From db89b6e600b2926c608844cb0d5c6e7e7388156c Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 8 May 2026 14:27:27 -0700 Subject: [PATCH 01/16] feat: add pet-hatch locator for Hatch environments (Fixes #450) Adds a new pet-hatch crate that detects Hatch-managed virtual environments so they are no longer misclassified as plain Venv by downstream consumers. Implementation matches Hatch's actual storage layout from src/hatch/env/virtual.py: - Default storage: /env/virtual/// (3 levels deep) - HATCH_DATA_DIR env var honoured; never silently falls back to platform default when set \ example from the issue) - Locator inserted before Venv so Hatch claims its envs first - 15 unit tests covering layout depth (rejects 2 / 4 levels), HATCH_DATA_DIR semantics, project-local config, and platform defaults --- Cargo.lock | 14 + crates/pet-core/src/lib.rs | 1 + crates/pet-core/src/python_environment.rs | 1 + crates/pet-hatch/Cargo.toml | 16 + crates/pet-hatch/src/lib.rs | 875 ++++++++++++++++++++++ crates/pet/Cargo.toml | 1 + crates/pet/src/jsonrpc.rs | 1 + crates/pet/src/locators.rs | 2 + 8 files changed, 911 insertions(+) create mode 100644 crates/pet-hatch/Cargo.toml create mode 100644 crates/pet-hatch/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index c17dcf32..c25cd0d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -448,6 +448,7 @@ dependencies = [ "pet-env-var-path", "pet-fs", "pet-global-virtualenvs", + "pet-hatch", "pet-homebrew", "pet-jsonrpc", "pet-linux-global-python", @@ -549,6 +550,19 @@ dependencies = [ "pet-virtualenv", ] +[[package]] +name = "pet-hatch" +version = "0.1.0" +dependencies = [ + "log", + "pet-core", + "pet-fs", + "pet-python-utils", + "serde", + "tempfile", + "toml 0.9.7", +] + [[package]] name = "pet-homebrew" version = "0.1.0" diff --git a/crates/pet-core/src/lib.rs b/crates/pet-core/src/lib.rs index fe8f4018..a4460405 100644 --- a/crates/pet-core/src/lib.rs +++ b/crates/pet-core/src/lib.rs @@ -43,6 +43,7 @@ pub struct Configuration { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum LocatorKind { Conda, + Hatch, Homebrew, LinuxGlobal, MacCommandLineTools, diff --git a/crates/pet-core/src/python_environment.rs b/crates/pet-core/src/python_environment.rs index 7bbfe5b8..c6d0c595 100644 --- a/crates/pet-core/src/python_environment.rs +++ b/crates/pet-core/src/python_environment.rs @@ -19,6 +19,7 @@ pub enum PythonEnvironmentKind { PyenvVirtualEnv, // Pyenv virtualenvs. Pipenv, Poetry, + Hatch, MacPythonOrg, MacCommandLineTools, LinuxGlobal, diff --git a/crates/pet-hatch/Cargo.toml b/crates/pet-hatch/Cargo.toml new file mode 100644 index 00000000..c49291c1 --- /dev/null +++ b/crates/pet-hatch/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "pet-hatch" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +pet-core = { path = "../pet-core" } +pet-fs = { path = "../pet-fs" } +pet-python-utils = { path = "../pet-python-utils" } +serde = { version = "1.0.226", features = ["derive"] } +toml = "0.9.7" +log = "0.4.21" + +[dev-dependencies] +tempfile = "3.13" diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs new file mode 100644 index 00000000..a9c9c0b9 --- /dev/null +++ b/crates/pet-hatch/src/lib.rs @@ -0,0 +1,875 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Hatch () environment locator. +//! +//! Hatch creates standard PEP 405 virtual environments (with a `pyvenv.cfg`), +//! but stores them in a known nested layout under its data directory. The +//! default layout is: +//! +//! ```text +//! /env/virtual//// +//! ``` +//! +//! where `` is the platform-specific Hatch data directory and +//! `` is a hash of the project root path. This is exactly three +//! components deep relative to `/env/virtual` (see Hatch's +//! `src/hatch/env/virtual.py` — `app_virtual_env_path`). +//! +//! In addition, projects can configure a custom storage location via +//! `[tool.hatch.dirs.env]` in `pyproject.toml` or `[dirs.env]` in +//! `hatch.toml`, e.g.: +//! +//! ```toml +//! [tool.hatch.dirs.env] +//! virtual = ".hatch" +//! ``` +//! +//! When the configured `virtual` path is relative or matches `~/.virtualenvs`, +//! Hatch uses a flat layout: `//`. + +use std::{ + fs, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use log::trace; +use pet_core::{ + env::PythonEnv, + os_environment::Environment, + python_environment::{PythonEnvironment, PythonEnvironmentBuilder, PythonEnvironmentKind}, + pyvenv_cfg::PyVenvCfg, + reporter::Reporter, + Configuration, Locator, LocatorKind, RefreshStatePersistence, +}; +use pet_fs::path::norm_case; +use pet_python_utils::executable::{find_executable, find_executables}; +use serde::Deserialize; + +/// Subdirectory under the Hatch data directory where the default +/// "virtual" environment storage lives. +/// +/// See `EnvironmentInterface.isolated_data_directory` and the `virtual` +/// plugin's `PLUGIN_NAME` in Hatch's source. +const VIRTUAL_ENV_SUBDIR: [&str; 2] = ["env", "virtual"]; + +pub struct Hatch { + /// Default storage directory for Hatch virtual environments — i.e. + /// `/env/virtual`. Resolved at construction. None if the + /// directory does not yet exist (it is created lazily by Hatch). + default_virtual_dir: Option, + /// Workspace directories supplied via configuration. Used to discover + /// project-local Hatch environments via parsed `dirs.env.virtual` config. + workspace_directories: Arc>>, +} + +impl Default for Hatch { + fn default() -> Self { + Self::from(&pet_core::os_environment::EnvironmentApi::new()) + } +} + +impl Hatch { + pub fn new() -> Self { + Self::default() + } + + pub fn from(environment: &dyn Environment) -> Self { + Self { + default_virtual_dir: get_default_virtual_dir(environment), + workspace_directories: Arc::new(Mutex::new(Vec::new())), + } + } +} + +impl Locator for Hatch { + fn get_kind(&self) -> LocatorKind { + LocatorKind::Hatch + } + + fn refresh_state(&self) -> RefreshStatePersistence { + RefreshStatePersistence::ConfiguredOnly + } + + fn supported_categories(&self) -> Vec { + vec![PythonEnvironmentKind::Hatch] + } + + fn configure(&self, config: &Configuration) { + let mut ws = self + .workspace_directories + .lock() + .expect("workspace_directories mutex poisoned"); + ws.clear(); + if let Some(dirs) = config.workspace_directories.as_ref() { + ws.extend(dirs.iter().cloned()); + } + } + + fn try_from(&self, env: &PythonEnv) -> Option { + // Determine the prefix (sysprefix) of this environment. + let prefix = env.prefix.clone().or_else(|| { + env.executable + .parent() + .and_then(Path::parent) + .map(Path::to_path_buf) + })?; + + // A pyvenv.cfg must be present — Hatch envs are always venvs. + let cfg = PyVenvCfg::find(&prefix)?; + + // Case 1: prefix lives in the default `/env/virtual` storage, + // exactly three components deep: + // /// + if let Some(storage) = self.default_virtual_dir.as_deref() { + if let Some(env_name) = match_default_storage_layout(&prefix, storage) { + trace!( + "Hatch env (default storage) {} found at {}", + env_name, + env.executable.display() + ); + return Some(build_env(&prefix, &cfg, env_name, None, &env.executable)); + } + } + + // Case 2: prefix lives one level under a workspace's configured + // `dirs.env.virtual` directory (flat layout). + let workspaces = self + .workspace_directories + .lock() + .expect("workspace_directories mutex poisoned") + .clone(); + for workspace in &workspaces { + for virtual_dir in resolve_project_virtual_dirs(workspace) { + if prefix_is_directly_under(&prefix, &virtual_dir) { + let env_name = prefix + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + trace!( + "Hatch env (project-local) {} found at {}", + env_name, + env.executable.display() + ); + return Some(build_env( + &prefix, + &cfg, + env_name, + Some(workspace.clone()), + &env.executable, + )); + } + } + } + + None + } + + fn find(&self, reporter: &dyn Reporter) { + // 1. Walk the default storage directory. + if let Some(storage) = self.default_virtual_dir.as_deref() { + for env in find_envs_in_default_storage(storage) { + reporter.report_environment(&env); + } + } + + // 2. Walk project-local virtual directories for each configured workspace. + let workspaces = self + .workspace_directories + .lock() + .expect("workspace_directories mutex poisoned") + .clone(); + for workspace in &workspaces { + for virtual_dir in resolve_project_virtual_dirs(workspace) { + for env in find_envs_in_flat_dir(&virtual_dir, Some(workspace.clone())) { + reporter.report_environment(&env); + } + } + } + } +} + +// --------------------------------------------------------------------------- +// Hatch data directory resolution +// --------------------------------------------------------------------------- + +/// Resolves `/env/virtual`, the directory Hatch uses for its +/// `virtual` environment plugin by default. +/// +/// Resolution order matches Hatch itself: +/// 1. `HATCH_DATA_DIR` env var (then append `env/virtual`). +/// 2. Platform default for `platformdirs.user_data_dir("hatch", appauthor=False)` +/// (then append `env/virtual`). +/// +/// Returns `None` if the resulting directory does not exist on disk. +fn get_default_virtual_dir(environment: &dyn Environment) -> Option { + // If HATCH_DATA_DIR is set and non-empty, Hatch *only* uses that location — + // it never falls back to the platform default. Mirror that behaviour: return + // the env/virtual subdir when it exists on disk, otherwise None. Do not + // fall through to platform defaults, or we'd risk attributing platform- + // default envs to Hatch when the user has redirected Hatch elsewhere. + if let Some(custom) = environment.get_env_var("HATCH_DATA_DIR".to_string()) { + if !custom.is_empty() { + let path = append_virtual_subdir(PathBuf::from(custom)); + return if path.is_dir() { + Some(norm_case(path)) + } else { + None + }; + } + } + let path = append_virtual_subdir(platform_default_data_dir(environment)?); + if path.is_dir() { + Some(norm_case(path)) + } else { + None + } +} + +fn append_virtual_subdir(data_dir: PathBuf) -> PathBuf { + let mut path = data_dir; + for segment in VIRTUAL_ENV_SUBDIR { + path.push(segment); + } + path +} + +/// Platform default for Hatch's data directory. +/// +/// Mirrors `platformdirs.user_data_dir("hatch", appauthor=False)`. +#[cfg(target_os = "linux")] +fn platform_default_data_dir(environment: &dyn Environment) -> Option { + if let Some(xdg) = environment.get_env_var("XDG_DATA_HOME".to_string()) { + if !xdg.is_empty() { + return Some(PathBuf::from(xdg).join("hatch")); + } + } + Some( + environment + .get_user_home()? + .join(".local") + .join("share") + .join("hatch"), + ) +} + +#[cfg(target_os = "macos")] +fn platform_default_data_dir(environment: &dyn Environment) -> Option { + Some( + environment + .get_user_home()? + .join("Library") + .join("Application Support") + .join("hatch"), + ) +} + +#[cfg(target_os = "windows")] +fn platform_default_data_dir(environment: &dyn Environment) -> Option { + // Windows: %USERPROFILE%\AppData\Local\hatch (matches platformdirs with + // appauthor=False). Equivalent to %LOCALAPPDATA%\hatch when LOCALAPPDATA + // is set, which is the common case. + if let Some(local) = environment.get_env_var("LOCALAPPDATA".to_string()) { + if !local.is_empty() { + return Some(PathBuf::from(local).join("hatch")); + } + } + Some( + environment + .get_user_home()? + .join("AppData") + .join("Local") + .join("hatch"), + ) +} + +#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] +fn platform_default_data_dir(environment: &dyn Environment) -> Option { + Some( + environment + .get_user_home()? + .join(".local") + .join("share") + .join("hatch"), + ) +} + +// --------------------------------------------------------------------------- +// Layout matching +// --------------------------------------------------------------------------- + +/// If `prefix` lives exactly three components deep under `storage` +/// (i.e. `///`), return the +/// final component (``). +fn match_default_storage_layout(prefix: &Path, storage: &Path) -> Option { + let normalized = norm_case(prefix); + let rel = normalized.strip_prefix(storage).ok()?; + let parts: Vec<_> = rel.iter().collect(); + if parts.len() == 3 { + Some(parts[2].to_string_lossy().to_string()) + } else { + None + } +} + +/// True iff `prefix`'s parent equals `dir` (case-insensitive on Windows). +fn prefix_is_directly_under(prefix: &Path, dir: &Path) -> bool { + match prefix.parent() { + Some(parent) => norm_case(parent) == norm_case(dir), + None => false, + } +} + +// --------------------------------------------------------------------------- +// Project config (pyproject.toml / hatch.toml) parsing +// --------------------------------------------------------------------------- + +#[derive(Deserialize, Default)] +struct PyProject { + tool: Option, +} + +#[derive(Deserialize, Default)] +struct PyProjectTool { + hatch: Option, +} + +#[derive(Deserialize, Default)] +struct HatchConfig { + dirs: Option, +} + +#[derive(Deserialize, Default)] +struct HatchDirs { + env: Option, +} + +/// Read the configured `dirs.env.virtual` paths for a workspace and resolve +/// each to an absolute directory. Both `pyproject.toml` (`[tool.hatch.dirs.env]`) +/// and a top-level `hatch.toml` (`[dirs.env]`) are checked. +/// +/// Each value may be relative (resolved against the workspace root) or +/// absolute. Returns an empty Vec if the workspace is not a Hatch project, +/// or if no `virtual` value is configured. +fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { + let mut dirs = Vec::new(); + for raw in read_configured_virtual_paths(workspace) { + let resolved = if Path::new(&raw).is_absolute() { + PathBuf::from(&raw) + } else { + workspace.join(&raw) + }; + if resolved.is_dir() { + dirs.push(norm_case(resolved)); + } + } + dirs +} + +fn read_configured_virtual_paths(workspace: &Path) -> Vec { + let mut paths = Vec::new(); + // pyproject.toml: [tool.hatch.dirs.env] + if let Ok(contents) = fs::read_to_string(workspace.join("pyproject.toml")) { + if let Ok(pyproject) = toml::from_str::(&contents) { + if let Some(virtual_value) = pyproject + .tool + .and_then(|t| t.hatch) + .and_then(|h| h.dirs) + .and_then(|d| d.env) + .and_then(|env| env.get("virtual").cloned()) + .and_then(|v| v.as_str().map(str::to_string)) + { + paths.push(virtual_value); + } + } + } + // hatch.toml: [dirs.env] + if let Ok(contents) = fs::read_to_string(workspace.join("hatch.toml")) { + if let Ok(hatch) = toml::from_str::(&contents) { + if let Some(virtual_value) = hatch + .dirs + .and_then(|d| d.env) + .and_then(|env| env.get("virtual").cloned()) + .and_then(|v| v.as_str().map(str::to_string)) + { + paths.push(virtual_value); + } + } + } + paths +} + +// --------------------------------------------------------------------------- +// Discovery +// --------------------------------------------------------------------------- + +/// Walk `////` and report +/// each leaf venv discovered. +fn find_envs_in_default_storage(storage: &Path) -> Vec { + let mut envs = Vec::new(); + let project_dirs = match fs::read_dir(storage) { + Ok(d) => d, + Err(_) => return envs, + }; + for project_entry in project_dirs.filter_map(Result::ok) { + let project_dir = project_entry.path(); + if !project_dir.is_dir() { + continue; + } + let id_dirs = match fs::read_dir(&project_dir) { + Ok(d) => d, + Err(_) => continue, + }; + for id_entry in id_dirs.filter_map(Result::ok) { + let id_dir = id_entry.path(); + if !id_dir.is_dir() { + continue; + } + let env_dirs = match fs::read_dir(&id_dir) { + Ok(d) => d, + Err(_) => continue, + }; + for env_entry in env_dirs.filter_map(Result::ok) { + let env_dir = env_entry.path(); + if !env_dir.is_dir() { + continue; + } + if let Some(env) = build_env_from_prefix(&env_dir, None) { + envs.push(env); + } + } + } + } + envs +} + +/// Walk `//` and report each venv discovered. +fn find_envs_in_flat_dir(dir: &Path, project: Option) -> Vec { + let mut envs = Vec::new(); + let entries = match fs::read_dir(dir) { + Ok(d) => d, + Err(_) => return envs, + }; + for entry in entries.filter_map(Result::ok) { + let env_dir = entry.path(); + if !env_dir.is_dir() { + continue; + } + if let Some(env) = build_env_from_prefix(&env_dir, project.clone()) { + envs.push(env); + } + } + envs +} + +fn build_env_from_prefix( + prefix: &Path, + project_path: Option, +) -> Option { + let cfg = PyVenvCfg::find(prefix)?; + let executable = find_executable(prefix)?; + let env_name = cfg + .prompt + .clone() + .or_else(|| prefix.file_name().map(|n| n.to_string_lossy().to_string())); + Some( + PythonEnvironmentBuilder::new(Some(PythonEnvironmentKind::Hatch)) + .name(env_name) + .executable(Some(executable)) + .version(cfg.version) + .prefix(Some(prefix.to_path_buf())) + .symlinks(Some(find_executables(prefix))) + .project(project_path) + .build(), + ) +} + +fn build_env( + prefix: &Path, + cfg: &PyVenvCfg, + fallback_name: String, + project_path: Option, + executable: &Path, +) -> PythonEnvironment { + let env_name = cfg.prompt.clone().unwrap_or(fallback_name); + PythonEnvironmentBuilder::new(Some(PythonEnvironmentKind::Hatch)) + .name(Some(env_name)) + .executable(Some(executable.to_path_buf())) + .version(cfg.version.clone()) + .prefix(Some(prefix.to_path_buf())) + .symlinks(Some(find_executables(prefix))) + .project(project_path) + .build() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use tempfile::TempDir; + + struct TestEnv { + home: Option, + vars: HashMap, + } + + impl Environment for TestEnv { + fn get_user_home(&self) -> Option { + self.home.clone() + } + fn get_root(&self) -> Option { + None + } + fn get_env_var(&self, key: String) -> Option { + self.vars.get(&key).cloned() + } + fn get_know_global_search_locations(&self) -> Vec { + vec![] + } + } + + fn write_pyvenv_cfg(prefix: &Path, prompt: &str, version: &str) { + fs::create_dir_all(prefix).unwrap(); + fs::write( + prefix.join("pyvenv.cfg"), + format!("home = /usr/bin\nversion = {version}\nprompt = {prompt}\n"), + ) + .unwrap(); + } + + fn write_python_exe(prefix: &Path) -> PathBuf { + let bin = prefix.join(if cfg!(windows) { "Scripts" } else { "bin" }); + fs::create_dir_all(&bin).unwrap(); + let exe = bin.join(if cfg!(windows) { + "python.exe" + } else { + "python" + }); + fs::write(&exe, b"").unwrap(); + exe + } + + fn make_locator(default_virtual_dir: Option) -> Hatch { + Hatch { + default_virtual_dir, + workspace_directories: Arc::new(Mutex::new(vec![])), + } + } + + #[test] + fn kind_and_supported_categories() { + let locator = make_locator(None); + assert_eq!(locator.get_kind(), LocatorKind::Hatch); + assert_eq!( + locator.supported_categories(), + vec![PythonEnvironmentKind::Hatch] + ); + } + + #[test] + fn try_from_identifies_env_in_default_storage_three_levels_deep() { + // Layout: /// + let temp = TempDir::new().unwrap(); + let storage = temp.path().join("env").join("virtual"); + let prefix = storage.join("myproj").join("ABCDEF12").join("default"); + write_pyvenv_cfg(&prefix, "default", "3.12.1"); + let exe = write_python_exe(&prefix); + + let locator = make_locator(Some(norm_case(&storage))); + let env = PythonEnv::new(exe, Some(prefix.clone()), None); + let identified = locator.try_from(&env).expect("Hatch env should match"); + assert_eq!(identified.kind, Some(PythonEnvironmentKind::Hatch)); + assert_eq!(identified.name, Some("default".to_string())); + assert_eq!(identified.version, Some("3.12.1".to_string())); + assert_eq!(identified.prefix, Some(norm_case(&prefix))); + assert!(identified.project.is_none()); + } + + #[test] + fn try_from_rejects_two_levels_deep_under_storage() { + // PR #451's broken assumption: only 2 components deep. + let temp = TempDir::new().unwrap(); + let storage = temp.path().join("env").join("virtual"); + let prefix = storage.join("myproj-hash").join("default"); + write_pyvenv_cfg(&prefix, "default", "3.12.1"); + let exe = write_python_exe(&prefix); + + let locator = make_locator(Some(norm_case(&storage))); + let env = PythonEnv::new(exe, Some(prefix), None); + assert!(locator.try_from(&env).is_none()); + } + + #[test] + fn try_from_rejects_four_levels_deep_under_storage() { + let temp = TempDir::new().unwrap(); + let storage = temp.path().join("env").join("virtual"); + let prefix = storage.join("a").join("b").join("c").join("d"); + write_pyvenv_cfg(&prefix, "d", "3.12.1"); + let exe = write_python_exe(&prefix); + + let locator = make_locator(Some(norm_case(&storage))); + let env = PythonEnv::new(exe, Some(prefix), None); + assert!(locator.try_from(&env).is_none()); + } + + #[test] + fn try_from_returns_none_for_plain_venv() { + let temp = TempDir::new().unwrap(); + let prefix = temp.path().join(".venv"); + write_pyvenv_cfg(&prefix, "venv", "3.12.1"); + let exe = write_python_exe(&prefix); + + let locator = make_locator(Some(temp.path().join("nonexistent"))); + let env = PythonEnv::new(exe, Some(prefix), None); + assert!(locator.try_from(&env).is_none()); + } + + #[test] + fn try_from_identifies_project_local_env_via_pyproject() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("project"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[project]\nname = \"foo\"\n\n[tool.hatch.dirs.env]\nvirtual = \".hatch\"\n", + ) + .unwrap(); + let virtual_dir = project.join(".hatch"); + let prefix = virtual_dir.join("default"); + write_pyvenv_cfg(&prefix, "default", "3.11.0"); + let exe = write_python_exe(&prefix); + + let locator = Hatch { + default_virtual_dir: None, + workspace_directories: Arc::new(Mutex::new(vec![project.clone()])), + }; + let env = PythonEnv::new(exe, Some(prefix), None); + let identified = locator.try_from(&env).expect("project-local env match"); + assert_eq!(identified.kind, Some(PythonEnvironmentKind::Hatch)); + assert_eq!(identified.project, Some(norm_case(&project))); + assert_eq!(identified.name, Some("default".to_string())); + } + + #[test] + fn try_from_identifies_project_local_env_via_hatch_toml() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("project"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("hatch.toml"), + b"[dirs.env]\nvirtual = \".hatch\"\n", + ) + .unwrap(); + let prefix = project.join(".hatch").join("default"); + write_pyvenv_cfg(&prefix, "default", "3.11.0"); + let exe = write_python_exe(&prefix); + + let locator = Hatch { + default_virtual_dir: None, + workspace_directories: Arc::new(Mutex::new(vec![project.clone()])), + }; + let env = PythonEnv::new(exe, Some(prefix), None); + let identified = locator.try_from(&env).expect("project-local env match"); + assert_eq!(identified.project, Some(norm_case(&project))); + } + + #[test] + fn try_from_rejects_project_local_without_dirs_env_config() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("project"); + fs::create_dir_all(&project).unwrap(); + // pyproject.toml is present but does not configure dirs.env.virtual. + fs::write( + project.join("pyproject.toml"), + b"[project]\nname = \"foo\"\n[tool.hatch.envs.default]\n", + ) + .unwrap(); + let prefix = project.join(".hatch").join("default"); + write_pyvenv_cfg(&prefix, "default", "3.11.0"); + let exe = write_python_exe(&prefix); + + let locator = Hatch { + default_virtual_dir: None, + workspace_directories: Arc::new(Mutex::new(vec![project])), + }; + let env = PythonEnv::new(exe, Some(prefix), None); + assert!(locator.try_from(&env).is_none()); + } + + #[test] + fn find_reports_envs_in_default_storage() { + let temp = TempDir::new().unwrap(); + let storage = temp.path().join("env").join("virtual"); + for env_name in ["default", "test"] { + let prefix = storage.join("myproj").join("AbCdEf12").join(env_name); + write_pyvenv_cfg(&prefix, env_name, "3.12.1"); + write_python_exe(&prefix); + } + // A bogus shallower entry should be ignored (no pyvenv.cfg here). + fs::create_dir_all(storage.join("orphan")).unwrap(); + + let envs = find_envs_in_default_storage(&storage); + assert_eq!(envs.len(), 2); + for env in envs { + assert_eq!(env.kind, Some(PythonEnvironmentKind::Hatch)); + assert_eq!(env.version.as_deref(), Some("3.12.1")); + } + } + + #[test] + fn find_reports_project_local_envs() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \".hatch\"\n", + ) + .unwrap(); + let prefix = project.join(".hatch").join("default"); + write_pyvenv_cfg(&prefix, "default", "3.11.0"); + write_python_exe(&prefix); + + let virtual_dirs = resolve_project_virtual_dirs(&project); + assert_eq!(virtual_dirs.len(), 1); + let envs = find_envs_in_flat_dir(&virtual_dirs[0], Some(project.clone())); + assert_eq!(envs.len(), 1); + assert_eq!(envs[0].project, Some(norm_case(&project))); + } + + #[test] + fn resolve_project_virtual_dirs_skips_non_hatch_projects() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[project]\nname = \"foo\"\n", + ) + .unwrap(); + assert!(resolve_project_virtual_dirs(&project).is_empty()); + } + + #[test] + fn resolve_project_virtual_dirs_supports_absolute_path() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + let absolute = temp.path().join("custom-envs"); + fs::create_dir_all(&absolute).unwrap(); + fs::write( + project.join("pyproject.toml"), + format!( + "[tool.hatch.dirs.env]\nvirtual = \"{}\"\n", + absolute.display().to_string().replace('\\', "\\\\") + ), + ) + .unwrap(); + + let dirs = resolve_project_virtual_dirs(&project); + assert_eq!(dirs, vec![norm_case(&absolute)]); + } + + #[cfg(target_os = "linux")] + #[test] + fn data_dir_uses_xdg_data_home_when_set() { + let temp = TempDir::new().unwrap(); + let mut vars = HashMap::new(); + vars.insert( + "XDG_DATA_HOME".to_string(), + temp.path().to_string_lossy().to_string(), + ); + let env = TestEnv { + home: Some(PathBuf::from("/home/test")), + vars, + }; + assert_eq!( + platform_default_data_dir(&env), + Some(temp.path().join("hatch")) + ); + } + + #[cfg(target_os = "linux")] + #[test] + fn data_dir_falls_back_to_local_share_on_linux() { + let env = TestEnv { + home: Some(PathBuf::from("/home/test")), + vars: HashMap::new(), + }; + assert_eq!( + platform_default_data_dir(&env), + Some(PathBuf::from("/home/test/.local/share/hatch")) + ); + } + + #[cfg(target_os = "macos")] + #[test] + fn data_dir_uses_application_support_on_macos() { + let env = TestEnv { + home: Some(PathBuf::from("/Users/test")), + vars: HashMap::new(), + }; + assert_eq!( + platform_default_data_dir(&env), + Some(PathBuf::from( + "/Users/test/Library/Application Support/hatch" + )) + ); + } + + #[cfg(target_os = "windows")] + #[test] + fn data_dir_uses_localappdata_on_windows() { + let mut vars = HashMap::new(); + vars.insert( + "LOCALAPPDATA".to_string(), + "C:\\Users\\test\\AppData\\Local".to_string(), + ); + let env = TestEnv { + home: Some(PathBuf::from("C:\\Users\\test")), + vars, + }; + assert_eq!( + platform_default_data_dir(&env), + Some(PathBuf::from("C:\\Users\\test\\AppData\\Local\\hatch")) + ); + } + + #[test] + fn default_virtual_dir_honours_hatch_data_dir_env_var() { + let temp = TempDir::new().unwrap(); + let virt = temp.path().join("env").join("virtual"); + fs::create_dir_all(&virt).unwrap(); + let mut vars = HashMap::new(); + vars.insert( + "HATCH_DATA_DIR".to_string(), + temp.path().to_string_lossy().to_string(), + ); + let env = TestEnv { + home: Some(temp.path().to_path_buf()), + vars, + }; + assert_eq!(get_default_virtual_dir(&env), Some(norm_case(virt))); + } + + #[test] + fn default_virtual_dir_does_not_fall_back_when_hatch_data_dir_is_set() { + // If HATCH_DATA_DIR is set, Hatch only uses that location. We must + // never silently fall through to the platform default — that could + // misattribute platform-default envs to Hatch when the user has + // redirected Hatch elsewhere. + let temp = TempDir::new().unwrap(); + // Set HATCH_DATA_DIR to a directory whose env/virtual subdir does not exist. + let mut vars = HashMap::new(); + vars.insert( + "HATCH_DATA_DIR".to_string(), + temp.path().to_string_lossy().to_string(), + ); + let env = TestEnv { + home: Some(temp.path().to_path_buf()), + vars, + }; + assert_eq!(get_default_virtual_dir(&env), None); + } +} diff --git a/crates/pet/Cargo.toml b/crates/pet/Cargo.toml index 36515074..ee5ac5be 100644 --- a/crates/pet/Cargo.toml +++ b/crates/pet/Cargo.toml @@ -26,6 +26,7 @@ pet-jsonrpc = { path = "../pet-jsonrpc" } pet-fs = { path = "../pet-fs" } pet-pyenv = { path = "../pet-pyenv" } pet-poetry = { path = "../pet-poetry" } +pet-hatch = { path = "../pet-hatch" } pet-reporter = { path = "../pet-reporter" } pet-virtualenvwrapper = { path = "../pet-virtualenvwrapper" } pet-python-utils = { path = "../pet-python-utils" } diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index c4baabd2..85e7a543 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -2070,6 +2070,7 @@ mod tests { LocatorKind::VirtualEnvWrapper, RefreshStatePersistence::Stateless, ), + (LocatorKind::Hatch, RefreshStatePersistence::ConfiguredOnly), (LocatorKind::Venv, RefreshStatePersistence::Stateless), (LocatorKind::VirtualEnv, RefreshStatePersistence::Stateless), #[cfg(unix)] diff --git a/crates/pet/src/locators.rs b/crates/pet/src/locators.rs index 6a78e7e5..c25473bf 100644 --- a/crates/pet/src/locators.rs +++ b/crates/pet/src/locators.rs @@ -10,6 +10,7 @@ use pet_core::python_environment::{ PythonEnvironment, PythonEnvironmentBuilder, PythonEnvironmentKind, }; use pet_core::Locator; +use pet_hatch::Hatch; use pet_linux_global_python::LinuxGlobalPython; use pet_mac_commandlinetools::MacCmdLineTools; use pet_mac_python_org::MacPythonOrg; @@ -69,6 +70,7 @@ pub fn create_locators( locators.push(poetry_locator); locators.push(Arc::new(PipEnv::from(environment))); locators.push(Arc::new(VirtualEnvWrapper::from(environment))); + locators.push(Arc::new(Hatch::from(environment))); locators.push(Arc::new(Venv::new())); // VirtualEnv is the most generic, hence should be the last. locators.push(Arc::new(VirtualEnv::new())); From b25b3a5bc89a799727c06d9290d151c8f18ad803 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 8 May 2026 14:39:57 -0700 Subject: [PATCH 02/16] fix: address PR #460 review feedback - Cache resolved virtual dirs in configure() so try_from() does not re-parse pyproject.toml / hatch.toml on every executable identification attempt. - Expand ~ (and \C:\Users\kanadig/\) in configured dirs.env.virtual values via pet_fs::path::expand_path so values like '~/.virtualenvs' resolve against the user home. - Build the new cache outside the workspace_virtual_dirs lock to keep disk I/O out of the critical section. - Serialize env-var-mutating tests via a per-binary mutex so cargo's default multi-threaded harness cannot race. --- crates/pet-hatch/src/lib.rs | 178 ++++++++++++++++++++++++++++-------- 1 file changed, 141 insertions(+), 37 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index a9c9c0b9..7f5c2e23 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -43,7 +43,7 @@ use pet_core::{ reporter::Reporter, Configuration, Locator, LocatorKind, RefreshStatePersistence, }; -use pet_fs::path::norm_case; +use pet_fs::path::{expand_path, norm_case}; use pet_python_utils::executable::{find_executable, find_executables}; use serde::Deserialize; @@ -54,14 +54,19 @@ use serde::Deserialize; /// plugin's `PLUGIN_NAME` in Hatch's source. const VIRTUAL_ENV_SUBDIR: [&str; 2] = ["env", "virtual"]; +/// Per-workspace cache of resolved Hatch virtual directories. Each entry is +/// `(workspace_root, resolved_virtual_dirs)` and is populated by `configure()`. +type WorkspaceVirtualDirs = Vec<(PathBuf, Vec)>; + pub struct Hatch { /// Default storage directory for Hatch virtual environments — i.e. /// `/env/virtual`. Resolved at construction. None if the /// directory does not yet exist (it is created lazily by Hatch). default_virtual_dir: Option, - /// Workspace directories supplied via configuration. Used to discover - /// project-local Hatch environments via parsed `dirs.env.virtual` config. - workspace_directories: Arc>>, + /// Per-workspace resolved virtual directories, computed during + /// `configure()` so that hot-path identification (`try_from`) does no + /// disk I/O or TOML parsing. + workspace_virtual_dirs: Arc>, } impl Default for Hatch { @@ -78,7 +83,7 @@ impl Hatch { pub fn from(environment: &dyn Environment) -> Self { Self { default_virtual_dir: get_default_virtual_dir(environment), - workspace_directories: Arc::new(Mutex::new(Vec::new())), + workspace_virtual_dirs: Arc::new(Mutex::new(Vec::new())), } } } @@ -97,14 +102,21 @@ impl Locator for Hatch { } fn configure(&self, config: &Configuration) { - let mut ws = self - .workspace_directories - .lock() - .expect("workspace_directories mutex poisoned"); - ws.clear(); + // Precompute and cache each workspace's resolved Hatch virtual dirs so + // `try_from()` does not have to re-read/parse pyproject.toml/hatch.toml + // on every executable identification attempt. We build the new cache + // *outside* the lock to keep disk I/O out of the critical section. + let mut new_cache: WorkspaceVirtualDirs = Vec::new(); if let Some(dirs) = config.workspace_directories.as_ref() { - ws.extend(dirs.iter().cloned()); + for workspace in dirs { + let virtual_dirs = resolve_project_virtual_dirs(workspace); + new_cache.push((workspace.clone(), virtual_dirs)); + } } + *self + .workspace_virtual_dirs + .lock() + .expect("workspace_virtual_dirs mutex poisoned") = new_cache; } fn try_from(&self, env: &PythonEnv) -> Option { @@ -136,13 +148,13 @@ impl Locator for Hatch { // Case 2: prefix lives one level under a workspace's configured // `dirs.env.virtual` directory (flat layout). let workspaces = self - .workspace_directories + .workspace_virtual_dirs .lock() - .expect("workspace_directories mutex poisoned") + .expect("workspace_virtual_dirs mutex poisoned") .clone(); - for workspace in &workspaces { - for virtual_dir in resolve_project_virtual_dirs(workspace) { - if prefix_is_directly_under(&prefix, &virtual_dir) { + for (workspace, virtual_dirs) in &workspaces { + for virtual_dir in virtual_dirs { + if prefix_is_directly_under(&prefix, virtual_dir) { let env_name = prefix .file_name() .map(|n| n.to_string_lossy().to_string()) @@ -176,13 +188,13 @@ impl Locator for Hatch { // 2. Walk project-local virtual directories for each configured workspace. let workspaces = self - .workspace_directories + .workspace_virtual_dirs .lock() - .expect("workspace_directories mutex poisoned") + .expect("workspace_virtual_dirs mutex poisoned") .clone(); - for workspace in &workspaces { - for virtual_dir in resolve_project_virtual_dirs(workspace) { - for env in find_envs_in_flat_dir(&virtual_dir, Some(workspace.clone())) { + for (workspace, virtual_dirs) in &workspaces { + for virtual_dir in virtual_dirs { + for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone())) { reporter.report_environment(&env); } } @@ -355,10 +367,14 @@ struct HatchDirs { fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { let mut dirs = Vec::new(); for raw in read_configured_virtual_paths(workspace) { - let resolved = if Path::new(&raw).is_absolute() { - PathBuf::from(&raw) + // Expand ~ and ${HOME}/${USERNAME} so configured values like + // "~/.virtualenvs" resolve to the user home rather than being + // joined onto the workspace as a relative path. + let expanded = expand_path(PathBuf::from(&raw)); + let resolved = if expanded.is_absolute() { + expanded } else { - workspace.join(&raw) + workspace.join(expanded) }; if resolved.is_dir() { dirs.push(norm_case(resolved)); @@ -507,8 +523,15 @@ fn build_env( mod tests { use super::*; use std::collections::HashMap; + use std::sync::Mutex as StdMutex; use tempfile::TempDir; + /// Serializes any test that mutates process-global environment variables + /// (HOME / USERPROFILE / etc.) so cargo's default multi-threaded harness + /// cannot race. Use `let _guard = ENV_LOCK.lock()...;` at the top of any + /// test that reads or writes those variables. + static ENV_LOCK: StdMutex<()> = StdMutex::new(()); + struct TestEnv { home: Option, vars: HashMap, @@ -553,7 +576,23 @@ mod tests { fn make_locator(default_virtual_dir: Option) -> Hatch { Hatch { default_virtual_dir, - workspace_directories: Arc::new(Mutex::new(vec![])), + workspace_virtual_dirs: Arc::new(Mutex::new(vec![])), + } + } + + /// Build a locator with a single configured workspace whose virtual dirs + /// have been resolved up-front (mirrors what `configure()` does). + fn make_locator_with_workspace( + default_virtual_dir: Option, + workspace: &Path, + ) -> Hatch { + let virtual_dirs = resolve_project_virtual_dirs(workspace); + Hatch { + default_virtual_dir, + workspace_virtual_dirs: Arc::new(Mutex::new(vec![( + workspace.to_path_buf(), + virtual_dirs, + )])), } } @@ -640,10 +679,7 @@ mod tests { write_pyvenv_cfg(&prefix, "default", "3.11.0"); let exe = write_python_exe(&prefix); - let locator = Hatch { - default_virtual_dir: None, - workspace_directories: Arc::new(Mutex::new(vec![project.clone()])), - }; + let locator = make_locator_with_workspace(None, &project); let env = PythonEnv::new(exe, Some(prefix), None); let identified = locator.try_from(&env).expect("project-local env match"); assert_eq!(identified.kind, Some(PythonEnvironmentKind::Hatch)); @@ -665,10 +701,7 @@ mod tests { write_pyvenv_cfg(&prefix, "default", "3.11.0"); let exe = write_python_exe(&prefix); - let locator = Hatch { - default_virtual_dir: None, - workspace_directories: Arc::new(Mutex::new(vec![project.clone()])), - }; + let locator = make_locator_with_workspace(None, &project); let env = PythonEnv::new(exe, Some(prefix), None); let identified = locator.try_from(&env).expect("project-local env match"); assert_eq!(identified.project, Some(norm_case(&project))); @@ -689,10 +722,7 @@ mod tests { write_pyvenv_cfg(&prefix, "default", "3.11.0"); let exe = write_python_exe(&prefix); - let locator = Hatch { - default_virtual_dir: None, - workspace_directories: Arc::new(Mutex::new(vec![project])), - }; + let locator = make_locator_with_workspace(None, &project); let env = PythonEnv::new(exe, Some(prefix), None); assert!(locator.try_from(&env).is_none()); } @@ -771,6 +801,80 @@ mod tests { assert_eq!(dirs, vec![norm_case(&absolute)]); } + #[test] + fn resolve_project_virtual_dirs_expands_tilde() { + // A configured value of "~/.virtualenvs" must resolve against the + // user's home directory, not be joined onto the workspace as a + // relative path. We fake $HOME / %USERPROFILE% to point at a + // tempdir we control, then make sure the expanded path is what we + // get back. + // + // `expand_path()` reads HOME / USERPROFILE from the *process* env, so + // this test mutates global state. We serialize against any other + // env-mutating test in this crate via `ENV_LOCK` so cargo's default + // multi-threaded harness cannot race. + let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner()); + + let temp = TempDir::new().unwrap(); + let fake_home = temp.path().join("home"); + let virtualenvs = fake_home.join(".virtualenvs"); + fs::create_dir_all(&virtualenvs).unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \"~/.virtualenvs\"\n", + ) + .unwrap(); + + let prev_home = std::env::var_os("HOME"); + let prev_user_profile = std::env::var_os("USERPROFILE"); + std::env::set_var("HOME", &fake_home); + std::env::set_var("USERPROFILE", &fake_home); + + let dirs = resolve_project_virtual_dirs(&project); + + // Restore env regardless of assertion outcome. + match prev_home { + Some(v) => std::env::set_var("HOME", v), + None => std::env::remove_var("HOME"), + } + match prev_user_profile { + Some(v) => std::env::set_var("USERPROFILE", v), + None => std::env::remove_var("USERPROFILE"), + } + + assert_eq!(dirs, vec![norm_case(&virtualenvs)]); + } + + #[test] + fn configure_caches_workspace_virtual_dirs() { + // try_from() must not re-read pyproject.toml on every call; configure() + // is responsible for resolving and caching the virtual dirs once. + let temp = TempDir::new().unwrap(); + let project = temp.path().join("project"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \".hatch\"\n", + ) + .unwrap(); + let virtual_dir = project.join(".hatch"); + fs::create_dir_all(&virtual_dir).unwrap(); + + let locator = make_locator(None); + let config = Configuration { + workspace_directories: Some(vec![project.clone()]), + ..Configuration::default() + }; + locator.configure(&config); + + let cached = locator.workspace_virtual_dirs.lock().unwrap().clone(); + assert_eq!(cached.len(), 1); + assert_eq!(cached[0].0, project); + assert_eq!(cached[0].1, vec![norm_case(&virtual_dir)]); + } + #[cfg(target_os = "linux")] #[test] fn data_dir_uses_xdg_data_home_when_set() { From ec40512da6d156c30a0808f29177a4f96ac8a866 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 8 May 2026 15:07:32 -0700 Subject: [PATCH 03/16] fix: order Hatch before VirtualEnvWrapper in locator chain (PR #460) A Hatch project can configure dirs.env.virtual = '~/.virtualenvs' (or any path that overlaps with WORKON_HOME). With Hatch placed after VirtualEnvWrapper, those envs would be claimed as VirtualEnvWrapper before Hatch ever saw them. Move Hatch ahead so it gets first claim on workspace-configured envs. --- crates/pet/src/jsonrpc.rs | 2 +- crates/pet/src/locators.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index 85e7a543..a13578ef 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -2066,11 +2066,11 @@ mod tests { RefreshStatePersistence::SyncedDiscoveryState, ), (LocatorKind::PipEnv, RefreshStatePersistence::ConfiguredOnly), + (LocatorKind::Hatch, RefreshStatePersistence::ConfiguredOnly), ( LocatorKind::VirtualEnvWrapper, RefreshStatePersistence::Stateless, ), - (LocatorKind::Hatch, RefreshStatePersistence::ConfiguredOnly), (LocatorKind::Venv, RefreshStatePersistence::Stateless), (LocatorKind::VirtualEnv, RefreshStatePersistence::Stateless), #[cfg(unix)] diff --git a/crates/pet/src/locators.rs b/crates/pet/src/locators.rs index c25473bf..a0b84205 100644 --- a/crates/pet/src/locators.rs +++ b/crates/pet/src/locators.rs @@ -69,8 +69,12 @@ pub fn create_locators( locators.push(Arc::new(Uv::from(environment))); locators.push(poetry_locator); locators.push(Arc::new(PipEnv::from(environment))); - locators.push(Arc::new(VirtualEnvWrapper::from(environment))); + // Hatch must run before VirtualEnvWrapper: a Hatch project can configure + // `dirs.env.virtual = "~/.virtualenvs"` (or any other directory that + // overlaps with `WORKON_HOME`), and we want Hatch to claim its envs + // first when the workspace marks them as Hatch-managed. locators.push(Arc::new(Hatch::from(environment))); + locators.push(Arc::new(VirtualEnvWrapper::from(environment))); locators.push(Arc::new(Venv::new())); // VirtualEnv is the most generic, hence should be the last. locators.push(Arc::new(VirtualEnv::new())); From b2a819bca276382b60e2951184cd46b00793701d Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Sat, 9 May 2026 19:00:38 -0700 Subject: [PATCH 04/16] fix: address PR #460 round-2 review feedback - get_default_virtual_dir(): no longer requires the path to exist on disk at construction time. The long-lived locator graph is built once at server startup; users may install Hatch (or create the first env) later in the same process. find() now re-checks existence at call time so newly-created envs are discovered without a restart. - try_from(): do the cheap path-shape classification (default storage / configured workspace dirs) BEFORE reading pyvenv.cfg, so non-Hatch venvs flowing through the locator chain do not pay an extra filesystem read. - try_from(): inspect the workspace cache under the lock and capture the match instead of cloning the entire cache on every identification attempt. --- crates/pet-hatch/src/lib.rs | 133 ++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 60 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 7f5c2e23..c6464d16 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -128,61 +128,76 @@ impl Locator for Hatch { .map(Path::to_path_buf) })?; - // A pyvenv.cfg must be present — Hatch envs are always venvs. - let cfg = PyVenvCfg::find(&prefix)?; - + // Do the cheap path-shape classification *first* so we don't pay for + // a `pyvenv.cfg` filesystem read on every non-Hatch venv that flows + // through the locator chain. + // // Case 1: prefix lives in the default `/env/virtual` storage, // exactly three components deep: // /// + let mut classification: Option<(String, Option)> = None; if let Some(storage) = self.default_virtual_dir.as_deref() { if let Some(env_name) = match_default_storage_layout(&prefix, storage) { - trace!( - "Hatch env (default storage) {} found at {}", - env_name, - env.executable.display() - ); - return Some(build_env(&prefix, &cfg, env_name, None, &env.executable)); + classification = Some((env_name, None)); } } // Case 2: prefix lives one level under a workspace's configured - // `dirs.env.virtual` directory (flat layout). - let workspaces = self - .workspace_virtual_dirs - .lock() - .expect("workspace_virtual_dirs mutex poisoned") - .clone(); - for (workspace, virtual_dirs) in &workspaces { - for virtual_dir in virtual_dirs { - if prefix_is_directly_under(&prefix, virtual_dir) { - let env_name = prefix - .file_name() - .map(|n| n.to_string_lossy().to_string()) - .unwrap_or_default(); - trace!( - "Hatch env (project-local) {} found at {}", - env_name, - env.executable.display() - ); - return Some(build_env( - &prefix, - &cfg, - env_name, - Some(workspace.clone()), - &env.executable, - )); + // `dirs.env.virtual` directory (flat layout). Inspect the cached + // workspaces under the lock and capture the match instead of cloning + // the entire cache. + if classification.is_none() { + let cache = self + .workspace_virtual_dirs + .lock() + .expect("workspace_virtual_dirs mutex poisoned"); + 'workspaces: for (workspace, virtual_dirs) in cache.iter() { + for virtual_dir in virtual_dirs { + if prefix_is_directly_under(&prefix, virtual_dir) { + let env_name = prefix + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + classification = Some((env_name, Some(workspace.clone()))); + break 'workspaces; + } } } } - None + let (env_name, project_path) = classification?; + + // Now that we know this is (likely) a Hatch env, read pyvenv.cfg. + // Hatch always writes one; if it's missing this isn't actually a + // Hatch-managed env. + let cfg = PyVenvCfg::find(&prefix)?; + + trace!( + "Hatch env {} found at {}", + env_name, + env.executable.display() + ); + Some(build_env( + &prefix, + &cfg, + env_name, + project_path, + &env.executable, + )) } fn find(&self, reporter: &dyn Reporter) { - // 1. Walk the default storage directory. + // 1. Walk the default storage directory if it currently exists. We + // re-check existence here (rather than caching the result of the + // check at construction) because the long-lived locator graph is + // built once at server startup; the user may install Hatch or + // create their first env after that point and we still want to + // discover it without a restart. if let Some(storage) = self.default_virtual_dir.as_deref() { - for env in find_envs_in_default_storage(storage) { - reporter.report_environment(&env); + if storage.is_dir() { + for env in find_envs_in_default_storage(storage) { + reporter.report_environment(&env); + } } } @@ -214,29 +229,23 @@ impl Locator for Hatch { /// 2. Platform default for `platformdirs.user_data_dir("hatch", appauthor=False)` /// (then append `env/virtual`). /// -/// Returns `None` if the resulting directory does not exist on disk. +/// The returned path may not exist on disk yet; callers must check existence +/// at use time. This lets us correctly identify Hatch envs created later in +/// the same long-lived PET process without a restart. fn get_default_virtual_dir(environment: &dyn Environment) -> Option { - // If HATCH_DATA_DIR is set and non-empty, Hatch *only* uses that location — - // it never falls back to the platform default. Mirror that behaviour: return - // the env/virtual subdir when it exists on disk, otherwise None. Do not - // fall through to platform defaults, or we'd risk attributing platform- - // default envs to Hatch when the user has redirected Hatch elsewhere. + // If HATCH_DATA_DIR is set and non-empty, Hatch *only* uses that location + // — it never falls back to the platform default. Mirror that behaviour. + // Do not fall through to platform defaults, or we'd risk attributing + // platform-default envs to Hatch when the user has redirected Hatch + // elsewhere. if let Some(custom) = environment.get_env_var("HATCH_DATA_DIR".to_string()) { if !custom.is_empty() { - let path = append_virtual_subdir(PathBuf::from(custom)); - return if path.is_dir() { - Some(norm_case(path)) - } else { - None - }; + return Some(norm_case(append_virtual_subdir(PathBuf::from(custom)))); } } - let path = append_virtual_subdir(platform_default_data_dir(environment)?); - if path.is_dir() { - Some(norm_case(path)) - } else { - None - } + Some(norm_case(append_virtual_subdir(platform_default_data_dir( + environment, + )?))) } fn append_virtual_subdir(data_dir: PathBuf) -> PathBuf { @@ -962,18 +971,22 @@ mod tests { // If HATCH_DATA_DIR is set, Hatch only uses that location. We must // never silently fall through to the platform default — that could // misattribute platform-default envs to Hatch when the user has - // redirected Hatch elsewhere. + // redirected Hatch elsewhere. The path itself does not need to + // exist at construction time (it may be created later in the + // process lifetime); we only require that the returned value + // points at HATCH_DATA_DIR/env/virtual, not the platform default. let temp = TempDir::new().unwrap(); - // Set HATCH_DATA_DIR to a directory whose env/virtual subdir does not exist. + let custom = temp.path().join("does-not-exist-yet"); let mut vars = HashMap::new(); vars.insert( "HATCH_DATA_DIR".to_string(), - temp.path().to_string_lossy().to_string(), + custom.to_string_lossy().to_string(), ); let env = TestEnv { home: Some(temp.path().to_path_buf()), vars, }; - assert_eq!(get_default_virtual_dir(&env), None); + let expected = norm_case(custom.join("env").join("virtual")); + assert_eq!(get_default_virtual_dir(&env), Some(expected)); } } From 29ad0dcd908cebda5c14ea374d031b0d1b937e55 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Sat, 9 May 2026 19:16:56 -0700 Subject: [PATCH 05/16] fix: address PR #460 round-3 review feedback - resolve_project_virtual_dirs(): cache configured paths regardless of whether the directory exists on disk yet. A user may configure 'virtual = .hatch' and create the env later in this process lifetime; we now recognise it without requiring the client to re-send 'configure'. find_envs_in_flat_dir() already handles missing dirs by returning empty. - default_virtual_dir field doc: corrected to reflect that the path may not exist on disk; existence is rechecked at find() time. - Module docs: aligned with implementation. All workspace-configured 'dirs.env.virtual' paths use the flat layout, regardless of whether they are relative, absolute, or use ~ expansion. --- crates/pet-hatch/src/lib.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index c6464d16..cd825087 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -25,8 +25,10 @@ //! virtual = ".hatch" //! ``` //! -//! When the configured `virtual` path is relative or matches `~/.virtualenvs`, -//! Hatch uses a flat layout: `//`. +//! For these workspace-configured locations Hatch uses a flat layout: +//! `//`. Configured paths may be relative +//! (resolved against the workspace root), absolute, or use `~` / +//! `${HOME}` style expansion (e.g. `~/.virtualenvs`). use std::{ fs, @@ -60,8 +62,12 @@ type WorkspaceVirtualDirs = Vec<(PathBuf, Vec)>; pub struct Hatch { /// Default storage directory for Hatch virtual environments — i.e. - /// `/env/virtual`. Resolved at construction. None if the - /// directory does not yet exist (it is created lazily by Hatch). + /// `/env/virtual`. Resolved at construction. The path may not + /// exist on disk yet (Hatch creates it lazily on first use); existence + /// is re-checked by `find()` at discovery time so envs created later in + /// this process lifetime are still discoverable without a restart. + /// `None` only when the platform data directory itself cannot be + /// resolved (e.g. no home directory). default_virtual_dir: Option, /// Per-workspace resolved virtual directories, computed during /// `configure()` so that hot-path identification (`try_from`) does no @@ -370,9 +376,15 @@ struct HatchDirs { /// each to an absolute directory. Both `pyproject.toml` (`[tool.hatch.dirs.env]`) /// and a top-level `hatch.toml` (`[dirs.env]`) are checked. /// -/// Each value may be relative (resolved against the workspace root) or -/// absolute. Returns an empty Vec if the workspace is not a Hatch project, -/// or if no `virtual` value is configured. +/// Each value may be relative (resolved against the workspace root), +/// absolute, or use `~` / `${HOME}` expansion. Returns an empty Vec if the +/// workspace is not a Hatch project, or if no `virtual` value is configured. +/// +/// The returned paths are cached regardless of whether they currently exist +/// on disk — a user may configure `virtual = ".hatch"` and create the env +/// later in this process lifetime, and we want subsequent `try_from()` +/// calls to recognise it without requiring the client to re-send `configure`. +/// `find_envs_in_flat_dir()` handles missing directories at discovery time. fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { let mut dirs = Vec::new(); for raw in read_configured_virtual_paths(workspace) { @@ -385,9 +397,7 @@ fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { } else { workspace.join(expanded) }; - if resolved.is_dir() { - dirs.push(norm_case(resolved)); - } + dirs.push(norm_case(resolved)); } dirs } From 4da16a53d021fba75dc0537323918a41b09720db Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Sun, 10 May 2026 12:17:06 -0700 Subject: [PATCH 06/16] chore: align pet-hatch serde version with workspace style (PR #460) Use serde 1.0.152 to match the dominant version string used across the workspace. Both 1.0.152 and 1.0.226 are semver-compatible and resolve to the same compiled crate, but matching the workspace style avoids inconsistency. Keep toml at 0.9.7 (already used by pet-uv). --- crates/pet-hatch/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pet-hatch/Cargo.toml b/crates/pet-hatch/Cargo.toml index c49291c1..dc3f6340 100644 --- a/crates/pet-hatch/Cargo.toml +++ b/crates/pet-hatch/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true pet-core = { path = "../pet-core" } pet-fs = { path = "../pet-fs" } pet-python-utils = { path = "../pet-python-utils" } -serde = { version = "1.0.226", features = ["derive"] } +serde = { version = "1.0.152", features = ["derive"] } toml = "0.9.7" log = "0.4.21" From beef4e54c92a9e18ffd6bf56ab20c72eb8ba353f Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 08:25:16 -0700 Subject: [PATCH 07/16] fix: expand and trim HATCH_DATA_DIR; skip empty configured virtual paths (PR #460) --- crates/pet-hatch/src/lib.rs | 113 +++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index cd825087..ea9fa737 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -245,8 +245,13 @@ fn get_default_virtual_dir(environment: &dyn Environment) -> Option { // platform-default envs to Hatch when the user has redirected Hatch // elsewhere. if let Some(custom) = environment.get_env_var("HATCH_DATA_DIR".to_string()) { - if !custom.is_empty() { - return Some(norm_case(append_virtual_subdir(PathBuf::from(custom)))); + let trimmed = custom.trim(); + if !trimmed.is_empty() { + // Expand ~ / ${HOME} / ${USERNAME} so a value like + // `HATCH_DATA_DIR=~/.local/share/hatch` resolves to the user + // home rather than a literal `~` directory. + let expanded = expand_path(PathBuf::from(trimmed)); + return Some(norm_case(append_virtual_subdir(expanded))); } } Some(norm_case(append_virtual_subdir(platform_default_data_dir( @@ -388,10 +393,17 @@ struct HatchDirs { fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { let mut dirs = Vec::new(); for raw in read_configured_virtual_paths(workspace) { + // Skip empty/whitespace values. Without this, `virtual = ""` would + // resolve to the workspace root and we'd misclassify any venv + // directly under the workspace (e.g. `./.venv`) as Hatch-managed. + let trimmed = raw.trim(); + if trimmed.is_empty() { + continue; + } // Expand ~ and ${HOME}/${USERNAME} so configured values like // "~/.virtualenvs" resolve to the user home rather than being // joined onto the workspace as a relative path. - let expanded = expand_path(PathBuf::from(&raw)); + let expanded = expand_path(PathBuf::from(trimmed)); let resolved = if expanded.is_absolute() { expanded } else { @@ -999,4 +1011,99 @@ mod tests { let expected = norm_case(custom.join("env").join("virtual")); assert_eq!(get_default_virtual_dir(&env), Some(expected)); } + + #[test] + fn default_virtual_dir_expands_tilde_in_hatch_data_dir() { + // A value like `HATCH_DATA_DIR=~/.local/share/hatch` must be + // expanded against the user's home rather than be treated as a + // literal `~` directory. + let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner()); + + let temp = TempDir::new().unwrap(); + let fake_home = temp.path().join("home"); + fs::create_dir_all(&fake_home).unwrap(); + + let prev_home = std::env::var_os("HOME"); + let prev_user_profile = std::env::var_os("USERPROFILE"); + std::env::set_var("HOME", &fake_home); + std::env::set_var("USERPROFILE", &fake_home); + + let mut vars = HashMap::new(); + vars.insert( + "HATCH_DATA_DIR".to_string(), + "~/.local/share/hatch".to_string(), + ); + let env = TestEnv { + home: Some(fake_home.clone()), + vars, + }; + let resolved = get_default_virtual_dir(&env); + + match prev_home { + Some(v) => std::env::set_var("HOME", v), + None => std::env::remove_var("HOME"), + } + match prev_user_profile { + Some(v) => std::env::set_var("USERPROFILE", v), + None => std::env::remove_var("USERPROFILE"), + } + + let expected = norm_case( + fake_home + .join(".local") + .join("share") + .join("hatch") + .join("env") + .join("virtual"), + ); + assert_eq!(resolved, Some(expected)); + } + + #[test] + fn default_virtual_dir_treats_whitespace_hatch_data_dir_as_unset() { + // Whitespace-only HATCH_DATA_DIR must be treated as unset so we + // fall back to the platform default rather than resolving to + // a literal whitespace directory. + let temp = TempDir::new().unwrap(); + let mut vars = HashMap::new(); + vars.insert("HATCH_DATA_DIR".to_string(), " ".to_string()); + let env = TestEnv { + home: Some(temp.path().to_path_buf()), + vars, + }; + // Should NOT be the literal " /env/virtual"; should resolve via + // the platform default (or None if home is unavailable). + let resolved = get_default_virtual_dir(&env); + if let Some(p) = resolved { + assert!(!p.to_string_lossy().contains(" ")); + } + } + + #[test] + fn resolve_project_virtual_dirs_skips_empty_value() { + // `virtual = ""` must not resolve to the workspace root and + // misclassify unrelated venvs under the workspace as Hatch. + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \"\"\n", + ) + .unwrap(); + assert!(resolve_project_virtual_dirs(&project).is_empty()); + } + + #[test] + fn resolve_project_virtual_dirs_skips_whitespace_value() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \" \"\n", + ) + .unwrap(); + assert!(resolve_project_virtual_dirs(&project).is_empty()); + } } From 3a1e3022290cbd04b3aac5c74af59c176c8f8905 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 09:38:17 -0700 Subject: [PATCH 08/16] perf: avoid Vec alloc and double norm_case in pet-hatch hot path (PR #460) --- crates/pet-hatch/src/lib.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index ea9fa737..3beb374f 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -337,18 +337,27 @@ fn platform_default_data_dir(environment: &dyn Environment) -> Option { fn match_default_storage_layout(prefix: &Path, storage: &Path) -> Option { let normalized = norm_case(prefix); let rel = normalized.strip_prefix(storage).ok()?; - let parts: Vec<_> = rel.iter().collect(); - if parts.len() == 3 { - Some(parts[2].to_string_lossy().to_string()) - } else { - None + // Iterate components directly to avoid a per-call Vec allocation on the + // identification hot path. We need exactly three components. + let mut iter = rel.iter(); + let _project_name = iter.next()?; + let _project_id = iter.next()?; + let venv_name = iter.next()?; + if iter.next().is_some() { + return None; } + Some(venv_name.to_string_lossy().to_string()) } /// True iff `prefix`'s parent equals `dir` (case-insensitive on Windows). +/// +/// `dir` is expected to be already normalized via `norm_case()` (entries +/// cached in `resolve_project_virtual_dirs()` always are), so we only +/// normalize `prefix.parent()` here — avoiding redundant `GetLongPathNameW` +/// / case-folding work on Windows in the identification hot path. fn prefix_is_directly_under(prefix: &Path, dir: &Path) -> bool { match prefix.parent() { - Some(parent) => norm_case(parent) == norm_case(dir), + Some(parent) => norm_case(parent) == dir, None => false, } } From ba241fad8de49b57cf8966fe12f544d883ccc9db Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 13:14:23 -0700 Subject: [PATCH 09/16] fix: env-name allowlist guard + matrix support for configured Hatch virtual dirs (PR #460) Also fix CI test for tilde expansion to compare paths component-wise (Windows mixed-separator). --- crates/pet-hatch/src/lib.rs | 297 ++++++++++++++++++++++++++++++++---- 1 file changed, 271 insertions(+), 26 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 3beb374f..4d45ac62 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -31,6 +31,7 @@ //! `${HOME}` style expansion (e.g. `~/.virtualenvs`). use std::{ + collections::HashSet, fs, path::{Path, PathBuf}, sync::{Arc, Mutex}, @@ -56,9 +57,18 @@ use serde::Deserialize; /// plugin's `PLUGIN_NAME` in Hatch's source. const VIRTUAL_ENV_SUBDIR: [&str; 2] = ["env", "virtual"]; -/// Per-workspace cache of resolved Hatch virtual directories. Each entry is -/// `(workspace_root, resolved_virtual_dirs)` and is populated by `configure()`. -type WorkspaceVirtualDirs = Vec<(PathBuf, Vec)>; +/// Per-workspace cache of resolved Hatch virtual directories and the set +/// of declared env names for that workspace. Each entry is +/// `(workspace_root, resolved_virtual_dirs, allowed_env_names)` and is +/// populated by `configure()`. +/// +/// `allowed_env_names` is used as a Hatch-specific guard when matching +/// venvs in workspace-configured `dirs.env.virtual` directories: a shared +/// directory like `~/.virtualenvs` can contain non-Hatch envs (created by +/// virtualenvwrapper, plain `venv`, etc.), so we only claim a venv if its +/// leaf directory name matches one of the env names declared in the +/// project's Hatch configuration. +type WorkspaceVirtualDirs = Vec<(PathBuf, Vec, HashSet)>; pub struct Hatch { /// Default storage directory for Hatch virtual environments — i.e. @@ -108,21 +118,23 @@ impl Locator for Hatch { } fn configure(&self, config: &Configuration) { - // Precompute and cache each workspace's resolved Hatch virtual dirs so - // `try_from()` does not have to re-read/parse pyproject.toml/hatch.toml - // on every executable identification attempt. We build the new cache - // *outside* the lock to keep disk I/O out of the critical section. + // Precompute and cache each workspace's resolved Hatch virtual dirs + // and declared env names so `try_from()` does not have to re-read + // or re-parse pyproject.toml / hatch.toml on every executable + // identification attempt. We build the new cache *outside* the + // lock to keep disk I/O out of the critical section. let mut new_cache: WorkspaceVirtualDirs = Vec::new(); if let Some(dirs) = config.workspace_directories.as_ref() { for workspace in dirs { let virtual_dirs = resolve_project_virtual_dirs(workspace); - new_cache.push((workspace.clone(), virtual_dirs)); + let env_names = resolve_project_env_names(workspace); + new_cache.push((workspace.clone(), virtual_dirs, env_names)); } } *self .workspace_virtual_dirs .lock() - .expect("workspace_virtual_dirs mutex poisoned") = new_cache; + .unwrap_or_else(|p| p.into_inner()) = new_cache; } fn try_from(&self, env: &PythonEnv) -> Option { @@ -152,18 +164,28 @@ impl Locator for Hatch { // `dirs.env.virtual` directory (flat layout). Inspect the cached // workspaces under the lock and capture the match instead of cloning // the entire cache. + // + // Because configured `dirs.env.virtual` may point at a shared + // directory (e.g. `~/.virtualenvs`), we additionally require that + // the venv's leaf directory name matches one of the env names + // declared in the workspace's Hatch configuration. Otherwise an + // unrelated virtualenvwrapper / `venv` env in the same directory + // would be misclassified as Hatch-managed. if classification.is_none() { let cache = self .workspace_virtual_dirs .lock() - .expect("workspace_virtual_dirs mutex poisoned"); - 'workspaces: for (workspace, virtual_dirs) in cache.iter() { + .unwrap_or_else(|p| p.into_inner()); + 'workspaces: for (workspace, virtual_dirs, env_names) in cache.iter() { for virtual_dir in virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { let env_name = prefix .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); + if !env_name_matches(&env_name, env_names) { + continue; + } classification = Some((env_name, Some(workspace.clone()))); break 'workspaces; } @@ -208,14 +230,16 @@ impl Locator for Hatch { } // 2. Walk project-local virtual directories for each configured workspace. + // Apply the same env-name guard as `try_from()` so shared directories + // (e.g. `~/.virtualenvs`) only yield the workspace's declared envs. let workspaces = self .workspace_virtual_dirs .lock() - .expect("workspace_virtual_dirs mutex poisoned") + .unwrap_or_else(|p| p.into_inner()) .clone(); - for (workspace, virtual_dirs) in &workspaces { + for (workspace, virtual_dirs, env_names) in &workspaces { for virtual_dir in virtual_dirs { - for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone())) { + for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), env_names) { reporter.report_environment(&env); } } @@ -379,6 +403,7 @@ struct PyProjectTool { #[derive(Deserialize, Default)] struct HatchConfig { dirs: Option, + envs: Option, } #[derive(Deserialize, Default)] @@ -456,6 +481,47 @@ fn read_configured_virtual_paths(workspace: &Path) -> Vec { paths } +/// Hatch's `default` environment is always implicitly available — Hatch +/// docs: "every project has a `default` environment". So even when +/// `[tool.hatch.envs.*]` declares no env, `default` is still a valid +/// env name. We include it in the allowlist unconditionally. +const HATCH_IMPLICIT_DEFAULT_ENV: &str = "default"; + +/// Read the set of Hatch env names declared for `workspace`. Reads +/// `[tool.hatch.envs.]` from `pyproject.toml` and `[envs.]` +/// from `hatch.toml`. The implicit `default` env is always included. +/// +/// Used as a Hatch-specific guard so that venvs in a configured but +/// potentially shared `dirs.env.virtual` directory (e.g. `~/.virtualenvs`) +/// are only claimed when their leaf directory name matches a declared +/// env name — otherwise unrelated virtualenvwrapper / `venv` envs in +/// the same directory would be misclassified as Hatch. +fn resolve_project_env_names(workspace: &Path) -> HashSet { + let mut names = HashSet::new(); + names.insert(HATCH_IMPLICIT_DEFAULT_ENV.to_string()); + // pyproject.toml: [tool.hatch.envs.] + if let Ok(contents) = fs::read_to_string(workspace.join("pyproject.toml")) { + if let Ok(pyproject) = toml::from_str::(&contents) { + if let Some(envs) = pyproject.tool.and_then(|t| t.hatch).and_then(|h| h.envs) { + for key in envs.keys() { + names.insert(key.clone()); + } + } + } + } + // hatch.toml: [envs.] + if let Ok(contents) = fs::read_to_string(workspace.join("hatch.toml")) { + if let Ok(hatch) = toml::from_str::(&contents) { + if let Some(envs) = hatch.envs { + for key in envs.keys() { + names.insert(key.clone()); + } + } + } + } + names +} + // --------------------------------------------------------------------------- // Discovery // --------------------------------------------------------------------------- @@ -500,8 +566,46 @@ fn find_envs_in_default_storage(storage: &Path) -> Vec { envs } -/// Walk `//` and report each venv discovered. -fn find_envs_in_flat_dir(dir: &Path, project: Option) -> Vec { +/// Returns true if `leaf` (a directory name) matches one of the declared +/// Hatch env names in `allowed`. +/// +/// Hatch's matrix feature creates per-variant directories named +/// `.` (e.g. `test.py3.10`), so a leaf matches if it +/// equals a declared name *or* starts with `"."`. +/// +/// On case-insensitive filesystems (Windows / default macOS) the on-disk +/// leaf may differ in case from the TOML key; compare lowercased on those +/// platforms. +fn env_name_matches(leaf: &str, allowed: &HashSet) -> bool { + fn normalize(s: &str) -> String { + #[cfg(any(windows, target_os = "macos"))] + { + s.to_lowercase() + } + #[cfg(not(any(windows, target_os = "macos")))] + { + s.to_string() + } + } + let leaf_n = normalize(leaf); + allowed.iter().any(|name| { + let n = normalize(name); + if n.is_empty() { + return false; + } + leaf_n == n || leaf_n.starts_with(&format!("{n}.")) + }) +} + +/// Walk `//` and report each venv discovered. `env_names` +/// is the allow-list of leaf directory names that are considered Hatch +/// envs (so a shared dir like `~/.virtualenvs` only yields envs the +/// workspace actually declares). +fn find_envs_in_flat_dir( + dir: &Path, + project: Option, + env_names: &HashSet, +) -> Vec { let mut envs = Vec::new(); let entries = match fs::read_dir(dir) { Ok(d) => d, @@ -512,6 +616,13 @@ fn find_envs_in_flat_dir(dir: &Path, project: Option) -> Vec n.to_string_lossy().to_string(), + None => continue, + }; + if !env_name_matches(&leaf, env_names) { + continue; + } if let Some(env) = build_env_from_prefix(&env_dir, project.clone()) { envs.push(env); } @@ -627,11 +738,13 @@ mod tests { workspace: &Path, ) -> Hatch { let virtual_dirs = resolve_project_virtual_dirs(workspace); + let env_names = resolve_project_env_names(workspace); Hatch { default_virtual_dir, workspace_virtual_dirs: Arc::new(Mutex::new(vec![( workspace.to_path_buf(), virtual_dirs, + env_names, )])), } } @@ -803,7 +916,8 @@ mod tests { let virtual_dirs = resolve_project_virtual_dirs(&project); assert_eq!(virtual_dirs.len(), 1); - let envs = find_envs_in_flat_dir(&virtual_dirs[0], Some(project.clone())); + let env_names = resolve_project_env_names(&project); + let envs = find_envs_in_flat_dir(&virtual_dirs[0], Some(project.clone()), &env_names); assert_eq!(envs.len(), 1); assert_eq!(envs[0].project, Some(norm_case(&project))); } @@ -1057,15 +1171,22 @@ mod tests { None => std::env::remove_var("USERPROFILE"), } - let expected = norm_case( - fake_home - .join(".local") - .join("share") - .join("hatch") - .join("env") - .join("virtual"), - ); - assert_eq!(resolved, Some(expected)); + // Compare via path components rather than byte-exact strings: on + // Windows, `expand_path` may preserve the forward-slash separators + // present in the input value (`~/.local/share/hatch`) while + // `PathBuf::join` adds backslashes, leading to a mixed-separator + // representation that still refers to the same logical path. Path + // component iteration is separator-agnostic. + let resolved = resolved.expect("HATCH_DATA_DIR resolution returned None"); + let expected = fake_home + .join(".local") + .join("share") + .join("hatch") + .join("env") + .join("virtual"); + let expected_components: Vec<_> = expected.components().collect(); + let resolved_components: Vec<_> = resolved.components().collect(); + assert_eq!(resolved_components, expected_components); } #[test] @@ -1115,4 +1236,128 @@ mod tests { .unwrap(); assert!(resolve_project_virtual_dirs(&project).is_empty()); } + + #[test] + fn resolve_project_env_names_includes_implicit_default() { + // Hatch always provides a `default` env, even if `[tool.hatch.envs.*]` + // declares none. + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \".hatch\"\n", + ) + .unwrap(); + let names = resolve_project_env_names(&project); + assert!(names.contains("default")); + } + + #[test] + fn resolve_project_env_names_reads_declared_envs() { + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.envs.default]\n[tool.hatch.envs.test]\n[tool.hatch.envs.docs]\n", + ) + .unwrap(); + let names = resolve_project_env_names(&project); + assert!(names.contains("default")); + assert!(names.contains("test")); + assert!(names.contains("docs")); + } + + #[test] + fn find_envs_in_flat_dir_filters_non_declared_envs() { + // A shared `dirs.env.virtual` directory (e.g. ~/.virtualenvs) may + // contain envs created by other tools. Only envs whose leaf + // directory name matches a declared Hatch env should be claimed. + let temp = TempDir::new().unwrap(); + let shared = temp.path().join("shared"); + fs::create_dir_all(&shared).unwrap(); + + // Hatch-managed env. + let hatch_env = shared.join("default"); + write_pyvenv_cfg(&hatch_env, "default", "3.11.0"); + write_python_exe(&hatch_env); + + // Unrelated env (e.g. virtualenvwrapper) in the same dir. + let foreign = shared.join("some-other-project"); + write_pyvenv_cfg(&foreign, "some-other-project", "3.11.0"); + write_python_exe(&foreign); + + let mut names = HashSet::new(); + names.insert("default".to_string()); + let envs = find_envs_in_flat_dir(&shared, None, &names); + assert_eq!(envs.len(), 1); + assert_eq!(envs[0].prefix, Some(hatch_env)); + } + + #[test] + fn find_envs_in_flat_dir_accepts_matrix_variants() { + // Hatch matrix envs land on disk as `.` (e.g. + // `test.py3.10`). They must still be claimed by the declared env + // `test`. + let temp = TempDir::new().unwrap(); + let shared = temp.path().join("shared"); + fs::create_dir_all(&shared).unwrap(); + + let v1 = shared.join("test.py3.10"); + write_pyvenv_cfg(&v1, "test.py3.10", "3.10.0"); + write_python_exe(&v1); + let v2 = shared.join("test.py3.11"); + write_pyvenv_cfg(&v2, "test.py3.11", "3.11.0"); + write_python_exe(&v2); + // Foreign env must still be rejected. + let foreign = shared.join("unrelated"); + write_pyvenv_cfg(&foreign, "unrelated", "3.11.0"); + write_python_exe(&foreign); + + let mut names = HashSet::new(); + names.insert("test".to_string()); + let envs = find_envs_in_flat_dir(&shared, None, &names); + assert_eq!(envs.len(), 2); + } + + #[cfg(any(windows, target_os = "macos"))] + #[test] + fn env_name_matches_is_case_insensitive_on_case_folding_filesystems() { + let mut names = HashSet::new(); + names.insert("Default".to_string()); + assert!(env_name_matches("default", &names)); + assert!(env_name_matches("DEFAULT", &names)); + } + + #[test] + fn try_from_rejects_unknown_leaf_under_configured_virtual_dir() { + // Workspace declares only `default`. A sibling venv created by + // another tool in the same configured `virtual` directory must + // not be claimed. + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + let shared = temp.path().join("shared"); + fs::create_dir_all(&shared).unwrap(); + fs::write( + project.join("pyproject.toml"), + format!( + "[tool.hatch.dirs.env]\nvirtual = \"{}\"\n[tool.hatch.envs.default]\n", + shared.display().to_string().replace('\\', "\\\\") + ), + ) + .unwrap(); + + let foreign = shared.join("some-other-project"); + write_pyvenv_cfg(&foreign, "some-other-project", "3.11.0"); + let exe = write_python_exe(&foreign); + + let locator = make_locator_with_workspace(None, &project); + let env = PythonEnv::new(exe, Some(foreign), None); + assert!( + locator.try_from(&env).is_none(), + "Hatch should not claim non-declared envs in a shared virtual dir" + ); + } } From 16e07dff4456263742da300da2e703d37e42811d Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 13:26:38 -0700 Subject: [PATCH 10/16] perf: dedupe TOML parsing + precompute env-name matcher (PR #460) Address Copilot review: configure() now parses pyproject.toml and hatch.toml once each per workspace via read_workspace_hatch_sections(), and the env-name allowlist is precomputed into EnvNameMatcher so the try_from() hot path avoids per-call to_lowercase()/format!() allocations. --- crates/pet-hatch/src/lib.rs | 275 +++++++++++++++++++++--------------- 1 file changed, 158 insertions(+), 117 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 4d45ac62..8d043a30 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -59,16 +59,18 @@ const VIRTUAL_ENV_SUBDIR: [&str; 2] = ["env", "virtual"]; /// Per-workspace cache of resolved Hatch virtual directories and the set /// of declared env names for that workspace. Each entry is -/// `(workspace_root, resolved_virtual_dirs, allowed_env_names)` and is +/// `(workspace_root, resolved_virtual_dirs, env_matcher)` and is /// populated by `configure()`. /// -/// `allowed_env_names` is used as a Hatch-specific guard when matching +/// `env_matcher` is used as a Hatch-specific guard when matching /// venvs in workspace-configured `dirs.env.virtual` directories: a shared /// directory like `~/.virtualenvs` can contain non-Hatch envs (created by /// virtualenvwrapper, plain `venv`, etc.), so we only claim a venv if its /// leaf directory name matches one of the env names declared in the -/// project's Hatch configuration. -type WorkspaceVirtualDirs = Vec<(PathBuf, Vec, HashSet)>; +/// project's Hatch configuration. The matcher pre-normalizes names so the +/// `try_from()` hot path avoids per-call `to_lowercase()` / `format!()` +/// allocations over the allowlist. +type WorkspaceVirtualDirs = Vec<(PathBuf, Vec, EnvNameMatcher)>; pub struct Hatch { /// Default storage directory for Hatch virtual environments — i.e. @@ -126,9 +128,15 @@ impl Locator for Hatch { let mut new_cache: WorkspaceVirtualDirs = Vec::new(); if let Some(dirs) = config.workspace_directories.as_ref() { for workspace in dirs { - let virtual_dirs = resolve_project_virtual_dirs(workspace); - let env_names = resolve_project_env_names(workspace); - new_cache.push((workspace.clone(), virtual_dirs, env_names)); + // Single parse of pyproject.toml + hatch.toml per workspace + // — both `virtual_dirs` and `env_names` come from the same + // TOML sections, so we read each file once here. + let (virtual_dirs, env_names) = resolve_workspace_hatch_config(workspace); + new_cache.push(( + workspace.clone(), + virtual_dirs, + EnvNameMatcher::from_names(env_names), + )); } } *self @@ -176,14 +184,14 @@ impl Locator for Hatch { .workspace_virtual_dirs .lock() .unwrap_or_else(|p| p.into_inner()); - 'workspaces: for (workspace, virtual_dirs, env_names) in cache.iter() { + 'workspaces: for (workspace, virtual_dirs, matcher) in cache.iter() { for virtual_dir in virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { let env_name = prefix .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); - if !env_name_matches(&env_name, env_names) { + if !matcher.matches(&env_name) { continue; } classification = Some((env_name, Some(workspace.clone()))); @@ -237,9 +245,9 @@ impl Locator for Hatch { .lock() .unwrap_or_else(|p| p.into_inner()) .clone(); - for (workspace, virtual_dirs, env_names) in &workspaces { + for (workspace, virtual_dirs, matcher) in &workspaces { for virtual_dir in virtual_dirs { - for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), env_names) { + for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), matcher) { reporter.report_environment(&env); } } @@ -411,26 +419,58 @@ struct HatchDirs { env: Option, } -/// Read the configured `dirs.env.virtual` paths for a workspace and resolve -/// each to an absolute directory. Both `pyproject.toml` (`[tool.hatch.dirs.env]`) -/// and a top-level `hatch.toml` (`[dirs.env]`) are checked. -/// -/// Each value may be relative (resolved against the workspace root), -/// absolute, or use `~` / `${HOME}` expansion. Returns an empty Vec if the -/// workspace is not a Hatch project, or if no `virtual` value is configured. -/// -/// The returned paths are cached regardless of whether they currently exist -/// on disk — a user may configure `virtual = ".hatch"` and create the env -/// later in this process lifetime, and we want subsequent `try_from()` -/// calls to recognise it without requiring the client to re-send `configure`. -/// `find_envs_in_flat_dir()` handles missing directories at discovery time. -fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { +/// Parse `pyproject.toml`'s `[tool.hatch]` table and `hatch.toml` (which +/// has the same shape as `HatchConfig`) for `workspace`, returning both +/// in a single pass. Returns `(pyproject_hatch, hatch_toml)` where each +/// is `None` if the corresponding file is missing or unparseable. +fn read_workspace_hatch_sections(workspace: &Path) -> (Option, Option) { + let pyproject = fs::read_to_string(workspace.join("pyproject.toml")) + .ok() + .and_then(|s| toml::from_str::(&s).ok()) + .and_then(|pp| pp.tool) + .and_then(|t| t.hatch); + let hatch_toml = fs::read_to_string(workspace.join("hatch.toml")) + .ok() + .and_then(|s| toml::from_str::(&s).ok()); + (pyproject, hatch_toml) +} + +fn extract_virtual_paths(sections: &(Option, Option)) -> Vec { + let mut paths = Vec::new(); + for section in [§ions.0, §ions.1].iter().copied().flatten() { + if let Some(virtual_value) = section + .dirs + .as_ref() + .and_then(|d| d.env.as_ref()) + .and_then(|env| env.get("virtual")) + .and_then(|v| v.as_str().map(str::to_string)) + { + paths.push(virtual_value); + } + } + paths +} + +fn extract_env_names(sections: &(Option, Option)) -> HashSet { + let mut names = HashSet::new(); + names.insert(HATCH_IMPLICIT_DEFAULT_ENV.to_string()); + for section in [§ions.0, §ions.1].iter().copied().flatten() { + if let Some(envs) = section.envs.as_ref() { + for key in envs.keys() { + names.insert(key.clone()); + } + } + } + names +} + +fn resolve_virtual_paths_against_workspace(workspace: &Path, raw: Vec) -> Vec { let mut dirs = Vec::new(); - for raw in read_configured_virtual_paths(workspace) { + for raw_value in raw { // Skip empty/whitespace values. Without this, `virtual = ""` would // resolve to the workspace root and we'd misclassify any venv // directly under the workspace (e.g. `./.venv`) as Hatch-managed. - let trimmed = raw.trim(); + let trimmed = raw_value.trim(); if trimmed.is_empty() { continue; } @@ -448,37 +488,34 @@ fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { dirs } -fn read_configured_virtual_paths(workspace: &Path) -> Vec { - let mut paths = Vec::new(); - // pyproject.toml: [tool.hatch.dirs.env] - if let Ok(contents) = fs::read_to_string(workspace.join("pyproject.toml")) { - if let Ok(pyproject) = toml::from_str::(&contents) { - if let Some(virtual_value) = pyproject - .tool - .and_then(|t| t.hatch) - .and_then(|h| h.dirs) - .and_then(|d| d.env) - .and_then(|env| env.get("virtual").cloned()) - .and_then(|v| v.as_str().map(str::to_string)) - { - paths.push(virtual_value); - } - } - } - // hatch.toml: [dirs.env] - if let Ok(contents) = fs::read_to_string(workspace.join("hatch.toml")) { - if let Ok(hatch) = toml::from_str::(&contents) { - if let Some(virtual_value) = hatch - .dirs - .and_then(|d| d.env) - .and_then(|env| env.get("virtual").cloned()) - .and_then(|v| v.as_str().map(str::to_string)) - { - paths.push(virtual_value); - } - } - } - paths +/// Single entry point used by `configure()`: parses `pyproject.toml` and +/// `hatch.toml` ONCE each per workspace and derives both the resolved +/// virtual directories and the declared env names from the same parse. +fn resolve_workspace_hatch_config(workspace: &Path) -> (Vec, HashSet) { + let sections = read_workspace_hatch_sections(workspace); + let virtual_dirs = + resolve_virtual_paths_against_workspace(workspace, extract_virtual_paths(§ions)); + let env_names = extract_env_names(§ions); + (virtual_dirs, env_names) +} + +/// Read the configured `dirs.env.virtual` paths for a workspace and resolve +/// each to an absolute directory. Both `pyproject.toml` (`[tool.hatch.dirs.env]`) +/// and a top-level `hatch.toml` (`[dirs.env]`) are checked. +/// +/// Each value may be relative (resolved against the workspace root), +/// absolute, or use `~` / `${HOME}` expansion. Returns an empty Vec if the +/// workspace is not a Hatch project, or if no `virtual` value is configured. +/// +/// The returned paths are cached regardless of whether they currently exist +/// on disk — a user may configure `virtual = ".hatch"` and create the env +/// later in this process lifetime, and we want subsequent `try_from()` +/// calls to recognise it without requiring the client to re-send `configure`. +/// `find_envs_in_flat_dir()` handles missing directories at discovery time. +#[cfg(test)] +fn resolve_project_virtual_dirs(workspace: &Path) -> Vec { + let sections = read_workspace_hatch_sections(workspace); + resolve_virtual_paths_against_workspace(workspace, extract_virtual_paths(§ions)) } /// Hatch's `default` environment is always implicitly available — Hatch @@ -496,30 +533,10 @@ const HATCH_IMPLICIT_DEFAULT_ENV: &str = "default"; /// are only claimed when their leaf directory name matches a declared /// env name — otherwise unrelated virtualenvwrapper / `venv` envs in /// the same directory would be misclassified as Hatch. +#[cfg(test)] fn resolve_project_env_names(workspace: &Path) -> HashSet { - let mut names = HashSet::new(); - names.insert(HATCH_IMPLICIT_DEFAULT_ENV.to_string()); - // pyproject.toml: [tool.hatch.envs.] - if let Ok(contents) = fs::read_to_string(workspace.join("pyproject.toml")) { - if let Ok(pyproject) = toml::from_str::(&contents) { - if let Some(envs) = pyproject.tool.and_then(|t| t.hatch).and_then(|h| h.envs) { - for key in envs.keys() { - names.insert(key.clone()); - } - } - } - } - // hatch.toml: [envs.] - if let Ok(contents) = fs::read_to_string(workspace.join("hatch.toml")) { - if let Ok(hatch) = toml::from_str::(&contents) { - if let Some(envs) = hatch.envs { - for key in envs.keys() { - names.insert(key.clone()); - } - } - } - } - names + let sections = read_workspace_hatch_sections(workspace); + extract_env_names(§ions) } // --------------------------------------------------------------------------- @@ -566,45 +583,66 @@ fn find_envs_in_default_storage(storage: &Path) -> Vec { envs } -/// Returns true if `leaf` (a directory name) matches one of the declared -/// Hatch env names in `allowed`. +/// Pre-normalized allowlist of declared Hatch env names for a workspace, +/// used to filter venvs in a configured `dirs.env.virtual` directory. /// /// Hatch's matrix feature creates per-variant directories named /// `.` (e.g. `test.py3.10`), so a leaf matches if it -/// equals a declared name *or* starts with `"."`. +/// equals a declared name *or* starts with `"."`. We precompute +/// both the normalized name and its `"."` prefix so the hot path +/// (`try_from()` / `find_envs_in_flat_dir()`) avoids per-call `format!()` +/// allocations. /// /// On case-insensitive filesystems (Windows / default macOS) the on-disk -/// leaf may differ in case from the TOML key; compare lowercased on those -/// platforms. -fn env_name_matches(leaf: &str, allowed: &HashSet) -> bool { - fn normalize(s: &str) -> String { - #[cfg(any(windows, target_os = "macos"))] - { - s.to_lowercase() - } - #[cfg(not(any(windows, target_os = "macos")))] - { - s.to_string() - } +/// leaf may differ in case from the TOML key, so we lowercase both sides +/// on those platforms at construction time. +#[derive(Clone, Default, Debug)] +struct EnvNameMatcher { + /// (normalized_name, normalized_name + ".") pairs. + entries: Vec<(String, String)>, +} + +fn normalize_env_name(s: &str) -> String { + #[cfg(any(windows, target_os = "macos"))] + { + s.to_lowercase() } - let leaf_n = normalize(leaf); - allowed.iter().any(|name| { - let n = normalize(name); - if n.is_empty() { - return false; + #[cfg(not(any(windows, target_os = "macos")))] + { + s.to_string() + } +} + +impl EnvNameMatcher { + fn from_names>(names: I) -> Self { + let mut entries: Vec<(String, String)> = Vec::new(); + for raw in names { + let n = normalize_env_name(&raw); + if n.is_empty() { + continue; + } + let prefix = format!("{n}."); + entries.push((n, prefix)); } - leaf_n == n || leaf_n.starts_with(&format!("{n}.")) - }) + Self { entries } + } + + fn matches(&self, leaf: &str) -> bool { + let leaf_n = normalize_env_name(leaf); + self.entries + .iter() + .any(|(n, p)| leaf_n == *n || leaf_n.starts_with(p.as_str())) + } } -/// Walk `//` and report each venv discovered. `env_names` +/// Walk `//` and report each venv discovered. `matcher` /// is the allow-list of leaf directory names that are considered Hatch /// envs (so a shared dir like `~/.virtualenvs` only yields envs the /// workspace actually declares). fn find_envs_in_flat_dir( dir: &Path, project: Option, - env_names: &HashSet, + matcher: &EnvNameMatcher, ) -> Vec { let mut envs = Vec::new(); let entries = match fs::read_dir(dir) { @@ -620,7 +658,7 @@ fn find_envs_in_flat_dir( Some(n) => n.to_string_lossy().to_string(), None => continue, }; - if !env_name_matches(&leaf, env_names) { + if !matcher.matches(&leaf) { continue; } if let Some(env) = build_env_from_prefix(&env_dir, project.clone()) { @@ -744,7 +782,7 @@ mod tests { workspace_virtual_dirs: Arc::new(Mutex::new(vec![( workspace.to_path_buf(), virtual_dirs, - env_names, + EnvNameMatcher::from_names(env_names), )])), } } @@ -916,8 +954,8 @@ mod tests { let virtual_dirs = resolve_project_virtual_dirs(&project); assert_eq!(virtual_dirs.len(), 1); - let env_names = resolve_project_env_names(&project); - let envs = find_envs_in_flat_dir(&virtual_dirs[0], Some(project.clone()), &env_names); + let matcher = EnvNameMatcher::from_names(resolve_project_env_names(&project)); + let envs = find_envs_in_flat_dir(&virtual_dirs[0], Some(project.clone()), &matcher); assert_eq!(envs.len(), 1); assert_eq!(envs[0].project, Some(norm_case(&project))); } @@ -1288,9 +1326,10 @@ mod tests { write_pyvenv_cfg(&foreign, "some-other-project", "3.11.0"); write_python_exe(&foreign); - let mut names = HashSet::new(); - names.insert("default".to_string()); - let envs = find_envs_in_flat_dir(&shared, None, &names); + let mut raw = HashSet::new(); + raw.insert("default".to_string()); + let matcher = EnvNameMatcher::from_names(raw); + let envs = find_envs_in_flat_dir(&shared, None, &matcher); assert_eq!(envs.len(), 1); assert_eq!(envs[0].prefix, Some(hatch_env)); } @@ -1315,9 +1354,10 @@ mod tests { write_pyvenv_cfg(&foreign, "unrelated", "3.11.0"); write_python_exe(&foreign); - let mut names = HashSet::new(); - names.insert("test".to_string()); - let envs = find_envs_in_flat_dir(&shared, None, &names); + let mut raw = HashSet::new(); + raw.insert("test".to_string()); + let matcher = EnvNameMatcher::from_names(raw); + let envs = find_envs_in_flat_dir(&shared, None, &matcher); assert_eq!(envs.len(), 2); } @@ -1326,8 +1366,9 @@ mod tests { fn env_name_matches_is_case_insensitive_on_case_folding_filesystems() { let mut names = HashSet::new(); names.insert("Default".to_string()); - assert!(env_name_matches("default", &names)); - assert!(env_name_matches("DEFAULT", &names)); + let matcher = EnvNameMatcher::from_names(names); + assert!(matcher.matches("default")); + assert!(matcher.matches("DEFAULT")); } #[test] From b716320cc653ffee1ebed6ac86b2037092d369f3 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 13:34:07 -0700 Subject: [PATCH 11/16] chore: revert to .unwrap() on mutex + fix stale doc reference (PR #460) --- crates/pet-hatch/src/lib.rs | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 8d043a30..8d196a13 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -139,10 +139,7 @@ impl Locator for Hatch { )); } } - *self - .workspace_virtual_dirs - .lock() - .unwrap_or_else(|p| p.into_inner()) = new_cache; + *self.workspace_virtual_dirs.lock().unwrap() = new_cache; } fn try_from(&self, env: &PythonEnv) -> Option { @@ -180,10 +177,7 @@ impl Locator for Hatch { // unrelated virtualenvwrapper / `venv` env in the same directory // would be misclassified as Hatch-managed. if classification.is_none() { - let cache = self - .workspace_virtual_dirs - .lock() - .unwrap_or_else(|p| p.into_inner()); + let cache = self.workspace_virtual_dirs.lock().unwrap(); 'workspaces: for (workspace, virtual_dirs, matcher) in cache.iter() { for virtual_dir in virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { @@ -240,11 +234,7 @@ impl Locator for Hatch { // 2. Walk project-local virtual directories for each configured workspace. // Apply the same env-name guard as `try_from()` so shared directories // (e.g. `~/.virtualenvs`) only yield the workspace's declared envs. - let workspaces = self - .workspace_virtual_dirs - .lock() - .unwrap_or_else(|p| p.into_inner()) - .clone(); + let workspaces = self.workspace_virtual_dirs.lock().unwrap().clone(); for (workspace, virtual_dirs, matcher) in &workspaces { for virtual_dir in virtual_dirs { for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), matcher) { @@ -383,10 +373,12 @@ fn match_default_storage_layout(prefix: &Path, storage: &Path) -> Option /// True iff `prefix`'s parent equals `dir` (case-insensitive on Windows). /// -/// `dir` is expected to be already normalized via `norm_case()` (entries -/// cached in `resolve_project_virtual_dirs()` always are), so we only -/// normalize `prefix.parent()` here — avoiding redundant `GetLongPathNameW` -/// / case-folding work on Windows in the identification hot path. +/// `dir` is expected to be already normalized via `norm_case()` — entries +/// cached on the `Hatch` locator are normalized at `configure()`-time by +/// `resolve_virtual_paths_against_workspace()` (called from +/// `resolve_workspace_hatch_config()`), so we only normalize +/// `prefix.parent()` here — avoiding redundant `GetLongPathNameW` / +/// case-folding work on Windows in the identification hot path. fn prefix_is_directly_under(prefix: &Path, dir: &Path) -> bool { match prefix.parent() { Some(parent) => norm_case(parent) == dir, From ea5b4996a5a96dd9ebb0ed1919225f3fe623dcb4 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 13:42:21 -0700 Subject: [PATCH 12/16] fix: reject unexpanded tilde paths in HATCH_DATA_DIR / dirs.env.virtual (PR #460) --- crates/pet-hatch/src/lib.rs | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 8d196a13..812702bf 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -273,6 +273,13 @@ fn get_default_virtual_dir(environment: &dyn Environment) -> Option { // `HATCH_DATA_DIR=~/.local/share/hatch` resolves to the user // home rather than a literal `~` directory. let expanded = expand_path(PathBuf::from(trimmed)); + // If the home directory is unavailable, `expand_path()` returns + // the input verbatim. Don't normalize a leading `~` into a + // literal directory under cwd — bail out so Hatch envs are not + // attributed to a bogus path. + if path_starts_with_tilde(&expanded) { + return None; + } return Some(norm_case(append_virtual_subdir(expanded))); } } @@ -289,6 +296,14 @@ fn append_virtual_subdir(data_dir: PathBuf) -> PathBuf { path } +/// Returns true if `path` still begins with a literal `~`, indicating that +/// `expand_path()` could not resolve the user's home directory (no HOME / +/// USERPROFILE set). Such paths must not be normalized or joined against +/// the workspace root, since `~` was not the user's intended directory. +fn path_starts_with_tilde(path: &Path) -> bool { + path.to_str().is_some_and(|s| s.starts_with('~')) +} + /// Platform default for Hatch's data directory. /// /// Mirrors `platformdirs.user_data_dir("hatch", appauthor=False)`. @@ -470,6 +485,12 @@ fn resolve_virtual_paths_against_workspace(workspace: &Path, raw: Vec) - // "~/.virtualenvs" resolve to the user home rather than being // joined onto the workspace as a relative path. let expanded = expand_path(PathBuf::from(trimmed)); + // If the home directory is unavailable, `expand_path()` returns + // the input verbatim. Skip such entries rather than joining a + // literal `~` onto the workspace root (e.g. `/~/...`). + if path_starts_with_tilde(&expanded) { + continue; + } let resolved = if expanded.is_absolute() { expanded } else { @@ -1031,6 +1052,46 @@ mod tests { assert_eq!(dirs, vec![norm_case(&virtualenvs)]); } + #[test] + fn resolve_project_virtual_dirs_skips_unexpanded_tilde() { + // If HOME / USERPROFILE are unset, `expand_path("~/.virtualenvs")` + // returns the input verbatim. We must not join `~` onto the + // workspace root (yielding `/~/.virtualenvs`) or pass + // a tilde-prefixed path through `norm_case()` — both would + // misclassify unrelated envs. + let _guard = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner()); + + let temp = TempDir::new().unwrap(); + let project = temp.path().join("proj"); + fs::create_dir_all(&project).unwrap(); + fs::write( + project.join("pyproject.toml"), + b"[tool.hatch.dirs.env]\nvirtual = \"~/.virtualenvs\"\n", + ) + .unwrap(); + + let prev_home = std::env::var_os("HOME"); + let prev_user_profile = std::env::var_os("USERPROFILE"); + std::env::remove_var("HOME"); + std::env::remove_var("USERPROFILE"); + + let dirs = resolve_project_virtual_dirs(&project); + + match prev_home { + Some(v) => std::env::set_var("HOME", v), + None => std::env::remove_var("HOME"), + } + match prev_user_profile { + Some(v) => std::env::set_var("USERPROFILE", v), + None => std::env::remove_var("USERPROFILE"), + } + + assert!( + dirs.is_empty(), + "unexpanded tilde paths must not be claimed: got {dirs:?}" + ); + } + #[test] fn configure_caches_workspace_virtual_dirs() { // try_from() must not re-read pyproject.toml on every call; configure() From 4225e5c0e2c7825025fabb9edd5ca960408e28f0 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 13:50:57 -0700 Subject: [PATCH 13/16] chore: use .expect() on workspace mutex to match codebase pattern (PR #460) --- crates/pet-hatch/src/lib.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 812702bf..27604da8 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -139,7 +139,10 @@ impl Locator for Hatch { )); } } - *self.workspace_virtual_dirs.lock().unwrap() = new_cache; + *self + .workspace_virtual_dirs + .lock() + .expect("workspace_virtual_dirs mutex poisoned") = new_cache; } fn try_from(&self, env: &PythonEnv) -> Option { @@ -177,7 +180,10 @@ impl Locator for Hatch { // unrelated virtualenvwrapper / `venv` env in the same directory // would be misclassified as Hatch-managed. if classification.is_none() { - let cache = self.workspace_virtual_dirs.lock().unwrap(); + let cache = self + .workspace_virtual_dirs + .lock() + .expect("workspace_virtual_dirs mutex poisoned"); 'workspaces: for (workspace, virtual_dirs, matcher) in cache.iter() { for virtual_dir in virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { @@ -234,7 +240,11 @@ impl Locator for Hatch { // 2. Walk project-local virtual directories for each configured workspace. // Apply the same env-name guard as `try_from()` so shared directories // (e.g. `~/.virtualenvs`) only yield the workspace's declared envs. - let workspaces = self.workspace_virtual_dirs.lock().unwrap().clone(); + let workspaces = self + .workspace_virtual_dirs + .lock() + .expect("workspace_virtual_dirs mutex poisoned") + .clone(); for (workspace, virtual_dirs, matcher) in &workspaces { for virtual_dir in virtual_dirs { for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), matcher) { From 66b67fae954ee43e8c19a111825533a126591dd6 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 14:03:00 -0700 Subject: [PATCH 14/16] perf: Arc-wrap workspace cache entries; macOS no longer assumed case-insensitive (PR #460) --- crates/pet-hatch/src/lib.rs | 86 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 27604da8..09f4e1c4 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -57,20 +57,28 @@ use serde::Deserialize; /// plugin's `PLUGIN_NAME` in Hatch's source. const VIRTUAL_ENV_SUBDIR: [&str; 2] = ["env", "virtual"]; -/// Per-workspace cache of resolved Hatch virtual directories and the set -/// of declared env names for that workspace. Each entry is -/// `(workspace_root, resolved_virtual_dirs, env_matcher)` and is -/// populated by `configure()`. +/// Per-workspace cache entry: workspace root, resolved +/// `dirs.env.virtual` paths, and the precomputed env-name allowlist. /// -/// `env_matcher` is used as a Hatch-specific guard when matching -/// venvs in workspace-configured `dirs.env.virtual` directories: a shared +/// `matcher` is used as a Hatch-specific guard when matching venvs in +/// workspace-configured `dirs.env.virtual` directories: a shared /// directory like `~/.virtualenvs` can contain non-Hatch envs (created by /// virtualenvwrapper, plain `venv`, etc.), so we only claim a venv if its /// leaf directory name matches one of the env names declared in the /// project's Hatch configuration. The matcher pre-normalizes names so the /// `try_from()` hot path avoids per-call `to_lowercase()` / `format!()` /// allocations over the allowlist. -type WorkspaceVirtualDirs = Vec<(PathBuf, Vec, EnvNameMatcher)>; +struct WorkspaceEntry { + workspace: PathBuf, + virtual_dirs: Vec, + matcher: EnvNameMatcher, +} + +/// Per-workspace cache populated by `configure()`. Entries are wrapped in +/// `Arc` so `find()` can snapshot the cache (clone the Vec of Arcs) and +/// release the lock cheaply before doing filesystem I/O — no deep +/// `Vec` / matcher clone per call. +type WorkspaceVirtualDirs = Vec>; pub struct Hatch { /// Default storage directory for Hatch virtual environments — i.e. @@ -132,11 +140,11 @@ impl Locator for Hatch { // — both `virtual_dirs` and `env_names` come from the same // TOML sections, so we read each file once here. let (virtual_dirs, env_names) = resolve_workspace_hatch_config(workspace); - new_cache.push(( - workspace.clone(), + new_cache.push(Arc::new(WorkspaceEntry { + workspace: workspace.clone(), virtual_dirs, - EnvNameMatcher::from_names(env_names), - )); + matcher: EnvNameMatcher::from_names(env_names), + })); } } *self @@ -184,17 +192,17 @@ impl Locator for Hatch { .workspace_virtual_dirs .lock() .expect("workspace_virtual_dirs mutex poisoned"); - 'workspaces: for (workspace, virtual_dirs, matcher) in cache.iter() { - for virtual_dir in virtual_dirs { + 'workspaces: for entry in cache.iter() { + for virtual_dir in &entry.virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { let env_name = prefix .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); - if !matcher.matches(&env_name) { + if !entry.matcher.matches(&env_name) { continue; } - classification = Some((env_name, Some(workspace.clone()))); + classification = Some((env_name, Some(entry.workspace.clone()))); break 'workspaces; } } @@ -238,16 +246,22 @@ impl Locator for Hatch { } // 2. Walk project-local virtual directories for each configured workspace. - // Apply the same env-name guard as `try_from()` so shared directories - // (e.g. `~/.virtualenvs`) only yield the workspace's declared envs. - let workspaces = self + // Snapshot the cache (cheap `Arc` clones) under the lock, then + // release the lock before doing filesystem I/O. Apply the same + // env-name guard as `try_from()` so shared directories (e.g. + // `~/.virtualenvs`) only yield the workspace's declared envs. + let workspaces: Vec> = self .workspace_virtual_dirs .lock() .expect("workspace_virtual_dirs mutex poisoned") .clone(); - for (workspace, virtual_dirs, matcher) in &workspaces { - for virtual_dir in virtual_dirs { - for env in find_envs_in_flat_dir(virtual_dir, Some(workspace.clone()), matcher) { + for entry in &workspaces { + for virtual_dir in &entry.virtual_dirs { + for env in find_envs_in_flat_dir( + virtual_dir, + Some(entry.workspace.clone()), + &entry.matcher, + ) { reporter.report_environment(&env); } } @@ -616,9 +630,13 @@ fn find_envs_in_default_storage(storage: &Path) -> Vec { /// (`try_from()` / `find_envs_in_flat_dir()`) avoids per-call `format!()` /// allocations. /// -/// On case-insensitive filesystems (Windows / default macOS) the on-disk -/// leaf may differ in case from the TOML key, so we lowercase both sides -/// on those platforms at construction time. +/// On case-insensitive filesystems (default on Windows) the on-disk leaf +/// may differ in case from the TOML key, so we lowercase both sides on +/// Windows at construction time. macOS volumes can be either case-sensitive +/// (default APFS) or case-insensitive (HFS+ / case-insensitive APFS), and +/// `norm_case()` itself does not case-fold on macOS — so we keep the +/// allowlist comparison byte-exact there to stay consistent with how paths +/// are normalized elsewhere in this crate. #[derive(Clone, Default, Debug)] struct EnvNameMatcher { /// (normalized_name, normalized_name + ".") pairs. @@ -626,11 +644,11 @@ struct EnvNameMatcher { } fn normalize_env_name(s: &str) -> String { - #[cfg(any(windows, target_os = "macos"))] + #[cfg(windows)] { s.to_lowercase() } - #[cfg(not(any(windows, target_os = "macos")))] + #[cfg(not(windows))] { s.to_string() } @@ -802,11 +820,11 @@ mod tests { let env_names = resolve_project_env_names(workspace); Hatch { default_virtual_dir, - workspace_virtual_dirs: Arc::new(Mutex::new(vec![( - workspace.to_path_buf(), + workspace_virtual_dirs: Arc::new(Mutex::new(vec![Arc::new(WorkspaceEntry { + workspace: workspace.to_path_buf(), virtual_dirs, - EnvNameMatcher::from_names(env_names), - )])), + matcher: EnvNameMatcher::from_names(env_names), + })])), } } @@ -1126,8 +1144,8 @@ mod tests { let cached = locator.workspace_virtual_dirs.lock().unwrap().clone(); assert_eq!(cached.len(), 1); - assert_eq!(cached[0].0, project); - assert_eq!(cached[0].1, vec![norm_case(&virtual_dir)]); + assert_eq!(cached[0].workspace, project); + assert_eq!(cached[0].virtual_dirs, vec![norm_case(&virtual_dir)]); } #[cfg(target_os = "linux")] @@ -1424,9 +1442,9 @@ mod tests { assert_eq!(envs.len(), 2); } - #[cfg(any(windows, target_os = "macos"))] + #[cfg(windows)] #[test] - fn env_name_matches_is_case_insensitive_on_case_folding_filesystems() { + fn env_name_matches_is_case_insensitive_on_windows() { let mut names = HashSet::new(); names.insert("Default".to_string()); let matcher = EnvNameMatcher::from_names(names); From 95c95163d2d867a32ccb8ef0b4e3d61ec1552d9a Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 14:26:12 -0700 Subject: [PATCH 15/16] perf: snapshot workspace cache in try_from() to minimize lock hold (PR #460) --- crates/pet-hatch/src/lib.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index 09f4e1c4..ca7fd5ad 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -188,10 +188,14 @@ impl Locator for Hatch { // unrelated virtualenvwrapper / `venv` env in the same directory // would be misclassified as Hatch-managed. if classification.is_none() { - let cache = self + // Snapshot the cache (cheap `Arc` clones) under the lock and + // release it before iterating, to keep `configure()` from being + // blocked by callers on the hot identification path. + let cache: Vec> = self .workspace_virtual_dirs .lock() - .expect("workspace_virtual_dirs mutex poisoned"); + .expect("workspace_virtual_dirs mutex poisoned") + .clone(); 'workspaces: for entry in cache.iter() { for virtual_dir in &entry.virtual_dirs { if prefix_is_directly_under(&prefix, virtual_dir) { From 7509df2d67f27fbb9a8d7bb662ce23f58b948447 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Mon, 11 May 2026 19:16:38 -0700 Subject: [PATCH 16/16] test: canonicalize temp roots in pet-hatch tests for Windows CI (PR #460) --- crates/pet-hatch/src/lib.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/crates/pet-hatch/src/lib.rs b/crates/pet-hatch/src/lib.rs index ca7fd5ad..b1db2f8d 100644 --- a/crates/pet-hatch/src/lib.rs +++ b/crates/pet-hatch/src/lib.rs @@ -795,6 +795,25 @@ mod tests { .unwrap(); } + /// Canonicalize a temp path for test comparisons. On Windows, `TempDir` + /// roots can come back as 8.3 short names (e.g. `C:\Users\RUNNER~1\...`) + /// while paths surfaced via `fs::read_dir` or env-var expansion are in + /// long form (`C:\Users\runneradmin\...`). Without this both sides of + /// `PathBuf` equality checks would not match on CI runners. The + /// `\\?\` verbatim prefix added by `fs::canonicalize` is stripped so the + /// resulting path matches what production code produces. + fn canonicalize_for_test(p: &Path) -> PathBuf { + let canon = fs::canonicalize(p).unwrap_or_else(|_| p.to_path_buf()); + #[cfg(windows)] + { + let s = canon.to_string_lossy().to_string(); + if let Some(stripped) = s.strip_prefix(r"\\?\") { + return PathBuf::from(stripped); + } + } + canon + } + fn write_python_exe(prefix: &Path) -> PathBuf { let bin = prefix.join(if cfg!(windows) { "Scripts" } else { "bin" }); fs::create_dir_all(&bin).unwrap(); @@ -1268,6 +1287,10 @@ mod tests { let temp = TempDir::new().unwrap(); let fake_home = temp.path().join("home"); fs::create_dir_all(&fake_home).unwrap(); + // Canonicalize so 8.3 short names on Windows CI runners don't + // cause spurious path mismatches when comparing against the + // value produced by `expand_path` + `norm_case`. + let fake_home = canonicalize_for_test(&fake_home); let prev_home = std::env::var_os("HOME"); let prev_user_profile = std::env::var_os("USERPROFILE"); @@ -1400,6 +1423,10 @@ mod tests { let temp = TempDir::new().unwrap(); let shared = temp.path().join("shared"); fs::create_dir_all(&shared).unwrap(); + // Canonicalize so 8.3 short names on Windows CI runners don't + // cause spurious path mismatches when comparing prefixes that + // were surfaced via `fs::read_dir`. + let shared = canonicalize_for_test(&shared); // Hatch-managed env. let hatch_env = shared.join("default");