From 544367ee9083fb684cb0c899419dc9806f6857c4 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 12 Jun 2026 18:55:45 +0200 Subject: [PATCH 1/5] Read metadata of installed packages lazily --- crates/oak_db/src/inputs.rs | 34 +++-- crates/oak_db/src/lib.rs | 1 + crates/oak_db/src/package.rs | 112 ++++++++++++++ crates/oak_db/src/tests/db.rs | 11 +- crates/oak_db/src/tests/file_imports.rs | 24 +-- crates/oak_db/src/tests/file_imports_at.rs | 11 +- crates/oak_db/src/tests/file_resolve.rs | 8 +- crates/oak_db/src/tests/file_resolve_at.rs | 28 ++-- crates/oak_db/src/tests/file_root.rs | 11 +- crates/oak_db/src/tests/inputs.rs | 11 +- crates/oak_db/src/tests/package_resolve.rs | 6 +- crates/oak_ide/tests/integration/rename.rs | 13 +- crates/oak_ide/tests/integration/support.rs | 10 +- .../oak_package_metadata/src/description.rs | 6 +- crates/oak_package_metadata/src/namespace.rs | 2 +- crates/oak_scan/src/inputs.rs | 35 +++-- crates/oak_scan/src/library.rs | 51 +++++-- crates/oak_scan/src/packages.rs | 46 +++--- crates/oak_scan/src/scheduler.rs | 5 +- crates/oak_scan/src/tests/stale.rs | 14 +- crates/oak_scan/tests/integration/library.rs | 138 ++++++++++++++---- 21 files changed, 407 insertions(+), 170 deletions(-) create mode 100644 crates/oak_db/src/package.rs diff --git a/crates/oak_db/src/inputs.rs b/crates/oak_db/src/inputs.rs index 4c697196d6..15b9b2ce4e 100644 --- a/crates/oak_db/src/inputs.rs +++ b/crates/oak_db/src/inputs.rs @@ -5,6 +5,7 @@ use oak_package_metadata::namespace::Namespace; use crate::Db; use crate::File; +use crate::FileRevision; /// Salsa-tracked root directory. /// @@ -200,12 +201,28 @@ pub struct Package { // `Package` directly because salsa inputs are lifetime-free. #[returns(ref)] pub name: String, - /// Installed-package version (from `DESCRIPTION`). `None` for - /// workspace packages. + /// Mtime of the package's `DESCRIPTION` file, or [`FileRevision::zero`] + /// when it can't be stat'd. Drives the lazy [`Package::description`] + /// query (and the `version` / `collation` derivations on top of it), the + /// same way `namespace_revision` drives [`Package::namespace`]. We stat + /// `DESCRIPTION` at scan time but parse it only on demand. + pub description_revision: FileRevision, + /// Mtime of the package's `NAMESPACE` file, or [`FileRevision::zero`] + /// when it can't be stat'd. The lazy [`Package::namespace`] query reads + /// it so a watcher that bumps it on a `NAMESPACE` change forces the next + /// parse to re-read disk, exactly like [`File::revision`] drives + /// [`File::source_text`]. We don't read or parse `NAMESPACE` at scan + /// time, only stat it, so installed packages the user never imports cost + /// nothing beyond the stat. + pub namespace_revision: FileRevision, + /// In-memory `NAMESPACE`, checked by [`Package::namespace`] before it + /// touches disk. Mirrors [`File::source_text_override`]: `None` means + /// "read from disk". The scanners always leave it `None`. It's the + /// injection point unit tests use to supply a namespace for a synthetic + /// path with no file behind it, and the natural hook for an editor + /// editing a package's `NAMESPACE` live. #[returns(ref)] - pub version: Option, - #[returns(ref)] - pub namespace: Namespace, + pub namespace_override: Option, /// R source files belonging to this package (the `R/*.R` files), in /// R's load order. When DESCRIPTION's `Collate:` directive is /// present, this is exactly the files it lists, in that order; @@ -235,11 +252,4 @@ pub struct Package { /// stays `Some(self)`, file lives in one of the two containers. #[returns(ref)] pub scripts: Vec, - /// The basename ordering from `DESCRIPTION`'s `Collate` field, if - /// present. `None` when the field is absent (R defaults to - /// alphabetical load order). Changes only when `DESCRIPTION` - /// itself changes, so this anchor is independent of `files` (which - /// bumps when R/ files are added or removed). - #[returns(ref)] - pub collation: Option>, } diff --git a/crates/oak_db/src/lib.rs b/crates/oak_db/src/lib.rs index 777b530b12..563d50ef53 100644 --- a/crates/oak_db/src/lib.rs +++ b/crates/oak_db/src/lib.rs @@ -9,6 +9,7 @@ mod identifier; mod imports; mod inputs; mod name; +mod package; mod package_resolve; mod parse; mod storage; diff --git a/crates/oak_db/src/package.rs b/crates/oak_db/src/package.rs new file mode 100644 index 0000000000..ece86fab53 --- /dev/null +++ b/crates/oak_db/src/package.rs @@ -0,0 +1,112 @@ +use std::fs; +use std::io; + +use oak_package_metadata::description::Description; +use oak_package_metadata::namespace::Namespace; +use stdext::result::ResultExt; + +use crate::Db; +use crate::Package; + +#[salsa::tracked] +impl Package { + /// The package's parsed `NAMESPACE`. + /// + /// Returns the in-memory override if one is set + /// (`namespace_override`), otherwise reads `NAMESPACE` from disk and + /// parses it. Lazy and tracked so we only pay the read and the R-parse + /// for packages whose imports actually get resolved. Parsing every + /// installed package's `NAMESPACE` eagerly would dominate the cost of + /// scanning a library with hundreds of packages, so we defer it. + /// + /// A missing or unparseable `NAMESPACE` yields an empty `Namespace`. + #[salsa::tracked(returns(ref))] + pub fn namespace(self, db: &dyn Db) -> Namespace { + if let Some(namespace) = self.namespace_override(db) { + return namespace.clone(); + } + + // Reading `namespace_revision` makes this memo depend on it even though + // the value isn't used here: bumping the revision is what forces a + // re-read. + let _ = self.namespace_revision(db); + + let Some(dir) = self + .description_path(db) + .as_path() + .and_then(|path| path.parent()) + else { + return Namespace::default(); + }; + + let namespace_path = dir.join("NAMESPACE"); + let text = match fs::read_to_string(namespace_path.as_std_path()) { + Ok(text) => text, + // A package needn't ship a `NAMESPACE`, so absence is the normal + // case and stays quiet. A file that exists but can't be read is + // logged so the failure isn't silently read as "no namespace". + Err(err) if err.kind() == io::ErrorKind::NotFound => return Namespace::default(), + Err(err) => { + log::error!("Failed to read `{namespace_path}`: {err:?}"); + return Namespace::default(); + }, + }; + + Namespace::parse(&text).log_err().unwrap_or_default() + } + + /// The package's `Version:`, parsed lazily from `DESCRIPTION`. `None` + /// when the file is missing or has no version. + /// + /// A narrow query over [`Package::description`]: editing `DESCRIPTION` + /// without changing `Version:` backdates here, so downstream isn't + /// disturbed. + #[salsa::tracked(returns(ref))] + pub fn version(self, db: &dyn Db) -> Option { + self.description(db) + .as_ref() + .map(|description| description.version.clone()) + } + + /// The basename order from `DESCRIPTION`'s `Collate:` field, parsed + /// lazily. `None` when the field (or the file) is absent. Narrow query + /// over [`Package::description`], same backdating story as + /// [`Package::version`]. + #[salsa::tracked(returns(ref))] + pub fn collation(self, db: &dyn Db) -> Option> { + self.description(db) + .as_ref() + .and_then(|description| description.collate()) + } + + /// The package's parsed `DESCRIPTION`, or `None` when it's missing or + /// unparseable. + /// + /// Lazy and tracked, keyed on `description_revision`. Reading is deferred + /// so the library scanner can register an installed package without + /// touching `DESCRIPTION` at all (it takes the name from the directory). + /// + /// `Description` is `PartialEq`, so salsa backdates this query when a + /// `description_revision` bump turns out to leave the parsed content + /// unchanged. The narrow `version` / `collation` queries on top firewall + /// the other direction: a real `DESCRIPTION` edit that doesn't touch + /// `Version:` / `Collate:` re-runs this query but backdates there. + #[salsa::tracked(returns(ref))] + pub(crate) fn description(self, db: &dyn Db) -> Option { + let _ = self.description_revision(db); + + let path = self.description_path(db).as_path()?; + let text = match fs::read_to_string(path.as_std_path()) { + Ok(text) => text, + // A missing `DESCRIPTION` is the normal "gone after a rescan" case + // and stays quiet. A file that exists but can't be read is logged + // rather than silently treated as absent. + Err(err) if err.kind() == io::ErrorKind::NotFound => return None, + Err(err) => { + log::error!("Failed to read `{path}`: {err:?}"); + return None; + }, + }; + Description::parse(&text).log_err() + } +} diff --git a/crates/oak_db/src/tests/db.rs b/crates/oak_db/src/tests/db.rs index f79fdde73a..68724435b9 100644 --- a/crates/oak_db/src/tests/db.rs +++ b/crates/oak_db/src/tests/db.rs @@ -1,6 +1,5 @@ use std::collections::HashSet; -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use crate::tests::test_db::file_path; @@ -36,11 +35,11 @@ fn test_file_by_path_finds_workspace_package_file() { &db, file_path("proj/DESCRIPTION"), "mypkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), vec![], Vec::new(), - None, ); let file = File::new( &db, @@ -64,11 +63,11 @@ fn test_file_by_path_finds_library_package_file() { &db, file_path("libs/foo/DESCRIPTION"), "foo".to_string(), - Some("1.0.0".to_string()), - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, vec![], Vec::new(), - None, ); let file = File::new( &db, diff --git a/crates/oak_db/src/tests/file_imports.rs b/crates/oak_db/src/tests/file_imports.rs index 4436ccd42f..c744e3ca80 100644 --- a/crates/oak_db/src/tests/file_imports.rs +++ b/crates/oak_db/src/tests/file_imports.rs @@ -22,11 +22,11 @@ fn make_installed(db: &mut TestDb, name: &str) -> (crate::Root, Package) { db, file_path(&format!("libs/{name}/DESCRIPTION")), name.to_string(), - Some("1.0.0".to_string()), - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); (root, pkg) @@ -142,11 +142,11 @@ fn test_package_file_emits_namespace_and_collation_layers() { &db, file_path("w/pkg/DESCRIPTION"), "pkg".to_string(), - None, - namespace, + FileRevision::zero(), + FileRevision::zero(), + Some(namespace), Vec::new(), Vec::new(), - None, ); let first = File::new( &db, @@ -215,11 +215,11 @@ fn test_testthat_file_sees_helpers_package_and_testthat() { &db, file_path("w/pkg/DESCRIPTION"), "pkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); let r_file = File::new( @@ -292,11 +292,11 @@ fn test_package_r_file_does_not_take_testthat_path() { &db, file_path("w/pkg/DESCRIPTION"), "pkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); let r_file = File::new( &db, @@ -338,11 +338,11 @@ fn test_testthat_file_includes_top_level_library_calls() { &db, file_path("w/pkg/DESCRIPTION"), "pkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); let r_file = File::new( &db, diff --git a/crates/oak_db/src/tests/file_imports_at.rs b/crates/oak_db/src/tests/file_imports_at.rs index 8a372b1420..be115179c0 100644 --- a/crates/oak_db/src/tests/file_imports_at.rs +++ b/crates/oak_db/src/tests/file_imports_at.rs @@ -1,5 +1,4 @@ use biome_rowan::TextSize; -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use crate::tests::test_db::file_path; @@ -43,11 +42,11 @@ fn install_packages(db: &mut TestDb, names: &[&str]) -> Vec { db, file_path(&format!("libs/{name}/DESCRIPTION")), name.to_string(), - Some("1.0.0".to_string()), - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); roots.push(root); @@ -65,11 +64,11 @@ fn install_workspace_package(db: &mut TestDb, name: &str) -> Package { db, file_path(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); db.workspace_roots().set_roots(db).to(vec![root]); diff --git a/crates/oak_db/src/tests/file_resolve.rs b/crates/oak_db/src/tests/file_resolve.rs index 7f539a3d0f..2ef14efce8 100644 --- a/crates/oak_db/src/tests/file_resolve.rs +++ b/crates/oak_db/src/tests/file_resolve.rs @@ -547,11 +547,11 @@ fn test_resolve_unbound_name_in_package_does_not_cycle() { &db, file_path("/w/pkg/DESCRIPTION"), "pkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - oak_package_metadata::namespace::Namespace::default(), Vec::new(), Vec::new(), - None, ); let a = File::new( @@ -589,11 +589,11 @@ fn test_resolve_walks_package_files_for_lazy_lookups() { &db, file_path("/w/pkg/DESCRIPTION"), "pkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - oak_package_metadata::namespace::Namespace::default(), Vec::new(), Vec::new(), - None, ); let a = File::new( diff --git a/crates/oak_db/src/tests/file_resolve_at.rs b/crates/oak_db/src/tests/file_resolve_at.rs index cae5a22ed6..2b906b04bb 100644 --- a/crates/oak_db/src/tests/file_resolve_at.rs +++ b/crates/oak_db/src/tests/file_resolve_at.rs @@ -66,11 +66,11 @@ fn install_workspace_package(db: &mut TestDb, name: &str) -> (Root, Package) { db, file_path(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); db.workspace_roots().set_roots(db).to(vec![root]); @@ -363,9 +363,9 @@ fn install_package( exports: &[&str], files: &[(&str, &str)], ) -> (Root, Package) { - let (prefix, version) = match kind { - RootKind::Library => ("library", Some("1.0.0".to_string())), - RootKind::Workspace => ("workspace", None), + let prefix = match kind { + RootKind::Library => "library", + RootKind::Workspace => "workspace", }; let root = Root::new( db, @@ -382,11 +382,11 @@ fn install_package( db, file_path(&format!("{prefix}/{name}/DESCRIPTION")), name.to_string(), - version, - namespace, + FileRevision::zero(), + FileRevision::zero(), + Some(namespace), Vec::new(), Vec::new(), - None, ); let pkg_files: Vec = files .iter() @@ -484,11 +484,11 @@ fn test_namespace_import_pkg_makes_export_resolve_in_package_file() { &db, file_path("workspace/mypkg/DESCRIPTION"), "mypkg".to_string(), - None, - ns, + FileRevision::zero(), + FileRevision::zero(), + Some(ns), Vec::new(), Vec::new(), - None, ); let source = "bar\n"; let ws_file = File::new( @@ -536,11 +536,11 @@ fn test_namespace_importfrom_makes_export_resolve_in_package_file() { &db, file_path("workspace/mypkg/DESCRIPTION"), "mypkg".to_string(), - None, - ns, + FileRevision::zero(), + FileRevision::zero(), + Some(ns), Vec::new(), Vec::new(), - None, ); let ws_file = File::new( &db, diff --git a/crates/oak_db/src/tests/file_root.rs b/crates/oak_db/src/tests/file_root.rs index 413749cc2d..b85d5e7420 100644 --- a/crates/oak_db/src/tests/file_root.rs +++ b/crates/oak_db/src/tests/file_root.rs @@ -1,4 +1,3 @@ -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use crate::tests::test_db::file_path; @@ -70,11 +69,11 @@ fn test_root_dispatches_through_library_package_when_set() { &db, file_path("libs/mypkg/DESCRIPTION"), "mypkg".to_string(), - Some("1.0.0".to_string()), - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, Vec::new(), Vec::new(), - None, ); pkg_root.set_packages(&mut db).to(vec![pkg]); db.library_roots().set_roots(&mut db).to(vec![pkg_root]); @@ -103,11 +102,11 @@ fn test_root_dispatches_through_workspace_package_when_set() { &db, file_path("proj/DESCRIPTION"), "mypkg".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); pkg_root.set_packages(&mut db).to(vec![pkg]); db.workspace_roots().set_roots(&mut db).to(vec![pkg_root]); diff --git a/crates/oak_db/src/tests/inputs.rs b/crates/oak_db/src/tests/inputs.rs index 4e8f00e33f..9fd41a2699 100644 --- a/crates/oak_db/src/tests/inputs.rs +++ b/crates/oak_db/src/tests/inputs.rs @@ -1,4 +1,3 @@ -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use crate::tests::test_db::file_path; @@ -18,11 +17,11 @@ fn make_workspace_package(db: &mut OakDatabase, name: &str) -> (Root, Package) { db, file_path(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); (root, pkg) @@ -34,11 +33,11 @@ fn make_installed_package(db: &mut OakDatabase, name: &str) -> (Root, Package) { db, file_path(&format!("libs/{name}/DESCRIPTION")), name.to_string(), - Some("1.0.0".to_string()), - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, Vec::new(), Vec::new(), - None, ); root.set_packages(db).to(vec![pkg]); (root, pkg) diff --git a/crates/oak_db/src/tests/package_resolve.rs b/crates/oak_db/src/tests/package_resolve.rs index 5ea796cd22..c3046284f8 100644 --- a/crates/oak_db/src/tests/package_resolve.rs +++ b/crates/oak_db/src/tests/package_resolve.rs @@ -31,11 +31,11 @@ fn setup_package( db, file_path(&format!("workspace/{pkg_name}/DESCRIPTION")), pkg_name.to_string(), - None, - namespace, + FileRevision::zero(), + FileRevision::zero(), + Some(namespace), Vec::new(), Vec::new(), - None, ); let file_entities: Vec = files diff --git a/crates/oak_ide/tests/integration/rename.rs b/crates/oak_ide/tests/integration/rename.rs index b4994a15df..0a34a18c1f 100644 --- a/crates/oak_ide/tests/integration/rename.rs +++ b/crates/oak_ide/tests/integration/rename.rs @@ -10,7 +10,6 @@ use oak_db::Root; use oak_db::RootKind; use oak_ide::prepare_rename; use oak_ide::rename; -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use url::Url; @@ -251,7 +250,7 @@ fn place_in_workspace_scripts(db: &mut OakDatabase, files: Vec) { /// package back-pointer set, and register it under a workspace root. Returns /// the created `File`s in order. fn build_workspace_package(db: &mut OakDatabase, files: &[(&str, &str)]) -> Vec { - let pkg = empty_package(db, "file:///project/pkg/DESCRIPTION", None); + let pkg = empty_package(db, "file:///project/pkg/DESCRIPTION"); let created: Vec = files .iter() .map(|(name, contents)| { @@ -276,7 +275,7 @@ fn build_workspace_package(db: &mut OakDatabase, files: &[(&str, &str)]) -> Vec< /// Build a single installed-package file under a library root. fn build_library_package_file(db: &mut OakDatabase, contents: &str) -> File { - let pkg = empty_package(db, "file:///lib/pkg/DESCRIPTION", Some("1.0".to_string())); + let pkg = empty_package(db, "file:///lib/pkg/DESCRIPTION"); let url = FilePath::from_url(&Url::parse("file:///lib/pkg/R/foo.R").unwrap()); let file = File::new( db, @@ -293,15 +292,15 @@ fn build_library_package_file(db: &mut OakDatabase, contents: &str) -> File { file } -fn empty_package(db: &OakDatabase, description_url: &str, version: Option) -> Package { +fn empty_package(db: &OakDatabase, description_url: &str) -> Package { Package::new( db, FilePath::from_url(&Url::parse(description_url).unwrap()), "pkg".to_string(), - version, - Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + None, vec![], vec![], - None, ) } diff --git a/crates/oak_ide/tests/integration/support.rs b/crates/oak_ide/tests/integration/support.rs index 3853fb5387..93299479e4 100644 --- a/crates/oak_ide/tests/integration/support.rs +++ b/crates/oak_ide/tests/integration/support.rs @@ -89,18 +89,16 @@ fn install_pkg( file_name: &str, contents: &str, ) -> File { - let (pkg_url, file_url, root_url, version) = match kind { + let (pkg_url, file_url, root_url) = match kind { RootKind::Library => ( lib_url(&format!("{name}/DESCRIPTION")), lib_url(&format!("{name}/R/{file_name}")), lib_url(name), - Some("1.0.0".to_string()), ), RootKind::Workspace => ( workspace_url(&format!("{name}/DESCRIPTION")), workspace_url(&format!("{name}/R/{file_name}")), workspace_url(name), - None, ), }; let namespace = Namespace { @@ -111,11 +109,11 @@ fn install_pkg( db, FilePath::from_url(&pkg_url), name.to_string(), - version, - namespace, + FileRevision::zero(), + FileRevision::zero(), + Some(namespace), Vec::new(), Vec::new(), - None, ); let file = File::new( db, diff --git a/crates/oak_package_metadata/src/description.rs b/crates/oak_package_metadata/src/description.rs index fe27bc4a6f..be6c2208f2 100644 --- a/crates/oak_package_metadata/src/description.rs +++ b/crates/oak_package_metadata/src/description.rs @@ -1,7 +1,7 @@ use crate::dcf::Dcf; /// Parsed DESCRIPTION file -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct Description { pub name: String, pub version: String, @@ -17,12 +17,12 @@ pub struct Description { pub fields: Dcf, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum Repository { CRAN, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum Priority { Base, Recommended, diff --git a/crates/oak_package_metadata/src/namespace.rs b/crates/oak_package_metadata/src/namespace.rs index 9edefe38ac..e81efc1843 100644 --- a/crates/oak_package_metadata/src/namespace.rs +++ b/crates/oak_package_metadata/src/namespace.rs @@ -8,7 +8,7 @@ use oak_core::syntax_ext::RIdentifierExt; use stdext::SortedVec; /// Parsed NAMESPACE file -#[derive(Default, Clone, Debug)] +#[derive(Default, Clone, Debug, PartialEq, Eq)] pub struct Namespace { /// Names of objects exported with `export()` pub exports: SortedVec, diff --git a/crates/oak_scan/src/inputs.rs b/crates/oak_scan/src/inputs.rs index 3c5b0482ff..25cfa60880 100644 --- a/crates/oak_scan/src/inputs.rs +++ b/crates/oak_scan/src/inputs.rs @@ -27,7 +27,6 @@ use oak_db::File; use oak_db::FileRevision; use oak_db::Package; use oak_db::Root; -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use crate::lookup::package_by_path; @@ -181,9 +180,16 @@ pub trait RootExt { /// /// Identity is keyed on `description_path`: if `self.packages` /// already contains a `Package` with this URL, that entity is - /// reused and its `name` / `version` / `namespace` / `collation` - /// fields are updated in place. Salsa backdates each setter call - /// when the value doesn't actually change. + /// reused and its `name` / `description_revision` / + /// `namespace_revision` fields are updated in place. Salsa backdates + /// each setter call when the value doesn't actually change. + /// + /// `description_revision` and `namespace_revision` are the `DESCRIPTION` + /// and `NAMESPACE` mtimes. The version, collation, and namespace are + /// parsed lazily by [`Package::version`] / [`Package::collation`] / + /// [`Package::namespace`], so the scanner never reads or parses those + /// files (the workspace scanner reads `DESCRIPTION` separately, for the + /// name and file ordering, before calling this). /// /// Files are reused by URL via [`Db::file_by_path`]; see /// [`FileEntry`] for the content-preservation semantics. Wiring @@ -194,11 +200,10 @@ pub trait RootExt { db: &mut DB, description_path: FilePath, name: String, - version: Option, - namespace: Namespace, + description_revision: FileRevision, + namespace_revision: FileRevision, files: Vec, scripts: Vec, - collation: Option>, ) -> Package; /// Drop this root from its live container, rehoming its files and packages @@ -227,11 +232,10 @@ impl RootExt for Root { db: &mut DB, description_path: FilePath, name: String, - version: Option, - namespace: Namespace, + description_revision: FileRevision, + namespace_revision: FileRevision, files: Vec, scripts: Vec, - collation: Option>, ) -> Package { // `package_by_path()` finds the existing entity whether it's already // in `self.packages` (rescan, common path) or in @@ -240,9 +244,8 @@ impl RootExt for Root { let pkg = match package_by_path(db, &description_path) { Some(p) => { p.set_name(db).to(name); - p.set_version(db).to(version); - p.set_namespace(db).to(namespace); - p.set_collation(db).to(collation); + p.set_description_revision(db).to(description_revision); + p.set_namespace_revision(db).to(namespace_revision); remove_from_stale_packages(db, p); p }, @@ -250,11 +253,11 @@ impl RootExt for Root { db, description_path, name, - version, - namespace, + description_revision, + namespace_revision, + None, Vec::new(), Vec::new(), - collation, ), }; diff --git a/crates/oak_scan/src/library.rs b/crates/oak_scan/src/library.rs index 6300a6e22c..431308027d 100644 --- a/crates/oak_scan/src/library.rs +++ b/crates/oak_scan/src/library.rs @@ -25,7 +25,7 @@ use salsa::Setter; use walkdir::WalkDir; use crate::inputs::RootExt; -use crate::packages::read_package_metadata; +use crate::packages::file_revision; /// Reconcile `LibraryRoots` to exactly `paths`. Called through /// [`crate::DbScan::set_library_paths`]. Order in `LibraryRoots.roots` @@ -82,21 +82,48 @@ fn scan_new_library_path(db: &mut DB, scan_path: &Path, path: if !entry.file_type().is_dir() { continue; } - let Some(pkg) = read_package_metadata(entry.path()) else { + let Some(pkg) = register_installed_package(db, root, entry.path()) else { continue; }; - packages.push(root.set_package( - db, - pkg.description_path, - pkg.name, - pkg.version, - pkg.namespace, - pkg.files, - pkg.scripts, - pkg.collation, - )); + packages.push(pkg); } root.set_packages(db).to(packages); root } + +/// Register one installed package under `root` without reading any of its +/// files. +/// +/// R installs packages at `//`, so the directory basename +/// is the package name. That lets us skip reading `DESCRIPTION` here: we only +/// confirm it exists (that's what marks the directory as a package) and stat +/// `DESCRIPTION` / `NAMESPACE` for the revisions that drive the lazy metadata +/// queries. Version, collation, and namespace are parsed on demand, only if +/// the package is ever imported. Returns `None` for a directory with no +/// `DESCRIPTION` or a non-UTF8 name. +fn register_installed_package( + db: &mut DB, + root: Root, + package_dir: &Path, +) -> Option { + let description_file = package_dir.join("DESCRIPTION"); + if !description_file.is_file() { + return None; + } + + let name = package_dir.file_name()?.to_str()?.to_string(); + let description_revision = file_revision(&description_file); + let namespace_revision = file_revision(&package_dir.join("NAMESPACE")); + let description_path = FilePath::from_path_buf(description_file)?; + + Some(root.set_package( + db, + description_path, + name, + description_revision, + namespace_revision, + Vec::new(), + Vec::new(), + )) +} diff --git a/crates/oak_scan/src/packages.rs b/crates/oak_scan/src/packages.rs index c7247ceaa6..533482e6e2 100644 --- a/crates/oak_scan/src/packages.rs +++ b/crates/oak_scan/src/packages.rs @@ -13,7 +13,6 @@ use filetime::FileTime; use ignore::WalkBuilder; use oak_db::FileRevision; use oak_package_metadata::description::Description; -use oak_package_metadata::namespace::Namespace; use stdext::result::ResultExt; use crate::inputs::FileEntry; @@ -32,8 +31,13 @@ pub(crate) struct PackageEntry { /// same `Package:` field, and dedup picks one of them per root. pub description_path: FilePath, pub name: String, - pub version: Option, - pub namespace: Namespace, + /// Mtime of `DESCRIPTION`, stat'd during the walk. Drives the lazy + /// `Package::description()` query (and `version` / `collation` on top). + pub description_revision: FileRevision, + /// Mtime of the package's `NAMESPACE`, stat'd during the walk. Drives + /// the lazy `Package::namespace()` query. That query is the only place + /// `NAMESPACE` gets read and parsed. The walk only stats it. + pub namespace_revision: FileRevision, /// `R/*.R` files: the package's loadable namespace. pub files: Vec, /// R files inside the package directory but outside `R/`: tests/, @@ -44,32 +48,40 @@ pub(crate) struct PackageEntry { pub collation: Option>, } -/// Read a package's `DESCRIPTION` / `NAMESPACE` metadata. Returns `None` if +/// Read a package's `DESCRIPTION` for its name and `Collate:` order, and stat +/// `DESCRIPTION` / `NAMESPACE` for their revisions. Returns `None` if /// `DESCRIPTION` is missing or malformed. The returned entry has empty `files` /// and `scripts`; callers fill those separately. /// -/// The library scanner uses this directly: installed packages have no `.R` -/// sources under `R/` (it holds the lazy-load db), so their `files` come from -/// a cache later, not from a directory scan. The workspace scanner wraps this -/// in [`read_workspace_package`], which discovers sources by walking the tree. +/// `NAMESPACE` is only stat'd here, never read or parsed. The lazy +/// [`oak_db::Package::namespace`] query does the parse. `DESCRIPTION` is read +/// and parsed because the walk needs the `Package:` name (the workspace +/// directory name isn't authoritative) and the `Collate:` order to sort +/// `R/*.R`. The parsed `collation` only orders this walk's files. It isn't +/// pushed to salsa. [`oak_db::Package::version`] and +/// [`oak_db::Package::collation`] re-read `DESCRIPTION` lazily off the +/// revision. +/// +/// Workspace-only: [`read_workspace_package`] wraps this and discovers sources +/// by walking the tree. The library scanner skips it entirely and registers +/// installed packages without reading `DESCRIPTION` (it takes the name from +/// the directory). pub(crate) fn read_package_metadata(package_dir: &Path) -> Option { - let description_path = package_dir.join("DESCRIPTION"); - let description_text = fs::read_to_string(&description_path).ok()?; + let description_file = package_dir.join("DESCRIPTION"); + let description_text = fs::read_to_string(&description_file).ok()?; let description = Description::parse(&description_text).log_err()?; - let description_path = FilePath::from_path_buf(description_path)?; - let namespace = fs::read_to_string(package_dir.join("NAMESPACE")) - .ok() - .and_then(|text| Namespace::parse(&text).log_err()) - .unwrap_or_default(); + let description_revision = file_revision(&description_file); + let namespace_revision = file_revision(&package_dir.join("NAMESPACE")); + let description_path = FilePath::from_path_buf(description_file)?; let collation = description.collate(); Some(PackageEntry { description_path, name: description.name, - version: Some(description.version), - namespace, + description_revision, + namespace_revision, files: Vec::new(), scripts: Vec::new(), collation, diff --git a/crates/oak_scan/src/scheduler.rs b/crates/oak_scan/src/scheduler.rs index a1a66c142f..e3ba4fc987 100644 --- a/crates/oak_scan/src/scheduler.rs +++ b/crates/oak_scan/src/scheduler.rs @@ -133,11 +133,10 @@ impl ScanCompleted { db, pkg.description_path, pkg.name, - pkg.version, - pkg.namespace, + pkg.description_revision, + pkg.namespace_revision, pkg.files, pkg.scripts, - pkg.collation, ) }) .collect(); diff --git a/crates/oak_scan/src/tests/stale.rs b/crates/oak_scan/src/tests/stale.rs index 0d77e6a8c6..1506f68547 100644 --- a/crates/oak_scan/src/tests/stale.rs +++ b/crates/oak_scan/src/tests/stale.rs @@ -8,11 +8,11 @@ use aether_path::FilePath; use oak_db::Db; use oak_db::DbInputs; use oak_db::File; +use oak_db::FileRevision; use oak_db::OakDatabase; use oak_db::Package; use oak_db::Root; use oak_db::RootKind; -use oak_package_metadata::namespace::Namespace; use salsa::Setter; use url::Url; @@ -77,11 +77,11 @@ fn test_set_stale_clears_package_on_editor_owned_package_file() { &db, file_path("/proj/DESCRIPTION"), "p".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), vec![], Vec::new(), - None, ); let file = File::new( &db, @@ -113,11 +113,11 @@ fn test_set_stale_routes_pkg_scripts_to_stale() { &db, file_path("/proj/DESCRIPTION"), "p".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), vec![], Vec::new(), - None, ); let test_file = File::new( &db, @@ -148,11 +148,11 @@ fn test_set_stale_routes_editor_owned_pkg_scripts_to_orphan() { &db, file_path("/proj/DESCRIPTION"), "p".to_string(), + FileRevision::zero(), + FileRevision::zero(), None, - Namespace::default(), vec![], Vec::new(), - None, ); let test_file = File::new( &db, diff --git a/crates/oak_scan/tests/integration/library.rs b/crates/oak_scan/tests/integration/library.rs index 917058351e..33e0486b9e 100644 --- a/crates/oak_scan/tests/integration/library.rs +++ b/crates/oak_scan/tests/integration/library.rs @@ -10,7 +10,7 @@ use oak_db::OakDatabase; use oak_db::Package; use oak_db::Root; use oak_db::RootKind; -use oak_package_metadata::namespace::Namespace; +use oak_package_metadata::namespace::Import; use oak_scan::DbScan; use oak_scan::RootExt; use salsa::Setter; @@ -63,6 +63,95 @@ fn test_scan_library_discovers_package() { assert_eq!(packages[0].version(&db), &Some("1.0.0".to_string()),); } +#[test] +fn test_package_namespace_reads_from_disk() { + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = tmp.path().join("dplyr"); + write_package(&pkg_dir, "dplyr", &[]); + // The scanner only stats `NAMESPACE`; the lazy `Package::namespace` query + // is the first thing to actually read and parse it. + fs::write( + pkg_dir.join("NAMESPACE"), + "export(mutate)\nexport(select)\nimportFrom(rlang, sym)\n", + ) + .unwrap(); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let pkg = db.library_roots().roots(&db)[0].packages(&db)[0]; + let namespace = pkg.namespace(&db); + assert_eq!(namespace.exports.to_vec(), vec!["mutate", "select"]); + assert_eq!(namespace.imports, vec![Import { + name: "sym".to_string(), + package: "rlang".to_string(), + }]); +} + +#[test] +fn test_package_namespace_empty_when_file_absent() { + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("dplyr"), "dplyr", &[]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let pkg = db.library_roots().roots(&db)[0].packages(&db)[0]; + let namespace = pkg.namespace(&db); + assert!(namespace.exports.is_empty()); + assert!(namespace.imports.is_empty()); +} + +#[test] +fn test_package_namespace_rereads_when_revision_bumps() { + // `Package::namespace` memoizes the parsed `NAMESPACE`. A later disk edit + // is invisible until something tells salsa the memo is stale, and the only + // such signal is the `namespace_revision` read inside `namespace()`. So + // this test rewrites the file, bumps the revision, and checks the second + // read reflects the edit. Drop that revision read and the assertion below + // would see the stale `mutate` export. + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = tmp.path().join("dplyr"); + write_package(&pkg_dir, "dplyr", &[]); + fs::write(pkg_dir.join("NAMESPACE"), "export(mutate)\n").unwrap(); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let pkg = db.library_roots().roots(&db)[0].packages(&db)[0]; + assert_eq!(pkg.namespace(&db).exports.to_vec(), vec!["mutate"]); + + fs::write(pkg_dir.join("NAMESPACE"), "export(select)\n").unwrap(); + pkg.set_namespace_revision(&mut db) + .to(oak_db::FileRevision::from(1u128)); + assert_eq!(pkg.namespace(&db).exports.to_vec(), vec!["select"]); +} + +#[test] +fn test_package_version_rereads_when_revision_bumps() { + // Guards the `description_revision` read inside `Package::description`, + // which `version()` reads through. Same shape as the `NAMESPACE` test: + // drop the revision read and the second assertion sees the stale `1.0.0`. + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = tmp.path().join("dplyr"); + write_package(&pkg_dir, "dplyr", &[]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let pkg = db.library_roots().roots(&db)[0].packages(&db)[0]; + assert_eq!(pkg.version(&db), &Some("1.0.0".to_string())); + + fs::write( + pkg_dir.join("DESCRIPTION"), + "Package: dplyr\nVersion: 2.0.0\n", + ) + .unwrap(); + pkg.set_description_revision(&mut db) + .to(oak_db::FileRevision::from(1u128)); + assert_eq!(pkg.version(&db), &Some("2.0.0".to_string())); +} + #[test] fn test_scan_multiple_library_paths_preserve_order() { let tmp1 = tempfile::tempdir().unwrap(); @@ -204,7 +293,7 @@ fn test_set_library_paths_re_add_preserves_package_identity() { use salsa::plumbing::AsId; let tmp = tempfile::tempdir().unwrap(); - write_package(&tmp.path().join("pkg"), "dplyr", &[("a.R", "x <- 1\n")]); + write_package(&tmp.path().join("dplyr"), "dplyr", &[("a.R", "x <- 1\n")]); let mut db = OakDatabase::new(); db.set_library_paths(&[tmp.path().to_path_buf()]); @@ -298,11 +387,10 @@ fn test_set_package_longer_root_wins_after_shorter_claims_first() { &mut db, desc_path.clone(), "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, short, p1); assert_eq!(db.root_by_package(p1), Some(short)); @@ -311,11 +399,10 @@ fn test_set_package_longer_root_wins_after_shorter_claims_first() { &mut db, desc_path, "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, long, p2); // Same entity; now in both roots' `packages`. `root_by_package` prefers @@ -335,11 +422,10 @@ fn test_set_package_shorter_root_does_not_steal_from_longer() { &mut db, desc_path.clone(), "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, long, p1); assert_eq!(db.root_by_package(p1), Some(long)); @@ -348,11 +434,10 @@ fn test_set_package_shorter_root_does_not_steal_from_longer() { &mut db, desc_path, "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, short, p2); // Same entity; now in both roots' `packages`. `root_by_package` keeps the @@ -383,22 +468,20 @@ fn test_all_files_emits_shared_file_once_under_deepest_root() { &mut db, desc_path.clone(), "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), files.clone(), Vec::new(), - None, ); register_package(&mut db, short, p1); let p2 = long.set_package( &mut db, desc_path, "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), files, Vec::new(), - None, ); register_package(&mut db, long, p2); assert_eq!(p1, p2); @@ -441,14 +524,13 @@ fn test_upsert_re_promotes_editor_owned_file_from_orphan() { &mut db, file_path("/lib/pkg/DESCRIPTION"), "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), vec![FileEntry { path: r_path.clone(), revision: oak_db::FileRevision::zero(), }], Vec::new(), - None, ); // Same `File` entity, editor content preserved, package backpointer set. @@ -478,11 +560,10 @@ fn test_set_package_stale_resurrection_changes_owning_root() { &mut db, desc_path.clone(), "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, old, p1); assert_eq!(db.root_by_package(p1), Some(old)); @@ -497,11 +578,10 @@ fn test_set_package_stale_resurrection_changes_owning_root() { &mut db, desc_path, "pkg".to_string(), - None, - Namespace::default(), + oak_db::FileRevision::zero(), + oak_db::FileRevision::zero(), Vec::new(), Vec::new(), - None, ); register_package(&mut db, new, p2); assert_eq!(p1, p2); From 59eb794335785dd6060c01ece96727d9c3a01839 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Jun 2026 01:15:11 +0200 Subject: [PATCH 2/5] Fix rebase conflicts --- crates/oak_db/src/tests/file_resolve.rs | 12 ++++++------ crates/oak_db/src/tests/package_resolve.rs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/oak_db/src/tests/file_resolve.rs b/crates/oak_db/src/tests/file_resolve.rs index 2ef14efce8..904c28b7c8 100644 --- a/crates/oak_db/src/tests/file_resolve.rs +++ b/crates/oak_db/src/tests/file_resolve.rs @@ -633,11 +633,11 @@ fn test_resolve_if_else_in_collated_file_offers_both() { &db, file_path("/w/pkg/DESCRIPTION"), "pkg".to_string(), - None, - oak_package_metadata::namespace::Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + Some(oak_package_metadata::namespace::Namespace::default()), Vec::new(), Vec::new(), - None, ); let a = File::new( @@ -685,11 +685,11 @@ fn test_resolve_collated_sequential_redef_resolves_to_last() { &db, file_path("/w/pkg/DESCRIPTION"), "pkg".to_string(), - None, - oak_package_metadata::namespace::Namespace::default(), + FileRevision::zero(), + FileRevision::zero(), + Some(oak_package_metadata::namespace::Namespace::default()), Vec::new(), Vec::new(), - None, ); let a = File::new( diff --git a/crates/oak_db/src/tests/package_resolve.rs b/crates/oak_db/src/tests/package_resolve.rs index c3046284f8..4471e0083c 100644 --- a/crates/oak_db/src/tests/package_resolve.rs +++ b/crates/oak_db/src/tests/package_resolve.rs @@ -205,11 +205,11 @@ fn test_reexport_via_import_from_resolves_to_source() { &db, file_path("workspace/tibble/DESCRIPTION"), "tibble".to_string(), - None, - tibble_ns, + FileRevision::zero(), + FileRevision::zero(), + Some(tibble_ns), Vec::new(), Vec::new(), - None, ); let tibble_file = File::new( &db, @@ -232,11 +232,11 @@ fn test_reexport_via_import_from_resolves_to_source() { &db, file_path("workspace/dplyr/DESCRIPTION"), "dplyr".to_string(), - None, - dplyr_ns, + FileRevision::zero(), + FileRevision::zero(), + Some(dplyr_ns), Vec::new(), Vec::new(), - None, ); let dplyr_file = File::new( &db, From 1a5bb03cef8062736e53e280cdbd42e1aa5f28eb Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Jun 2026 11:35:20 +0200 Subject: [PATCH 3/5] Report untracked query if stat failed --- crates/oak_db/src/file.rs | 6 +++--- crates/oak_db/src/file_revision.rs | 20 ++++++++++++++++++++ crates/oak_db/src/package.rs | 10 +++++----- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/oak_db/src/file.rs b/crates/oak_db/src/file.rs index 0854c60a10..fe63da068e 100644 --- a/crates/oak_db/src/file.rs +++ b/crates/oak_db/src/file.rs @@ -8,6 +8,7 @@ use oak_semantic::semantic_index::SymbolTable; use oak_semantic::use_def_map::UseDefMap; use crate::db::root_by_file; +use crate::file_revision::report_untracked_if_zero; use crate::imports::SalsaImportsResolver; use crate::parse::OakParse; use crate::Db; @@ -80,9 +81,8 @@ impl File { return text.clone(); } - // Reading `revision` makes this memo depend on it even though the value - // isn't otherwise used here: bumping the revision is what forces a re-read. - let _ = self.revision(db); + // Depend on `revision()` so a bump forces a re-read + report_untracked_if_zero(db, self.revision(db)); let FilePath::File(path) = self.path(db) else { // Our virtual documents (e.g. untitled://) are push-based, the diff --git a/crates/oak_db/src/file_revision.rs b/crates/oak_db/src/file_revision.rs index 160f9b2cdd..56c63e2c77 100644 --- a/crates/oak_db/src/file_revision.rs +++ b/crates/oak_db/src/file_revision.rs @@ -33,6 +33,26 @@ impl From for FileRevision { } } +/// Report an untracked read when `revision` is the zero sentinel. +/// +/// A query that reads a revision input already records the normal salsa +/// dependency, so a real mtime bump re-runs it. Zero is different: it means we +/// never got a trustworthy mtime, e.g. a transient stat failure on a network +/// drive. Nothing guarantees the revision will ever move off zero, because a +/// file that recovers without changing produces no watcher event. Reporting an +/// untracked read makes salsa re-run the calling query on the next revision, so +/// it retries the disk read instead of pinning its first cached result forever. +/// +/// The retry only persists while the file stays both reachable and at zero. A +/// successful read that returns the same bytes backdates, so nothing downstream +/// re-runs, and a deleted or evicted file leaves the live graph and stops being +/// queried at all. +pub(crate) fn report_untracked_if_zero(db: &dyn crate::Db, revision: FileRevision) { + if revision == FileRevision::zero() { + db.report_untracked_read(); + } +} + impl From for FileRevision { fn from(value: filetime::FileTime) -> Self { // `seconds()` is i64, `nanoseconds()` is u32. diff --git a/crates/oak_db/src/package.rs b/crates/oak_db/src/package.rs index ece86fab53..17e2d50b38 100644 --- a/crates/oak_db/src/package.rs +++ b/crates/oak_db/src/package.rs @@ -5,6 +5,7 @@ use oak_package_metadata::description::Description; use oak_package_metadata::namespace::Namespace; use stdext::result::ResultExt; +use crate::file_revision::report_untracked_if_zero; use crate::Db; use crate::Package; @@ -26,10 +27,8 @@ impl Package { return namespace.clone(); } - // Reading `namespace_revision` makes this memo depend on it even though - // the value isn't used here: bumping the revision is what forces a - // re-read. - let _ = self.namespace_revision(db); + // Depend on `namespace_revision()` so a bump forces a re-read + report_untracked_if_zero(db, self.namespace_revision(db)); let Some(dir) = self .description_path(db) @@ -93,7 +92,8 @@ impl Package { /// `Version:` / `Collate:` re-runs this query but backdates there. #[salsa::tracked(returns(ref))] pub(crate) fn description(self, db: &dyn Db) -> Option { - let _ = self.description_revision(db); + // Depend on `description_revision()` so a bump forces a re-read + report_untracked_if_zero(db, self.description_revision(db)); let path = self.description_path(db).as_path()?; let text = match fs::read_to_string(path.as_std_path()) { From 993ab43b768327d49d5cac39ce81632fa7f808f7 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Jun 2026 12:05:04 +0200 Subject: [PATCH 4/5] Move `Package` to package.rs --- crates/oak_db/src/inputs.rs | 76 +---------------------------------- crates/oak_db/src/lib.rs | 2 +- crates/oak_db/src/package.rs | 77 +++++++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 77 deletions(-) diff --git a/crates/oak_db/src/inputs.rs b/crates/oak_db/src/inputs.rs index 15b9b2ce4e..cd4df6227d 100644 --- a/crates/oak_db/src/inputs.rs +++ b/crates/oak_db/src/inputs.rs @@ -1,11 +1,10 @@ use std::collections::HashSet; use aether_path::FilePath; -use oak_package_metadata::namespace::Namespace; use crate::Db; use crate::File; -use crate::FileRevision; +use crate::Package; /// Salsa-tracked root directory. /// @@ -180,76 +179,3 @@ impl StaleRoot { Self::new(db, HashSet::new(), vec![]) } } - -#[salsa::input(debug)] -pub struct Package { - /// URL of the package's `DESCRIPTION` file. Stable identity across - /// rescans and workspace / library churn: scanners look up an - /// existing `Package` by this URL before creating a new one. Two - /// packages with the same `Package:` name can coexist on disk and the - /// URL distinguishes them. - /// - /// The package's owning [`Root`] is not stored as a field. It is - /// derived from live-graph containment via [`Db::root_by_package`]: a - /// package belongs to whichever `Root.packages` currently holds it. - /// Workspace-vs-library is then `root.kind(db)`. - #[returns(ref)] - pub description_path: FilePath, - // TODO(salsa): Expose a tracked `name_interned(db) -> Name<'db>` - // method so `db.package_by_name()` and other lookups key on the - // interned id rather than the string. Can't store `Name<'db>` on - // `Package` directly because salsa inputs are lifetime-free. - #[returns(ref)] - pub name: String, - /// Mtime of the package's `DESCRIPTION` file, or [`FileRevision::zero`] - /// when it can't be stat'd. Drives the lazy [`Package::description`] - /// query (and the `version` / `collation` derivations on top of it), the - /// same way `namespace_revision` drives [`Package::namespace`]. We stat - /// `DESCRIPTION` at scan time but parse it only on demand. - pub description_revision: FileRevision, - /// Mtime of the package's `NAMESPACE` file, or [`FileRevision::zero`] - /// when it can't be stat'd. The lazy [`Package::namespace`] query reads - /// it so a watcher that bumps it on a `NAMESPACE` change forces the next - /// parse to re-read disk, exactly like [`File::revision`] drives - /// [`File::source_text`]. We don't read or parse `NAMESPACE` at scan - /// time, only stat it, so installed packages the user never imports cost - /// nothing beyond the stat. - pub namespace_revision: FileRevision, - /// In-memory `NAMESPACE`, checked by [`Package::namespace`] before it - /// touches disk. Mirrors [`File::source_text_override`]: `None` means - /// "read from disk". The scanners always leave it `None`. It's the - /// injection point unit tests use to supply a namespace for a synthetic - /// path with no file behind it, and the natural hook for an editor - /// editing a package's `NAMESPACE` live. - #[returns(ref)] - pub namespace_override: Option, - /// R source files belonging to this package (the `R/*.R` files), in - /// R's load order. When DESCRIPTION's `Collate:` directive is - /// present, this is exactly the files it lists, in that order; - /// files in `R/` not listed are excluded (matching R's loader, - /// Writing R Extensions §1.1.1). When `Collate:` is absent, files - /// are in case-insensitive alphabetical order. TODO(diagnostics): - /// Lint files missing from collation. - /// - /// Per-package granularity: adding or removing a file in one - /// package doesn't invalidate tracked queries reading another - /// package's files. - /// - /// **Placement invariant.** A file present here must have - /// `package(db) == Some(self)`, and a file with - /// `package == Some(self)` must live here or in [`Self::scripts`]. - /// Call this setter only through `oak_scan`'s helpers, which keep - /// the back-pointer and the container in sync. - #[returns(ref)] - pub files: Vec, - /// Other R files inside the package directory that aren't part of the - /// loadable namespace: `tests/`, `inst/`, `data-raw/`, etc. These get LSP - /// analysis (parse, semantic index) but aren't loaded with the package, so - /// name resolution treats them as standalone scripts that just happen to - /// live next to the package's code. - /// - /// **Placement invariant.** Same as [`Self::files`]: backpointer - /// stays `Some(self)`, file lives in one of the two containers. - #[returns(ref)] - pub scripts: Vec, -} diff --git a/crates/oak_db/src/lib.rs b/crates/oak_db/src/lib.rs index 563d50ef53..7ccac6802f 100644 --- a/crates/oak_db/src/lib.rs +++ b/crates/oak_db/src/lib.rs @@ -33,11 +33,11 @@ pub use identifier::NamespacePart; pub use inputs::LibraryRoots; pub use inputs::LiveRoot; pub use inputs::OrphanRoot; -pub use inputs::Package; pub use inputs::Root; pub use inputs::RootKind; pub use inputs::StaleRoot; pub use inputs::WorkspaceRoots; pub use name::Name; +pub use package::Package; pub use package_resolve::PackageVisibility; pub use storage::OakDatabase; diff --git a/crates/oak_db/src/package.rs b/crates/oak_db/src/package.rs index 17e2d50b38..bdee5779a0 100644 --- a/crates/oak_db/src/package.rs +++ b/crates/oak_db/src/package.rs @@ -1,13 +1,88 @@ use std::fs; use std::io; +use aether_path::FilePath; use oak_package_metadata::description::Description; use oak_package_metadata::namespace::Namespace; use stdext::result::ResultExt; use crate::file_revision::report_untracked_if_zero; use crate::Db; -use crate::Package; +use crate::File; +use crate::FileRevision; + +#[salsa::input(debug)] +pub struct Package { + /// URL of the package's `DESCRIPTION` file. Stable identity across + /// rescans and workspace / library churn: scanners look up an + /// existing `Package` by this URL before creating a new one. Two + /// packages with the same `Package:` name can coexist on disk and the + /// URL distinguishes them. + /// + /// The package's owning [`Root`] is not stored as a field. It is + /// derived from live-graph containment via [`Db::root_by_package`]: a + /// package belongs to whichever `Root.packages` currently holds it. + /// Workspace-vs-library is then `root.kind(db)`. + #[returns(ref)] + pub description_path: FilePath, + // TODO(salsa): Expose a tracked `name_interned(db) -> Name<'db>` + // method so `db.package_by_name()` and other lookups key on the + // interned id rather than the string. Can't store `Name<'db>` on + // `Package` directly because salsa inputs are lifetime-free. + #[returns(ref)] + pub name: String, + /// Mtime of the package's `DESCRIPTION` file, or [`FileRevision::zero`] + /// when it can't be stat'd. Drives the lazy [`Package::description`] + /// query (and the `version` / `collation` derivations on top of it), the + /// same way `namespace_revision` drives [`Package::namespace`]. We stat + /// `DESCRIPTION` at scan time but parse it only on demand. + pub description_revision: FileRevision, + /// Mtime of the package's `NAMESPACE` file, or [`FileRevision::zero`] + /// when it can't be stat'd. The lazy [`Package::namespace`] query reads + /// it so a watcher that bumps it on a `NAMESPACE` change forces the next + /// parse to re-read disk, exactly like [`File::revision`] drives + /// [`File::source_text`]. We don't read or parse `NAMESPACE` at scan + /// time, only stat it, so installed packages the user never imports cost + /// nothing beyond the stat. + pub namespace_revision: FileRevision, + /// In-memory `NAMESPACE`, checked by [`Package::namespace`] before it + /// touches disk. Mirrors [`File::source_text_override`]: `None` means + /// "read from disk". The scanners always leave it `None`. It's the + /// injection point unit tests use to supply a namespace for a synthetic + /// path with no file behind it, and the natural hook for an editor + /// editing a package's `NAMESPACE` live. + #[returns(ref)] + pub namespace_override: Option, + /// R source files belonging to this package (the `R/*.R` files), in + /// R's load order. When DESCRIPTION's `Collate:` directive is + /// present, this is exactly the files it lists, in that order; + /// files in `R/` not listed are excluded (matching R's loader, + /// Writing R Extensions §1.1.1). When `Collate:` is absent, files + /// are in case-insensitive alphabetical order. TODO(diagnostics): + /// Lint files missing from collation. + /// + /// Per-package granularity: adding or removing a file in one + /// package doesn't invalidate tracked queries reading another + /// package's files. + /// + /// **Placement invariant.** A file present here must have + /// `package(db) == Some(self)`, and a file with + /// `package == Some(self)` must live here or in [`Self::scripts`]. + /// Call this setter only through `oak_scan`'s helpers, which keep + /// the back-pointer and the container in sync. + #[returns(ref)] + pub files: Vec, + /// Other R files inside the package directory that aren't part of the + /// loadable namespace: `tests/`, `inst/`, `data-raw/`, etc. These get LSP + /// analysis (parse, semantic index) but aren't loaded with the package, so + /// name resolution treats them as standalone scripts that just happen to + /// live next to the package's code. + /// + /// **Placement invariant.** Same as [`Self::files`]: backpointer + /// stays `Some(self)`, file lives in one of the two containers. + #[returns(ref)] + pub scripts: Vec, +} #[salsa::tracked] impl Package { From bc0615d4705c87baa64d577e143794a496bd5149 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Jun 2026 12:13:19 +0200 Subject: [PATCH 5/5] Address code review --- crates/oak_db/src/package.rs | 25 +++++++++++-------------- crates/oak_scan/src/packages.rs | 3 ++- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/crates/oak_db/src/package.rs b/crates/oak_db/src/package.rs index bdee5779a0..a50fa8de08 100644 --- a/crates/oak_db/src/package.rs +++ b/crates/oak_db/src/package.rs @@ -93,7 +93,7 @@ impl Package { /// parses it. Lazy and tracked so we only pay the read and the R-parse /// for packages whose imports actually get resolved. Parsing every /// installed package's `NAMESPACE` eagerly would dominate the cost of - /// scanning a library with hundreds of packages, so we defer it. + /// scanning a library with hundreds of packages, so we defer it (#1265). /// /// A missing or unparseable `NAMESPACE` yields an empty `Namespace`. #[salsa::tracked(returns(ref))] @@ -114,19 +114,17 @@ impl Package { }; let namespace_path = dir.join("NAMESPACE"); - let text = match fs::read_to_string(namespace_path.as_std_path()) { - Ok(text) => text, + match fs::read_to_string(namespace_path.as_std_path()) { + Ok(text) => Namespace::parse(&text).log_err().unwrap_or_default(), // A package needn't ship a `NAMESPACE`, so absence is the normal // case and stays quiet. A file that exists but can't be read is // logged so the failure isn't silently read as "no namespace". - Err(err) if err.kind() == io::ErrorKind::NotFound => return Namespace::default(), + Err(err) if err.kind() == io::ErrorKind::NotFound => Namespace::default(), Err(err) => { log::error!("Failed to read `{namespace_path}`: {err:?}"); - return Namespace::default(); + Namespace::default() }, - }; - - Namespace::parse(&text).log_err().unwrap_or_default() + } } /// The package's `Version:`, parsed lazily from `DESCRIPTION`. `None` @@ -171,17 +169,16 @@ impl Package { report_untracked_if_zero(db, self.description_revision(db)); let path = self.description_path(db).as_path()?; - let text = match fs::read_to_string(path.as_std_path()) { - Ok(text) => text, + match fs::read_to_string(path.as_std_path()) { + Ok(text) => Description::parse(&text).log_err(), // A missing `DESCRIPTION` is the normal "gone after a rescan" case // and stays quiet. A file that exists but can't be read is logged // rather than silently treated as absent. - Err(err) if err.kind() == io::ErrorKind::NotFound => return None, + Err(err) if err.kind() == io::ErrorKind::NotFound => None, Err(err) => { log::error!("Failed to read `{path}`: {err:?}"); - return None; + None }, - }; - Description::parse(&text).log_err() + } } } diff --git a/crates/oak_scan/src/packages.rs b/crates/oak_scan/src/packages.rs index 533482e6e2..9ba1df84b7 100644 --- a/crates/oak_scan/src/packages.rs +++ b/crates/oak_scan/src/packages.rs @@ -56,7 +56,8 @@ pub(crate) struct PackageEntry { /// `NAMESPACE` is only stat'd here, never read or parsed. The lazy /// [`oak_db::Package::namespace`] query does the parse. `DESCRIPTION` is read /// and parsed because the walk needs the `Package:` name (the workspace -/// directory name isn't authoritative) and the `Collate:` order to sort +/// directory name isn't authoritative, unlike an installed package's folder +/// name) and the `Collate:` order to sort /// `R/*.R`. The parsed `collation` only orders this walk's files. It isn't /// pushed to salsa. [`oak_db::Package::version`] and /// [`oak_db::Package::collation`] re-read `DESCRIPTION` lazily off the