-
Notifications
You must be signed in to change notification settings - Fork 28
Read metadata of installed packages lazily #1273
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: oak/salsa-29-optional-contents
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ use oak_package_metadata::namespace::Namespace; | |
|
|
||
| use crate::Db; | ||
| use crate::File; | ||
| use crate::FileRevision; | ||
|
|
||
| /// Salsa-tracked root directory. | ||
| /// | ||
|
|
@@ -200,12 +201,28 @@ pub struct Package { | |
| // `Package` directly because salsa inputs are lifetime-free. | ||
| #[returns(ref)] | ||
| pub name: String, | ||
| /// Installed-package version (from `DESCRIPTION`). `None` for | ||
| /// workspace packages. | ||
| /// Mtime of the package's `DESCRIPTION` file, or [`FileRevision::zero`] | ||
| /// when it can't be stat'd. Drives the lazy [`Package::description`] | ||
| /// query (and the `version` / `collation` derivations on top of it), the | ||
| /// same way `namespace_revision` drives [`Package::namespace`]. We stat | ||
| /// `DESCRIPTION` at scan time but parse it only on demand. | ||
| pub description_revision: FileRevision, | ||
| /// Mtime of the package's `NAMESPACE` file, or [`FileRevision::zero`] | ||
| /// when it can't be stat'd. The lazy [`Package::namespace`] query reads | ||
| /// it so a watcher that bumps it on a `NAMESPACE` change forces the next | ||
| /// parse to re-read disk, exactly like [`File::revision`] drives | ||
| /// [`File::source_text`]. We don't read or parse `NAMESPACE` at scan | ||
| /// time, only stat it, so installed packages the user never imports cost | ||
| /// nothing beyond the stat. | ||
| pub namespace_revision: FileRevision, | ||
| /// In-memory `NAMESPACE`, checked by [`Package::namespace`] before it | ||
| /// touches disk. Mirrors [`File::source_text_override`]: `None` means | ||
| /// "read from disk". The scanners always leave it `None`. It's the | ||
| /// injection point unit tests use to supply a namespace for a synthetic | ||
| /// path with no file behind it, and the natural hook for an editor | ||
| /// editing a package's `NAMESPACE` live. | ||
| #[returns(ref)] | ||
| pub version: Option<String>, | ||
| #[returns(ref)] | ||
| pub namespace: Namespace, | ||
| pub namespace_override: Option<Namespace>, | ||
|
Comment on lines
+218
to
+225
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But not for description?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it seems nothing currently uses this |
||
| /// R source files belonging to this package (the `R/*.R` files), in | ||
| /// R's load order. When DESCRIPTION's `Collate:` directive is | ||
| /// present, this is exactly the files it lists, in that order; | ||
|
|
@@ -235,11 +252,4 @@ pub struct Package { | |
| /// stays `Some(self)`, file lives in one of the two containers. | ||
| #[returns(ref)] | ||
| pub scripts: Vec<File>, | ||
| /// The basename ordering from `DESCRIPTION`'s `Collate` field, if | ||
| /// present. `None` when the field is absent (R defaults to | ||
| /// alphabetical load order). Changes only when `DESCRIPTION` | ||
| /// itself changes, so this anchor is independent of `files` (which | ||
| /// bumps when R/ files are added or removed). | ||
| #[returns(ref)] | ||
| pub collation: Option<Vec<String>>, | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,112 @@ | ||||||
| use std::fs; | ||||||
| use std::io; | ||||||
|
|
||||||
| use oak_package_metadata::description::Description; | ||||||
| use oak_package_metadata::namespace::Namespace; | ||||||
| use stdext::result::ResultExt; | ||||||
|
|
||||||
| use crate::Db; | ||||||
| use crate::Package; | ||||||
|
|
||||||
| #[salsa::tracked] | ||||||
| impl Package { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe move all of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And merge with |
||||||
| /// The package's parsed `NAMESPACE`. | ||||||
| /// | ||||||
| /// Returns the in-memory override if one is set | ||||||
| /// (`namespace_override`), otherwise reads `NAMESPACE` from disk and | ||||||
| /// parses it. Lazy and tracked so we only pay the read and the R-parse | ||||||
| /// for packages whose imports actually get resolved. Parsing every | ||||||
| /// installed package's `NAMESPACE` eagerly would dominate the cost of | ||||||
| /// scanning a library with hundreds of packages, so we defer it. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| /// | ||||||
| /// A missing or unparseable `NAMESPACE` yields an empty `Namespace`. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's interesting to decide when to use I generally agree with this approach for this one though
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh actually i see |
||||||
| #[salsa::tracked(returns(ref))] | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lru bound for this and
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i have 600 packages installed fwiw 😬 |
||||||
| pub fn namespace(self, db: &dyn Db) -> Namespace { | ||||||
| if let Some(namespace) = self.namespace_override(db) { | ||||||
| return namespace.clone(); | ||||||
| } | ||||||
|
|
||||||
| // Reading `namespace_revision` makes this memo depend on it even though | ||||||
| // the value isn't used here: bumping the revision is what forces a | ||||||
| // re-read. | ||||||
| let _ = self.namespace_revision(db); | ||||||
|
|
||||||
| let Some(dir) = self | ||||||
| .description_path(db) | ||||||
| .as_path() | ||||||
| .and_then(|path| path.parent()) | ||||||
| else { | ||||||
| return Namespace::default(); | ||||||
| }; | ||||||
|
|
||||||
| let namespace_path = dir.join("NAMESPACE"); | ||||||
| let text = match fs::read_to_string(namespace_path.as_std_path()) { | ||||||
| Ok(text) => text, | ||||||
| // A package needn't ship a `NAMESPACE`, so absence is the normal | ||||||
| // case and stays quiet. A file that exists but can't be read is | ||||||
| // logged so the failure isn't silently read as "no namespace". | ||||||
| Err(err) if err.kind() == io::ErrorKind::NotFound => return Namespace::default(), | ||||||
| Err(err) => { | ||||||
| log::error!("Failed to read `{namespace_path}`: {err:?}"); | ||||||
| return Namespace::default(); | ||||||
| }, | ||||||
| }; | ||||||
|
|
||||||
| Namespace::parse(&text).log_err().unwrap_or_default() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move this up into |
||||||
| } | ||||||
|
|
||||||
| /// The package's `Version:`, parsed lazily from `DESCRIPTION`. `None` | ||||||
| /// when the file is missing or has no version. | ||||||
| /// | ||||||
| /// A narrow query over [`Package::description`]: editing `DESCRIPTION` | ||||||
| /// without changing `Version:` backdates here, so downstream isn't | ||||||
| /// disturbed. | ||||||
|
Comment on lines
+61
to
+63
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice |
||||||
| #[salsa::tracked(returns(ref))] | ||||||
| pub fn version(self, db: &dyn Db) -> Option<String> { | ||||||
| self.description(db) | ||||||
| .as_ref() | ||||||
| .map(|description| description.version.clone()) | ||||||
| } | ||||||
|
|
||||||
| /// The basename order from `DESCRIPTION`'s `Collate:` field, parsed | ||||||
| /// lazily. `None` when the field (or the file) is absent. Narrow query | ||||||
| /// over [`Package::description`], same backdating story as | ||||||
| /// [`Package::version`]. | ||||||
| #[salsa::tracked(returns(ref))] | ||||||
| pub fn collation(self, db: &dyn Db) -> Option<Vec<String>> { | ||||||
| self.description(db) | ||||||
| .as_ref() | ||||||
| .and_then(|description| description.collate()) | ||||||
| } | ||||||
|
|
||||||
| /// The package's parsed `DESCRIPTION`, or `None` when it's missing or | ||||||
| /// unparseable. | ||||||
| /// | ||||||
| /// Lazy and tracked, keyed on `description_revision`. Reading is deferred | ||||||
| /// so the library scanner can register an installed package without | ||||||
| /// touching `DESCRIPTION` at all (it takes the name from the directory). | ||||||
| /// | ||||||
| /// `Description` is `PartialEq`, so salsa backdates this query when a | ||||||
| /// `description_revision` bump turns out to leave the parsed content | ||||||
| /// unchanged. The narrow `version` / `collation` queries on top firewall | ||||||
| /// the other direction: a real `DESCRIPTION` edit that doesn't touch | ||||||
| /// `Version:` / `Collate:` re-runs this query but backdates there. | ||||||
| #[salsa::tracked(returns(ref))] | ||||||
| pub(crate) fn description(self, db: &dyn Db) -> Option<Description> { | ||||||
| let _ = self.description_revision(db); | ||||||
|
|
||||||
| let path = self.description_path(db).as_path()?; | ||||||
| let text = match fs::read_to_string(path.as_std_path()) { | ||||||
| Ok(text) => text, | ||||||
| // A missing `DESCRIPTION` is the normal "gone after a rescan" case | ||||||
| // and stays quiet. A file that exists but can't be read is logged | ||||||
| // rather than silently treated as absent. | ||||||
| Err(err) if err.kind() == io::ErrorKind::NotFound => return None, | ||||||
| Err(err) => { | ||||||
| log::error!("Failed to read `{path}`: {err:?}"); | ||||||
| return None; | ||||||
| }, | ||||||
| }; | ||||||
| Description::parse(&text).log_err() | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move into |
||||||
| } | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the "cant be stat'd" case of returning zero, we could end up stuck with an outdated version of the file right? Like we read it once and never again because we never move off 0?
Is it better to force it to read it every time in the case where we cant determine a FileRevision?
(I have no idea if you can even do that with salsa's caching model)