From 87cffb2736ad16ddb88d266fe455460d8cabb3c4 Mon Sep 17 00:00:00 2001 From: Mossa Date: Mon, 4 May 2026 14:19:09 +0200 Subject: [PATCH] feat: add --recursive to dvs get Adds `--recursive` / `-r` to `dvs get` (CLI) and `recursive = NULL` to `dvs_get()` (R), matching the existing flag on `dvs status`. Without the flag, directory paths return only direct children; with it, all descendants. --- dvs-cli/src/main.rs | 22 +- dvs-rpkg/R/dvs-commands.R | 3 +- dvs-rpkg/R/dvs-wrappers.R | 31 +- dvs-rpkg/man/dvs_get.Rd | 10 +- dvs-rpkg/man/dvs_get_impl.Rd | 9 +- dvs-rpkg/man/dvs_status.Rd | 6 +- dvs-rpkg/man/dvs_status_impl.Rd | 6 +- dvs-rpkg/src/rust/Cargo.lock | 2 +- dvs-rpkg/src/rust/Cargo.toml | 2 +- dvs-rpkg/src/rust/lib.rs | 27 +- dvs-rpkg/tests/testthat/test-get.R | 13 + dvs/src/files/status.rs | 2 +- dvs/src/globbing.rs | 156 ++++++-- ui/CLAUDE.md | 11 +- ui/helpers.sh | 2 +- ui/main.sh | 9 +- ui/main_parallel.sh | 7 +- ui/main_progress.sh | 7 +- ui/main_recursive.sh | 551 +++++++++++++++++++++++++++++ ui/main_status.sh | 7 +- 20 files changed, 798 insertions(+), 85 deletions(-) create mode 100755 ui/main_recursive.sh diff --git a/dvs-cli/src/main.rs b/dvs-cli/src/main.rs index 4963cc3b..f18ee569 100644 --- a/dvs-cli/src/main.rs +++ b/dvs-cli/src/main.rs @@ -76,12 +76,16 @@ pub enum Command { }, /// Retrieves the given files from dvs storage. You can use a glob or paths. /// If you pass a directory and a glob, the glob will be ran from that directory. - /// At least one path or --glob must be provided + /// At least one path or --glob must be provided. Get { - #[clap(required_unless_present = "glob")] paths: Vec, #[clap(long, short)] glob: Option, + /// Recursively include files in subdirectories for directory inputs. + /// Without this flag, directories return only their direct children. + /// Has no effect when no explicit paths are given. + #[clap(long, short)] + recursive: bool, /// Show what would be retrieved without making any actual changes #[clap(long)] dry_run: bool, @@ -405,14 +409,22 @@ fn try_main() -> Result<()> { Command::Get { paths, glob, + recursive, dry_run, } => { + if paths.is_empty() && glob.is_none() { + bail!( + "dvs get with no path or --glob would restore every tracked file. \ + If that's the intent, use `dvs get --glob '**/*'`." + ); + } let config = Config::find(¤t_dir).ok_or_else(|| anyhow!("Not in a DVS repository"))??; let dvs_paths = DvsPaths::from_cwd(&config)?; - let all_paths: Vec<_> = resolve_paths_for_get(paths, glob.as_deref(), &dvs_paths)? - .into_iter() - .collect(); + let all_paths: Vec<_> = + resolve_paths_for_get(paths, glob.as_deref(), &dvs_paths, recursive)? + .into_iter() + .collect(); if all_paths.is_empty() { return Err(anyhow!("No files to get")); } diff --git a/dvs-rpkg/R/dvs-commands.R b/dvs-rpkg/R/dvs-commands.R index 6c4a561a..54eccc61 100644 --- a/dvs-rpkg/R/dvs-commands.R +++ b/dvs-rpkg/R/dvs-commands.R @@ -80,7 +80,7 @@ dvs_status <- function( #' @inherit dvs_get_impl title description params #' @rdname dvs_get #' @export -dvs_get <- function(paths = character(0), glob = NULL, dry_run = NULL) { +dvs_get <- function(paths = character(0), glob = NULL, recursive = NULL, dry_run = NULL) { dvs_set_threads_impl(getOption("dvs.num_threads")) progress_callback <- NULL if (!isTRUE(dry_run)) { @@ -90,6 +90,7 @@ dvs_get <- function(paths = character(0), glob = NULL, dry_run = NULL) { get_data_frame <- dvs_get_impl( paths = paths, glob = glob, + recursive = recursive, dry_run = dry_run, progress_callback = progress_callback ) diff --git a/dvs-rpkg/R/dvs-wrappers.R b/dvs-rpkg/R/dvs-wrappers.R index 1349a11f..2f293a60 100644 --- a/dvs-rpkg/R/dvs-wrappers.R +++ b/dvs-rpkg/R/dvs-wrappers.R @@ -139,26 +139,33 @@ dvs_add_impl <- function(paths = character(0), message = NULL, glob = NULL, dry_ .val } -# Generated from Rust fn `dvs_get` (lib.rs:436:15) +# Generated from Rust fn `dvs_get` (lib.rs:447:15) #' @title Retrieve files from DVS storage into the working directory -#' @description Fetches the specified files from DVS storage and writes them to their original paths in the working directory. +#' @description Fetches the specified files from DVS storage and writes them to their original paths in the working directory. Calling `dvs_get()` with no arguments restores every tracked file in the repository. #' @param paths Character vector of file paths to retrieve from DVS storage. #' @param glob Optional glob pattern to select files (e.g. `"data/*.csv"`). #' Globs use a literal path separator: `*.csv` only matches files in the #' target directory and will not match `subdir/file.csv`. Use `**/*.csv` to #' match recursively across subdirectories. +#' @param recursive If `TRUE`, directory inputs include all descendants; +#' if `FALSE` or `NULL` (default), only direct children of the directory +#' are returned. The flag only constrains descendants of paths passed +#' explicitly — when `paths` is empty, every tracked file is returned +#' regardless of nesting depth, and `recursive` has no effect. #' @param dry_run If `TRUE`, report what would be retrieved without writing files. #' @param progress_callback Optional handle to enable progress bar display. #' @keywords internal #' @source Generated by miniextendr from Rust fn `dvs_get` -dvs_get_impl <- function(paths = character(0), glob = NULL, dry_run = NULL, progress_callback = NULL) { +dvs_get_impl <- function(paths = character(0), glob = NULL, recursive = NULL, dry_run = NULL, progress_callback = NULL) { stopifnot( "'glob' must be NULL or character" = is.null(glob) || is.character(glob), "'glob' must be NULL or have length 1" = is.null(glob) || length(glob) == 1L, + "'recursive' must be NULL or logical" = is.null(recursive) || is.logical(recursive), + "'recursive' must be NULL or have length 1" = is.null(recursive) || length(recursive) == 1L, "'dry_run' must be NULL or logical" = is.null(dry_run) || is.logical(dry_run), "'dry_run' must be NULL or have length 1" = is.null(dry_run) || length(dry_run) == 1L ) - .val <- .Call(C_dvs_get, .call = match.call(), paths, glob, dry_run, progress_callback) + .val <- .Call(C_dvs_get, .call = match.call(), paths, glob, recursive, dry_run, progress_callback) if (inherits(.val, "rust_error_value") && isTRUE(attr(.val, "__rust_error__"))) return(.miniextendr_raise_condition(.val, sys.call())) .val } @@ -192,11 +199,15 @@ dvs_init_impl <- function(storage_path, root_dir = NULL, group = NULL, metadata_ invisible(.val) } -# Generated from Rust fn `dvs_status` (lib.rs:294:15) +# Generated from Rust fn `dvs_status` (lib.rs:298:15) #' @title Report the sync status of DVS-managed files #' @description Reports the sync status of DVS-managed files. By default all statuses are shown; pass a character vector of status names (e.g. `c("current", "absent")`) to restrict output. #' @param paths Character vector of file or directory paths to check status for. -#' @param recursive If `TRUE`, recursively include files in subdirectories. +#' @param recursive If `TRUE`, directory inputs include all descendants; +#' if `FALSE` or `NULL` (default), only direct children of the directory +#' are returned. The flag only constrains descendants of paths passed +#' explicitly — when `paths` is empty, every tracked file is returned +#' regardless of nesting depth, and `recursive` has no effect. #' @param status Character vector of statuses to include. Valid values are #' `"current"`, `"absent"`, and `"unsynced"`. When empty (default), all #' statuses are shown. @@ -214,7 +225,7 @@ dvs_status_impl <- function(paths = character(0), recursive = NULL, status = c(" .val } -# Generated from Rust fn `dvs_version` (lib.rs:494:8) +# Generated from Rust fn `dvs_version` (lib.rs:511:8) #' @source Generated by miniextendr from Rust fn `dvs_version` #' @export dvs_version <- function() { @@ -223,7 +234,7 @@ dvs_version <- function() { .val } -# Generated from Rust fn `dvs_set_threads` (lib.rs:480:15) +# Generated from Rust fn `dvs_set_threads` (lib.rs:497:15) #' @title Set the number of threads used by DVS parallel operations #' @description Controls the thread pool size for add, get, and status operations. Pass `NULL` to revert to automatic detection. #' @param threads Integer number of threads, or `NULL` to reset. @@ -240,7 +251,7 @@ dvs_set_threads_impl <- function(threads = NULL) { invisible(.val) } -# Generated from Rust fn `format_byte_size` (lib.rs:488:8) +# Generated from Rust fn `format_byte_size` (lib.rs:505:8) #' @title Format a byte count as a human-readable size string #' @param size_bytes non-negative integer representing file sizes in bytes. #' @source Generated by miniextendr from Rust fn `format_byte_size` @@ -256,7 +267,7 @@ format_byte_size <- function(size_bytes) { .val } -# Generated from Rust fn `set_dvs_log_level` (lib.rs:523:8) +# Generated from Rust fn `set_dvs_log_level` (lib.rs:540:8) #' @title Set the log level for DVS internals #' @description Controls which log messages from the DVS internals are routed to R's console. `error` and `warn` go to [stderr()]; `info`, `debug`, and `trace` go to stdout. #' @param level Character string giving the desired log level. The default diff --git a/dvs-rpkg/man/dvs_get.Rd b/dvs-rpkg/man/dvs_get.Rd index 8856e40e..24d77bc7 100644 --- a/dvs-rpkg/man/dvs_get.Rd +++ b/dvs-rpkg/man/dvs_get.Rd @@ -4,7 +4,7 @@ \alias{dvs_get} \title{Retrieve files from DVS storage into the working directory} \usage{ -dvs_get(paths = character(0), glob = NULL, dry_run = NULL) +dvs_get(paths = character(0), glob = NULL, recursive = NULL, dry_run = NULL) } \arguments{ \item{paths}{Character vector of file paths to retrieve from DVS storage.} @@ -14,8 +14,14 @@ Globs use a literal path separator: \verb{*.csv} only matches files in the target directory and will not match \code{subdir/file.csv}. Use \verb{**/*.csv} to match recursively across subdirectories.} +\item{recursive}{If \code{TRUE}, directory inputs include all descendants; +if \code{FALSE} or \code{NULL} (default), only direct children of the directory +are returned. The flag only constrains descendants of paths passed +explicitly — when \code{paths} is empty, every tracked file is returned +regardless of nesting depth, and \code{recursive} has no effect.} + \item{dry_run}{If \code{TRUE}, report what would be retrieved without writing files.} } \description{ -Fetches the specified files from DVS storage and writes them to their original paths in the working directory. +Fetches the specified files from DVS storage and writes them to their original paths in the working directory. Calling \code{dvs_get()} with no arguments restores every tracked file in the repository. } diff --git a/dvs-rpkg/man/dvs_get_impl.Rd b/dvs-rpkg/man/dvs_get_impl.Rd index 5986016e..2087f58f 100644 --- a/dvs-rpkg/man/dvs_get_impl.Rd +++ b/dvs-rpkg/man/dvs_get_impl.Rd @@ -10,6 +10,7 @@ Generated by miniextendr from Rust fn \code{dvs_get} dvs_get_impl( paths = character(0), glob = NULL, + recursive = NULL, dry_run = NULL, progress_callback = NULL ) @@ -22,11 +23,17 @@ Globs use a literal path separator: \verb{*.csv} only matches files in the target directory and will not match \code{subdir/file.csv}. Use \verb{**/*.csv} to match recursively across subdirectories.} +\item{recursive}{If \code{TRUE}, directory inputs include all descendants; +if \code{FALSE} or \code{NULL} (default), only direct children of the directory +are returned. The flag only constrains descendants of paths passed +explicitly — when \code{paths} is empty, every tracked file is returned +regardless of nesting depth, and \code{recursive} has no effect.} + \item{dry_run}{If \code{TRUE}, report what would be retrieved without writing files.} \item{progress_callback}{Optional handle to enable progress bar display.} } \description{ -Fetches the specified files from DVS storage and writes them to their original paths in the working directory. +Fetches the specified files from DVS storage and writes them to their original paths in the working directory. Calling \code{dvs_get()} with no arguments restores every tracked file in the repository. } \keyword{internal} diff --git a/dvs-rpkg/man/dvs_status.Rd b/dvs-rpkg/man/dvs_status.Rd index 773c0d9c..8ff99328 100644 --- a/dvs-rpkg/man/dvs_status.Rd +++ b/dvs-rpkg/man/dvs_status.Rd @@ -13,7 +13,11 @@ dvs_status( \arguments{ \item{paths}{Character vector of file or directory paths to check status for.} -\item{recursive}{If \code{TRUE}, recursively include files in subdirectories.} +\item{recursive}{If \code{TRUE}, directory inputs include all descendants; +if \code{FALSE} or \code{NULL} (default), only direct children of the directory +are returned. The flag only constrains descendants of paths passed +explicitly — when \code{paths} is empty, every tracked file is returned +regardless of nesting depth, and \code{recursive} has no effect.} \item{status}{Character vector of statuses to include. Valid values are \code{"current"}, \code{"absent"}, and \code{"unsynced"}. When empty (default), all diff --git a/dvs-rpkg/man/dvs_status_impl.Rd b/dvs-rpkg/man/dvs_status_impl.Rd index 02301281..cf4f3f61 100644 --- a/dvs-rpkg/man/dvs_status_impl.Rd +++ b/dvs-rpkg/man/dvs_status_impl.Rd @@ -16,7 +16,11 @@ dvs_status_impl( \arguments{ \item{paths}{Character vector of file or directory paths to check status for.} -\item{recursive}{If \code{TRUE}, recursively include files in subdirectories.} +\item{recursive}{If \code{TRUE}, directory inputs include all descendants; +if \code{FALSE} or \code{NULL} (default), only direct children of the directory +are returned. The flag only constrains descendants of paths passed +explicitly — when \code{paths} is empty, every tracked file is returned +regardless of nesting depth, and \code{recursive} has no effect.} \item{status}{Character vector of statuses to include. Valid values are \code{"current"}, \code{"absent"}, and \code{"unsynced"}. When empty (default), all diff --git a/dvs-rpkg/src/rust/Cargo.lock b/dvs-rpkg/src/rust/Cargo.lock index b26bfd2f..a242fba7 100644 --- a/dvs-rpkg/src/rust/Cargo.lock +++ b/dvs-rpkg/src/rust/Cargo.lock @@ -160,7 +160,7 @@ dependencies = [ [[package]] name = "dvs" version = "0.3.0" -source = "git+https://github.com/A2-ai/dvs2?branch=main#c6e8988288979fd29141f3a2019f782f399c5d7c" +source = "git+https://github.com/A2-ai/dvs2?branch=feat%2Fget-recursive#17603ab5703ea83f69323a65b0c50513dbdd2d5e" dependencies = [ "anyhow", "blake3", diff --git a/dvs-rpkg/src/rust/Cargo.toml b/dvs-rpkg/src/rust/Cargo.toml index d9a07ffe..ccf740c0 100644 --- a/dvs-rpkg/src/rust/Cargo.toml +++ b/dvs-rpkg/src/rust/Cargo.toml @@ -34,7 +34,7 @@ connections = ["miniextendr-api/connections"] # block, or rely on `just` recipes that vendor from the local checkout. [dependencies] miniextendr-api = { git = "https://github.com/A2-ai/miniextendr", features = ["serde", "time", "log"] } -dvs = { git = "https://github.com/A2-ai/dvs2", branch = "main", package = "dvs" } +dvs = { git = "https://github.com/A2-ai/dvs2", branch = "feat/get-recursive", package = "dvs" } serde = { version = "1", features = ["derive"] } serde_json = "1" anyhow = "1" diff --git a/dvs-rpkg/src/rust/lib.rs b/dvs-rpkg/src/rust/lib.rs index 8ab6a374..ce40cb22 100644 --- a/dvs-rpkg/src/rust/lib.rs +++ b/dvs-rpkg/src/rust/lib.rs @@ -285,7 +285,11 @@ impl From for Status { /// of status names (e.g. `c("current", "absent")`) to restrict output. /// /// @param paths Character vector of file or directory paths to check status for. -/// @param recursive If `TRUE`, recursively include files in subdirectories. +/// @param recursive If `TRUE`, directory inputs include all descendants; +/// if `FALSE` or `NULL` (default), only direct children of the directory +/// are returned. The flag only constrains descendants of paths passed +/// explicitly — when `paths` is empty, every tracked file is returned +/// regardless of nesting depth, and `recursive` has no effect. /// @param status Character vector of statuses to include. Valid values are /// `"current"`, `"absent"`, and `"unsynced"`. When empty (default), all /// statuses are shown. @@ -422,13 +426,20 @@ impl<'a> From<&'a FileMetadata> for FileMetadataView<'a> { /// Retrieve files from DVS storage into the working directory. /// /// Fetches the specified files from DVS storage and writes them -/// to their original paths in the working directory. +/// to their original paths in the working directory. Calling +/// `dvs_get()` with no arguments restores every tracked file in +/// the repository. /// /// @param paths Character vector of file paths to retrieve from DVS storage. /// @param glob Optional glob pattern to select files (e.g. `"data/*.csv"`). /// Globs use a literal path separator: `*.csv` only matches files in the /// target directory and will not match `subdir/file.csv`. Use `**/*.csv` to /// match recursively across subdirectories. +/// @param recursive If `TRUE`, directory inputs include all descendants; +/// if `FALSE` or `NULL` (default), only direct children of the directory +/// are returned. The flag only constrains descendants of paths passed +/// explicitly — when `paths` is empty, every tracked file is returned +/// regardless of nesting depth, and `recursive` has no effect. /// @param dry_run If `TRUE`, report what would be retrieved without writing files. /// @param progress_callback Optional handle to enable progress bar display. /// @keywords internal @@ -436,6 +447,7 @@ impl<'a> From<&'a FileMetadata> for FileMetadataView<'a> { pub(crate) fn dvs_get( #[miniextendr(default = "character(0)")] paths: Vec, #[miniextendr(default = "NULL")] glob: Option, + #[miniextendr(default = "NULL")] recursive: Option, #[miniextendr(default = "NULL")] dry_run: Option, #[miniextendr(default = "NULL")] progress_callback: Option>, ) -> Result>> { @@ -444,9 +456,14 @@ pub(crate) fn dvs_get( let config = Config::find(¤t_dir).ok_or_else(|| anyhow!("Not in a DVS repository"))??; let dvs_paths = DvsPaths::from_cwd(&config)?; - let all_paths: Vec<_> = resolve_paths_for_get(paths, glob.as_deref(), &dvs_paths)? - .into_iter() - .collect(); + let all_paths: Vec<_> = resolve_paths_for_get( + paths, + glob.as_deref(), + &dvs_paths, + recursive.unwrap_or(false), + )? + .into_iter() + .collect(); if all_paths.is_empty() { return Err(anyhow!("No files to get")); } diff --git a/dvs-rpkg/tests/testthat/test-get.R b/dvs-rpkg/tests/testthat/test-get.R index b3e918bb..515c06bb 100644 --- a/dvs-rpkg/tests/testthat/test-get.R +++ b/dvs-rpkg/tests/testthat/test-get.R @@ -49,3 +49,16 @@ test_that("dvs_get on never-added path errors", { new_dvs_test_repo() expect_error(dvs_get(paths = "never-added.csv")) }) + +test_that("dvs_get() with no args restores every tracked file at every depth", { + new_dvs_test_repo() + write_theoph("top.csv") + write_theoph_shuffled("data/raw/deep.csv", seed = 1) + dvs_add(paths = file.path(getwd(), c("top.csv", "data/raw/deep.csv"))) + file.remove(c("top.csv", "data/raw/deep.csv")) + + result <- dvs_get() + expect_s3_class(result, "tbl_df") + expect_setequal(result$path, c("top.csv", "data/raw/deep.csv")) + expect_true(all(file.exists(c("top.csv", "data/raw/deep.csv")))) +}) diff --git a/dvs/src/files/status.rs b/dvs/src/files/status.rs index 7db43765..14a7986c 100644 --- a/dvs/src/files/status.rs +++ b/dvs/src/files/status.rs @@ -44,7 +44,7 @@ pub struct StatusFilter { /// We need to handle `.`, `./` etc but we can't canonicalize because /// the path might not exist and we want the path relative to the directory so no symlink resolution -fn normalize_path(p: PathBuf) -> Option { +pub(crate) fn normalize_path(p: PathBuf) -> Option { let mut out = PathBuf::new(); for c in p.components() { match c { diff --git a/dvs/src/globbing.rs b/dvs/src/globbing.rs index c8267c95..d2dc6dcf 100644 --- a/dvs/src/globbing.rs +++ b/dvs/src/globbing.rs @@ -97,33 +97,39 @@ pub fn resolve_paths_for_get( paths: Vec, glob_pattern: Option<&str>, dvs_paths: &DvsPaths, + recursive: bool, ) -> Result> { let mut out = HashSet::new(); let glob_matcher = build_glob_matcher(glob_pattern)?; let metadata_root = dvs_paths.metadata_folder().canonicalize()?; - // Get cwd-relative prefix for converting user paths to repo-root-relative let cwd_prefix = dvs_paths.cwd_relative_to_root(); - // Convert user paths to repo-relative directory filters - // If no paths given, default to cwd (or repo root if at root) - let dir_filters: Vec = if paths.is_empty() { - vec![cwd_prefix.map(|p| p.to_path_buf()).unwrap_or_default()] + // None means no path restriction — all tracked files under cwd are candidates. + // The `recursive` flag only applies when explicit paths are given. + let dir_filters: Option> = if paths.is_empty() { + None } else { - paths - .into_iter() - .map(|p| { - if p.is_absolute() { - match p.strip_prefix(dvs_paths.repo_root()) { - Ok(r) => r.to_path_buf(), - Err(_) => p, - } - } else if let Some(prefix) = cwd_prefix { - prefix.join(&p) - } else { - p - } - }) - .collect() + Some( + paths + .into_iter() + .filter_map(|p| { + let raw = if p.is_absolute() { + match p.strip_prefix(dvs_paths.repo_root()) { + Ok(r) => r.to_path_buf(), + Err(_) => p, + } + } else if let Some(prefix) = cwd_prefix { + prefix.join(&p) + } else { + p + }; + // Strip `Component::CurDir` (`.`) so e.g. `dvs get .` becomes + // an empty PathBuf, which `Path::starts_with` treats as a + // prefix of every tracked path. + crate::files::status::normalize_path(raw) + }) + .collect(), + ) }; // Walk all metadata files @@ -144,12 +150,19 @@ pub fn resolve_paths_for_get( }; let tracked_path = relative_to_metadata.with_extension(""); - // Filter: must be under one of user's directories (or exact match) - let under_filter = dir_filters - .iter() - .any(|dir| tracked_path.starts_with(dir) || &tracked_path == dir); - if !under_filter { - continue; + // When explicit paths were given, the tracked file must be under one of them + if let Some(filters) = &dir_filters { + let under_filter = filters.iter().any(|filter_path| { + // Exact match (user passed a file path) + tracked_path == filter_path.as_path() + // Recursive: any descendant + || (recursive && tracked_path.starts_with(filter_path)) + // Non-recursive: direct child only + || (!recursive && tracked_path.parent() == Some(filter_path.as_path())) + }); + if !under_filter { + continue; + } } // Get cwd-relative path for glob matching @@ -254,16 +267,17 @@ mod tests { fn get_exact_file_match() { let (_temp, dvs_paths) = setup_test_repo(); let result = - resolve_paths_for_get(vec![PathBuf::from("foo.txt")], None, &dvs_paths).unwrap(); + resolve_paths_for_get(vec![PathBuf::from("foo.txt")], None, &dvs_paths, false).unwrap(); assert_eq!(result.len(), 1); assert!(result.contains(&PathBuf::from("foo.txt"))); } #[test] - fn get_directory_returns_all_tracked() { + fn get_directory_recursive_returns_all_tracked() { let (_temp, dvs_paths) = setup_test_repo(); - let result = resolve_paths_for_get(vec![PathBuf::from("data")], None, &dvs_paths).unwrap(); + let result = + resolve_paths_for_get(vec![PathBuf::from("data")], None, &dvs_paths, true).unwrap(); assert!(result.contains(&PathBuf::from("data/a.csv"))); assert!(result.contains(&PathBuf::from("data/subdir/c.csv"))); @@ -271,23 +285,46 @@ mod tests { assert!(!result.contains(&PathBuf::from("data/b.txt"))); } + #[test] + fn get_directory_non_recursive_excludes_subdirs() { + let (_temp, dvs_paths) = setup_test_repo(); + let result = + resolve_paths_for_get(vec![PathBuf::from("data")], None, &dvs_paths, false).unwrap(); + + assert!(result.contains(&PathBuf::from("data/a.csv"))); + assert!(!result.contains(&PathBuf::from("data/subdir/c.csv"))); + assert!(!result.contains(&PathBuf::from("data/b.txt"))); + } + #[test] fn get_with_glob_filters() { let (_temp, dvs_paths) = setup_test_repo(); // Empty paths defaults to cwd, then glob filters - let result = resolve_paths_for_get(vec![], Some("*.txt"), &dvs_paths).unwrap(); + let result = resolve_paths_for_get(vec![], Some("*.txt"), &dvs_paths, false).unwrap(); assert!(result.contains(&PathBuf::from("foo.txt"))); assert!(!result.contains(&PathBuf::from("data/a.csv"))); } - // Do we want that behaviour? #[test] - fn get_no_paths_defaults_to_cwd() { + fn get_no_paths_returns_all_under_cwd() { let (_temp, dvs_paths) = setup_test_repo(); - let result = resolve_paths_for_get(vec![], None, &dvs_paths).unwrap(); + // Without explicit paths the recursive flag has no target to restrict; + // all tracked files under cwd are returned regardless of its value. + let result = resolve_paths_for_get(vec![], None, &dvs_paths, false).unwrap(); + assert!(result.contains(&PathBuf::from("foo.txt"))); + assert!(result.contains(&PathBuf::from("data/a.csv"))); + assert!(result.contains(&PathBuf::from("data/subdir/c.csv"))); + } - // Should return all tracked files + #[test] + fn get_dot_recursive_returns_all_tracked() { + let (_temp, dvs_paths) = setup_test_repo(); + // `dvs get . --recursive` should match every tracked path: normalize_path + // strips the CurDir component, leaving an empty PathBuf that + // Path::starts_with treats as a prefix of any path. + let result = + resolve_paths_for_get(vec![PathBuf::from(".")], None, &dvs_paths, true).unwrap(); assert!(result.contains(&PathBuf::from("foo.txt"))); assert!(result.contains(&PathBuf::from("data/a.csv"))); assert!(result.contains(&PathBuf::from("data/subdir/c.csv"))); @@ -297,7 +334,7 @@ mod tests { fn get_absolute_file_path() { let (temp, dvs_paths) = setup_test_repo(); let abs_path = temp.path().canonicalize().unwrap().join("foo.txt"); - let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths).unwrap(); + let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths, false).unwrap(); assert_eq!(result.len(), 1); assert!(result.contains(&PathBuf::from("foo.txt"))); @@ -307,13 +344,60 @@ mod tests { fn get_absolute_directory_path() { let (temp, dvs_paths) = setup_test_repo(); let abs_path = temp.path().canonicalize().unwrap().join("data"); - let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths).unwrap(); + let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths, true).unwrap(); assert!(result.contains(&PathBuf::from("data/a.csv"))); assert!(result.contains(&PathBuf::from("data/subdir/c.csv"))); assert!(!result.contains(&PathBuf::from("foo.txt"))); } + #[test] + fn get_absolute_directory_path_non_recursive() { + let (temp, dvs_paths) = setup_test_repo(); + let abs_path = temp.path().canonicalize().unwrap().join("data"); + let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths, false).unwrap(); + + assert!(result.contains(&PathBuf::from("data/a.csv"))); + assert!(!result.contains(&PathBuf::from("data/subdir/c.csv"))); + assert!(!result.contains(&PathBuf::from("foo.txt"))); + } + + #[test] + fn get_directory_recursive_with_glob() { + // Recursive directory + glob: glob is evaluated cwd-relative on + // descendants. With cwd at repo root and dir filter `data`, the glob + // pattern must include the `data/` prefix to match anything. + let (_temp, dvs_paths) = setup_test_repo(); + let result = resolve_paths_for_get( + vec![PathBuf::from("data")], + Some("data/**/*.csv"), + &dvs_paths, + true, + ) + .unwrap(); + + assert!(result.contains(&PathBuf::from("data/a.csv"))); + assert!(result.contains(&PathBuf::from("data/subdir/c.csv"))); + assert!(!result.contains(&PathBuf::from("foo.txt"))); + } + + #[test] + fn get_directory_non_recursive_with_glob_excludes_subdirs() { + // Non-recursive + glob: even a recursive-style glob shouldn't reach + // subdirectories because the dir filter rejects them first. + let (_temp, dvs_paths) = setup_test_repo(); + let result = resolve_paths_for_get( + vec![PathBuf::from("data")], + Some("data/**/*.csv"), + &dvs_paths, + false, + ) + .unwrap(); + + assert!(result.contains(&PathBuf::from("data/a.csv"))); + assert!(!result.contains(&PathBuf::from("data/subdir/c.csv"))); + } + #[test] fn add_absolute_file_path() { let (temp, dvs_paths) = setup_test_repo(); diff --git a/ui/CLAUDE.md b/ui/CLAUDE.md index 9c18dc09..66c07017 100644 --- a/ui/CLAUDE.md +++ b/ui/CLAUDE.md @@ -3,11 +3,12 @@ ## Running UI tests ```bash -bash ui/main_status.sh # Status filtering, path filtering, recursive -bash ui/main_progress.sh # Progress bars: 100x1MB + 1x500MB -bash ui/main_parallel.sh # Thread control benchmarks -bash ui/main_log.sh # Logging: off/info/debug/warn/error, threads, transitions -bash ui/cleanup.sh # Remove temp dirs left by tests +bash ui/main_status.sh # Status filtering, path filtering, recursive +bash ui/main_recursive.sh # Recursive flag across add/get/status +bash ui/main_progress.sh # Progress bars: 100x1MB + 1x500MB +bash ui/main_parallel.sh # Thread control benchmarks +bash ui/main_log.sh # Logging: off/info/debug/warn/error, threads, transitions +bash ui/cleanup.sh # Remove temp dirs left by tests ``` All scripts use `set -euox pipefail` and trap on ERR, so a nonzero exit code means something failed. But **exit code 0 is not sufficient** — always redirect output to a log file, then read the log and verify: diff --git a/ui/helpers.sh b/ui/helpers.sh index 8381e963..a8943c7d 100755 --- a/ui/helpers.sh +++ b/ui/helpers.sh @@ -9,7 +9,7 @@ PS4='> ' # suppress that first line. Filtering on the xtrace file-descriptor is the # only reliable mechanism. The filter is a one-time setup: all subsequent # xtrace output for the sourcing script passes through it. -exec {_say_xtfd}> >(grep --line-buffered -v '^> say ' >&2) +exec {_say_xtfd}> >(grep -E --line-buffered -v '^> say($| )|^> set [+-]x$|^>>+' >&2) BASH_XTRACEFD=$_say_xtfd # Echo without leaving an xtrace line for the echo itself. diff --git a/ui/main.sh b/ui/main.sh index e106654c..66f7633f 100755 --- a/ui/main.sh +++ b/ui/main.sh @@ -1,17 +1,17 @@ #!/usr/bin/env bash -# note: the -x shows the script command in output -set -euox pipefail +set -eu # prints the line in script that errors trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR -echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" # shellcheck source=ui/helpers.sh source "${SCRIPT_DIR}/helpers.sh" +set -xo pipefail + +say "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" RUN_SUFFIX="${DVS_REPO_CLI##*_}" @@ -96,5 +96,4 @@ EOF -w printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR" diff --git a/ui/main_parallel.sh b/ui/main_parallel.sh index 0309fbde..e256ced6 100755 --- a/ui/main_parallel.sh +++ b/ui/main_parallel.sh @@ -10,16 +10,17 @@ # overhead. CLI is timed wall-clock around just the dvs add call. # Results are written to a log file in the ui/ directory. -set -euox pipefail +set -eu trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR -echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" # shellcheck source=ui/helpers.sh source "${SCRIPT_DIR}/helpers.sh" +set -xo pipefail + +say "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." N_FILES="${1:-20}" FILE_SIZE="${2:-50M}" diff --git a/ui/main_progress.sh b/ui/main_progress.sh index ab112fec..161485b4 100755 --- a/ui/main_progress.sh +++ b/ui/main_progress.sh @@ -5,15 +5,16 @@ # 1. 100 x 1MB files (many small files) # 2. 1 x 500MB file (single large file) -set -euox pipefail +set -eu trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR -echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" source "${SCRIPT_DIR}/helpers.sh" +set -xo pipefail + +say "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." # ── Scenario 1: 100 x 1MB files ──────────────────────────────────── diff --git a/ui/main_recursive.sh b/ui/main_recursive.sh new file mode 100755 index 00000000..c85cc177 --- /dev/null +++ b/ui/main_recursive.sh @@ -0,0 +1,551 @@ +#!/usr/bin/env bash + +# Demonstrate recursive vs. non-recursive behaviour across dvs add, dvs get, +# and dvs status — both CLI and R package — over a nested file tree. + +set -eu +trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# shellcheck source=ui/helpers.sh +source "${SCRIPT_DIR}/helpers.sh" +set -xo pipefail + +say "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." + +# ── Setup: two repos (CLI + R), each with its own storage ── +# All four dirs share one mktemp-generated suffix so it's obvious they belong +# to the same run: dvs_repo_cli_AbC / dvs_storage_cli_AbC / dvs_repo_rpkg_AbC / +# dvs_storage_rpkg_AbC. + +DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" +RUN_SUFFIX="${DVS_REPO_CLI##*_}" +DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX" +DVS_REPO_RPKG="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX" +DVS_STORAGE_RPKG="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX" +mkdir "$DVS_STORAGE_CLI" "$DVS_REPO_RPKG" "$DVS_STORAGE_RPKG" + +# ── Init ── + +cd "$DVS_REPO_CLI" +dvs init "$DVS_STORAGE_CLI" + +cd "$DVS_REPO_RPKG" +print_eval_rscript <&2' ERR -echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" # shellcheck source=ui/helpers.sh source "${SCRIPT_DIR}/helpers.sh" +set -xo pipefail + +say "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." # ── Setup: two repos (CLI + R), nested directory structure ──