diff --git a/justfile b/justfile index d4f474dc..5badde73 100644 --- a/justfile +++ b/justfile @@ -164,3 +164,58 @@ rpkg-bindgen-cli: ci: fmt-check clippy check-std-fs test @echo "All CI checks passed!" + +# ============================================================================ +# UI test outputs (publish to alx project in .alx/config.yaml) +# ============================================================================ + +# Names correspond to ui/main_.sh (and ui/main.sh for "main"). +# Each name has matching ui/output/ui-.html and alx topic ui-. +ui_names := "main status progress parallel recursive" + +# Run all ui/main*.sh scripts and capture each log into /tmp/ui-.log +ui-run: + #!/usr/bin/env bash + set -uo pipefail + bash ui/cleanup.sh >/dev/null 2>&1 || true + for name in {{ui_names}}; do + if [[ "$name" == "main" ]]; then script="ui/main.sh"; else script="ui/main_${name}.sh"; fi + log="/tmp/ui-${name}.log" + echo "Running ${script} → ${log}" + bash "$script" > "$log" 2>&1 || echo " WARN: ${script} exited nonzero (log captured anyway)" + done + +# Wrap each /tmp/ui-.log into ui/output/ui-.html +ui-render: + #!/usr/bin/env bash + set -euo pipefail + mkdir -p ui/output + for name in {{ui_names}}; do + log="/tmp/ui-${name}.log" + out="ui/output/ui-${name}.html" + if [[ ! -f "$log" ]]; then echo "skipping ${name} (no ${log})"; continue; fi + { + printf '\nui-%s\n' "$name" + printf '\n' + printf '

ui-%s

\n
' "$name"
+            sed 's/&/\&/g; s//\>/g' "$log"
+            printf '
\n' + } > "$out" + echo "wrote ${out}" + done + +# Publish ui/output/ui-.html to alx with each script as source attachment +ui-publish-only: + #!/usr/bin/env bash + set -uo pipefail + for name in {{ui_names}}; do + if [[ "$name" == "main" ]]; then script="ui/main.sh"; else script="ui/main_${name}.sh"; fi + out="ui/output/ui-${name}.html" + if [[ ! -f "$out" ]]; then echo "skipping ${name} (no ${out})"; continue; fi + echo "--- alx publish ui-${name} ---" + alx publish "$out" -S "$script" -S ui/helpers.sh -t "ui-${name}" \ + --overwrite --skip-warnings --no-prompt 2>&1 | tail -8 + done + +# Full pipeline: run scripts → render HTML → publish to alx +ui-publish: ui-run ui-render ui-publish-only diff --git a/ui/helpers.sh b/ui/helpers.sh index 623f3015..5336b70f 100755 --- a/ui/helpers.sh +++ b/ui/helpers.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash +# Use '> ' as the xtrace prefix instead of bash's default '+ '. +PS4='> ' + +# Echo without leaving an xtrace line for the echo itself. +# Use for blank-line separators and "=== section ===" headers. +say() { { set +x; } 2>/dev/null; echo "$@"; { set -x; } 2>/dev/null; } + print_eval_rscript() { tee /dev/stderr | Rscript - } @@ -64,6 +71,20 @@ mkfiles() { set -x } +# Create a single random-content file at PATH (default 1KB). +# Wraps `head -c … /dev/urandom > path` so the path shows up in `set -x` traces +# rather than ten identical "head -c 1024 /dev/urandom" lines. +mkrandfile() { + { set +x; } 2>/dev/null + local path="${1:?path is required}" + local size="${2:-1K}" + local bytes + bytes="$(size_to_bytes "$size")" + mkdir -p "$(dirname "$path")" + head -c "$bytes" /dev/urandom > "$path" + set -x +} + resolve_dataset_archetype() { local archetype="${1:?dataset archetype is required}" local candidate diff --git a/ui/main.sh b/ui/main.sh index aef2dcca..8411f50e 100755 --- a/ui/main.sh +++ b/ui/main.sh @@ -5,6 +5,8 @@ set -euox pipefail # prints the line in script that errors trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR +echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" @@ -12,7 +14,9 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" source "${SCRIPT_DIR}/helpers.sh" DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" -DVS_STORAGE_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)" +RUN_SUFFIX="${DVS_REPO_CLI##*_}" +DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX" +mkdir "$DVS_STORAGE_CLI" # region: INIT @@ -22,13 +26,13 @@ dvs init "$DVS_STORAGE_CLI" ls -a "$DVS_REPO_CLI" "$DVS_STORAGE_CLI" -DVS_REPO_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" +DVS_REPO_RPKG="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX" +DVS_STORAGE_RPKG="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX" +mkdir "$DVS_REPO_RPKG" "$DVS_STORAGE_RPKG" cd "$DVS_REPO_RPKG" -# this `tee` prints the R-script being executed -tee /dev/stderr < print(width = Inf) # WORKS # conclusion: the data-frame does not contain the absolute paths even if we give it absolute paths of the files # data_derived_files <- c($(find "$DVS_REPO_RPKG"/data/derived -type f | sed 's/.*/"&"/' | paste -sd, -)) @@ -74,14 +78,10 @@ cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF -# TODO: -# [ ] make tibble a Suggests, and _impl post-fix the dvs_* from Rust stuff -# [ ] truncate the hash - # # Compare dvs.toml (created by init) # diff "${DVS_REPO_CLI}"/dvs.toml "${DVS_REPO_RPKG}"/dvs.toml @@ -93,6 +93,5 @@ EOF - -# This will delete everything. -# bash ${SCRIPT_DIR}/cleanup.sh +w +printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR" diff --git a/ui/main_parallel.sh b/ui/main_parallel.sh index 589e4139..0309fbde 100755 --- a/ui/main_parallel.sh +++ b/ui/main_parallel.sh @@ -13,6 +13,8 @@ set -euox pipefail trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR +echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" @@ -31,14 +33,19 @@ printf '\n=== Parallel test: %d files × %s, threads=%s ===\n\n' \ # ── Generate files once ───────────────────────────────────────────── +# All dirs in this run share one mktemp suffix so it's obvious they belong +# together. The R variants R0..R3 use a `_R_` infix to stay distinct while +# keeping the same trailing run-suffix. FIXTURES="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)" +RUN_SUFFIX="${FIXTURES##*_}" cd "$FIXTURES" mkfiles "$N_FILES" "$FILE_SIZE" data/derived # ── CLI: DVS_NUM_THREADS env var ──────────────────────────────────── -DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" -DVS_STORAGE_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)" +DVS_REPO_CLI="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX" +DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX" +mkdir "$DVS_REPO_CLI" "$DVS_STORAGE_CLI" cd "$DVS_REPO_CLI" dvs init "$DVS_STORAGE_CLI" cp -r "$FIXTURES/data" "$DVS_REPO_CLI/data" @@ -49,14 +56,16 @@ CLI_ELAPSED="$( { time DVS_NUM_THREADS="$THREADS" dvs add data/derived/file_*.bi # ── R: all three methods in one process ───────────────────────────── -DVS_REPO_R0="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R0="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" -DVS_REPO_R1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" -DVS_REPO_R2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" -DVS_REPO_R3="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R3="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" +DVS_REPO_R0="$SCRIPT_DIR/dvs_repo_rpkg_R0_$RUN_SUFFIX" +DVS_STORAGE_R0="$SCRIPT_DIR/dvs_storage_rpkg_R0_$RUN_SUFFIX" +DVS_REPO_R1="$SCRIPT_DIR/dvs_repo_rpkg_R1_$RUN_SUFFIX" +DVS_STORAGE_R1="$SCRIPT_DIR/dvs_storage_rpkg_R1_$RUN_SUFFIX" +DVS_REPO_R2="$SCRIPT_DIR/dvs_repo_rpkg_R2_$RUN_SUFFIX" +DVS_STORAGE_R2="$SCRIPT_DIR/dvs_storage_rpkg_R2_$RUN_SUFFIX" +DVS_REPO_R3="$SCRIPT_DIR/dvs_repo_rpkg_R3_$RUN_SUFFIX" +DVS_STORAGE_R3="$SCRIPT_DIR/dvs_storage_rpkg_R3_$RUN_SUFFIX" +mkdir "$DVS_REPO_R0" "$DVS_STORAGE_R0" "$DVS_REPO_R1" "$DVS_STORAGE_R1" \ + "$DVS_REPO_R2" "$DVS_STORAGE_R2" "$DVS_REPO_R3" "$DVS_STORAGE_R3" for d in "$DVS_REPO_R0" "$DVS_REPO_R1" "$DVS_REPO_R2" "$DVS_REPO_R3"; do cd "$d" && cp -r "$FIXTURES/data" "$d/data" @@ -110,7 +119,7 @@ dvs_init("$DVS_STORAGE_R3") Sys.setenv(DVS_NUM_THREADS = "${THREADS}") start <- proc.time() -dvs_add(glob = "data/derived/*") +dvs_add(glob = "data/derived/*") |> print(width = Inf) r_env_elapsed <- (proc.time() - start)[["elapsed"]] Sys.unsetenv("DVS_NUM_THREADS") @@ -175,3 +184,5 @@ done if [ "$_anomalies" -gt 0 ]; then printf 'NOTE: %d anomaly detected — likely OS cache pressure, not a real regression\n' "$_anomalies" >&2 fi + +printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR" diff --git a/ui/main_progress.sh b/ui/main_progress.sh index 20f1834f..ab112fec 100755 --- a/ui/main_progress.sh +++ b/ui/main_progress.sh @@ -8,6 +8,8 @@ set -euox pipefail trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR +echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" @@ -19,13 +21,19 @@ printf '\n\n========================================\n' printf ' SCENARIO 1: 100 x 1MB files\n' printf '========================================\n\n' +# All scenario-1 dirs share one mktemp suffix so it's obvious they belong +# together: dvs_fixture_AbC, dvs_repo_cli_AbC, dvs_storage_cli_AbC, etc. FIXTURES_1="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)" +RUN_SUFFIX_1="${FIXTURES_1##*_}" +DVS_REPO_CLI_1="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX_1" +DVS_STORAGE_CLI_1="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX_1" +DVS_REPO_R_1="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX_1" +DVS_STORAGE_R_1="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX_1" +mkdir "$DVS_REPO_CLI_1" "$DVS_STORAGE_CLI_1" "$DVS_REPO_R_1" "$DVS_STORAGE_R_1" cd "$FIXTURES_1" mkfiles 100 1M data/derived printf '\n--- CLI ADD: 100 x 1MB ---\n' -DVS_REPO_CLI_1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" -DVS_STORAGE_CLI_1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)" cd "$DVS_REPO_CLI_1" dvs init "$DVS_STORAGE_CLI_1" cp -r "$FIXTURES_1/data" "$DVS_REPO_CLI_1/data" @@ -35,8 +43,6 @@ printf '\n--- CLI GET: 100 x 1MB ---\n' time dvs get data/derived/* printf '\n--- RPKG ADD: 100 x 1MB ---\n' -DVS_REPO_R_1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R_1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" cd "$DVS_REPO_R_1" cp -r "$FIXTURES_1/data" "$DVS_REPO_R_1/data" @@ -62,13 +68,18 @@ printf '\n\n========================================\n' printf ' SCENARIO 2: 1 x 500MB file\n' printf '========================================\n\n' +# Scenario-2 dirs share their own suffix, distinct from scenario-1's. FIXTURES_2="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)" +RUN_SUFFIX_2="${FIXTURES_2##*_}" +DVS_REPO_CLI_2="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX_2" +DVS_STORAGE_CLI_2="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX_2" +DVS_REPO_R_2="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX_2" +DVS_STORAGE_R_2="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX_2" +mkdir "$DVS_REPO_CLI_2" "$DVS_STORAGE_CLI_2" "$DVS_REPO_R_2" "$DVS_STORAGE_R_2" cd "$FIXTURES_2" mkfiles 1 500M data/derived printf '\n--- CLI ADD: 1 x 500MB ---\n' -DVS_REPO_CLI_2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" -DVS_STORAGE_CLI_2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)" cd "$DVS_REPO_CLI_2" dvs init "$DVS_STORAGE_CLI_2" cp -r "$FIXTURES_2/data" "$DVS_REPO_CLI_2/data" @@ -78,8 +89,6 @@ printf '\n--- CLI GET: 1 x 500MB ---\n' time dvs get data/derived/file_1.bin printf '\n--- RPKG ADD: 1 x 500MB ---\n' -DVS_REPO_R_2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_R_2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" cd "$DVS_REPO_R_2" cp -r "$FIXTURES_2/data" "$DVS_REPO_R_2/data" diff --git a/ui/main_status.sh b/ui/main_status.sh index e61f198c..457551e6 100755 --- a/ui/main_status.sh +++ b/ui/main_status.sh @@ -6,6 +6,8 @@ set -euox pipefail trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR +echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch." + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" @@ -15,10 +17,11 @@ source "${SCRIPT_DIR}/helpers.sh" # ── Setup: two repos (CLI + R), nested directory structure ── DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)" -DVS_STORAGE_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)" - -DVS_REPO_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)" -DVS_STORAGE_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)" +RUN_SUFFIX="${DVS_REPO_CLI##*_}" +DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX" +DVS_REPO_RPKG="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX" +DVS_STORAGE_RPKG="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX" +mkdir "$DVS_STORAGE_CLI" "$DVS_REPO_RPKG" "$DVS_STORAGE_RPKG" # ── Init ── @@ -48,73 +51,83 @@ mkfiles 2 1K models/v1 print_eval_rscript < print(width = Inf) +dvs_add(glob = "models/**/*.bin") |> print(width = Inf) EOF # ── 1. Status: all files (default) ── -echo "=== CLI: dvs status (all files) ===" +say +say "=== CLI: dvs status (all files) ===" cd "$DVS_REPO_CLI" dvs status -echo "=== R: dvs_status() (all files) ===" +say +say "=== R: dvs_status() (all files) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 2. Status: filter to a single file ── -echo "=== CLI: dvs status data/raw/file_1.bin ===" +say +say "=== CLI: dvs status data/raw/file_1.bin ===" cd "$DVS_REPO_CLI" dvs status data/raw/file_1.bin -echo "=== R: dvs_status(paths = 'data/raw/file_1.bin') ===" +say +say "=== R: dvs_status(paths = 'data/raw/file_1.bin') ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 3. Status: filter to a directory (non-recursive) ── -echo "=== CLI: dvs status data/ (non-recursive — direct children only) ===" +say +say "=== CLI: dvs status data/ (non-recursive — direct children only) ===" cd "$DVS_REPO_CLI" dvs status data/ -echo "=== R: dvs_status(paths = 'data/') (non-recursive) ===" +say +say "=== R: dvs_status(paths = 'data/') (non-recursive) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 4. Status: filter to a directory (recursive) ── -echo "=== CLI: dvs status -r data/ (recursive — all descendants) ===" +say +say "=== CLI: dvs status -r data/ (recursive — all descendants) ===" cd "$DVS_REPO_CLI" dvs status -r data/ -echo "=== R: dvs_status(paths = 'data/', recursive = TRUE) ===" +say +say "=== R: dvs_status(paths = 'data/', recursive = TRUE) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 5. Status: filter with status flags ── -echo "=== CLI: dvs status --absent (show only absent files) ===" +say +say "=== CLI: dvs status --absent (show only absent files) ===" cd "$DVS_REPO_CLI" dvs status --absent -echo "=== R: dvs_status(status = 'absent') ===" +say +say "=== R: dvs_status(status = 'absent') ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 6. Retrieve some files, then show mixed status ── @@ -122,64 +135,75 @@ EOF cd "$DVS_REPO_CLI" dvs get data/raw/file_1.bin -echo "=== CLI: dvs status (mixed: 1 current, rest absent) ===" +say +say "=== CLI: dvs status (mixed: 1 current, rest absent) ===" dvs status cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF -echo "=== R: dvs_status() (mixed: 1 current, rest absent) ===" +say +say "=== R: dvs_status() (mixed: 1 current, rest absent) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 7. Combined: path filter + status flag ── -echo "=== CLI: dvs status -r data/ --current ===" +say +say "=== CLI: dvs status -r data/ --current ===" cd "$DVS_REPO_CLI" dvs status -r data/ --current -echo "=== R: dvs_status(paths = 'data/', recursive = TRUE, status = 'current') ===" +say +say "=== R: dvs_status(paths = 'data/', recursive = TRUE, status = 'current') ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF -echo "=== CLI: dvs status -r data/ --absent ===" +say +say "=== CLI: dvs status -r data/ --absent ===" cd "$DVS_REPO_CLI" dvs status -r data/ --absent -echo "=== R: dvs_status(paths = 'data/', recursive = TRUE, status = 'absent') ===" +say +say "=== R: dvs_status(paths = 'data/', recursive = TRUE, status = 'absent') ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 8. Status: multiple status values (several.ok) ── -echo "=== R: dvs_status(status = c('current', 'absent')) ===" +say +say "=== R: dvs_status(status = c('current', 'absent')) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF # ── 9. Multiple paths ── -echo "=== CLI: dvs status data/raw/file_1.bin models/ ===" +say +say "=== CLI: dvs status data/raw/file_1.bin models/ ===" cd "$DVS_REPO_CLI" dvs status data/raw/file_1.bin models/ -echo "=== R: dvs_status(paths = c('data/raw/file_1.bin', 'models/')) ===" +say +say "=== R: dvs_status(paths = c('data/raw/file_1.bin', 'models/')) ===" cd "$DVS_REPO_RPKG" print_eval_rscript < print(width = Inf) EOF + +printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR"