Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,58 @@ rpkg-bindgen-cli:

ci: fmt-check clippy check-std-fs test
@echo "All CI checks passed!"

# ============================================================================
# UI test outputs (publish to alx project in .alx/config.yaml)
# ============================================================================

# Names correspond to ui/main_<NAME>.sh (and ui/main.sh for "main").
# Each name has matching ui/output/ui-<NAME>.html and alx topic ui-<NAME>.
ui_names := "main status progress parallel recursive"

# Run all ui/main*.sh scripts and capture each log into /tmp/ui-<NAME>.log
ui-run:
#!/usr/bin/env bash
set -uo pipefail
bash ui/cleanup.sh >/dev/null 2>&1 || true
for name in {{ui_names}}; do
if [[ "$name" == "main" ]]; then script="ui/main.sh"; else script="ui/main_${name}.sh"; fi
log="/tmp/ui-${name}.log"
echo "Running ${script} → ${log}"
bash "$script" > "$log" 2>&1 || echo " WARN: ${script} exited nonzero (log captured anyway)"
done

# Wrap each /tmp/ui-<NAME>.log into ui/output/ui-<NAME>.html
ui-render:
#!/usr/bin/env bash
set -euo pipefail
mkdir -p ui/output
for name in {{ui_names}}; do
log="/tmp/ui-${name}.log"
out="ui/output/ui-${name}.html"
if [[ ! -f "$log" ]]; then echo "skipping ${name} (no ${log})"; continue; fi
{
printf '<!DOCTYPE html>\n<html><head><meta charset="utf-8"><title>ui-%s</title>\n' "$name"
printf '<style>body{font-family:ui-monospace,Menlo,Consolas,monospace;background:#1e1e1e;color:#e6e6e6;padding:1rem;margin:0}pre{white-space:pre-wrap;word-wrap:break-word;font-size:20px;line-height:1.45}h1{color:#9ecbff;font-family:system-ui,sans-serif;font-size:1.5rem;margin:0 0 1rem}</style></head><body>\n'
printf '<h1>ui-%s</h1>\n<pre>' "$name"
sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g' "$log"
printf '</pre></body></html>\n'
} > "$out"
echo "wrote ${out}"
done

# Publish ui/output/ui-<NAME>.html to alx with each script as source attachment
ui-publish-only:
#!/usr/bin/env bash
set -uo pipefail
for name in {{ui_names}}; do
if [[ "$name" == "main" ]]; then script="ui/main.sh"; else script="ui/main_${name}.sh"; fi
out="ui/output/ui-${name}.html"
if [[ ! -f "$out" ]]; then echo "skipping ${name} (no ${out})"; continue; fi
echo "--- alx publish ui-${name} ---"
alx publish "$out" -S "$script" -S ui/helpers.sh -t "ui-${name}" \
--overwrite --skip-warnings --no-prompt 2>&1 | tail -8
done

# Full pipeline: run scripts → render HTML → publish to alx
ui-publish: ui-run ui-render ui-publish-only
21 changes: 21 additions & 0 deletions ui/helpers.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#!/usr/bin/env bash

# Use '> ' as the xtrace prefix instead of bash's default '+ '.
PS4='> '

# Echo without leaving an xtrace line for the echo itself.
# Use for blank-line separators and "=== section ===" headers.
say() { { set +x; } 2>/dev/null; echo "$@"; { set -x; } 2>/dev/null; }

print_eval_rscript() {
tee /dev/stderr | Rscript -
}
Expand Down Expand Up @@ -64,6 +71,20 @@ mkfiles() {
set -x
}

# Create a single random-content file at PATH (default 1KB).
# Wraps `head -c … /dev/urandom > path` so the path shows up in `set -x` traces
# rather than ten identical "head -c 1024 /dev/urandom" lines.
mkrandfile() {
{ set +x; } 2>/dev/null
local path="${1:?path is required}"
local size="${2:-1K}"
local bytes
bytes="$(size_to_bytes "$size")"
mkdir -p "$(dirname "$path")"
head -c "$bytes" /dev/urandom > "$path"
set -x
}

resolve_dataset_archetype() {
local archetype="${1:?dataset archetype is required}"
local candidate
Expand Down
29 changes: 14 additions & 15 deletions ui/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@ set -euox pipefail
# prints the line in script that errors
trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR

echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch."

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"

# shellcheck source=ui/helpers.sh
source "${SCRIPT_DIR}/helpers.sh"

DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)"
DVS_STORAGE_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)"
RUN_SUFFIX="${DVS_REPO_CLI##*_}"
DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX"
mkdir "$DVS_STORAGE_CLI"

# region: INIT

Expand All @@ -22,13 +26,13 @@ dvs init "$DVS_STORAGE_CLI"

ls -a "$DVS_REPO_CLI" "$DVS_STORAGE_CLI"

DVS_REPO_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_RPKG="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
DVS_REPO_RPKG="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX"
DVS_STORAGE_RPKG="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX"
mkdir "$DVS_REPO_RPKG" "$DVS_STORAGE_RPKG"

cd "$DVS_REPO_RPKG"

# this `tee` prints the R-script being executed
tee /dev/stderr <<EOF | Rscript -
print_eval_rscript <<EOF
library(dvs)

dvs_init("$DVS_STORAGE_RPKG")
Expand All @@ -48,12 +52,12 @@ cd "$DVS_REPO_RPKG"
mkfiles 5 10M data/derived
mkdatasetfiles 5 10M data/derived chickweight

tee /dev/stderr <<EOF | Rscript -
print_eval_rscript <<EOF
library(dvs)

# dvs_add("data/derived") # ERROR

dvs_add("$DVS_REPO_RPKG/data/derived", glob = "*") # WORKS
dvs_add("$DVS_REPO_RPKG/data/derived", glob = "*") |> print(width = Inf) # WORKS

# conclusion: the data-frame does not contain the absolute paths even if we give it absolute paths of the files
# data_derived_files <- c($(find "$DVS_REPO_RPKG"/data/derived -type f | sed 's/.*/"&"/' | paste -sd, -))
Expand All @@ -74,14 +78,10 @@ cd "$DVS_REPO_RPKG"
print_eval_rscript <<EOF
library(dvs)

dvs_status()
dvs_status() |> print(width = Inf)

EOF

# TODO:
# [ ] make tibble a Suggests, and _impl post-fix the dvs_* from Rust stuff
# [ ] truncate the hash

# # Compare dvs.toml (created by init)
# diff "${DVS_REPO_CLI}"/dvs.toml "${DVS_REPO_RPKG}"/dvs.toml

Expand All @@ -93,6 +93,5 @@ EOF




# This will delete everything.
# bash ${SCRIPT_DIR}/cleanup.sh
w
printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR"
33 changes: 22 additions & 11 deletions ui/main_parallel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
set -euox pipefail
trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR

echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch."

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"

Expand All @@ -31,14 +33,19 @@ printf '\n=== Parallel test: %d files × %s, threads=%s ===\n\n' \

# ── Generate files once ─────────────────────────────────────────────

# All dirs in this run share one mktemp suffix so it's obvious they belong
# together. The R variants R0..R3 use a `_R<n>_` infix to stay distinct while
# keeping the same trailing run-suffix.
FIXTURES="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)"
RUN_SUFFIX="${FIXTURES##*_}"
cd "$FIXTURES"
mkfiles "$N_FILES" "$FILE_SIZE" data/derived

# ── CLI: DVS_NUM_THREADS env var ────────────────────────────────────

DVS_REPO_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)"
DVS_STORAGE_CLI="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)"
DVS_REPO_CLI="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX"
DVS_STORAGE_CLI="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX"
mkdir "$DVS_REPO_CLI" "$DVS_STORAGE_CLI"
cd "$DVS_REPO_CLI"
dvs init "$DVS_STORAGE_CLI"
cp -r "$FIXTURES/data" "$DVS_REPO_CLI/data"
Expand All @@ -49,14 +56,16 @@ CLI_ELAPSED="$( { time DVS_NUM_THREADS="$THREADS" dvs add data/derived/file_*.bi

# ── R: all three methods in one process ─────────────────────────────

DVS_REPO_R0="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R0="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
DVS_REPO_R1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
DVS_REPO_R2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
DVS_REPO_R3="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R3="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
DVS_REPO_R0="$SCRIPT_DIR/dvs_repo_rpkg_R0_$RUN_SUFFIX"
DVS_STORAGE_R0="$SCRIPT_DIR/dvs_storage_rpkg_R0_$RUN_SUFFIX"
DVS_REPO_R1="$SCRIPT_DIR/dvs_repo_rpkg_R1_$RUN_SUFFIX"
DVS_STORAGE_R1="$SCRIPT_DIR/dvs_storage_rpkg_R1_$RUN_SUFFIX"
DVS_REPO_R2="$SCRIPT_DIR/dvs_repo_rpkg_R2_$RUN_SUFFIX"
DVS_STORAGE_R2="$SCRIPT_DIR/dvs_storage_rpkg_R2_$RUN_SUFFIX"
DVS_REPO_R3="$SCRIPT_DIR/dvs_repo_rpkg_R3_$RUN_SUFFIX"
DVS_STORAGE_R3="$SCRIPT_DIR/dvs_storage_rpkg_R3_$RUN_SUFFIX"
mkdir "$DVS_REPO_R0" "$DVS_STORAGE_R0" "$DVS_REPO_R1" "$DVS_STORAGE_R1" \
"$DVS_REPO_R2" "$DVS_STORAGE_R2" "$DVS_REPO_R3" "$DVS_STORAGE_R3"

for d in "$DVS_REPO_R0" "$DVS_REPO_R1" "$DVS_REPO_R2" "$DVS_REPO_R3"; do
cd "$d" && cp -r "$FIXTURES/data" "$d/data"
Expand Down Expand Up @@ -110,7 +119,7 @@ dvs_init("$DVS_STORAGE_R3")
Sys.setenv(DVS_NUM_THREADS = "${THREADS}")

start <- proc.time()
dvs_add(glob = "data/derived/*")
dvs_add(glob = "data/derived/*") |> print(width = Inf)
r_env_elapsed <- (proc.time() - start)[["elapsed"]]

Sys.unsetenv("DVS_NUM_THREADS")
Expand Down Expand Up @@ -175,3 +184,5 @@ done
if [ "$_anomalies" -gt 0 ]; then
printf 'NOTE: %d anomaly detected — likely OS cache pressure, not a real regression\n' "$_anomalies" >&2
fi

printf '\nCleanup: bash %s/cleanup.sh\n' "$SCRIPT_DIR"
25 changes: 17 additions & 8 deletions ui/main_progress.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
set -euox pipefail
trap 'printf "ERROR at %s:%d\n" "${BASH_SOURCE[0]}" "$LINENO" >&2' ERR

echo "NOTE: \`just install-all\` should have been called prior to this so the dvs CLI binary on PATH and the installed dvs R package both reflect the current branch."

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"

Expand All @@ -19,13 +21,19 @@ printf '\n\n========================================\n'
printf ' SCENARIO 1: 100 x 1MB files\n'
printf '========================================\n\n'

# All scenario-1 dirs share one mktemp suffix so it's obvious they belong
# together: dvs_fixture_AbC, dvs_repo_cli_AbC, dvs_storage_cli_AbC, etc.
FIXTURES_1="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)"
RUN_SUFFIX_1="${FIXTURES_1##*_}"
DVS_REPO_CLI_1="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX_1"
DVS_STORAGE_CLI_1="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX_1"
DVS_REPO_R_1="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX_1"
DVS_STORAGE_R_1="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX_1"
mkdir "$DVS_REPO_CLI_1" "$DVS_STORAGE_CLI_1" "$DVS_REPO_R_1" "$DVS_STORAGE_R_1"
cd "$FIXTURES_1"
mkfiles 100 1M data/derived

printf '\n--- CLI ADD: 100 x 1MB ---\n'
DVS_REPO_CLI_1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)"
DVS_STORAGE_CLI_1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)"
cd "$DVS_REPO_CLI_1"
dvs init "$DVS_STORAGE_CLI_1"
cp -r "$FIXTURES_1/data" "$DVS_REPO_CLI_1/data"
Expand All @@ -35,8 +43,6 @@ printf '\n--- CLI GET: 100 x 1MB ---\n'
time dvs get data/derived/*

printf '\n--- RPKG ADD: 100 x 1MB ---\n'
DVS_REPO_R_1="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R_1="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
cd "$DVS_REPO_R_1"
cp -r "$FIXTURES_1/data" "$DVS_REPO_R_1/data"

Expand All @@ -62,13 +68,18 @@ printf '\n\n========================================\n'
printf ' SCENARIO 2: 1 x 500MB file\n'
printf '========================================\n\n'

# Scenario-2 dirs share their own suffix, distinct from scenario-1's.
FIXTURES_2="$(mktemp -d "$SCRIPT_DIR"/dvs_fixture_XXX)"
RUN_SUFFIX_2="${FIXTURES_2##*_}"
DVS_REPO_CLI_2="$SCRIPT_DIR/dvs_repo_cli_$RUN_SUFFIX_2"
DVS_STORAGE_CLI_2="$SCRIPT_DIR/dvs_storage_cli_$RUN_SUFFIX_2"
DVS_REPO_R_2="$SCRIPT_DIR/dvs_repo_rpkg_$RUN_SUFFIX_2"
DVS_STORAGE_R_2="$SCRIPT_DIR/dvs_storage_rpkg_$RUN_SUFFIX_2"
mkdir "$DVS_REPO_CLI_2" "$DVS_STORAGE_CLI_2" "$DVS_REPO_R_2" "$DVS_STORAGE_R_2"
cd "$FIXTURES_2"
mkfiles 1 500M data/derived

printf '\n--- CLI ADD: 1 x 500MB ---\n'
DVS_REPO_CLI_2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_cli_XXX)"
DVS_STORAGE_CLI_2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_cli_XXX)"
cd "$DVS_REPO_CLI_2"
dvs init "$DVS_STORAGE_CLI_2"
cp -r "$FIXTURES_2/data" "$DVS_REPO_CLI_2/data"
Expand All @@ -78,8 +89,6 @@ printf '\n--- CLI GET: 1 x 500MB ---\n'
time dvs get data/derived/file_1.bin

printf '\n--- RPKG ADD: 1 x 500MB ---\n'
DVS_REPO_R_2="$(mktemp -d "$SCRIPT_DIR"/dvs_repo_rpkg_XXX)"
DVS_STORAGE_R_2="$(mktemp -d "$SCRIPT_DIR"/dvs_storage_rpkg_XXX)"
cd "$DVS_REPO_R_2"
cp -r "$FIXTURES_2/data" "$DVS_REPO_R_2/data"

Expand Down
Loading
Loading