From db8e15b1536e55a5818f9af7559da3f9acfff000 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob.patro@gmail.com>
Date: Sat, 21 Feb 2026 00:36:22 -0500
Subject: [PATCH 1/4] Refactor phases 0-7 (allowed paths only)

---
 Cargo.toml                                    |   1 +
 docs/architecture-map.md                      |  77 ++
 scripts/refactor_safety_suite.sh              |   8 +
 scripts/test_simpleaf_local_integration.sh    |  14 +
 src/atac/index.rs                             |  48 +-
 src/atac/process.rs                           | 412 +++++-----
 src/core.rs                                   |   5 +
 src/core/context.rs                           |  54 ++
 src/core/exec.rs                              |  54 ++
 src/core/index_meta.rs                        | 163 ++++
 src/core/io.rs                                |  63 ++
 src/core/runtime.rs                           |  36 +
 src/main.rs                                   |  10 +-
 src/simpleaf_commands/chemistry.rs            | 455 ++++++++---
 src/simpleaf_commands/indexing.rs             | 281 ++++---
 src/simpleaf_commands/quant.rs                | 712 ++++++++--------
 src/simpleaf_commands/workflow.rs             | 155 ++--
 src/utils/workflow_utils.rs                   | 757 ++++++++++++++----
 tests/cli_help_snapshots.rs                   | 116 +++
 tests/cli_smoke.rs                            |  69 ++
 tests/phase1_regressions.rs                   |  59 ++
 tests/snapshots/cli-help/simpleaf___help.txt  |  18 +
 .../cli-help/simpleaf_atac___help.txt         |  13 +
 .../cli-help/simpleaf_atac_index___help.txt   |  26 +
 .../cli-help/simpleaf_atac_process___help.txt |  86 ++
 .../cli-help/simpleaf_chemistry___help.txt    |  16 +
 .../simpleaf_chemistry_add___help.txt         |  28 +
 .../simpleaf_chemistry_clean___help.txt       |   8 +
 .../simpleaf_chemistry_fetch___help.txt       |  11 +
 .../simpleaf_chemistry_lookup___help.txt      |   9 +
 .../simpleaf_chemistry_refresh___help.txt     |   9 +
 .../simpleaf_chemistry_remove___help.txt      |  10 +
 .../cli-help/simpleaf_index___help.txt        |  61 ++
 .../cli-help/simpleaf_inspect___help.txt      |   7 +
 .../cli-help/simpleaf_quant___help.txt        |  78 ++
 .../simpleaf_refresh_prog_info___help.txt     |   7 +
 .../cli-help/simpleaf_set_paths___help.txt    |  11 +
 .../cli-help/simpleaf_workflow___help.txt     |  18 +
 .../cli-help/simpleaf_workflow_get___help.txt |   9 +
 .../simpleaf_workflow_list___help.txt         |   7 +
 .../simpleaf_workflow_patch___help.txt        |  22 +
 .../simpleaf_workflow_refresh___help.txt      |   7 +
 .../cli-help/simpleaf_workflow_run___help.txt |  28 +
 43 files changed, 2971 insertions(+), 1067 deletions(-)
 create mode 100644 docs/architecture-map.md
 create mode 100755 scripts/refactor_safety_suite.sh
 create mode 100755 scripts/test_simpleaf_local_integration.sh
 create mode 100644 src/core.rs
 create mode 100644 src/core/context.rs
 create mode 100644 src/core/exec.rs
 create mode 100644 src/core/index_meta.rs
 create mode 100644 src/core/io.rs
 create mode 100644 src/core/runtime.rs
 create mode 100644 tests/cli_help_snapshots.rs
 create mode 100644 tests/cli_smoke.rs
 create mode 100644 tests/phase1_regressions.rs
 create mode 100644 tests/snapshots/cli-help/simpleaf___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_atac___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_atac_index___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_atac_process___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_add___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_clean___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_fetch___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_lookup___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_refresh___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_chemistry_remove___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_index___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_inspect___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_quant___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_refresh_prog_info___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_set_paths___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow_get___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow_list___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow_patch___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow_refresh___help.txt
 create mode 100644 tests/snapshots/cli-help/simpleaf_workflow_run___help.txt

diff --git a/Cargo.toml b/Cargo.toml
index 7744979..abeac5e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ homepage = "https://simpleaf.readthedocs.io"
 #documentation = "https://fry.readthedocs.io/en/latest/"
 include = [
   "/src/*.rs",
+  "/src/core/*.rs",
   "/src/utils/*.rs",
   "/src/utils/af_utils/*.rs",
   "/src/atac/*.rs",
diff --git a/docs/architecture-map.md b/docs/architecture-map.md
new file mode 100644
index 0000000..5c2e110
--- /dev/null
+++ b/docs/architecture-map.md
@@ -0,0 +1,77 @@
+# simpleaf Architecture Map
+
+This document summarizes the post-refactor module boundaries and where new code should live.
+
+## CLI Entry and Dispatch
+- `src/main.rs`
+  - Parses CLI arguments and dispatches to command handlers.
+  - Should remain thin: argument parsing, high-level routing, and top-level logging only.
+- `src/simpleaf_commands.rs`
+  - Defines CLI option structs and subcommands.
+  - Avoid business logic here; keep it as schema/contract for command-line interfaces.
+
+## Command Implementations
+- `src/simpleaf_commands/indexing.rs`
+  - RNA reference/index orchestration.
+  - Uses staged outputs for readability and testability.
+- `src/simpleaf_commands/quant.rs`
+  - RNA mapping/quantification orchestration.
+  - Stage-level helpers produce typed intermediate outputs.
+- `src/atac/process.rs`
+  - ATAC processing pipeline (map/gpl/sort/macs).
+  - Stage decomposition mirrors RNA command structure.
+- `src/simpleaf_commands/workflow.rs`
+  - Workflow command front-end (run/list/get/patch/refresh).
+  - Handles run-input resolution before delegating planning/execution.
+- `src/simpleaf_commands/chemistry.rs`
+  - Chemistry registry and permit-list lifecycle operations.
+  - Contains registry merge/update logic and dry-run-safe file operations.
+
+## Core Shared Infrastructure
+- `src/core/context.rs`
+  - Runtime context loading/validation (e.g., AF home, tool info).
+- `src/core/exec.rs`
+  - Checked command execution helpers.
+- `src/core/index_meta.rs`
+  - Shared index metadata discovery and parsing.
+- `src/core/runtime.rs`
+  - Runtime helpers (e.g., thread capping).
+- `src/core/io.rs`
+  - JSON file read/write and atomic write helpers.
+
+## Utility Layer
+- `src/utils/workflow_utils.rs`
+  - Workflow manifest validation, planning, command queue building, execution, and logging.
+  - Contains typed workflow execution errors and log schema construction.
+- `src/utils/chem_utils.rs`
+  - Chemistry model definitions and registry parsing helpers.
+- `src/utils/prog_utils.rs`
+  - Program/tool discovery and generic process/IO utilities.
+- `src/utils/af_utils.rs`
+  - Domain utilities for geometry and command-level support logic.
+
+## Testing Layout
+- `tests/cli_help_snapshots.rs`
+  - Guards user-facing CLI help contract.
+- `tests/cli_smoke.rs`
+  - Lightweight argument/dispatch integration checks.
+- `tests/phase1_regressions.rs`
+  - High-value regression checks for prior production bugs.
+- Module-local unit tests in `src/**`
+  - Stage/helper-focused tests close to implementation.
+
+## CI and Validation Paths
+- Strict quality gate:
+  - `cargo fmt --check`
+  - `cargo clippy --all-targets -- -D warnings`
+  - `cargo test --all-targets`
+- Deterministic local integration path:
+  - `scripts/test_simpleaf_local_integration.sh`
+- Full e2e toy dataset path (heavier):
+  - `scripts/test_simpleaf.sh`
+
+## Extension Guidelines
+- Add reusable primitives under `src/core/` before duplicating orchestration logic.
+- Keep command modules focused on stage orchestration and user-facing flow.
+- Prefer typed errors (`thiserror`) where multiple internal failure variants matter.
+- Use atomic writes for registry/log/state files that are critical for resume/recovery flows.
diff --git a/scripts/refactor_safety_suite.sh b/scripts/refactor_safety_suite.sh
new file mode 100755
index 0000000..50e6fb6
--- /dev/null
+++ b/scripts/refactor_safety_suite.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "${ROOT_DIR}"
+
+echo "[refactor-safety] running CLI help snapshot and parse-smoke tests"
+cargo test --test cli_help_snapshots --test cli_smoke
diff --git a/scripts/test_simpleaf_local_integration.sh b/scripts/test_simpleaf_local_integration.sh
new file mode 100755
index 0000000..418e99d
--- /dev/null
+++ b/scripts/test_simpleaf_local_integration.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "${ROOT_DIR}"
+
+echo "[local-integration] running deterministic integration tests"
+
+# These tests avoid remote/networked resources and external large datasets.
+cargo test --test cli_help_snapshots --test cli_smoke --test phase1_regressions
+
+# Targeted workflow and chemistry regressions that cover refactor-critical paths.
+cargo test --all-targets test_execute_commands_external_failure_reports_status_and_stderr
+cargo test --all-targets clean_chemistries_dry_run_is_non_destructive_then_removes_unused
diff --git a/src/atac/index.rs b/src/atac/index.rs
index 92614b7..1f108e2 100644
--- a/src/atac/index.rs
+++ b/src/atac/index.rs
@@ -1,25 +1,21 @@
 use crate::atac::commands::IndexOpts;
-use crate::utils::{
-    prog_utils,
-    prog_utils::{CommandVerbosityLevel, ReqProgs},
-};
+use crate::core::{context, exec, io, runtime};
+use crate::utils::{prog_utils, prog_utils::ReqProgs};
 use anyhow;
-use anyhow::{bail, Context};
+use anyhow::Context;
 use cmd_lib::run_fun;
-use serde_json::{json, Value};
+use serde_json::json;
 use std::path::Path;
 use std::time::Instant;
 use tracing::{info, warn};
 
 pub(crate) fn piscem_index(af_home_path: &Path, opts: &IndexOpts) -> anyhow::Result<()> {
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let piscem_prog_info = rp
         .piscem
         .as_ref()
-        .expect("piscem program info should be properly set.");
+        .context("piscem program info is missing; please run `simpleaf set-paths`.")?;
 
     match prog_utils::check_version_constraints(
         "piscem",
@@ -60,18 +56,13 @@ pub(crate) fn piscem_index(af_home_path: &Path, opts: &IndexOpts) -> anyhow::Res
         piscem_index_cmd.arg("--overwrite");
     }
 
-    let mut threads = opts.threads;
-    // if the user requested more threads than can be used
-    if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-        let max_threads = max_threads_usize.get() as u32;
-        if threads > max_threads {
-            warn!(
-                "The maximum available parallelism is {}, but {} threads were requested.",
-                max_threads, threads
-            );
-            warn!("setting number of threads to {}", max_threads);
-            threads = max_threads;
-        }
+    let (threads, capped_at) = runtime::cap_threads(opts.threads);
+    if let Some(max_threads) = capped_at {
+        warn!(
+            "The maximum available parallelism is {}, but {} threads were requested.",
+            max_threads, opts.threads
+        );
+        warn!("setting number of threads to {}", max_threads);
     }
 
     piscem_index_cmd
@@ -94,14 +85,9 @@ pub(crate) fn piscem_index(af_home_path: &Path, opts: &IndexOpts) -> anyhow::Res
     info!("piscem build cmd : {}", index_cmd_string);
 
     let index_start = Instant::now();
-    let cres = prog_utils::execute_command(&mut piscem_index_cmd, CommandVerbosityLevel::Quiet)
-        .expect("failed to invoke piscem index command");
+    exec::run_checked(&mut piscem_index_cmd, "atac piscem index command")?;
     let index_duration = index_start.elapsed();
 
-    if !cres.status.success() {
-        bail!("piscem index failed to build succesfully {:?}", cres.status);
-    }
-
     let index_json_file = output_index_dir.join("simpleaf_index.json");
     let index_json = json!({
             "cmd" : index_cmd_string,
@@ -117,11 +103,7 @@ pub(crate) fn piscem_index(af_home_path: &Path, opts: &IndexOpts) -> anyhow::Res
                 "ref" : &opts.input
             }
     });
-    std::fs::write(
-        &index_json_file,
-        serde_json::to_string_pretty(&index_json).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", index_json_file.display()))?;
+    io::write_json_pretty_atomic(&index_json_file, &index_json)?;
 
     Ok(())
 }
diff --git a/src/atac/process.rs b/src/atac/process.rs
index 47b4ec1..118c059 100644
--- a/src/atac/process.rs
+++ b/src/atac/process.rs
@@ -1,20 +1,39 @@
 use crate::atac::commands::ProcessOpts;
+use crate::core::{context, exec, index_meta, io, runtime};
 use crate::utils::chem_utils::get_single_custom_chem_from_file;
 use crate::utils::chem_utils::ExpectedOri;
 use crate::utils::chem_utils::QueryInRegistry;
 use crate::utils::constants::CHEMISTRIES_PATH;
-use crate::utils::{
-    prog_utils,
-    prog_utils::{CommandVerbosityLevel, ReqProgs},
-};
+use crate::utils::{prog_utils, prog_utils::ReqProgs};
 use anyhow;
 use anyhow::{bail, Context};
-use serde_json::{json, Value};
+use serde_json::json;
 use std::io::BufReader;
 use std::path::{Path, PathBuf};
 use std::time::Instant;
 use tracing::{info, warn};
 
+pub(crate) struct MapStageOutput {
+    pub map_output: PathBuf,
+    pub map_duration_secs: f64,
+    pub map_cmd: String,
+}
+
+struct GplStageOutput {
+    gpl_duration_secs: f64,
+    gpl_cmd: String,
+}
+
+struct SortStageOutput {
+    sort_duration_secs: f64,
+    sort_cmd: String,
+}
+
+struct MacsStageOutput {
+    macs_duration_secs: f64,
+    macs_cmd: String,
+}
+
 fn push_advanced_piscem_options(
     piscem_map_cmd: &mut std::process::Command,
     opts: &ProcessOpts,
@@ -63,7 +82,7 @@ fn add_read_args(map_cmd: &mut std::process::Command, opts: &ProcessOpts) -> any
         let reads2 = opts
             .reads2
             .as_ref()
-            .expect("since reads1 files is given, read2 files must be provided.");
+            .context("read2 files must be provided when read1 files are provided.")?;
         let barcode_reads: &Vec<PathBuf> = &opts.barcode_reads;
         if reads1.len() != reads2.len() || reads1.len() != barcode_reads.len() {
             bail!(
@@ -99,9 +118,9 @@ fn add_read_args(map_cmd: &mut std::process::Command, opts: &ProcessOpts) -> any
             .join(",");
         map_cmd.arg("--barcode").arg(bc_str);
     } else {
-        let reads = opts.reads.as_ref().expect(
-            "since reads1 and reads2 are not provided, the single-end reads must be provided.",
-        );
+        let reads = opts.reads.as_ref().context(
+            "single-end reads must be provided when read1/read2 files are not provided.",
+        )?;
         let barcode_reads: &Vec<PathBuf> = &opts.barcode_reads;
         if reads.len() != barcode_reads.len() {
             bail!(
@@ -136,14 +155,12 @@ pub(crate) fn check_progs<P: AsRef<Path>>(
     opts: &ProcessOpts,
 ) -> anyhow::Result<()> {
     let af_home_path = af_home_path.as_ref();
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let af_prog_info = rp
         .alevin_fry
         .as_ref()
-        .expect("alevin-fry program info should be properly set.");
+        .context("alevin-fry program info is missing; please run `simpleaf set-paths`.")?;
 
     match prog_utils::check_version_constraints(
         "alevin-fry",
@@ -157,7 +174,7 @@ pub(crate) fn check_progs<P: AsRef<Path>>(
     let piscem_prog_info = rp
         .piscem
         .as_ref()
-        .expect("piscem program info should be properly set.");
+        .context("piscem program info is missing; please run `simpleaf set-paths`.")?;
 
     match prog_utils::check_version_constraints(
         "piscem",
@@ -172,7 +189,9 @@ pub(crate) fn check_progs<P: AsRef<Path>>(
         let macs_prog_info = rp
             .macs
             .as_ref()
-            .expect("macs3 program should be properly set if using the `--call-peaks` option");
+            .context(
+                "macs3 program info is missing; please run `simpleaf set-paths` before using `--call-peaks`.",
+            )?;
         match prog_utils::check_version_constraints(
             "macs3",
             ">=3.0.2, <4.0.0",
@@ -188,86 +207,15 @@ pub(crate) fn check_progs<P: AsRef<Path>>(
 
 // NOTE: we assume that check_progs has already been called and so version constraints have
 // already been checked.
-pub(crate) fn map_reads(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+pub(crate) fn map_reads(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<MapStageOutput> {
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let piscem_prog_info = rp
         .piscem
         .as_ref()
-        .expect("piscem program info should be properly set.");
-
-    // figure out what type of index we expect
-    let index_base;
-
-    let mut index = opts.index.clone();
-    // If the user built the index using simpleaf, there are
-    // 2 possibilities here:
-    //  1. They are passing in the directory containing the index
-    //  2. They are passing in the prefix stem of the index files
-    // The code below is to check, in both cases, if we can automatically
-    // detect if the index was constructed with simpleaf.
-
-    // If we are in case 1., the passed in path is a directory and
-    // we can check for the simpleaf_index.json file directly,
-    // Otherwise if the path is not a directory, we check if it
-    // ends in piscem_idx (the suffix that simpleaf uses when
-    // making a piscem index). Then we test the directory we
-    // get after stripping off this suffix.
-    let removed_piscem_idx_suffix = if !index.is_dir() && index.ends_with("piscem_idx") {
-        // remove the piscem_idx part
-        index.pop();
-        true
-    } else {
-        false
-    };
+        .context("piscem program info is missing; please run `simpleaf set-paths`.")?;
 
-    let index_json_path = index.join("simpleaf_index.json");
-    match index_json_path.try_exists() {
-        Ok(true) => {
-            // we have the simpleaf_index.json file, so parse it.
-            let index_json_file = std::fs::File::open(&index_json_path).with_context({
-                || format!("Could not open file {}", index_json_path.display())
-            })?;
-
-            let index_json_reader = BufReader::new(&index_json_file);
-            let v: Value = serde_json::from_reader(index_json_reader)?;
-
-            let index_type_str: String = serde_json::from_value(v["index_type"].clone())?;
-
-            // here, set the index type based on what we found as the
-            // value for the `index_type` key.
-            match index_type_str.as_ref() {
-                "piscem" => {
-                    // here, either the user has provided us with just
-                    // the directory containing the piscem index, or
-                    // we have "popped" off the "piscem_idx" suffix, so
-                    // add it (back).
-                    index_base = index.join("piscem_idx");
-                }
-                _ => {
-                    bail!(
-                        "unknown index type {} present in simpleaf_index.json",
-                        index_type_str,
-                    );
-                }
-            }
-        }
-        Ok(false) => {
-            // at this point, we have inferred that simpleaf wasn't
-            // used to construct the index, so fall back to what the user
-            // requested directly.
-            // if we have previously removed the piscem_idx suffix, add it back
-            if removed_piscem_idx_suffix {
-                index.push("piscem_idx");
-            }
-            index_base = index;
-        }
-        Err(e) => {
-            bail!(e);
-        }
-    }
+    let index_base = index_meta::resolve_atac_piscem_index_base(opts.index.clone())?;
 
     let input_files = vec![
         index_base.with_extension("ctab"),
@@ -292,17 +240,13 @@ pub(crate) fn map_reads(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Resu
         .arg(opts.bin_overlap.to_string());
 
     // if the user requested more threads than can be used
-    let mut threads = opts.threads;
-    if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-        let max_threads = max_threads_usize.get() as u32;
-        if threads > max_threads {
-            warn!(
-                "The maximum available parallelism is {}, but {} threads were requested.",
-                max_threads, threads
-            );
-            warn!("setting number of threads to {}", max_threads);
-            threads = max_threads;
-        }
+    let (threads, capped_at) = runtime::cap_threads(opts.threads);
+    if let Some(max_threads) = capped_at {
+        warn!(
+            "The maximum available parallelism is {}, but {} threads were requested.",
+            max_threads, opts.threads
+        );
+        warn!("setting number of threads to {}", max_threads);
     }
 
     // location of output directory, number of threads
@@ -339,19 +283,9 @@ being used by simpleaf"#,
     info!("map command : {}", map_cmd_string);
 
     let map_start = Instant::now();
-    let map_proc_out =
-        prog_utils::execute_command(&mut piscem_map_cmd, CommandVerbosityLevel::Quiet)
-            .expect("could not execute [atac::map]");
+    exec::run_checked(&mut piscem_map_cmd, "[atac::map]")?;
     let map_duration = map_start.elapsed();
-
-    if !map_proc_out.status.success() {
-        bail!(
-            "atac::map failed with exit status {:?}",
-            map_proc_out.status
-        );
-    } else {
-        info!("mapping completed successfully in {:#?}", map_duration);
-    }
+    info!("mapping completed successfully in {:#?}", map_duration);
 
     let af_process_info_file = opts.output.join("simpleaf_process_log.json");
     let af_process_info = json!({
@@ -370,25 +304,23 @@ being used by simpleaf"#,
 
     // write the relevant info about
     // our run to file.
-    std::fs::write(
-        &af_process_info_file,
-        serde_json::to_string_pretty(&af_process_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", af_process_info_file.display()))?;
+    io::write_json_pretty_atomic(&af_process_info_file, &af_process_info)?;
 
     info!("successfully mapped reads and generated output RAD file.");
-    Ok(())
+    Ok(MapStageOutput {
+        map_output,
+        map_duration_secs: map_duration.as_secs_f64(),
+        map_cmd: map_cmd_string,
+    })
 }
 
-fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<MacsStageOutput> {
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let macs_prog_info = rp
         .macs
         .as_ref()
-        .expect("macs program info should be properly set.");
+        .context("macs program info is missing; please run `simpleaf set-paths`.")?;
 
     let gpl_dir = opts.output.join("af_process");
     let bedsuf = if opts.compress { ".bed.gz" } else { ".bed" };
@@ -418,18 +350,9 @@ fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()
     info!("macs3 command : {}", macs_cmd_string);
 
     let macs_start = Instant::now();
-    let macs_proc_out = prog_utils::execute_command(&mut macs_cmd, CommandVerbosityLevel::Quiet)
-        .expect("could not execute [atac::macs]");
+    exec::run_checked(&mut macs_cmd, "[atac::macs]")?;
     let macs_duration = macs_start.elapsed();
-
-    if !macs_proc_out.status.success() {
-        bail!(
-            "atac::macs failed with exit status {:?}",
-            macs_proc_out.status
-        );
-    } else {
-        info!("macs completed successfully in {:#?}", macs_duration);
-    }
+    info!("macs completed successfully in {:#?}", macs_duration);
 
     let af_process_info_file = opts.output.join("simpleaf_process_log.json");
     let json_file = std::fs::File::open(af_process_info_file.clone())
@@ -448,35 +371,40 @@ fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()
 
     // write the relevant info about
     // our run to file.
-    std::fs::write(
-        &af_process_info_file,
-        serde_json::to_string_pretty(&af_process_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", af_process_info_file.display()))?;
+    io::write_json_pretty_atomic(&af_process_info_file, &af_process_info)?;
 
     info!("successfully called peaks using macs3.");
 
-    Ok(())
+    Ok(MacsStageOutput {
+        macs_duration_secs: macs_duration.as_secs_f64(),
+        macs_cmd: macs_cmd_string,
+    })
 }
 
 pub(crate) fn gen_bed(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
-    af_gpl(af_home_path, opts)?;
-    af_sort(af_home_path, opts)?;
-    macs_call_peaks(af_home_path, opts)?;
+    let gpl = af_gpl(af_home_path, opts)?;
+    let sort = af_sort(af_home_path, opts)?;
+    let macs = macs_call_peaks(af_home_path, opts)?;
+    info!(
+        "ATAC downstream stages completed (gpl: {:.2}s, sort: {:.2}s, macs: {:.2}s).",
+        gpl.gpl_duration_secs, sort.sort_duration_secs, macs.macs_duration_secs
+    );
+    info!(
+        "ATAC commands: gpl=`{}`, sort=`{}`, macs=`{}`",
+        gpl.gpl_cmd, sort.sort_cmd, macs.macs_cmd
+    );
     Ok(())
 }
 
 // NOTE: we assume that check_progs has already been called and so version constraints have
 // already been checked.
-fn af_sort(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+fn af_sort(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<SortStageOutput> {
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let af_prog_info = rp
         .alevin_fry
         .as_ref()
-        .expect("alevin-fry program info should be properly set.");
+        .context("alevin-fry program info is missing; please run `simpleaf set-paths`.")?;
 
     let gpl_dir = opts.output.join("af_process");
     let rad_dir = opts.output.join("af_map");
@@ -490,17 +418,13 @@ fn af_sort(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
         .arg(rad_dir);
 
     // if the user requested more threads than can be used
-    let mut threads = opts.threads;
-    if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-        let max_threads = max_threads_usize.get() as u32;
-        if threads > max_threads {
-            warn!(
-                "The maximum available parallelism is {}, but {} threads were requested.",
-                max_threads, threads
-            );
-            warn!("setting number of threads to {}", max_threads);
-            threads = max_threads;
-        }
+    let (threads, capped_at) = runtime::cap_threads(opts.threads);
+    if let Some(max_threads) = capped_at {
+        warn!(
+            "The maximum available parallelism is {}, but {} threads were requested.",
+            max_threads, opts.threads
+        );
+        warn!("setting number of threads to {}", max_threads);
     }
     af_sort.arg("--threads").arg(threads.to_string());
 
@@ -512,18 +436,9 @@ fn af_sort(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
     info!("sort command : {}", sort_cmd_string);
 
     let af_sort_start = Instant::now();
-    let af_sort_proc_out = prog_utils::execute_command(&mut af_sort, CommandVerbosityLevel::Quiet)
-        .expect("could not execute [atac::af_sort]");
+    exec::run_checked(&mut af_sort, "[atac::af_sort]")?;
     let af_sort_duration = af_sort_start.elapsed();
-
-    if !af_sort_proc_out.status.success() {
-        bail!(
-            "atac::sort failed with exit status {:?}",
-            af_sort_proc_out.status
-        );
-    } else {
-        info!("sort completed successfully in {:#?}", af_sort_duration);
-    }
+    info!("sort completed successfully in {:#?}", af_sort_duration);
 
     let af_process_info_file = opts.output.join("simpleaf_process_log.json");
     let json_file = std::fs::File::open(af_process_info_file.clone())
@@ -542,27 +457,24 @@ fn af_sort(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
 
     // write the relevant info about
     // our run to file.
-    std::fs::write(
-        &af_process_info_file,
-        serde_json::to_string_pretty(&af_process_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", af_process_info_file.display()))?;
+    io::write_json_pretty_atomic(&af_process_info_file, &af_process_info)?;
 
     info!("successfully sorted and deduplicated records and created the output BED file.");
-    Ok(())
+    Ok(SortStageOutput {
+        sort_duration_secs: af_sort_duration.as_secs_f64(),
+        sort_cmd: sort_cmd_string,
+    })
 }
 
 // NOTE: we assume that check_progs has already been called and so version constraints have
 // already been checked.
-fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<GplStageOutput> {
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     let af_prog_info = rp
         .alevin_fry
         .as_ref()
-        .expect("alevin-fry program info should be properly set.");
+        .context("alevin-fry program info is missing; please run `simpleaf set-paths`.")?;
 
     let filter_meth_opt;
 
@@ -693,17 +605,13 @@ fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
     }
 
     // if the user requested more threads than can be used
-    let mut threads = opts.threads;
-    if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-        let max_threads = max_threads_usize.get() as u32;
-        if threads > max_threads {
-            warn!(
-                "The maximum available parallelism is {}, but {} threads were requested.",
-                max_threads, threads
-            );
-            warn!("setting number of threads to {}", max_threads);
-            threads = max_threads;
-        }
+    let (threads, capped_at) = runtime::cap_threads(opts.threads);
+    if let Some(max_threads) = capped_at {
+        warn!(
+            "The maximum available parallelism is {}, but {} threads were requested.",
+            max_threads, opts.threads
+        );
+        warn!("setting number of threads to {}", max_threads);
     }
     af_gpl.arg("--threads").arg(format!("{}", threads));
 
@@ -711,21 +619,12 @@ fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
     info!("gpl command : {}", gpl_cmd_string);
 
     let af_gpl_start = Instant::now();
-    let af_gpl_proc_out = prog_utils::execute_command(&mut af_gpl, CommandVerbosityLevel::Quiet)
-        .expect("could not execute [atac::af_gpl]");
+    exec::run_checked(&mut af_gpl, "[atac::af_gpl]")?;
     let af_gpl_duration = af_gpl_start.elapsed();
-
-    if !af_gpl_proc_out.status.success() {
-        bail!(
-            "atac::gpl failed with exit status {:?}",
-            af_gpl_proc_out.status
-        );
-    } else {
-        info!(
-            "permit list generation completed successfully in {:#?}",
-            af_gpl_duration
-        );
-    }
+    info!(
+        "permit list generation completed successfully in {:#?}",
+        af_gpl_duration
+    );
 
     let af_process_info_file = opts.output.join("simpleaf_process_log.json");
     let json_file = std::fs::File::open(af_process_info_file.clone())
@@ -744,12 +643,99 @@ fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<()> {
 
     // write the relevant info about
     // our run to file.
-    std::fs::write(
-        &af_process_info_file,
-        serde_json::to_string_pretty(&af_process_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", af_process_info_file.display()))?;
+    io::write_json_pretty_atomic(&af_process_info_file, &af_process_info)?;
 
     info!("successfully performed cell barcode detection and correction.");
-    Ok(())
+    Ok(GplStageOutput {
+        gpl_duration_secs: af_gpl_duration.as_secs_f64(),
+        gpl_cmd: gpl_cmd_string,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use crate::atac::commands::{AtacChemistry, Macs3GenomeSize};
+
+    use super::*;
+
+    fn base_process_opts() -> ProcessOpts {
+        ProcessOpts {
+            index: PathBuf::from("/tmp/index"),
+            reads1: None,
+            reads2: None,
+            reads: None,
+            barcode_reads: vec![],
+            chemistry: AtacChemistry::TenxV2,
+            barcode_length: 16,
+            output: PathBuf::from("/tmp/out"),
+            threads: 1,
+            call_peaks: false,
+            permit_barcode_ori: None,
+            unfiltered_pl: None,
+            min_reads: 10,
+            compress: false,
+            ignore_ambig_hits: false,
+            no_poison: false,
+            use_chr: false,
+            thr: 0.8,
+            bin_size: 50,
+            bin_overlap: 2,
+            no_tn5_shift: false,
+            check_kmer_orphan: false,
+            max_ec_card: 4096,
+            max_hit_occ: 64,
+            max_hit_occ_recover: 1024,
+            max_read_occ: 250,
+            gsize: Macs3GenomeSize::KnownOpt("hs"),
+            qvalue: 0.1,
+            extsize: 50,
+        }
+    }
+
+    #[test]
+    fn add_read_args_succeeds_for_paired_end_inputs() {
+        let td = tempfile::tempdir().expect("failed to create tempdir");
+        let r1 = td.path().join("r1.fastq");
+        let r2 = td.path().join("r2.fastq");
+        let bc = td.path().join("bc.fastq");
+        fs::write(&r1, "").expect("failed to write r1");
+        fs::write(&r2, "").expect("failed to write r2");
+        fs::write(&bc, "").expect("failed to write bc");
+
+        let mut opts = base_process_opts();
+        opts.reads1 = Some(vec![r1]);
+        opts.reads2 = Some(vec![r2]);
+        opts.barcode_reads = vec![bc];
+
+        let mut cmd = std::process::Command::new("echo");
+        add_read_args(&mut cmd, &opts).expect("expected add_read_args to succeed");
+    }
+
+    #[test]
+    fn add_read_args_fails_for_mismatched_read_counts() {
+        let td = tempfile::tempdir().expect("failed to create tempdir");
+        let r1 = td.path().join("r1.fastq");
+        let r2a = td.path().join("r2a.fastq");
+        let r2b = td.path().join("r2b.fastq");
+        let bc = td.path().join("bc.fastq");
+        fs::write(&r1, "").expect("failed to write r1");
+        fs::write(&r2a, "").expect("failed to write r2a");
+        fs::write(&r2b, "").expect("failed to write r2b");
+        fs::write(&bc, "").expect("failed to write bc");
+
+        let mut opts = base_process_opts();
+        opts.reads1 = Some(vec![r1]);
+        opts.reads2 = Some(vec![r2a, r2b]);
+        opts.barcode_reads = vec![bc];
+
+        let mut cmd = std::process::Command::new("echo");
+        let err = add_read_args(&mut cmd, &opts).expect_err("expected mismatch to fail");
+        assert!(
+            format!("{:#}", err).contains("Cannot proceed"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
 }
diff --git a/src/core.rs b/src/core.rs
new file mode 100644
index 0000000..eaeed1f
--- /dev/null
+++ b/src/core.rs
@@ -0,0 +1,5 @@
+pub mod context;
+pub mod exec;
+pub mod index_meta;
+pub mod io;
+pub mod runtime;
diff --git a/src/core/context.rs b/src/core/context.rs
new file mode 100644
index 0000000..e14024b
--- /dev/null
+++ b/src/core/context.rs
@@ -0,0 +1,54 @@
+use std::path::Path;
+
+use anyhow::Context;
+use serde_json::Value;
+
+use crate::utils::prog_utils::{self, ReqProgs};
+
+pub struct RuntimeContext {
+    pub progs: ReqProgs,
+}
+
+pub fn load_required_programs(af_home_path: &Path) -> anyhow::Result<ReqProgs> {
+    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
+    serde_json::from_value(v["prog_info"].clone())
+        .with_context(|| "Could not deserialize required program metadata from simpleaf_info.json")
+}
+
+pub fn load_runtime_context(af_home_path: &Path) -> anyhow::Result<RuntimeContext> {
+    Ok(RuntimeContext {
+        progs: load_required_programs(af_home_path)?,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    use super::load_runtime_context;
+
+    #[test]
+    fn load_runtime_context_reads_prog_info() {
+        let td = tempdir().expect("failed to create tempdir");
+        let af_info = json!({
+            "prog_info": {
+                "salmon": null,
+                "piscem": {"exe_path": "/bin/echo", "version": "0.12.2"},
+                "alevin_fry": {"exe_path": "/bin/echo", "version": "0.11.2"},
+                "macs": null
+            }
+        });
+        fs::write(
+            td.path().join("simpleaf_info.json"),
+            serde_json::to_string_pretty(&af_info).expect("failed to serialize json"),
+        )
+        .expect("failed to write simpleaf_info.json");
+
+        let ctx = load_runtime_context(td.path()).expect("failed to load runtime context");
+        assert!(ctx.progs.piscem.is_some());
+        assert!(ctx.progs.alevin_fry.is_some());
+    }
+}
diff --git a/src/core/exec.rs b/src/core/exec.rs
new file mode 100644
index 0000000..b550f7c
--- /dev/null
+++ b/src/core/exec.rs
@@ -0,0 +1,54 @@
+use anyhow::{bail, Context};
+
+use crate::utils::prog_utils::{self, CommandVerbosityLevel};
+
+pub fn run_capture(
+    cmd: &mut std::process::Command,
+    context: &str,
+) -> anyhow::Result<std::process::Output> {
+    prog_utils::execute_command(cmd, CommandVerbosityLevel::Quiet)
+        .with_context(|| format!("failed to execute {}", context))
+}
+
+pub fn ensure_success(output: &std::process::Output, context: &str) -> anyhow::Result<()> {
+    if output.status.success() {
+        return Ok(());
+    }
+
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    bail!(
+        "{} failed with exit status {}. stderr: {}",
+        context,
+        output.status,
+        stderr.trim()
+    );
+}
+
+pub fn run_checked(
+    cmd: &mut std::process::Command,
+    context: &str,
+) -> anyhow::Result<std::process::Output> {
+    let output = run_capture(cmd, context)?;
+    ensure_success(&output, context)?;
+    Ok(output)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::run_checked;
+
+    #[test]
+    fn run_checked_succeeds_for_true_command() {
+        let mut cmd = std::process::Command::new("sh");
+        cmd.arg("-c").arg("true");
+        run_checked(&mut cmd, "sh true").expect("expected true command to succeed");
+    }
+
+    #[test]
+    fn run_checked_errors_for_false_command() {
+        let mut cmd = std::process::Command::new("sh");
+        cmd.arg("-c").arg("false");
+        let err = run_checked(&mut cmd, "sh false").expect_err("expected false command to fail");
+        assert!(format!("{:#}", err).contains("failed with exit status"));
+    }
+}
diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs
new file mode 100644
index 0000000..af53247
--- /dev/null
+++ b/src/core/index_meta.rs
@@ -0,0 +1,163 @@
+use std::path::PathBuf;
+
+use anyhow::bail;
+use serde_json::Value;
+
+use crate::core::io;
+use crate::utils::af_utils::IndexType;
+
+pub struct QuantIndexMetadata {
+    pub index_type: IndexType,
+    pub inferred_t2g: Option<PathBuf>,
+    pub inferred_gene_id_to_name: Option<PathBuf>,
+}
+
+pub fn resolve_quant_index(
+    index: Option<PathBuf>,
+    use_piscem: bool,
+) -> anyhow::Result<QuantIndexMetadata> {
+    let mut inferred_t2g = None;
+    let mut inferred_gene_id_to_name = None;
+    let index_type;
+
+    if let Some(mut index) = index {
+        let removed_piscem_idx_suffix = if !index.is_dir() && index.ends_with("piscem_idx") {
+            index.pop();
+            true
+        } else {
+            false
+        };
+
+        let index_json_path = index.join("simpleaf_index.json");
+        match index_json_path.try_exists() {
+            Ok(true) => {
+                let v: Value = io::read_json_file(&index_json_path)?;
+
+                let index_type_str: String = serde_json::from_value(v["index_type"].clone())?;
+                index_type = match index_type_str.as_ref() {
+                    "salmon" => IndexType::Salmon(index.clone()),
+                    "piscem" => IndexType::Piscem(index.join("piscem_idx")),
+                    _ => {
+                        bail!(
+                            "unknown index type {} present in simpleaf_index.json",
+                            index_type_str
+                        );
+                    }
+                };
+
+                let t2g_opt: Option<PathBuf> = serde_json::from_value(v["t2g_file"].clone())?;
+                if let Some(t2g_rel) = t2g_opt {
+                    inferred_t2g = Some(index.join(t2g_rel));
+                }
+
+                if index.join("gene_id_to_name.tsv").exists() {
+                    inferred_gene_id_to_name = Some(index.join("gene_id_to_name.tsv"));
+                } else if let Some(index_parent) = index.parent() {
+                    let gene_name_path = index_parent.join("ref").join("gene_id_to_name.tsv");
+                    if gene_name_path.exists() && gene_name_path.is_file() {
+                        inferred_gene_id_to_name = Some(gene_name_path);
+                    }
+                }
+            }
+            Ok(false) => {
+                if removed_piscem_idx_suffix {
+                    index.push("piscem_idx");
+                }
+                index_type = if use_piscem {
+                    IndexType::Piscem(index)
+                } else {
+                    IndexType::Salmon(index)
+                };
+            }
+            Err(e) => bail!(e),
+        }
+    } else {
+        index_type = IndexType::NoIndex;
+    }
+
+    Ok(QuantIndexMetadata {
+        index_type,
+        inferred_t2g,
+        inferred_gene_id_to_name,
+    })
+}
+
+pub fn resolve_atac_piscem_index_base(mut index: PathBuf) -> anyhow::Result<PathBuf> {
+    let removed_piscem_idx_suffix = if !index.is_dir() && index.ends_with("piscem_idx") {
+        index.pop();
+        true
+    } else {
+        false
+    };
+
+    let index_json_path = index.join("simpleaf_index.json");
+    match index_json_path.try_exists() {
+        Ok(true) => {
+            let v: Value = io::read_json_file(&index_json_path)?;
+
+            let index_type_str: String = serde_json::from_value(v["index_type"].clone())?;
+            match index_type_str.as_ref() {
+                "piscem" => Ok(index.join("piscem_idx")),
+                _ => bail!(
+                    "unknown index type {} present in simpleaf_index.json",
+                    index_type_str
+                ),
+            }
+        }
+        Ok(false) => {
+            if removed_piscem_idx_suffix {
+                index.push("piscem_idx");
+            }
+            Ok(index)
+        }
+        Err(e) => bail!(e),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    use crate::utils::af_utils::IndexType;
+
+    use super::{resolve_atac_piscem_index_base, resolve_quant_index};
+
+    #[test]
+    fn resolve_quant_index_reads_simpleaf_index_metadata() {
+        let td = tempdir().expect("failed to create tempdir");
+        let idx_dir = td.path().join("index");
+        fs::create_dir_all(&idx_dir).expect("failed to create index dir");
+        fs::write(
+            idx_dir.join("simpleaf_index.json"),
+            serde_json::to_string_pretty(&json!({
+                "index_type":"salmon",
+                "t2g_file":"t2g_3col.tsv"
+            }))
+            .expect("failed to serialize json"),
+        )
+        .expect("failed to write simpleaf_index.json");
+        fs::write(idx_dir.join("gene_id_to_name.tsv"), "g1\tn1\n")
+            .expect("failed to write gene_id_to_name.tsv");
+
+        let meta = resolve_quant_index(Some(idx_dir.clone()), false)
+            .expect("failed to resolve quant index");
+        assert!(matches!(meta.index_type, IndexType::Salmon(_)));
+        assert_eq!(meta.inferred_t2g, Some(idx_dir.join("t2g_3col.tsv")));
+        assert_eq!(
+            meta.inferred_gene_id_to_name,
+            Some(idx_dir.join("gene_id_to_name.tsv"))
+        );
+    }
+
+    #[test]
+    fn resolve_atac_index_base_accepts_plain_prefix() {
+        let td = tempdir().expect("failed to create tempdir");
+        let idx = td.path().join("foo").join("piscem_idx");
+        let resolved =
+            resolve_atac_piscem_index_base(idx.clone()).expect("failed to resolve atac index");
+        assert_eq!(resolved, idx);
+    }
+}
diff --git a/src/core/io.rs b/src/core/io.rs
new file mode 100644
index 0000000..88b6279
--- /dev/null
+++ b/src/core/io.rs
@@ -0,0 +1,63 @@
+use std::fs;
+use std::path::Path;
+
+use anyhow::Context;
+use serde::Serialize;
+use serde_json::Value;
+
+pub fn read_json_file(path: &Path) -> anyhow::Result<Value> {
+    let json_file = std::fs::File::open(path)
+        .with_context(|| format!("Could not open JSON file {}.", path.display()))?;
+    let v: Value = serde_json::from_reader(json_file)
+        .with_context(|| format!("Could not parse JSON file {}.", path.display()))?;
+    Ok(v)
+}
+
+pub fn write_json_pretty<T: Serialize>(path: &Path, value: &T) -> anyhow::Result<()> {
+    let payload = serde_json::to_string_pretty(value)
+        .with_context(|| format!("Could not serialize JSON for {}.", path.display()))?;
+    fs::write(path, payload).with_context(|| format!("could not write {}", path.display()))
+}
+
+pub fn write_json_pretty_atomic<T: Serialize>(path: &Path, value: &T) -> anyhow::Result<()> {
+    let payload = serde_json::to_string_pretty(value)
+        .with_context(|| format!("Could not serialize JSON for {}.", path.display()))?;
+    let tmp_path = path.with_extension("tmp");
+    fs::write(&tmp_path, payload)
+        .with_context(|| format!("could not write temporary file {}", tmp_path.display()))?;
+    fs::rename(&tmp_path, path).with_context(|| {
+        format!(
+            "could not atomically rename {} to {}",
+            tmp_path.display(),
+            path.display()
+        )
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    use super::{read_json_file, write_json_pretty, write_json_pretty_atomic};
+
+    #[test]
+    fn write_and_read_json_roundtrip() {
+        let td = tempdir().expect("failed to create tempdir");
+        let path = td.path().join("a.json");
+        let v = json!({"key":"value","n":1});
+        write_json_pretty(&path, &v).expect("failed to write json");
+        let read_back = read_json_file(&path).expect("failed to read json");
+        assert_eq!(read_back, v);
+    }
+
+    #[test]
+    fn write_json_pretty_atomic_persists_content() {
+        let td = tempdir().expect("failed to create tempdir");
+        let path = td.path().join("b.json");
+        let v = json!({"ok":true});
+        write_json_pretty_atomic(&path, &v).expect("failed to write json atomically");
+        let read_back = read_json_file(&path).expect("failed to read json");
+        assert_eq!(read_back, v);
+    }
+}
diff --git a/src/core/runtime.rs b/src/core/runtime.rs
new file mode 100644
index 0000000..3329337
--- /dev/null
+++ b/src/core/runtime.rs
@@ -0,0 +1,36 @@
+pub fn cap_threads(requested: u32) -> (u32, Option<u32>) {
+    cap_threads_with_limit(
+        requested,
+        std::thread::available_parallelism()
+            .ok()
+            .map(|n| n.get() as u32),
+    )
+}
+
+fn cap_threads_with_limit(requested: u32, limit: Option<u32>) -> (u32, Option<u32>) {
+    if let Some(max_threads) = limit {
+        if requested > max_threads {
+            return (max_threads, Some(max_threads));
+        }
+    }
+    (requested, None)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::cap_threads_with_limit;
+
+    #[test]
+    fn caps_requested_threads_when_over_limit() {
+        let (effective, capped_at) = cap_threads_with_limit(32, Some(8));
+        assert_eq!(effective, 8);
+        assert_eq!(capped_at, Some(8));
+    }
+
+    #[test]
+    fn keeps_requested_threads_when_within_limit() {
+        let (effective, capped_at) = cap_threads_with_limit(8, Some(32));
+        assert_eq!(effective, 8);
+        assert_eq!(capped_at, None);
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 64efefe..c144bbe 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ use chemistry::{
     add_chemistry, clean_chemistries, fetch_chemistries, lookup_chemistry, refresh_chemistries,
     remove_chemistry,
 };
+use tracing::info;
 use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter};
 
 use anyhow::bail;
@@ -10,6 +11,7 @@ use clap::{crate_version, Parser};
 use std::env;
 use std::path::PathBuf;
 
+mod core;
 mod defaults;
 mod utils;
 
@@ -109,7 +111,13 @@ fn main() -> anyhow::Result<()> {
             // validate versions
             atac::process::check_progs(&af_home_path, &process_opts)?;
             // first we map the reads
-            atac::process::map_reads(af_home_path.as_path(), &process_opts)?;
+            let map_stage = atac::process::map_reads(af_home_path.as_path(), &process_opts)?;
+            info!(
+                "ATAC map stage complete: output={} duration={:.2}s cmd={}",
+                map_stage.map_output.display(),
+                map_stage.map_duration_secs,
+                map_stage.map_cmd
+            );
             // then we generate the permit list and sort the file
             atac::process::gen_bed(af_home_path.as_path(), &process_opts)
         }
diff --git a/src/simpleaf_commands/chemistry.rs b/src/simpleaf_commands/chemistry.rs
index 22d6918..418b551 100644
--- a/src/simpleaf_commands/chemistry.rs
+++ b/src/simpleaf_commands/chemistry.rs
@@ -1,3 +1,4 @@
+use crate::core::io::write_json_pretty_atomic;
 use crate::utils::chem_utils::{
     custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file,
     CustomChemistry, CustomChemistryMap, ExpectedOri, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY,
@@ -10,15 +11,128 @@ use regex::Regex;
 use anyhow::{bail, Context, Result};
 use semver::Version;
 use serde_json::json;
+use serde_json::{Map, Value};
 use std::collections::HashSet;
 use std::fs;
-use std::io::{Seek, Write};
 use std::os::unix::fs::MetadataExt;
 use std::path::{Path, PathBuf};
 use tracing::{debug, info, warn};
 use utils::prog_utils::read_json_from_remote_url;
 use utils::remote::is_remote_url;
 
+fn removable_permit_lists(
+    used_pls: &HashSet<PathBuf>,
+    present_pls: &HashSet<PathBuf>,
+) -> HashSet<PathBuf> {
+    present_pls - used_pls
+}
+
+/// Parse a chemistry version string with additional operation context.
+fn parse_chemistry_version(version: &str, context: &str) -> Result<Version> {
+    Version::parse(version).with_context(|| {
+        format!(
+            "Could not parse version {} while {}. Please follow https://semver.org/ (e.g. 0.1.0).",
+            version, context
+        )
+    })
+}
+
+/// Return `true` if a new chemistry entry should replace an existing one.
+///
+/// Replacement happens when `force` is set or when the incoming version is
+/// strictly newer than the existing version.
+fn should_replace_registry_entry(existing: &Value, incoming: &Value, force: bool) -> Result<bool> {
+    if force {
+        return Ok(true);
+    }
+    let current_version = existing
+        .get("version")
+        .and_then(Value::as_str)
+        .with_context(|| "Chemistry should have a string `version` field.")?;
+    let new_version = incoming
+        .get("version")
+        .and_then(Value::as_str)
+        .with_context(|| "Chemistry should have a string `version` field.")?;
+    Ok(
+        parse_chemistry_version(new_version, "comparing incoming chemistry")?
+            > parse_chemistry_version(current_version, "comparing existing chemistry")?,
+    )
+}
+
+/// Merge incoming registry entries into an existing registry object.
+///
+/// Missing keys are inserted. Existing keys are replaced only when
+/// `should_replace_registry_entry` permits it.
+fn merge_registry_entries(
+    existing: &mut Map<String, Value>,
+    incoming: &Map<String, Value>,
+    force: bool,
+    dry_run_pref: &str,
+) -> Result<()> {
+    for (k, v) in incoming {
+        match existing.get_mut(k) {
+            None => {
+                existing.insert(k.clone(), v.clone());
+            }
+            Some(curr) => {
+                if should_replace_registry_entry(curr, v, force)? {
+                    info!("{}updating {}", dry_run_pref, k);
+                    existing.insert(k.clone(), v.clone());
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Merge entries from deprecated `custom_chemistries.json` into the main registry.
+///
+/// Only missing entries are inserted, and only when the deprecated value is a
+/// valid geometry string.
+fn merge_deprecated_registry_entries(
+    existing: &mut Map<String, Value>,
+    deprecated: &Map<String, Value>,
+    dry_run_pref: &str,
+) {
+    for (k, v) in deprecated {
+        if existing.contains_key(k) {
+            warn!(
+                "{}The main registry already contained the chemistry \"{}\"; Ignored the one from the deprecated registry.",
+                dry_run_pref, k
+            );
+        } else if let Value::String(geom) = v {
+            if validate_geometry(geom).is_ok() {
+                let new_ent = json!({
+                    "geometry": geom,
+                    "expected_ori": "both",
+                    "version" : CustomChemistry::default_version(),
+                });
+                existing.insert(k.to_owned(), new_ent);
+                info!(
+                    "{}Successfully inserted chemistry \"{}\" from the deprecated registry into the main registry.",
+                    dry_run_pref, k
+                );
+            } else {
+                warn!(
+                    "{}The chemistry \"{}\" in the deprecated registry is not a valid geometry string; Skipped.",
+                    dry_run_pref, k
+                );
+            }
+        } else {
+            warn!(
+                "{}The chemistry \"{}\" in the deprecated registry is not a string; Skipped.",
+                dry_run_pref, k
+            );
+        }
+    }
+}
+
+/// Persist a JSON value as pretty-printed text to disk.
+fn write_json_pretty(path: &Path, value: &Value) -> Result<()> {
+    write_json_pretty_atomic(path, value)
+        .with_context(|| format!("Could not write {}", path.display()))
+}
+
 /// Attempt to get the chemistry definition from the provided JSON file
 /// Check if the JSON file is local or remote. If remote, fetch the file first.
 /// Parse the JSON file, and look for the specific chemistry with the requested name.
@@ -160,7 +274,7 @@ pub fn add_chemistry(
     let version = add_opts
         .version
         .unwrap_or(CustomChemistry::default_version());
-    let add_ver = Version::parse(version.as_ref()).with_context(|| format!("Could not parse version {}. Please follow https://semver.org/. A valid example is 0.1.0", version))?;
+    let add_ver = parse_chemistry_version(version.as_ref(), "adding chemistry")?;
 
     let name = add_opts.name;
 
@@ -169,7 +283,10 @@ pub fn add_chemistry(
 
     if let Some(existing_entry) = get_single_custom_chem_from_file(&chem_p, &name)? {
         let existing_ver_str = existing_entry.version();
-        let existing_ver = Version::parse(existing_ver_str).with_context( || format!("Could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?;
+        let existing_ver = parse_chemistry_version(
+            existing_ver_str,
+            "reading existing chemistry version from chemistries.json",
+        )?;
         if add_ver <= existing_ver {
             info!("Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry; Skipping addition.", add_ver, existing_ver);
             return Ok(());
@@ -302,15 +419,7 @@ pub fn add_chemistry(
 
     // convert the custom chemistry hashmap to json
     let v = custom_chem_hm_into_json(chem_hm)?;
-
-    // write out the new custom chemistry file
-    let mut custom_chem_file = std::fs::File::create(&chem_p)
-        .with_context(|| format!("Could not create {}", chem_p.display()))?;
-    custom_chem_file.rewind()?;
-
-    custom_chem_file
-        .write_all(serde_json::to_string_pretty(&v).unwrap().as_bytes())
-        .with_context(|| format!("Could not write {}", chem_p.display()))?;
+    write_json_pretty(&chem_p, &v)?;
 
     Ok(())
 }
@@ -377,44 +486,8 @@ pub fn refresh_chemistries(
         prog_utils::download_to_file(CHEMISTRIES_URL, &tmp_chem_path)?;
         if let Some(existing_chem) = parse_resource_json_file(&chem_path, None)?.as_object_mut() {
             if let Some(new_chem) = parse_resource_json_file(&tmp_chem_path, None)?.as_object() {
-                for (k, v) in new_chem.iter() {
-                    match existing_chem.get_mut(k) {
-                        None => {
-                            existing_chem.insert(k.clone(), v.clone());
-                        }
-                        Some(ev) => {
-                            let curr_ver = Version::parse(
-                                ev.get("version")
-                                    .expect("Chemistry should have a version field")
-                                    .as_str()
-                                    .expect("Version should be a string"),
-                            )?;
-                            let new_ver = Version::parse(
-                                v.get("version")
-                                    .expect("Chemistry should have a version field")
-                                    .as_str()
-                                    .expect("Version should be a string"),
-                            )?;
-                            if refresh_opts.force || new_ver > curr_ver {
-                                info!("{}updating {}", dry_run_pref, k);
-                                existing_chem.insert(k.clone(), v.clone());
-                            }
-                        }
-                    }
-                }
-
-                // write out the merged chemistry file
-                let mut chem_file = std::fs::File::create(&chem_path)
-                    .with_context(|| format!("could not create {}", chem_path.display()))?;
-                chem_file.rewind()?;
-
-                chem_file
-                    .write_all(
-                        serde_json::to_string_pretty(&existing_chem)
-                            .unwrap()
-                            .as_bytes(),
-                    )
-                    .with_context(|| format!("could not write {}", chem_path.display()))?;
+                merge_registry_entries(existing_chem, new_chem, refresh_opts.force, dry_run_pref)?;
+                write_json_pretty(&chem_path, &Value::Object(existing_chem.clone()))?;
 
                 // remove the temp file
                 std::fs::remove_file(tmp_chem_path)?;
@@ -434,34 +507,8 @@ pub fn refresh_chemistries(
             if let Some(old_custom_chem) =
                 parse_resource_json_file(&custom_chem_file, None)?.as_object()
             {
-                for (k, v) in old_custom_chem.iter() {
-                    if new_chem.contains_key(k) {
-                        warn!("{}The main registry already contained the chemistry \"{}\"; Ignored the one from the deprecated registry.", dry_run_pref, k);
-                    } else if let serde_json::Value::String(v) = v {
-                        if validate_geometry(v).is_ok() {
-                            let new_ent = json!({
-                            "geometry": v,
-                            "expected_ori": "both",
-                            "version" : CustomChemistry::default_version(),
-                            });
-                            new_chem.insert(k.to_owned(), new_ent);
-                            info!("{}Successfully inserted chemistry \"{}\" from the deprecated registry into the main registry.", dry_run_pref, k);
-                        } else {
-                            warn!("{}The chemistry \"{}\" in the deprecated registry is not a valid geometry string; Skipped.", dry_run_pref, k);
-                        }
-                    } else {
-                        warn!("{}The chemistry \"{}\" in the deprecated registry is not a string; Skipped.", dry_run_pref, k);
-                    }
-                }
-
-                // write out the merged chemistry file
-                let mut chem_file = std::fs::File::create(&chem_path)
-                    .with_context(|| format!("could not create {}", chem_path.display()))?;
-                chem_file.rewind()?;
-
-                chem_file
-                    .write_all(serde_json::to_string_pretty(&new_chem).unwrap().as_bytes())
-                    .with_context(|| format!("could not write {}", chem_path.display()))?;
+                merge_deprecated_registry_entries(new_chem, old_custom_chem, dry_run_pref);
+                write_json_pretty(&chem_path, &Value::Object(new_chem.clone()))?;
 
                 let backup = custom_chem_file.with_extension("json.bak");
                 std::fs::rename(custom_chem_file, backup)?;
@@ -531,17 +578,17 @@ pub fn clean_chemistries(
         })
         .collect::<HashSet<PathBuf>>();
 
-    let rem_pls = &present_pls - &used_pls;
+    let rem_pls = removable_permit_lists(&used_pls, &present_pls);
 
     // check if the chemistry already exists and log
     if dry_run {
         if rem_pls.is_empty() {
             info!("[dry_run] : No permit list files in the cache directory are currently unused; Nothing to clean.");
         } else {
-            info!("[dry_run] : The following files in the permit list directory do not match any registered chemistries and would be removed: {:#?}", present_pls);
+            info!("[dry_run] : The following files in the permit list directory do not match any registered chemistries and would be removed: {:#?}", rem_pls);
         }
     } else {
-        for pl in rem_pls {
+        for pl in &rem_pls {
             info!("removing file from {}", pl.display());
             std::fs::remove_file(pl)?;
         }
@@ -593,15 +640,7 @@ pub fn remove_chemistry(
     } else if !remove_opts.dry_run {
         // convert the custom chemistry hashmap to json
         let v = custom_chem_hm_into_json(chem_hm)?;
-
-        // write out the new custom chemistry file
-        let mut custom_chem_file = std::fs::File::create(&chem_p)
-            .with_context(|| format!("Could not create {}", chem_p.display()))?;
-        custom_chem_file.rewind()?;
-
-        custom_chem_file
-            .write_all(serde_json::to_string_pretty(&v).unwrap().as_bytes())
-            .with_context(|| format!("Could not write {}", chem_p.display()))?;
+        write_json_pretty(&chem_p, &v)?;
     }
 
     Ok(())
@@ -760,3 +799,239 @@ pub fn fetch_chemistries(
 
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        add_chemistry, clean_chemistries, merge_deprecated_registry_entries,
+        merge_registry_entries, parse_chemistry_version, removable_permit_lists, remove_chemistry,
+    };
+    use crate::simpleaf_commands::{ChemistryAddOpts, ChemistryCleanOpts, ChemistryRemoveOpts};
+    use crate::utils::constants::CHEMISTRIES_PATH;
+    use serde_json::{json, Map, Value};
+    use std::collections::HashSet;
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::tempdir;
+
+    /// Write a JSON value to `<af_home>/chemistries.json` for registry tests.
+    fn write_registry(af_home: &std::path::Path, value: &Value) {
+        fs::write(
+            af_home.join(CHEMISTRIES_PATH),
+            serde_json::to_string_pretty(value).unwrap(),
+        )
+        .unwrap();
+    }
+
+    /// Read `<af_home>/chemistries.json` as JSON.
+    fn read_registry(af_home: &std::path::Path) -> Value {
+        serde_json::from_str(&fs::read_to_string(af_home.join(CHEMISTRIES_PATH)).unwrap()).unwrap()
+    }
+
+    #[test]
+    fn removable_permit_list_set_only_contains_unused_files() {
+        let used = HashSet::from([PathBuf::from("plist/a"), PathBuf::from("plist/b")]);
+        let present = HashSet::from([
+            PathBuf::from("plist/a"),
+            PathBuf::from("plist/b"),
+            PathBuf::from("plist/c"),
+        ]);
+
+        let removable = removable_permit_lists(&used, &present);
+        assert_eq!(removable, HashSet::from([PathBuf::from("plist/c")]));
+    }
+
+    #[test]
+    fn merge_registry_entries_prefers_newer_versions_unless_forced() {
+        let mut existing = Map::new();
+        existing.insert(
+            "chem_a".to_string(),
+            json!({"geometry":"1{b[16]u[12]x:}2{r:}","expected_ori":"both","version":"1.0.0"}),
+        );
+        let mut incoming = Map::new();
+        incoming.insert(
+            "chem_a".to_string(),
+            json!({"geometry":"1{b[16]u[10]x:}2{r:}","expected_ori":"both","version":"0.9.0"}),
+        );
+        incoming.insert(
+            "chem_b".to_string(),
+            json!({"geometry":"1{b[16]u[12]x:}2{r[50]x:}","expected_ori":"both","version":"0.1.0"}),
+        );
+
+        merge_registry_entries(&mut existing, &incoming, false, "").unwrap();
+        assert_eq!(existing["chem_a"]["version"], json!("1.0.0"));
+        assert_eq!(existing["chem_b"]["version"], json!("0.1.0"));
+
+        merge_registry_entries(&mut existing, &incoming, true, "").unwrap();
+        assert_eq!(existing["chem_a"]["version"], json!("0.9.0"));
+    }
+
+    #[test]
+    fn merge_deprecated_registry_entries_inserts_only_valid_missing_entries() {
+        let mut existing = Map::new();
+        existing.insert(
+            "already".to_string(),
+            json!({"geometry":"1{b[16]u[12]x:}2{r:}","expected_ori":"both","version":"1.0.0"}),
+        );
+        let deprecated = Map::from_iter([
+            (
+                "already".to_string(),
+                Value::String("1{b[10]}2{r:}".to_string()),
+            ),
+            (
+                "valid_new".to_string(),
+                Value::String("1{b[16]u[12]x:}2{r:}".to_string()),
+            ),
+            (
+                "invalid_new".to_string(),
+                Value::String("bad-geom".to_string()),
+            ),
+        ]);
+
+        merge_deprecated_registry_entries(&mut existing, &deprecated, "");
+
+        assert!(existing.contains_key("already"));
+        assert!(existing.contains_key("valid_new"));
+        assert!(!existing.contains_key("invalid_new"));
+    }
+
+    #[test]
+    fn add_chemistry_only_updates_when_version_increases() {
+        let tmp = tempdir().unwrap();
+        write_registry(
+            tmp.path(),
+            &json!({
+                "mychem": {
+                    "geometry": "1{b[16]u[12]x:}2{r:}",
+                    "expected_ori": "both",
+                    "version": "1.2.0"
+                }
+            }),
+        );
+
+        add_chemistry(
+            tmp.path().to_path_buf(),
+            ChemistryAddOpts {
+                name: "mychem".to_string(),
+                geometry: Some("1{b[16]u[10]x:}2{r:}".to_string()),
+                expected_ori: Some("both".to_string()),
+                local_url: None,
+                remote_url: None,
+                version: Some("1.1.0".to_string()),
+                from_json: None,
+            },
+        )
+        .unwrap();
+        let registry_after_older = read_registry(tmp.path());
+        assert_eq!(registry_after_older["mychem"]["version"], json!("1.2.0"));
+        assert_eq!(
+            registry_after_older["mychem"]["geometry"],
+            json!("1{b[16]u[12]x:}2{r:}")
+        );
+
+        add_chemistry(
+            tmp.path().to_path_buf(),
+            ChemistryAddOpts {
+                name: "mychem".to_string(),
+                geometry: Some("1{b[16]u[10]x:}2{r:}".to_string()),
+                expected_ori: Some("both".to_string()),
+                local_url: None,
+                remote_url: None,
+                version: Some("1.3.0".to_string()),
+                from_json: None,
+            },
+        )
+        .unwrap();
+        let registry_after_newer = read_registry(tmp.path());
+        assert_eq!(registry_after_newer["mychem"]["version"], json!("1.3.0"));
+        assert_eq!(
+            registry_after_newer["mychem"]["geometry"],
+            json!("1{b[16]u[10]x:}2{r:}")
+        );
+    }
+
+    #[test]
+    fn remove_chemistry_respects_dry_run_and_regex_removal() {
+        let tmp = tempdir().unwrap();
+        write_registry(
+            tmp.path(),
+            &json!({
+                "foo_chem": {"geometry":"1{b[16]u[12]x:}2{r:}","expected_ori":"both","version":"0.1.0"},
+                "bar_chem": {"geometry":"1{b[16]u[12]x:}2{r:}","expected_ori":"both","version":"0.1.0"}
+            }),
+        );
+
+        remove_chemistry(
+            tmp.path().to_path_buf(),
+            ChemistryRemoveOpts {
+                name: "foo_.*".to_string(),
+                dry_run: true,
+            },
+        )
+        .unwrap();
+        let after_dry_run = read_registry(tmp.path());
+        assert!(after_dry_run.get("foo_chem").is_some());
+        assert!(after_dry_run.get("bar_chem").is_some());
+
+        remove_chemistry(
+            tmp.path().to_path_buf(),
+            ChemistryRemoveOpts {
+                name: "foo_.*".to_string(),
+                dry_run: false,
+            },
+        )
+        .unwrap();
+        let after_remove = read_registry(tmp.path());
+        assert!(after_remove.get("foo_chem").is_none());
+        assert!(after_remove.get("bar_chem").is_some());
+    }
+
+    #[test]
+    fn clean_chemistries_dry_run_is_non_destructive_then_removes_unused() {
+        let tmp = tempdir().unwrap();
+        write_registry(
+            tmp.path(),
+            &json!({
+                "chem": {
+                    "geometry":"1{b[16]u[12]x:}2{r:}",
+                    "expected_ori":"both",
+                    "version":"0.1.0",
+                    "plist_name":"keep_pl"
+                }
+            }),
+        );
+
+        let plist_dir = tmp.path().join("plist");
+        fs::create_dir_all(&plist_dir).unwrap();
+        let keep = plist_dir.join("keep_pl");
+        let remove = plist_dir.join("remove_pl");
+        fs::write(&keep, "keep").unwrap();
+        fs::write(&remove, "remove").unwrap();
+
+        clean_chemistries(
+            tmp.path().to_path_buf(),
+            ChemistryCleanOpts { dry_run: true },
+        )
+        .unwrap();
+        assert!(keep.exists());
+        assert!(remove.exists());
+
+        clean_chemistries(
+            tmp.path().to_path_buf(),
+            ChemistryCleanOpts { dry_run: false },
+        )
+        .unwrap();
+        assert!(keep.exists());
+        assert!(!remove.exists());
+    }
+
+    #[test]
+    fn parse_chemistry_version_rejects_invalid_versions() {
+        let err = parse_chemistry_version("not-a-version", "unit test").unwrap_err();
+        assert!(
+            format!("{:#}", err).contains("Could not parse version"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+}
diff --git a/src/simpleaf_commands/indexing.rs b/src/simpleaf_commands/indexing.rs
index 3556db1..98dd124 100644
--- a/src/simpleaf_commands/indexing.rs
+++ b/src/simpleaf_commands/indexing.rs
@@ -1,12 +1,12 @@
+use crate::core::{context, exec, io, runtime};
 use crate::utils::af_utils::create_dir_if_absent;
 use crate::utils::prog_utils;
-use crate::utils::prog_utils::{CommandVerbosityLevel, ReqProgs};
+use crate::utils::prog_utils::ReqProgs;
 
 use anyhow::{anyhow, bail, Context};
 use roers;
 use serde::Deserialize;
 use serde_json::json;
-use serde_json::Value;
 use std::collections::HashSet;
 use std::fs::File;
 use std::io::{BufWriter, Write};
@@ -16,6 +16,100 @@ use tracing::{error, info, warn};
 
 use super::{IndexOpts, ReferenceType};
 
+struct ReferenceStageOutput {
+    ref_seq: PathBuf,
+    min_seq_len: Option<u32>,
+    t2g: Option<PathBuf>,
+    gene_id_to_name: Option<PathBuf>,
+    roers_duration: Option<std::time::Duration>,
+    roers_cmd: Option<roers::AugRefOpts>,
+}
+
+struct IndexBuildStageOutput {
+    index_duration: std::time::Duration,
+    index_cmd_string: String,
+}
+
+fn derive_kmer_and_minimizer(
+    min_seq_len: Option<u32>,
+    default_kmer_length: u32,
+    default_minimizer_length: u32,
+) -> anyhow::Result<(u32, u32)> {
+    if let Some(msl) = min_seq_len {
+        if msl < 10 {
+            bail!("The reference sequences are too short for indexing. Please provide sequences with a minimum length of at least 10 bases.");
+        }
+        if (msl / 2) < default_kmer_length && default_kmer_length == 31 {
+            let kmer_length = msl / 2;
+            let minimizer_length = (kmer_length as f32 / 1.8).ceil() as u32 + 1;
+            info!(
+                "Using kmer_length = {} and minimizer_length = {} because the default values are too big for the reference sequences.",
+                kmer_length, minimizer_length
+            );
+            Ok((kmer_length, minimizer_length))
+        } else {
+            Ok((default_kmer_length, default_minimizer_length))
+        }
+    } else {
+        Ok((default_kmer_length, default_minimizer_length))
+    }
+}
+
+fn write_index_log_stage(
+    output: &Path,
+    reference_stage: &ReferenceStageOutput,
+    index_stage: &IndexBuildStageOutput,
+) -> anyhow::Result<()> {
+    let index_log_file = output.join("simpleaf_index_log.json");
+    let index_log_info = if let Some(roers_duration) = reference_stage.roers_duration {
+        json!({
+            "time_info" : {
+                "roers_time" : roers_duration,
+                "index_time" : index_stage.index_duration
+            },
+            "cmd_info" : {
+                "roers_cmd" : reference_stage.roers_cmd,
+                "index_cmd" : index_stage.index_cmd_string,
+            }
+        })
+    } else {
+        json!({
+            "time_info" : {
+                "index_time" : index_stage.index_duration
+            },
+            "cmd_info" : {
+                "index_cmd" : index_stage.index_cmd_string,
+            }
+        })
+    };
+
+    io::write_json_pretty_atomic(&index_log_file, &index_log_info)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::derive_kmer_and_minimizer;
+
+    #[test]
+    fn derive_kmer_and_minimizer_fails_for_short_reference() {
+        let err = derive_kmer_and_minimizer(Some(9), 31, 19)
+            .expect_err("expected short sequence length to fail");
+        assert!(
+            format!("{:#}", err).contains("minimum length of at least 10"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+
+    #[test]
+    fn derive_kmer_and_minimizer_adjusts_defaults_for_short_sequences() {
+        let (k, m) =
+            derive_kmer_and_minimizer(Some(20), 31, 19).expect("expected adjusted k/m values");
+        assert_eq!(k, 10);
+        assert_eq!(m, 7);
+    }
+}
+
 fn validate_index_type_opts(opts: &IndexOpts) -> anyhow::Result<()> {
     let mut bail = false;
     if opts.use_piscem && opts.sparse {
@@ -230,9 +324,7 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
     validate_index_type_opts(&opts)?;
     let mut threads = opts.threads;
     let output = opts.output;
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    // Read the JSON contents of the file as an instance of `User`.
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
+    let rp: ReqProgs = context::load_required_programs(af_home_path)?;
 
     rp.issue_recommended_version_messages();
     // we are building a custom spliced+intronic reference
@@ -435,40 +527,29 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
         // _gene_id_to_name = Some(id_to_name_path);
     }
 
-    std::fs::write(
-        &info_file,
-        serde_json::to_string_pretty(&index_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", info_file.display()))?;
-
-    let ref_seq = reference_sequence.with_context(||
-                "Reference sequence should either be generated from --fasta with reftype spliced+intronic / spliced+unspliced or set with --ref-seq",
-            )?;
+    io::write_json_pretty(&info_file, &index_info)?;
+
+    let ref_seq = reference_sequence.with_context(
+        || {
+            "Reference sequence should either be generated from --fasta with reftype spliced+intronic / spliced+unspliced or set with --ref-seq"
+        },
+    )?;
+    let reference_stage = ReferenceStageOutput {
+        ref_seq: ref_seq.clone(),
+        min_seq_len,
+        t2g,
+        gene_id_to_name,
+        roers_duration,
+        roers_cmd: roers_aug_ref_opt,
+    };
 
-    let input_files = vec![ref_seq.clone()];
+    let input_files = vec![reference_stage.ref_seq.clone()];
     prog_utils::check_files_exist(&input_files)?;
-
-    let kmer_length: u32;
-    let minimizer_length: u32;
-    if let Some(msl) = min_seq_len {
-        if msl < 10 {
-            bail!("The reference sequences are too short for indexing. Please provide sequences with a minimum length of at least 10 bases.");
-        }
-        // only if the value is not the default value
-        if (msl / 2) < opts.kmer_length && opts.kmer_length == 31 {
-            kmer_length = msl / 2;
-            // https://github.com/COMBINE-lab/protocol-estuary/blob/2ecc65f1891ebfafff2a4a17460550e4dd1f4bb6/utils/simpleaf_workflow_utils.libsonnet#L232
-            minimizer_length = (kmer_length as f32 / 1.8).ceil() as u32 + 1;
-
-            info!("Using kmer_length = {} and minimizer_length = {} because the default values are too big for the reference sequences.", kmer_length, minimizer_length);
-        } else {
-            kmer_length = opts.kmer_length;
-            minimizer_length = opts.minimizer_length;
-        };
-    } else {
-        kmer_length = opts.kmer_length;
-        minimizer_length = opts.minimizer_length;
-    }
+    let (kmer_length, minimizer_length) = derive_kmer_and_minimizer(
+        reference_stage.min_seq_len,
+        opts.kmer_length,
+        opts.minimizer_length,
+    )?;
 
     let output_index_dir = output.join("index");
     let index_duration;
@@ -481,10 +562,9 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
                             Please either set a path using the `set-paths` command, or ensure the `PISCEM` environment variable is set properly.");
         }
 
-        let piscem_prog_info = rp
-            .piscem
-            .as_ref()
-            .expect("piscem program info should be properly set.");
+        let piscem_prog_info = rp.piscem.as_ref().context(
+            "The construction of a piscem index was requested, but piscem program info is missing.",
+        )?;
 
         let mut piscem_index_cmd =
             std::process::Command::new(format!("{}", piscem_prog_info.exe_path.display()));
@@ -501,7 +581,7 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
             .arg("-o")
             .arg(&output_index_stem)
             .arg("-s")
-            .arg(&ref_seq)
+            .arg(&reference_stage.ref_seq)
             .arg("--seed")
             .arg(opts.hash_seed.to_string())
             .arg("-w")
@@ -513,18 +593,15 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
             piscem_index_cmd.arg("--overwrite");
         }
 
-        // if the user requested more threads than can be used
-        if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-            let max_threads = max_threads_usize.get() as u32;
-            if threads > max_threads {
-                warn!(
-                    "The maximum available parallelism is {}, but {} threads were requested.",
-                    max_threads, threads
-                );
-                warn!("setting number of threads to {}", max_threads);
-                threads = max_threads;
-            }
+        let (capped_threads, capped_at) = runtime::cap_threads(threads);
+        if let Some(max_threads) = capped_at {
+            warn!(
+                "The maximum available parallelism is {}, but {} threads were requested.",
+                max_threads, threads
+            );
+            warn!("setting number of threads to {}", max_threads);
         }
+        threads = capped_threads;
 
         piscem_index_cmd
             .arg("--threads")
@@ -561,17 +638,12 @@ simpleaf"#,
         info!("piscem build cmd : {}", index_cmd_string);
 
         let index_start = Instant::now();
-        let cres = prog_utils::execute_command(&mut piscem_index_cmd, CommandVerbosityLevel::Quiet)
-            .expect("failed to invoke piscem index command");
+        let _cres = exec::run_checked(&mut piscem_index_cmd, "piscem index command")?;
         index_duration = index_start.elapsed();
 
-        if !cres.status.success() {
-            bail!("piscem index failed to build succesfully {:?}", cres.status);
-        }
-
         // copy over the t2g file to the index
         let mut t2g_out_path: Option<PathBuf> = None;
-        if let Some(t2g_file) = t2g {
+        if let Some(t2g_file) = reference_stage.t2g.clone() {
             let index_t2g_path = output_index_dir.join("t2g_3col.tsv");
             t2g_out_path = Some(PathBuf::from("t2g_3col.tsv"));
             std::fs::copy(t2g_file, index_t2g_path)?;
@@ -579,7 +651,7 @@ simpleaf"#,
 
         // copy over the gene_id_to_name.tsv file to the index
         let mut gene_id_to_name_out_path: Option<PathBuf> = None;
-        if let Some(gene_id_to_name_file) = gene_id_to_name {
+        if let Some(gene_id_to_name_file) = reference_stage.gene_id_to_name.clone() {
             let index_id2name_path = output_index_dir.join("gene_id_to_name.tsv");
             gene_id_to_name_out_path = Some(PathBuf::from("gene_id_to_name.tsv"));
             std::fs::copy(gene_id_to_name_file, index_id2name_path)?;
@@ -596,14 +668,10 @@ simpleaf"#,
                     "m" : minimizer_length,
                     "overwrite" : opts.overwrite,
                     "threads" : threads,
-                    "ref" : ref_seq
+                    "ref" : reference_stage.ref_seq
                 }
         });
-        std::fs::write(
-            &index_json_file,
-            serde_json::to_string_pretty(&index_json).unwrap(),
-        )
-        .with_context(|| format!("could not write {}", index_json_file.display()))?;
+        io::write_json_pretty_atomic(&index_json_file, &index_json)?;
     } else {
         // ensure we have piscem
         if rp.salmon.is_none() {
@@ -611,8 +679,11 @@ simpleaf"#,
                             Please either set a path using the `simpleaf set-paths` command, or ensure the `SALMON` environment variable is set properly.");
         }
 
+        let salmon_prog_info = rp.salmon.as_ref().context(
+            "The construction of a salmon index was requested, but salmon program info is missing.",
+        )?;
         let mut salmon_index_cmd =
-            std::process::Command::new(format!("{}", rp.salmon.unwrap().exe_path.display()));
+            std::process::Command::new(format!("{}", salmon_prog_info.exe_path.display()));
 
         salmon_index_cmd
             .arg("index")
@@ -621,7 +692,7 @@ simpleaf"#,
             .arg("-i")
             .arg(&output_index_dir)
             .arg("-t")
-            .arg(&ref_seq);
+            .arg(&reference_stage.ref_seq);
 
         // overwrite doesn't do anything special for the salmon index, so mention this to
         // the user.
@@ -640,18 +711,15 @@ simpleaf"#,
             salmon_index_cmd.arg("--keepDuplicates");
         }
 
-        // if the user requested more threads than can be used
-        if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-            let max_threads = max_threads_usize.get() as u32;
-            if threads > max_threads {
-                warn!(
-                    "The maximum available parallelism is {}, but {} threads were requested.",
-                    max_threads, threads
-                );
-                warn!("setting number of threads to {}", max_threads);
-                threads = max_threads;
-            }
+        let (capped_threads, capped_at) = runtime::cap_threads(threads);
+        if let Some(max_threads) = capped_at {
+            warn!(
+                "The maximum available parallelism is {}, but {} threads were requested.",
+                max_threads, threads
+            );
+            warn!("setting number of threads to {}", max_threads);
         }
+        threads = capped_threads;
 
         salmon_index_cmd
             .arg("--threads")
@@ -662,17 +730,12 @@ simpleaf"#,
         info!("salmon index cmd : {}", index_cmd_string);
 
         let index_start = Instant::now();
-        let cres = prog_utils::execute_command(&mut salmon_index_cmd, CommandVerbosityLevel::Quiet)
-            .expect("failed to invoke salmon index command");
+        let _cres = exec::run_checked(&mut salmon_index_cmd, "salmon index command")?;
         index_duration = index_start.elapsed();
 
-        if !cres.status.success() {
-            bail!("salmon index failed to build succesfully {:?}", cres.status);
-        }
-
         // copy over the t2g file to the index
         let mut t2g_out_path: Option<PathBuf> = None;
-        if let Some(t2g_file) = t2g {
+        if let Some(t2g_file) = reference_stage.t2g.clone() {
             let index_t2g_path = output_index_dir.join("t2g_3col.tsv");
             t2g_out_path = Some(PathBuf::from("t2g_3col.tsv"));
             std::fs::copy(t2g_file, index_t2g_path)?;
@@ -680,8 +743,8 @@ simpleaf"#,
 
         // copy over the gene_id_to_name.tsv file to the index
         let mut gene_id_to_name_out_path: Option<PathBuf> = None;
-        info!("{:?}", gene_id_to_name);
-        if let Some(gene_id_to_name_file) = gene_id_to_name {
+        info!("{:?}", reference_stage.gene_id_to_name);
+        if let Some(gene_id_to_name_file) = reference_stage.gene_id_to_name.clone() {
             let index_id2name_path = output_index_dir.join("gene_id_to_name.tsv");
             gene_id_to_name_out_path = Some(PathBuf::from("gene_id_to_name.tsv"));
             std::fs::copy(gene_id_to_name_file, index_id2name_path)?;
@@ -699,43 +762,15 @@ simpleaf"#,
                     "sparse" : opts.sparse,
                     "keep_duplicates" : opts.keep_duplicates,
                     "threads" : threads,
-                    "ref" : ref_seq
+                    "ref" : reference_stage.ref_seq
                 }
         });
-        std::fs::write(
-            &index_json_file,
-            serde_json::to_string_pretty(&index_json).unwrap(),
-        )
-        .with_context(|| format!("could not write {}", index_json_file.display()))?;
+        io::write_json_pretty_atomic(&index_json_file, &index_json)?;
     }
-
-    let index_log_file = output.join("simpleaf_index_log.json");
-    let index_log_info = if let Some(roers_duration) = roers_duration {
-        // if we ran make-splici
-        json!({
-            "time_info" : {
-                "roers_time" : roers_duration,
-                "index_time" : index_duration
-            },
-            "cmd_info" : {
-                "roers_cmd" : roers_aug_ref_opt,
-                "index_cmd" : index_cmd_string,                    }
-        })
-    } else {
-        // if we indexed provided sequences directly
-        json!({
-            "time_info" : {
-                "index_time" : index_duration
-            },
-            "cmd_info" : {
-                "index_cmd" : index_cmd_string,                    }
-        })
+    let index_stage = IndexBuildStageOutput {
+        index_duration,
+        index_cmd_string,
     };
-
-    std::fs::write(
-        &index_log_file,
-        serde_json::to_string_pretty(&index_log_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", index_log_file.display()))?;
+    write_index_log_stage(&output, &reference_stage, &index_stage)?;
     Ok(())
 }
diff --git a/src/simpleaf_commands/quant.rs b/src/simpleaf_commands/quant.rs
index 7cd8896..1a1a1ca 100644
--- a/src/simpleaf_commands/quant.rs
+++ b/src/simpleaf_commands/quant.rs
@@ -1,12 +1,12 @@
 use crate::utils::af_utils::*;
 
+use crate::core::{context, exec, index_meta, io, runtime};
 use crate::utils::prog_parsing_utils;
 use crate::utils::prog_utils;
-use crate::utils::prog_utils::{CommandVerbosityLevel, ReqProgs};
+use crate::utils::prog_utils::ReqProgs;
 
 use anyhow::{bail, Context};
 use serde_json::json;
-use serde_json::Value;
 use std::collections::HashMap;
 use std::io::{BufRead, BufReader, BufWriter, Write};
 use std::path::{Path, PathBuf};
@@ -52,8 +52,10 @@ impl CBListInfo {
             .lines()
             .take(NUM_SAMPLE_LINES) // don't read the whole file in the single-coumn case
             .map(|l| {
-                l.unwrap_or_else(|_| panic!("Could not open permitlist file {}", pl_file.display()))
+                l.with_context(|| format!("Could not open permitlist file {}", pl_file.display()))
             })
+            .collect::<anyhow::Result<Vec<_>>>()?
+            .into_iter()
             .any(|l| !l.contains('\t'));
 
         // if single column, we are good. Otherwise, we need to write the first column to the final file
@@ -236,140 +238,65 @@ fn validate_map_and_quant_opts(opts: &MapQuantOpts) -> anyhow::Result<()> {
     Ok(())
 }
 
-pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<()> {
-    validate_map_and_quant_opts(&opts)?;
-
-    let mut t2g_map = opts.t2g_map.clone();
-    // Read the JSON contents of the file as an instance of `User`.
-    let v: Value = prog_utils::inspect_af_home(af_home_path)?;
-    let rp: ReqProgs = serde_json::from_value(v["prog_info"].clone())?;
-
-    rp.issue_recommended_version_messages();
-
-    let mut gene_id_to_name_opt: Option<PathBuf> = None;
-
-    // figure out what type of index we expect
-    let index_type;
-
-    if let Some(mut index) = opts.index.clone() {
-        // If the user built the index using simpleaf, and they are using
-        // piscem, then they are not required to pass the --use-piscem flag
-        // to the quant step (though they *can* pass it if they wish).
-        // Thus, if they built the piscem index using simpleaf, there are
-        // 2 possibilities here:
-        //  1. They are passing in the directory containing the index
-        //  2. They are passing in the prefix stem of the index files
-        // The code below is to check, in both cases, if we can automatically
-        // detect if the index was constructed with simpleaf, so that we can
-        // then automatically infer other files, like the t2g files.
-
-        // If we are in case 1., the passed in path is a directory and
-        // we can check for the simpleaf_index.json file directly,
-        // Otherwise if the path is not a directory, we check if it
-        // ends in piscem_idx (the suffix that simpleaf uses when
-        // making a piscem index). Then we test the directory we
-        // get after stripping off this suffix.
-        let removed_piscem_idx_suffix = if !index.is_dir() && index.ends_with("piscem_idx") {
-            // remove the piscem_idx part
-            index.pop();
-            true
-        } else {
-            false
-        };
-
-        let index_json_path = index.join("simpleaf_index.json");
-        match index_json_path.try_exists() {
-            Ok(true) => {
-                // we have the simpleaf_index.json file, so parse it.
-                let index_json_file = std::fs::File::open(&index_json_path).with_context({
-                    || format!("Could not open file {}", index_json_path.display())
-                })?;
+#[derive(Debug)]
+struct QuantSetup {
+    rp: ReqProgs,
+    index_type: IndexType,
+    t2g_map_file: PathBuf,
+    gene_id_to_name_opt: Option<PathBuf>,
+    chem: Chemistry,
+    ori: ExpectedOri,
+    filter_meth: CellFilterMethod,
+    threads: u32,
+}
 
-                let index_json_reader = BufReader::new(&index_json_file);
-                let v: Value = serde_json::from_reader(index_json_reader)?;
+#[derive(Debug)]
+struct MappingStageOutput {
+    sc_mapper: String,
+    map_cmd_string: String,
+    map_output: PathBuf,
+    map_duration: Duration,
+}
 
-                let index_type_str: String = serde_json::from_value(v["index_type"].clone())?;
+#[derive(Debug)]
+struct QuantStageOutput {
+    gpl_output: PathBuf,
+    gpl_cmd_string: String,
+    collate_cmd_string: String,
+    quant_cmd_string: String,
+    gpl_duration: Duration,
+    collate_duration: Duration,
+    quant_duration: Duration,
+}
 
-                // here, set the index type based on what we found as the
-                // value for the `index_type` key.
-                match index_type_str.as_ref() {
-                    "salmon" => {
-                        index_type = IndexType::Salmon(index.clone());
-                    }
-                    "piscem" => {
-                        // here, either the user has provided us with just
-                        // the directory containing the piscem index, or
-                        // we have "popped" off the "piscem_idx" suffix, so
-                        // add it (back).
-                        index_type = IndexType::Piscem(index.join("piscem_idx"));
-                    }
-                    _ => {
-                        bail!(
-                            "unknown index type {} present in simpleaf_index.json",
-                            index_type_str,
-                        );
-                    }
-                }
-                // if the user didn't pass in a t2g_map, try and populate it
-                // automatically here
-                if t2g_map.is_none() {
-                    let t2g_opt: Option<PathBuf> = serde_json::from_value(v["t2g_file"].clone())?;
-                    if let Some(t2g_val) = t2g_opt {
-                        let t2g_loc = index.join(t2g_val);
-                        info!("found local t2g file at {}, will attempt to use this since none was provided explicitly", t2g_loc.display());
-                        t2g_map = Some(t2g_loc);
-                    }
-                }
+fn resolve_quant_setup(
+    af_home_path: &Path,
+    opts: &MapQuantOpts,
+) -> anyhow::Result<(QuantSetup, CBListInfo)> {
+    let mut t2g_map = opts.t2g_map.clone();
+    let ctx = context::load_runtime_context(af_home_path)?;
+    let rp: ReqProgs = ctx.progs;
+    rp.issue_recommended_version_messages();
 
-                // if the user used simpleaf for index construction, then we also built the
-                // reference and populated the gene_id_to_name.tsv file.  See if we can grab
-                // that as well.
-                if index.join("gene_id_to_name.tsv").exists() {
-                    gene_id_to_name_opt = Some(index.join("gene_id_to_name.tsv"));
-                } else if let Some(index_parent) = index.parent() {
-                    // we are doing index_dir/../ref/gene_id_to_name.tsv
-                    let gene_name_path = index_parent.join("ref").join("gene_id_to_name.tsv");
-                    if gene_name_path.exists() && gene_name_path.is_file() {
-                        gene_id_to_name_opt = Some(gene_name_path);
-                    }
-                }
-            }
-            Ok(false) => {
-                // at this point, we have inferred that simpleaf wasn't
-                // used to construct the index, so fall back to what the user
-                // requested directly.
-                // if we have previously removed the piscem_idx suffix, add it back
-                if removed_piscem_idx_suffix {
-                    index.push("piscem_idx");
-                }
-                if opts.use_piscem {
-                    // the user passed the use-piscem flag, so treat the provided
-                    // path as the *prefix stem* to the piscem index
-                    index_type = IndexType::Piscem(index);
-                } else {
-                    // if the user didn't pass use-piscem and there
-                    // is no simpleaf index json file to check, then
-                    // it's assumed they are using a salmon index.
-                    index_type = IndexType::Salmon(index);
-                }
-            }
-            Err(e) => {
-                bail!(e);
-            }
+    let index_meta = index_meta::resolve_quant_index(opts.index.clone(), opts.use_piscem)?;
+    if t2g_map.is_none() {
+        if let Some(t2g_loc) = index_meta.inferred_t2g.clone() {
+            info!(
+                "found local t2g file at {}, will attempt to use this since none was provided explicitly",
+                t2g_loc.display()
+            );
+            t2g_map = Some(t2g_loc);
         }
-    } else {
-        index_type = IndexType::NoIndex;
     }
+    let index_type = index_meta.index_type;
+    let gene_id_to_name_opt = index_meta.inferred_gene_id_to_name;
 
-    // at this point make sure we have a t2g value
     let t2g_map_file = t2g_map.context(
         "A transcript-to-gene map (t2g) file was not provided via `--t2g-map`|`-m` and could \
         not be inferred from the index. Please provide a t2g map explicitly to the quant command.",
     )?;
-    prog_utils::check_files_exist(&[t2g_map_file.clone()])?;
+    prog_utils::check_files_exist(std::slice::from_ref(&t2g_map_file))?;
 
-    // make sure we have an program matching the
-    // appropriate index type
     match index_type {
         IndexType::Piscem(_) => {
             if rp.piscem.is_none() {
@@ -378,54 +305,35 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
         }
         IndexType::Salmon(_) => {
             if rp.salmon.is_none() {
-                bail!("A salmon index is being used, but no piscem executable is provided. Please set one with `simpleaf set-paths`.");
+                bail!("A salmon index is being used, but no salmon executable is provided. Please set one with `simpleaf set-paths`.");
             }
         }
         IndexType::NoIndex => {}
     }
 
-    // the chemistries file
     let custom_chem_p = af_home_path.join(CHEMISTRIES_PATH);
-
     let chem = Chemistry::from_str(&index_type, &custom_chem_p, &opts.chemistry)?;
-
-    let ori: ExpectedOri;
-    // if the user set the orientation, then
-    // use that explicitly
-    if let Some(o) = &opts.expected_ori {
-        ori = ExpectedOri::from_str(o).with_context(|| {
+    let ori = if let Some(o) = &opts.expected_ori {
+        ExpectedOri::from_str(o).with_context(|| {
             format!(
                 "Could not parse orientation {}. It must be one of the following: {:?}",
                 o,
                 ExpectedOri::all_to_str().join(", ")
             )
-        })?;
+        })?
     } else {
-        ori = chem.expected_ori();
-    }
+        chem.expected_ori()
+    };
 
     let mut filter_meth_opt = None;
     let mut pl_info = CBListInfo::new();
-
-    // based on the filtering method
     if let Some(ref pl_file) = opts.unfiltered_pl {
-        // NOTE: unfiltered_pl is of type Option<Option<PathBuf>> so being in here
-        // tells us nothing about the inner option.  We handle that now.
-
-        // if the -u flag is passed and some file is provided, then the inner
-        // Option is Some(PathBuf)
         if let Some(pl_file) = pl_file {
-            // the user has explicily passed a file along, so try
-            // to use that
             if pl_file.is_file() {
-                // we read the file to see if there is additional columns separated by \t.
-                // unwrap is safe here cuz we defined it above
                 pl_info.init(pl_file, &opts.output)?;
-
-                let min_cells = opts.min_reads;
                 filter_meth_opt = Some(CellFilterMethod::UnfilteredExternalList(
                     pl_info.final_file.to_string_lossy().into_owned(),
-                    min_cells,
+                    opts.min_reads,
                 ));
             } else {
                 bail!(
@@ -434,20 +342,13 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
                 );
             }
         } else {
-            // here, the -u flag is provided
-            // but no file is provided, then the
-            // inner option is None and we will try to get the permit list automatically if
-            // using 10xv2, 10xv3, or 10xv4
-
-            // check the chemistry
             let pl_res = get_permit_if_absent(af_home_path, &chem)?;
-            let min_cells = opts.min_reads;
             match pl_res {
                 PermitListResult::DownloadSuccessful(p) | PermitListResult::AlreadyPresent(p) => {
                     pl_info.init(&p, &opts.output)?;
                     filter_meth_opt = Some(CellFilterMethod::UnfilteredExternalList(
                         pl_info.final_file.to_string_lossy().into_owned(),
-                        min_cells,
+                        opts.min_reads,
                     ));
                 }
                 PermitListResult::UnregisteredChemistry => {
@@ -472,63 +373,57 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
             filter_meth_opt = Some(CellFilterMethod::ExpectCells(*num_expected));
         };
     }
-    // otherwise it must have been knee;
     if opts.knee {
         filter_meth_opt = Some(CellFilterMethod::KneeFinding);
     }
 
-    if filter_meth_opt.is_none() {
-        bail!("No valid filtering strategy was provided!");
-    }
-
-    // if the user requested more threads than can be used
-    let mut threads = opts.threads;
-    if let Ok(max_threads_usize) = std::thread::available_parallelism() {
-        let max_threads = max_threads_usize.get() as u32;
-        if threads > max_threads {
-            warn!(
-                "The maximum available parallelism is {}, but {} threads were requested.",
-                max_threads, threads
-            );
-            warn!("setting number of threads to {}", max_threads);
-            threads = max_threads;
-        }
+    let (threads, capped_at) = runtime::cap_threads(opts.threads);
+    if let Some(max_threads) = capped_at {
+        warn!(
+            "The maximum available parallelism is {}, but {} threads were requested.",
+            max_threads, opts.threads
+        );
+        warn!("setting number of threads to {}", max_threads);
     }
 
-    // here we must be safe to unwrap
-    let filter_meth = filter_meth_opt.unwrap();
-
-    let sc_mapper: String;
-    let map_cmd_string: String;
-    let map_output: PathBuf;
-    let map_duration: Duration;
+    let setup = QuantSetup {
+        rp,
+        index_type,
+        t2g_map_file,
+        gene_id_to_name_opt,
+        chem,
+        ori,
+        filter_meth: filter_meth_opt.context("No valid filtering strategy was provided!")?,
+        threads,
+    };
+    Ok((setup, pl_info))
+}
 
-    // if we are mapping against an index
+fn run_mapping_stage(
+    opts: &MapQuantOpts,
+    setup: &QuantSetup,
+) -> anyhow::Result<MappingStageOutput> {
     if let Some(index) = opts.index.clone() {
-        let reads1 = opts
-            .reads1
-            .as_ref()
-            .expect("since mapping against an index is requested, read1 files must be provided.");
-        let reads2 = opts
-            .reads2
-            .as_ref()
-            .expect("since mapping against an index is requested, read2 files must be provided.");
-        assert_eq!(
-            reads1.len(),
-            reads2.len(),
-            "{} read1 files and {} read2 files were given; Cannot proceed!",
-            reads1.len(),
-            reads2.len()
-        );
-
-        match index_type {
-            IndexType::Piscem(ref index_base) => {
-                let piscem_prog_info = rp
-                    .piscem
-                    .as_ref()
-                    .expect("piscem program info should be properly set.");
+        let reads1 = opts.reads1.as_ref().context(
+            "Mapping against an index was requested, but read1 files were not provided.",
+        )?;
+        let reads2 = opts.reads2.as_ref().context(
+            "Mapping against an index was requested, but read2 files were not provided.",
+        )?;
+        if reads1.len() != reads2.len() {
+            bail!(
+                "{} read1 files and {} read2 files were given; Cannot proceed!",
+                reads1.len(),
+                reads2.len()
+            );
+        }
 
-                // using a piscem index
+        match &setup.index_type {
+            IndexType::Piscem(index_base) => {
+                let piscem_prog_info =
+                    setup.rp.piscem.as_ref().context(
+                        "A piscem index is being used, but piscem program info is missing.",
+                    )?;
                 let mut piscem_quant_cmd =
                     std::process::Command::new(format!("{}", &piscem_prog_info.exe_path.display()));
                 let index_path = format!("{}", index_base.display());
@@ -537,22 +432,19 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
                     .arg("--index")
                     .arg(index_path);
 
-                // location of output directory, number of threads
-                map_output = opts.output.join("af_map");
+                let map_output = opts.output.join("af_map");
                 piscem_quant_cmd
                     .arg("--threads")
-                    .arg(format!("{}", threads))
+                    .arg(format!("{}", setup.threads))
                     .arg("-o")
                     .arg(&map_output);
 
-                // if the user is requesting a mapping option that required
-                // piscem version >= 0.7.0, ensure we have that
                 if let Ok(_piscem_ver) = prog_utils::check_version_constraints(
                     "piscem",
                     ">=0.7.0, <1.0.0",
                     &piscem_prog_info.version,
                 ) {
-                    push_advanced_piscem_options(&mut piscem_quant_cmd, &opts)?;
+                    push_advanced_piscem_options(&mut piscem_quant_cmd, opts)?;
                 } else {
                     info!(
                         r#"
@@ -564,21 +456,16 @@ being used by simpleaf"#,
                     );
                 }
 
-                // we get the final geometry we want to pass to piscem
-                // check if we can parse the geometry directly, or if we are dealing with a
-                // "complex" geometry.
                 let frag_lib_xform = add_or_transform_fragment_library(
                     MapperType::Piscem,
-                    chem.fragment_geometry_str(),
+                    setup.chem.fragment_geometry_str(),
                     reads1,
                     reads2,
                     &mut piscem_quant_cmd,
                 )?;
 
-                map_cmd_string = prog_utils::get_cmd_line_string(&piscem_quant_cmd);
+                let map_cmd_string = prog_utils::get_cmd_line_string(&piscem_quant_cmd);
                 info!("piscem map-sc cmd : {}", map_cmd_string);
-                sc_mapper = String::from("piscem");
-
                 let mut input_files = vec![
                     index_base.with_extension("ctab"),
                     index_base.with_extension("refinfo"),
@@ -586,21 +473,12 @@ being used by simpleaf"#,
                 ];
                 input_files.extend_from_slice(reads1);
                 input_files.extend_from_slice(reads2);
-
                 prog_utils::check_files_exist(&input_files)?;
 
                 let map_start = Instant::now();
-                let cres = prog_utils::execute_command(
-                    &mut piscem_quant_cmd,
-                    CommandVerbosityLevel::Quiet,
-                )
-                .expect("failed to execute piscem [mapping phase]");
-
-                // if we had to filter the reads through a fifo
-                // wait for the thread feeding the fifo to finish
+                exec::run_checked(&mut piscem_quant_cmd, "piscem [mapping phase]")?;
                 match frag_lib_xform {
                     FragmentTransformationType::TransformedIntoFifo(xform_data) => {
-                        // wait for it to join
                         match xform_data.join_handle.join() {
                             Ok(join_res) => {
                                 let xform_stats = join_res?;
@@ -616,25 +494,23 @@ being used by simpleaf"#,
                             }
                         }
                     }
-                    FragmentTransformationType::Identity => {
-                        // nothing to do.
-                    }
+                    FragmentTransformationType::Identity => {}
                 }
 
-                map_duration = map_start.elapsed();
-
-                if !cres.status.success() {
-                    bail!("piscem mapping failed with exit status {:?}", cres.status);
-                }
+                Ok(MappingStageOutput {
+                    sc_mapper: String::from("piscem"),
+                    map_cmd_string,
+                    map_output,
+                    map_duration: map_start.elapsed(),
+                })
             }
-            IndexType::Salmon(ref index_base) => {
-                // using a salmon index
-                let mut salmon_quant_cmd = std::process::Command::new(format!(
-                    "{}",
-                    rp.salmon.unwrap().exe_path.display()
-                ));
-
-                // set the input index and library type
+            IndexType::Salmon(index_base) => {
+                let salmon_prog_info =
+                    setup.rp.salmon.as_ref().context(
+                        "A salmon index is being used, but salmon program info is missing.",
+                    )?;
+                let mut salmon_quant_cmd =
+                    std::process::Command::new(format!("{}", salmon_prog_info.exe_path.display()));
                 let index_path = format!("{}", index_base.display());
                 salmon_quant_cmd
                     .arg("alevin")
@@ -643,55 +519,37 @@ being used by simpleaf"#,
                     .arg("-l")
                     .arg("A");
 
-                // check if we can parse the geometry directly, or if we are dealing with a
-                // "complex" geometry.
                 let frag_lib_xform = add_or_transform_fragment_library(
                     MapperType::Salmon,
-                    chem.fragment_geometry_str(),
+                    setup.chem.fragment_geometry_str(),
                     reads1,
                     reads2,
                     &mut salmon_quant_cmd,
                 )?;
 
-                // location of output directory, number of threads
-                map_output = opts.output.join("af_map");
+                let map_output = opts.output.join("af_map");
                 salmon_quant_cmd
                     .arg("--threads")
-                    .arg(format!("{}", threads))
+                    .arg(format!("{}", setup.threads))
                     .arg("-o")
                     .arg(&map_output);
-
-                // if the user explicitly requested to use selective-alignment
-                // then enable that
                 if opts.use_selective_alignment {
                     salmon_quant_cmd.arg("--rad");
                 } else {
-                    // otherwise default to sketch mode
                     salmon_quant_cmd.arg("--sketch");
                 }
 
-                map_cmd_string = prog_utils::get_cmd_line_string(&salmon_quant_cmd);
+                let map_cmd_string = prog_utils::get_cmd_line_string(&salmon_quant_cmd);
                 info!("salmon alevin cmd : {}", map_cmd_string);
-                sc_mapper = String::from("salmon");
-
                 let mut input_files = vec![index];
                 input_files.extend_from_slice(reads1);
                 input_files.extend_from_slice(reads2);
-
                 prog_utils::check_files_exist(&input_files)?;
 
                 let map_start = Instant::now();
-                let cres = prog_utils::execute_command(
-                    &mut salmon_quant_cmd,
-                    CommandVerbosityLevel::Quiet,
-                )
-                .expect("failed to execute salmon [mapping phase]");
-
-                // if we had to filter the reads through a fifo
-                // wait for the thread feeding the fifo to finish
+                exec::run_checked(&mut salmon_quant_cmd, "salmon [mapping phase]")?;
                 match frag_lib_xform {
                     FragmentTransformationType::TransformedIntoFifo(xform_data) => {
-                        // wait for it to join
                         match xform_data.join_handle.join() {
                             Ok(join_res) => {
                                 let xform_stats = join_res?;
@@ -707,35 +565,40 @@ being used by simpleaf"#,
                             }
                         }
                     }
-                    FragmentTransformationType::Identity => {
-                        // nothing to do.
-                    }
+                    FragmentTransformationType::Identity => {}
                 }
 
-                map_duration = map_start.elapsed();
-
-                if !cres.status.success() {
-                    bail!("salmon mapping failed with exit status {:?}", cres.status);
-                }
+                Ok(MappingStageOutput {
+                    sc_mapper: String::from("salmon"),
+                    map_cmd_string,
+                    map_output,
+                    map_duration: map_start.elapsed(),
+                })
             }
             IndexType::NoIndex => {
                 bail!("Cannot perform mapping an quantification without known (piscem or salmon) index!");
             }
         }
     } else {
-        map_cmd_string = String::from("");
-        sc_mapper = String::from("");
-        map_output = opts
-            .map_dir
-            .expect("map-dir must be provided, since index, read1 and read2 were not.");
-        map_duration = Duration::new(0, 0);
+        Ok(MappingStageOutput {
+            sc_mapper: String::new(),
+            map_cmd_string: String::new(),
+            map_output: opts
+                .map_dir
+                .clone()
+                .context("map-dir must be provided, since index, read1 and read2 were not.")?,
+            map_duration: Duration::new(0, 0),
+        })
     }
+}
 
-    let map_output_string = map_output.display().to_string();
-
-    // make the quant directory
+fn run_quant_stage(
+    opts: &MapQuantOpts,
+    setup: &QuantSetup,
+    mapping: &MappingStageOutput,
+    pl_info: &mut CBListInfo,
+) -> anyhow::Result<QuantStageOutput> {
     let gpl_output = opts.output.join("af_quant");
-
     std::fs::create_dir_all(&gpl_output).with_context(|| {
         format!(
             "Failed to create quantification output directory {}",
@@ -743,16 +606,13 @@ being used by simpleaf"#,
         )
     })?;
 
-    // attempt to get the relevant information from map_info to propagate forward to the
-    // quantification directory
-    //get_mapping_info
-    let mapping_log = match index_type {
+    let mapping_log = match &setup.index_type {
         IndexType::Piscem(_) => {
-            let piscem_map_log_path = map_output.join("map_info.json");
+            let piscem_map_log_path = mapping.map_output.join("map_info.json");
             prog_parsing_utils::construct_json_from_piscem_log(piscem_map_log_path)?
         }
         IndexType::Salmon(_) => {
-            let salmon_log_path = map_output.join("logs").join("salmon_quant.log");
+            let salmon_log_path = mapping.map_output.join("logs").join("salmon_quant.log");
             prog_parsing_utils::construct_json_from_salmon_log(salmon_log_path)?
         }
         IndexType::NoIndex => {
@@ -765,153 +625,119 @@ being used by simpleaf"#,
             })
         }
     };
-
     let map_info_path = gpl_output.join("simpleaf_map_info.json");
     let map_info_file = std::fs::File::create(map_info_path)?;
     serde_json::to_writer(map_info_file, &mapping_log)?;
 
-    let alevin_fry = rp.alevin_fry.unwrap().exe_path;
-    // alevin-fry generate permit list
+    let alevin_fry = setup
+        .rp
+        .alevin_fry
+        .as_ref()
+        .context("Alevin-fry program info is missing; please run `simpleaf set-paths`.")?
+        .exe_path
+        .clone();
     let mut alevin_gpl_cmd = std::process::Command::new(format!("{}", &alevin_fry.display()));
-
-    let gpl_threads = threads.min(8);
+    let gpl_threads = setup.threads.min(8);
     alevin_gpl_cmd.arg("generate-permit-list");
-    alevin_gpl_cmd.arg("-i").arg(&map_output);
-    alevin_gpl_cmd.arg("-d").arg(ori.as_str());
+    alevin_gpl_cmd.arg("-i").arg(&mapping.map_output);
+    alevin_gpl_cmd.arg("-d").arg(setup.ori.as_str());
     alevin_gpl_cmd.arg("-t").arg(format!("{}", gpl_threads));
-
-    // add the filter mode
-    filter_meth.add_to_args(&mut alevin_gpl_cmd);
-
+    setup.filter_meth.add_to_args(&mut alevin_gpl_cmd);
     alevin_gpl_cmd.arg("-o").arg(&gpl_output);
-
-    info!(
-        "alevin-fry generate-permit-list cmd : {}",
-        prog_utils::get_cmd_line_string(&alevin_gpl_cmd)
-    );
-    let input_files = vec![map_output.clone()];
+    let gpl_cmd_string = prog_utils::get_cmd_line_string(&alevin_gpl_cmd);
+    info!("alevin-fry generate-permit-list cmd : {}", gpl_cmd_string);
+    let input_files = vec![mapping.map_output.clone()];
     prog_utils::check_files_exist(&input_files)?;
-
     let gpl_start = Instant::now();
-    let gpl_proc_out =
-        prog_utils::execute_command(&mut alevin_gpl_cmd, CommandVerbosityLevel::Quiet)
-            .expect("could not execute [generate permit list]");
+    exec::run_checked(&mut alevin_gpl_cmd, "[generate permit list]")?;
     let gpl_duration = gpl_start.elapsed();
 
-    if !gpl_proc_out.status.success() {
-        bail!(
-            "alevin-fry generate-permit-list failed with exit status {:?}",
-            gpl_proc_out.status
-        );
-    }
-
-    //
-    // collate
-    //
     let mut alevin_collate_cmd = std::process::Command::new(format!("{}", &alevin_fry.display()));
-
     alevin_collate_cmd.arg("collate");
     alevin_collate_cmd.arg("-i").arg(&gpl_output);
-    alevin_collate_cmd.arg("-r").arg(&map_output);
-    alevin_collate_cmd.arg("-t").arg(format!("{}", threads));
-
-    info!(
-        "alevin-fry collate cmd : {}",
-        prog_utils::get_cmd_line_string(&alevin_collate_cmd)
-    );
-    let input_files = vec![gpl_output.clone(), map_output];
+    alevin_collate_cmd.arg("-r").arg(&mapping.map_output);
+    alevin_collate_cmd
+        .arg("-t")
+        .arg(format!("{}", setup.threads));
+    let collate_cmd_string = prog_utils::get_cmd_line_string(&alevin_collate_cmd);
+    info!("alevin-fry collate cmd : {}", collate_cmd_string);
+    let input_files = vec![gpl_output.clone(), mapping.map_output.clone()];
     prog_utils::check_files_exist(&input_files)?;
-
     let collate_start = Instant::now();
-    let collate_proc_out =
-        prog_utils::execute_command(&mut alevin_collate_cmd, CommandVerbosityLevel::Quiet)
-            .expect("could not execute [collate]");
+    exec::run_checked(&mut alevin_collate_cmd, "[collate]")?;
     let collate_duration = collate_start.elapsed();
 
-    if !collate_proc_out.status.success() {
-        bail!(
-            "alevin-fry collate failed with exit status {:?}",
-            collate_proc_out.status
-        );
-    }
-
-    //
-    // quant
-    //
     let mut alevin_quant_cmd = std::process::Command::new(format!("{}", &alevin_fry.display()));
-
     alevin_quant_cmd
         .arg("quant")
         .arg("-i")
         .arg(&gpl_output)
         .arg("-o")
         .arg(&gpl_output);
-    alevin_quant_cmd.arg("-t").arg(format!("{}", threads));
-    alevin_quant_cmd.arg("-m").arg(t2g_map_file.clone());
-    alevin_quant_cmd.arg("-r").arg(opts.resolution);
-
+    alevin_quant_cmd.arg("-t").arg(format!("{}", setup.threads));
+    alevin_quant_cmd.arg("-m").arg(setup.t2g_map_file.clone());
+    alevin_quant_cmd.arg("-r").arg(&opts.resolution);
+    let quant_cmd_string = prog_utils::get_cmd_line_string(&alevin_quant_cmd);
     info!("cmd : {:?}", alevin_quant_cmd);
-
-    let input_files = vec![gpl_output.clone(), t2g_map_file];
+    let input_files = vec![gpl_output.clone(), setup.t2g_map_file.clone()];
     prog_utils::check_files_exist(&input_files)?;
-
     let quant_start = Instant::now();
-    let quant_proc_out =
-        prog_utils::execute_command(&mut alevin_quant_cmd, CommandVerbosityLevel::Quiet)
-            .expect("could not execute [quant]");
+    exec::run_checked(&mut alevin_quant_cmd, "[quant]")?;
     let quant_duration = quant_start.elapsed();
 
-    if !quant_proc_out.status.success() {
-        bail!("quant failed with exit status {:?}", quant_proc_out.status);
-    }
-
-    // If we had a gene_id_to_name.tsv file handy, copy it over into the
-    // quantification directory.
-    if let Some(gene_name_path) = gene_id_to_name_opt {
+    if let Some(gene_name_path) = &setup.gene_id_to_name_opt {
         let target_path = gpl_output.join("gene_id_to_name.tsv");
-        match std::fs::copy(&gene_name_path, &target_path) {
+        match std::fs::copy(gene_name_path, &target_path) {
             Ok(_) => {
                 info!("successfully copied the gene_name_to_id.tsv file into the quantification directory.");
             }
             Err(err) => {
-                warn!("could not successfully copy gene_id_to_name file from {:?} to {:?} because of {:?}",
-                gene_name_path, target_path, err
-            );
+                warn!(
+                    "could not successfully copy gene_id_to_name file from {:?} to {:?} because of {:?}",
+                    gene_name_path, target_path, err
+                );
             }
         }
     }
 
-    // If a permit/explit list with auxilary info was provided,
-    // we add the auxilary info to the barcodes.tsv file.
     let quants_mat_rows_p = gpl_output.join("alevin").join("quants_mat_rows.txt");
     pl_info.update_af_quant_barcodes_tsv(&quants_mat_rows_p)?;
 
-    let mut convert_duration = None;
-    if opts.anndata_out {
-        let convert_start = Instant::now();
-        let opath = gpl_output.join("alevin").join("quants.h5ad");
-        af_anndata::convert_csr_to_anndata(&gpl_output, &opath)?;
-        convert_duration = Some(convert_start.elapsed());
-    }
+    Ok(QuantStageOutput {
+        gpl_output,
+        gpl_cmd_string,
+        collate_cmd_string,
+        quant_cmd_string,
+        gpl_duration,
+        collate_duration,
+        quant_duration,
+    })
+}
 
+fn write_quant_log(
+    opts: &MapQuantOpts,
+    mapping: &MappingStageOutput,
+    quant_stage: &QuantStageOutput,
+    convert_duration: Option<Duration>,
+) -> anyhow::Result<()> {
     let af_quant_info_file = opts.output.join("simpleaf_quant_log.json");
     let mut af_quant_info = json!({
         "time_info" : {
-        "map_time" : map_duration,
-        "gpl_time" : gpl_duration,
-        "collate_time" : collate_duration,
-        "quant_time" : quant_duration
+        "map_time" : mapping.map_duration,
+        "gpl_time" : quant_stage.gpl_duration,
+        "collate_time" : quant_stage.collate_duration,
+        "quant_time" : quant_stage.quant_duration
     },
         "cmd_info" : {
-        "map_cmd" : map_cmd_string,
-        "gpl_cmd" : prog_utils::get_cmd_line_string(&alevin_gpl_cmd),
-        "collate_cmd" : prog_utils::get_cmd_line_string(&alevin_collate_cmd),
-        "quant_cmd" : prog_utils::get_cmd_line_string(&alevin_quant_cmd)
+        "map_cmd" : mapping.map_cmd_string,
+        "gpl_cmd" : quant_stage.gpl_cmd_string,
+        "collate_cmd" : quant_stage.collate_cmd_string,
+        "quant_cmd" : quant_stage.quant_cmd_string
     },
         "map_info" : {
-        "mapper" : sc_mapper,
-        "map_cmd" : map_cmd_string,
-        "map_outdir": map_output_string
+        "mapper" : mapping.sc_mapper,
+        "map_cmd" : mapping.map_cmd_string,
+        "map_outdir": mapping.map_output.display().to_string()
     }
     });
 
@@ -919,12 +745,106 @@ being used by simpleaf"#,
         af_quant_info["time_info"]["conversion_time"] = json!(ctime);
     }
 
-    // write the relevant info about
-    // our run to file.
-    std::fs::write(
-        &af_quant_info_file,
-        serde_json::to_string_pretty(&af_quant_info).unwrap(),
-    )
-    .with_context(|| format!("could not write {}", af_quant_info_file.display()))?;
+    io::write_json_pretty_atomic(&af_quant_info_file, &af_quant_info)?;
     Ok(())
 }
+
+pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<()> {
+    validate_map_and_quant_opts(&opts)?;
+    let (setup, mut pl_info) = resolve_quant_setup(af_home_path, &opts)?;
+    let mapping = run_mapping_stage(&opts, &setup)?;
+    let quant_stage = run_quant_stage(&opts, &setup, &mapping, &mut pl_info)?;
+
+    let mut convert_duration = None;
+    if opts.anndata_out {
+        let convert_start = Instant::now();
+        let opath = quant_stage.gpl_output.join("alevin").join("quants.h5ad");
+        af_anndata::convert_csr_to_anndata(&quant_stage.gpl_output, &opath)?;
+        convert_duration = Some(convert_start.elapsed());
+    }
+
+    write_quant_log(&opts, &mapping, &quant_stage, convert_duration)
+}
+
+#[cfg(test)]
+mod tests {
+    use clap::Parser;
+
+    use crate::utils::af_utils::RnaChemistry;
+    use crate::{Cli, Commands};
+
+    use super::*;
+
+    fn parse_quant_opts(args: &[&str]) -> MapQuantOpts {
+        let mut cli_args = vec!["simpleaf"];
+        cli_args.extend_from_slice(args);
+        match Cli::parse_from(cli_args).command {
+            Commands::Quant(opts) => opts,
+            cmd => panic!("expected quant command, found {:?}", cmd),
+        }
+    }
+
+    fn minimal_no_index_setup() -> QuantSetup {
+        QuantSetup {
+            rp: ReqProgs {
+                salmon: None,
+                piscem: None,
+                alevin_fry: None,
+                macs: None,
+            },
+            index_type: IndexType::NoIndex,
+            t2g_map_file: PathBuf::from("/tmp/t2g.tsv"),
+            gene_id_to_name_opt: None,
+            chem: Chemistry::Rna(RnaChemistry::TenxV3),
+            ori: ExpectedOri::Forward,
+            filter_meth: CellFilterMethod::KneeFinding,
+            threads: 1,
+        }
+    }
+
+    #[test]
+    fn mapping_stage_no_index_uses_map_dir() {
+        let opts = parse_quant_opts(&[
+            "quant",
+            "-c",
+            "10xv3",
+            "-o",
+            "/tmp/out",
+            "-r",
+            "cr-like",
+            "--knee",
+            "--map-dir",
+            "/tmp/mapped",
+        ]);
+        let setup = minimal_no_index_setup();
+        let stage = run_mapping_stage(&opts, &setup).expect("mapping stage should succeed");
+        assert_eq!(stage.map_output, PathBuf::from("/tmp/mapped"));
+        assert_eq!(stage.sc_mapper, "");
+    }
+
+    #[test]
+    fn mapping_stage_no_index_fails_without_map_dir() {
+        let mut opts = parse_quant_opts(&[
+            "quant",
+            "-c",
+            "10xv3",
+            "-o",
+            "/tmp/out",
+            "-r",
+            "cr-like",
+            "--knee",
+            "--map-dir",
+            "/tmp/mapped",
+        ]);
+        opts.map_dir = None;
+        opts.index = None;
+
+        let setup = minimal_no_index_setup();
+        let err = run_mapping_stage(&opts, &setup).expect_err("expected missing map-dir to fail");
+        assert!(
+            format!("{:#}", err).contains("map-dir must be provided"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+}
diff --git a/src/simpleaf_commands/workflow.rs b/src/simpleaf_commands/workflow.rs
index 02ec4a2..9dad7a8 100644
--- a/src/simpleaf_commands/workflow.rs
+++ b/src/simpleaf_commands/workflow.rs
@@ -21,6 +21,85 @@ struct WorkflowTemplate {
     version: String,
 }
 
+/// Fully resolved workflow run inputs after parsing either a manifest or template.
+struct ResolvedWorkflowRun {
+    /// Parsed executable manifest.
+    instantiated_manifest: serde_json::Value,
+    /// Source path of the provided manifest/template (for logging context).
+    source_path: std::path::PathBuf,
+    /// Resolved output path extracted from the instantiated manifest.
+    output_path: std::path::PathBuf,
+}
+
+/// Resolve `workflow run` inputs into an instantiated manifest and paths.
+///
+/// This stage is responsible for parse/instantiate + output-path resolution.
+fn resolve_workflow_run_inputs<T: AsRef<Path>>(
+    af_home_path: T,
+    template: Option<std::path::PathBuf>,
+    manifest: Option<std::path::PathBuf>,
+    output_opt: Option<std::path::PathBuf>,
+    jpaths: &Option<Vec<std::path::PathBuf>>,
+    ext_codes: &Option<Vec<String>>,
+) -> anyhow::Result<ResolvedWorkflowRun> {
+    if let Some(manifest) = manifest {
+        info!("Loading and executing manifest.");
+        let instantiated_manifest = workflow_utils::parse_manifest(&manifest)?;
+        let output_path = workflow_utils::get_output_path(&instantiated_manifest)?;
+        return Ok(ResolvedWorkflowRun {
+            instantiated_manifest,
+            source_path: manifest,
+            output_path,
+        });
+    }
+
+    if let Some(template) = template {
+        if !template.exists() || !template.is_file() {
+            bail!("the path of the given workflow template file doesn't exist; Cannot proceed.")
+        }
+
+        info!("Processing simpleaf template to produce and execute manifest.");
+        let workflow_json_string = workflow_utils::instantiate_workflow_template(
+            af_home_path.as_ref(),
+            template.as_path(),
+            output_opt.clone(),
+            jpaths,
+            ext_codes,
+        )?;
+
+        let instantiated_manifest: Value = serde_json::from_str(workflow_json_string.as_str())?;
+        let output_path = workflow_utils::get_output_path(&instantiated_manifest)?;
+
+        // If both command-line and template outputs are set, warn if template wins.
+        if let Some(requested_output_path) = output_opt {
+            if requested_output_path != output_path {
+                warn!(
+                    r#"The output path {} was requested via the command line, but 
+                            the output path {} was resolved from the workflow template.
+                            In this case, since the output variable is not used when instantiating 
+                            the template, the value ({}) present in the template must be used.
+                            Please be aware that {} will not be used for output!"#,
+                    requested_output_path.display(),
+                    output_path.display(),
+                    output_path.display(),
+                    requested_output_path.display()
+                );
+            }
+        }
+
+        return Ok(ResolvedWorkflowRun {
+            instantiated_manifest,
+            source_path: template,
+            output_path,
+        });
+    }
+
+    bail!(concat!(
+        "You must have one of a manifest or template, ",
+        "but provided neither; this shouldn't happen"
+    ));
+}
+
 pub fn refresh_protocol_estuary<T: AsRef<Path>>(af_home_path: T) -> anyhow::Result<()> {
     workflow_utils::get_protocol_estuary(
         af_home_path.as_ref(),
@@ -384,71 +463,17 @@ pub fn run_workflow<T: AsRef<Path>>(
             skip_step,
             ext_codes,
         } => {
-            // designate that output is optional
-            let output_opt = output;
-
-            let instantiated_manifest: serde_json::Value;
-            let source_path: std::path::PathBuf;
-            let output_path: std::path::PathBuf;
-            if let Some(manifest) = manifest {
-                // If the user passed a fully-instantiated
-                // manifest to execute
-                info!("Loading and executing manifest.");
-                // iterate json files and parse records to commands
-                // convert files into json string vector
-                instantiated_manifest = workflow_utils::parse_manifest(&manifest)?;
-                output_path = workflow_utils::get_output_path(&instantiated_manifest)?;
-                source_path = manifest.clone();
-            } else if let Some(template) = template {
-                //  check the validity of the file
-                if !template.exists() || !template.is_file() {
-                    bail!("the path of the given workflow template file doesn't exist; Cannot proceed.")
-                }
-
-                info!("Processing simpleaf template to produce and execute manifest.");
-
-                // iterate json files and parse records to commands
-                // convert files into json string vector
-                let workflow_json_string = workflow_utils::instantiate_workflow_template(
-                    af_home_path.as_ref(),
-                    template.as_path(),
-                    output_opt.clone(),
-                    &jpaths,
-                    &ext_codes,
-                )?;
-
-                // write complete workflow (i.e. the manifest) JSON to output folder
-                instantiated_manifest = serde_json::from_str(workflow_json_string.as_str())?;
-                output_path = workflow_utils::get_output_path(&instantiated_manifest)?;
-
-                // check if the output path we read from the instantiated template matches
-                // the output path requested by the user (if the user passed one in). If
-                // they do not match, issue an obnoxious warning.
-                // @DongzeHe : We should also probably log this warning to the output
-                // log for subsequent inspection.
-                if let Some(requested_output_path) = output_opt {
-                    if requested_output_path != output_path {
-                        warn!(
-                            r#"The output path {} was requested via the command line, but 
-                            the output path {} was resolved from the workflow template.
-                            In this case, since the output variable is not used when instantiating 
-                            the template, the value ({}) present in the template must be used.
-                            Please be aware that {} will not be used for output!"#,
-                            requested_output_path.display(),
-                            output_path.display(),
-                            output_path.display(),
-                            requested_output_path.display()
-                        );
-                    }
-                }
-
-                source_path = template.clone();
-            } else {
-                bail!(concat!(
-                    "You must have one of a manifest or template, ",
-                    "but provided neither; this shouldn't happen"
-                ));
-            }
+            let resolved = resolve_workflow_run_inputs(
+                af_home_path.as_ref(),
+                template,
+                manifest,
+                output,
+                &jpaths,
+                &ext_codes,
+            )?;
+            let instantiated_manifest = resolved.instantiated_manifest;
+            let source_path = resolved.source_path;
+            let output_path = resolved.output_path;
 
             // recursively make the output directory, which at this point
             // has been resolved as the one used in the template or manifest
diff --git a/src/utils/workflow_utils.rs b/src/utils/workflow_utils.rs
index 7be22e8..3212c35 100644
--- a/src/utils/workflow_utils.rs
+++ b/src/utils/workflow_utils.rs
@@ -6,6 +6,7 @@ use anyhow::{anyhow, bail, Context};
 use chrono::{DateTime, Local};
 use clap::Parser;
 // use cmd_lib::run_cmd;
+use crate::core::io::write_json_pretty_atomic;
 use serde_json::{json, Map, Value};
 use std::boxed::Box;
 use std::collections::HashMap;
@@ -33,6 +34,42 @@ pub enum WFCommand {
     ExternalCommand(std::process::Command),
 }
 
+/// Structured errors returned when executing workflow commands.
+#[derive(Debug, thiserror::Error)]
+pub enum WorkflowExecutionError {
+    /// A `simpleaf` command in the workflow failed.
+    #[error("Workflow step {step} ({program}) failed while executing `{command}`: {source}")]
+    SimpleafStepFailed {
+        step: u64,
+        program: String,
+        command: String,
+        #[source]
+        source: anyhow::Error,
+    },
+    /// An external workflow command exited with a non-zero status.
+    #[error(
+        "Workflow step {step} ({program}) failed executing external command `{command}` with exit status {status}. stderr: {stderr}"
+    )]
+    ExternalCommandNonZero {
+        step: u64,
+        program: String,
+        command: String,
+        status: String,
+        stderr: String,
+    },
+    /// An external workflow command could not be launched.
+    #[error(
+        "Workflow step {step} ({program}) could not launch external command `{command}`: {source}"
+    )]
+    ExternalCommandSpawnFailed {
+        step: u64,
+        program: String,
+        command: String,
+        #[source]
+        source: std::io::Error,
+    },
+}
+
 #[allow(dead_code)]
 enum ColumnTypeTag {
     String,
@@ -66,7 +103,7 @@ impl PatchCollection {
         self.patches.push(p);
     }
 
-    pub fn iter(&self) -> std::slice::Iter<JsonPatch> {
+    pub fn iter(&self) -> std::slice::Iter<'_, JsonPatch> {
         self.patches.iter()
     }
 }
@@ -285,6 +322,123 @@ be certain you intend to do this.",
     Ok(patches)
 }
 
+/// Validate workflow structure before planning or execution.
+///
+/// This ensures command entries contain both `step` and `program_name`,
+/// verifies expected types for core fields, and rejects unsupported
+/// `simpleaf` command variants with actionable messages.
+pub fn validate_manifest_structure(workflow: &Value) -> anyhow::Result<()> {
+    fn validate_node(node: &Value, path: &str) -> anyhow::Result<()> {
+        match node {
+            Value::Object(obj) => {
+                let has_step = obj.get(SystemFields::Step.as_str()).is_some();
+                let has_program = obj.get(SystemFields::ProgramName.as_str()).is_some();
+                if has_step ^ has_program {
+                    bail!(
+                        "Malformed workflow entry at {}: command records must contain both `step` and `program_name`.",
+                        path
+                    );
+                }
+
+                if has_step {
+                    let step_val = obj.get(SystemFields::Step.as_str()).with_context(|| {
+                        format!(
+                            "Malformed workflow entry at {}: missing `step` value.",
+                            path
+                        )
+                    })?;
+                    let step = step_val.as_u64().with_context(|| {
+                        format!(
+                            "Malformed workflow entry at {}: `step` must be a positive integer, found {:?}.",
+                            path, step_val
+                        )
+                    })?;
+                    if step == 0 {
+                        bail!(
+                            "Malformed workflow entry at {}: `step` must be >= 1, found 0.",
+                            path
+                        );
+                    }
+
+                    if let Some(active_val) = obj.get(SystemFields::Active.as_str()) {
+                        if active_val.as_bool().is_none() {
+                            bail!(
+                                "Malformed workflow entry at {}: `active` must be a boolean, found {:?}.",
+                                path,
+                                active_val
+                            );
+                        }
+                    }
+
+                    let program_name_val = obj
+                        .get(SystemFields::ProgramName.as_str())
+                        .with_context(|| {
+                            format!(
+                                "Malformed workflow entry at {}: missing `program_name` value.",
+                                path
+                            )
+                        })?;
+                    let program_name = program_name_val.as_str().with_context(|| {
+                        format!(
+                            "Malformed workflow entry at {}: `program_name` must be a string, found {:?}.",
+                            path, program_name_val
+                        )
+                    })?;
+
+                    if program_name.starts_with("simpleaf")
+                        && !program_name.ends_with("index")
+                        && !program_name.ends_with("quant")
+                    {
+                        bail!(
+                            "Unsupported workflow command `{}` at {}. Only `simpleaf index` and `simpleaf quant` are currently supported.",
+                            program_name,
+                            path
+                        );
+                    }
+
+                    if !program_name.starts_with("simpleaf") {
+                        let args_val = obj
+                            .get(SystemFields::ExternalArguments.as_str())
+                            .with_context(|| {
+                                format!(
+                                    "Malformed external command at {}: missing `arguments` array.",
+                                    path
+                                )
+                            })?;
+                        if args_val.as_array().is_none() {
+                            bail!(
+                                "Malformed external command at {}: `arguments` must be an array, found {:?}.",
+                                path,
+                                args_val
+                            );
+                        }
+                    }
+                }
+
+                // Recurse to validate nested workflow sections.
+                for (k, v) in obj {
+                    let child_path = if path == "$" {
+                        format!("$.{}", k)
+                    } else {
+                        format!("{}.{}", path, k)
+                    };
+                    validate_node(v, &child_path)?;
+                }
+                Ok(())
+            }
+            Value::Array(arr) => {
+                for (idx, v) in arr.iter().enumerate() {
+                    validate_node(v, &format!("{}[{}]", path, idx))?;
+                }
+                Ok(())
+            }
+            _ => Ok(()),
+        }
+    }
+
+    validate_node(workflow, "$")
+}
+
 pub fn get_output_path(manifest: &serde_json::Value) -> anyhow::Result<PathBuf> {
     // we assume that the path we want is /meta_info/output, and it *must* exist
     // as a key!
@@ -366,16 +520,11 @@ pub fn duration_to_dhms(d: chrono::Duration) -> String {
     execution_elapsed_time
 }
 
-/// This function updates the start_at variable
-/// if --resume is provided.\
-/// It finds the workflow_info.json exported by
-/// simpleaf workflow from the previous run and
-/// grab the "Succeed" and "Execution Terminated Step"
-/// fields.\
-/// If the previous run was succeed, then we report an error
-/// saying nothing to resume
-/// If Execution Terminated Step is a negative number, that
-/// means there was no previous execution:
+/// Compute the effective `start_at` for `--resume` runs from a prior workflow log.
+///
+/// This reads `"Succeed"` and `"Latest Run"."Execution Terminated Step"` from the
+/// previous `simpleaf_workflow_log.json` payload and returns the step to resume from.
+/// If the previous run already succeeded, resuming is rejected.
 pub fn update_start_at(v: &Value) -> anyhow::Result<u64> {
     let latest_run = v.get("Latest Run").with_context(|| {
         "Could not get the `Latest Run` field from the `simpleaf_workflow_log.json`; Cannot proceed"
@@ -404,6 +553,9 @@ pub fn update_start_at(v: &Value) -> anyhow::Result<u64> {
     }
 }
 
+/// Load the previous workflow metadata log used by `--resume`.
+///
+/// The expected file is `simpleaf_workflow_log.json` in the provided output directory.
 pub fn get_previous_log<T: AsRef<Path>>(output: T) -> anyhow::Result<Value> {
     // the path to the expected log file
     let exec_log_path = output.as_ref().join("simpleaf_workflow_log.json");
@@ -436,6 +588,10 @@ pub fn get_previous_log<T: AsRef<Path>>(output: T) -> anyhow::Result<Value> {
     }
 }
 
+/// Execute a planned workflow queue and update the workflow log after each successful step.
+///
+/// Commands are run in queue order (already sorted by step during planning).
+/// On the first command failure, this writes a failed log state and returns the error.
 pub fn execute_commands_in_workflow<T: AsRef<Path>>(
     simpleaf_workflow: SimpleafWorkflow,
     af_home_path: T,
@@ -454,6 +610,7 @@ pub fn execute_commands_in_workflow<T: AsRef<Path>>(
 
         match cr.cmd {
             WFCommand::SimpleafCommand(cmd) => {
+                let command_string = pn.to_string();
                 let exec_result = match *cmd {
                     Commands::Index(index_opts) => {
                         crate::indexing::build_ref_and_index(af_home_path.as_ref(), index_opts)
@@ -462,12 +619,23 @@ pub fn execute_commands_in_workflow<T: AsRef<Path>>(
                     Commands::Quant(quant_opts) => {
                         crate::quant::map_and_quant(af_home_path.as_ref(), quant_opts)
                     }
-                    _ => todo!(),
+                    unsupported_cmd => {
+                        bail!(
+                            "Unsupported simpleaf workflow command variant: {:?}. Only `index` and `quant` are currently supported in workflow execution.",
+                            unsupported_cmd
+                        )
+                    }
                 };
                 if let Err(e) = exec_result {
                     workflow_log.write(false)?;
                     info!("Execution terminated at {} command for step {}", pn, step);
-                    return Err(e);
+                    return Err(WorkflowExecutionError::SimpleafStepFailed {
+                        step,
+                        program: pn.to_string(),
+                        command: command_string,
+                        source: e,
+                    }
+                    .into());
                 } else {
                     info!("Successfully ran {} command for step {}", pn, step);
                     workflow_log.update(&cr.field_trajectory_vec[..])?;
@@ -490,23 +658,29 @@ pub fn execute_commands_in_workflow<T: AsRef<Path>>(
                             workflow_log.update(&cr.field_trajectory_vec[..])?;
                         } else {
                             workflow_log.write(false)?;
-                            let cmd_stderr = std::str::from_utf8(&cres.stderr[..])?;
-                            let msg = format!("{} command at step {} failed to exit with code 0 under the shell.\n\
-                            The exit status was: {}.\n\
-                            The stderr of the invocation was: {}.", pn, step, cres.status, cmd_stderr);
-                            warn!(msg);
-                            bail!(msg);
+                            let cmd_stderr =
+                                String::from_utf8_lossy(&cres.stderr).trim().to_string();
+                            let err = WorkflowExecutionError::ExternalCommandNonZero {
+                                step,
+                                program: pn.to_string(),
+                                command: cmd_string,
+                                status: cres.status.to_string(),
+                                stderr: cmd_stderr,
+                            };
+                            warn!("{:#}", err);
+                            return Err(err.into());
                         }
                     }
                     Err(e) => {
                         workflow_log.write(false)?;
-                        let msg = format!(
-                            "{} command at step {} failed to execute under the shell.\n\
-                            The returned error was: {:?}.\n",
-                            pn, step, e
-                        );
-                        warn!(msg);
-                        bail!(msg);
+                        let err = WorkflowExecutionError::ExternalCommandSpawnFailed {
+                            step,
+                            program: pn.to_string(),
+                            command: cmd_string,
+                            source: e,
+                        };
+                        warn!("{:#}", err);
+                        return Err(err.into());
                     } // TODO: use this in the log somewhere.
                 } // invoke external cmd
             }
@@ -527,6 +701,9 @@ pub fn initialize_workflow<T: AsRef<Path>>(
     skip_step: Vec<u64>,
     resume: bool,
 ) -> anyhow::Result<(SimpleafWorkflow, WorkflowLog)> {
+    // Validate manifest shape before planning command execution.
+    validate_manifest_structure(&workflow_json_value)?;
+
     // Instantiate a workflow log struct
     let mut wl = WorkflowLog::new(
         output.as_ref(),
@@ -621,7 +798,7 @@ impl SimpleafWorkflow {
                         .with_context(|| {
                             format!(
                                 "Cannot parse step field value, {:?}, as an integer",
-                                field.get(SystemFields::Step.as_str()).unwrap()
+                                field.get(SystemFields::Step.as_str())
                             )
                         })?;
 
@@ -633,7 +810,7 @@ impl SimpleafWorkflow {
                             v.as_bool().with_context(|| {
                                 format!(
                                     "Cannot parse active field value, {:?}, as a boolean",
-                                    field.get(SystemFields::Active.as_str()).unwrap()
+                                    field.get(SystemFields::Active.as_str())
                                 )
                             })?
                         } else {
@@ -644,37 +821,45 @@ impl SimpleafWorkflow {
                     let cmd_field = workflow_log.get_mut_cmd_field(&curr_field_trajectory_vec)?;
                     cmd_field[SystemFields::Active.as_str()] = json!(active);
 
-                    // The field must contains a program_name
-                    if let Some(program_name) = field.get(SystemFields::ProgramName.as_str()) {
-                        pn = ProgramName::from_str(program_name.as_str().with_context(|| {
+                    // The command record must contain a program_name field.
+                    let program_name = field
+                        .get(SystemFields::ProgramName.as_str())
+                        .with_context(|| {
+                            format!(
+                                "Malformed workflow command at step {}: missing `program_name` field.",
+                                step
+                            )
+                        })?;
+                    pn =
+                        ProgramName::from_str(program_name.as_str().with_context(|| {
                             "Cannot create ProgramName struct from a program name"
                         })?);
-                        // if active, then push to execution queue
-                        if active {
-                            info!("Parsing {} command for step {}", pn, step);
-                            // The `step` will be used for sorting the cmd_queue vector.
-                            // all commands must have a valid `step`.
-                            let cmd = match pn.create_cmd(field) {
-                                Ok(v) => v,
-                                Err(e) => {
-                                    if pn.is_external() {
-                                        bail!("Could not parse external command {} for step {}. The error message was: {}", pn, step, e);
-                                    } else {
-                                        bail!("Could not parse simpleaf command {} for step {}. The error message was: {}", pn, step, e);
-                                    }
+
+                    // if active, then push to execution queue
+                    if active {
+                        info!("Parsing {} command for step {}", pn, step);
+                        // The `step` will be used for sorting the cmd_queue vector.
+                        // all commands must have a valid `step`.
+                        let cmd = match pn.create_cmd(field) {
+                            Ok(v) => v,
+                            Err(e) => {
+                                if pn.is_external() {
+                                    bail!("Could not parse external command {} for step {}. The error message was: {}", pn, step, e);
+                                } else {
+                                    bail!("Could not parse simpleaf command {} for step {}. The error message was: {}", pn, step, e);
                                 }
-                            };
-                            cmd_queue.push(CommandRecord {
-                                step,
-                                active,
-                                program_name: pn,
-                                cmd,
-                                field_trajectory_vec: curr_field_trajectory_vec,
-                            });
-                        } else {
-                            info!("Skipping {} command for step {}", pn, step);
-                        } // if active
-                    } // if have ProgramName
+                            }
+                        };
+                        cmd_queue.push(CommandRecord {
+                            step,
+                            active,
+                            program_name: pn,
+                            cmd,
+                            field_trajectory_vec: curr_field_trajectory_vec,
+                        });
+                    } else {
+                        info!("Skipping {} command for step {}", pn, step);
+                    } // if active
                 } else {
                     // If this is not a command record, we move to the next level
                     // recursively calling this function on the current field.
@@ -729,6 +914,7 @@ pub struct WorkflowLog {
     // this vector records all field names in the complete workflow.
     // This is used for locating the step for each command
     field_id_to_name: Vec<String>,
+    field_name_to_id: HashMap<String, usize>,
     // TODO: the trajectory vector in each CommandRecord can be
     // move here as a long vector, and in each CommandRecord
     // we just need to record the start pos and the length of its trajectory.
@@ -772,12 +958,12 @@ impl WorkflowLog {
         let workflow_name = template
             .as_ref()
             .file_stem()
-            .unwrap_or_else(|| {
-                panic!(
+            .with_context(|| {
+                format!(
                     "Cannot parse file name of file {}",
                     template.as_ref().display()
                 )
-            })
+            })?
             .to_string_lossy()
             .into_owned();
 
@@ -804,6 +990,7 @@ impl WorkflowLog {
             value: workflow_json_value.clone(),
             cmd_runtime_records: Map::new(),
             field_id_to_name: Vec::new(),
+            field_name_to_id: HashMap::new(),
             // cmds_field_id_trajectory: Vec::new()
             previous_log,
         })
@@ -816,13 +1003,8 @@ impl WorkflowLog {
         });
     }
 
-    /// Write log to the output path.
-    /// 1. an execution log file includes the whole workflow,
-    ///    in which succeffully invoked commands have
-    ///    a negative `step`
-    /// 2. an info log file records runtime, workflow name,
-    ///    output path etc.
-    pub fn write(&self, succeed: bool) -> anyhow::Result<()> {
+    /// Build the metadata payload written to `simpleaf_workflow_log.json`.
+    fn build_meta_info_payload(&self, succeed: bool) -> anyhow::Result<Value> {
         // initiate meta_info
         let workflow_meta_info = if let Some(workflow_meta_info) = &self.workflow_meta_info {
             workflow_meta_info.to_owned()
@@ -868,7 +1050,7 @@ impl WorkflowLog {
         let d = Local::now().signed_duration_since(self.workflow_start_time);
         let execution_elapsed_time = duration_to_dhms(d);
 
-        let meta_info = json!(
+        Ok(json!(
             {
                 "Workflow Name": self.workflow_name,
                 "Workflow Meta Info":  workflow_meta_info,
@@ -883,28 +1065,25 @@ impl WorkflowLog {
                     "Command Runtime by Step": Value::from(self.cmd_runtime_records.clone()),
                 },
                 "Previous Runs": previous_runs
-        });
+        }))
+    }
 
-        // execution log
-        std::fs::write(
-            self.meta_info_path.as_path(),
-            serde_json::to_string_pretty(&meta_info)
-                .with_context(|| ("Cannot convert json value to string."))?,
-        )
-        .with_context(|| {
+    /// Write log to the output path.
+    /// 1. an execution log file includes the whole workflow,
+    ///    in which succeffully invoked commands have
+    ///    a negative `step`
+    /// 2. an info log file records runtime, workflow name,
+    ///    output path etc.
+    pub fn write(&self, succeed: bool) -> anyhow::Result<()> {
+        let meta_info = self.build_meta_info_payload(succeed)?;
+        write_json_pretty_atomic(self.meta_info_path.as_path(), &meta_info).with_context(|| {
             format!(
                 "could not write workflow meta info JSON file to {}",
                 self.meta_info_path.display()
             )
         })?;
 
-        // execution log
-        std::fs::write(
-            self.exec_log_path.as_path(),
-            serde_json::to_string_pretty(&self.value)
-                .with_context(|| "Could not convert json value to string.")?,
-        )
-        .with_context(|| {
+        write_json_pretty_atomic(self.exec_log_path.as_path(), &self.value).with_context(|| {
             format!(
                 "could not write complete simpleaf workflow JSON file to {}",
                 self.exec_log_path.display()
@@ -916,11 +1095,13 @@ impl WorkflowLog {
 
     /// Get the index corresponds to the field name in the field_id_to_name vector.
     pub fn get_field_id(&mut self, field_name: &String) -> usize {
-        if let Ok(pos) = self.field_id_to_name.binary_search(field_name) {
-            pos
+        if let Some(pos) = self.field_name_to_id.get(field_name) {
+            *pos
         } else {
+            let pos = self.field_id_to_name.len();
             self.field_id_to_name.push(field_name.to_owned());
-            self.field_id_to_name.len() - 1
+            self.field_name_to_id.insert(field_name.to_owned(), pos);
+            pos
         }
     }
 
@@ -943,7 +1124,7 @@ impl WorkflowLog {
             curr_field_name = self
                 .field_id_to_name
                 .get(*curr_field_id)
-                .expect("Cannot map field ID to name.");
+                .with_context(|| format!("Cannot map field ID {} to name.", curr_field_id))?;
             // let curr_pos = field_trajectory_vec.first().expect("Cannot get the first element");
             curr_field = curr_field
                 .get_mut(curr_field_name)
@@ -1047,6 +1228,13 @@ impl ProgramName {
         matches!(self, &ProgramName::External(_))
     }
 
+    /// Return true if an argument contains shell operators and therefore
+    /// requires shell interpretation.
+    fn has_shell_metachar(arg: &str) -> bool {
+        arg.chars()
+            .any(|c| ['|', '&', ';', '<', '>', '$', '`', '(', ')'].contains(&c))
+    }
+
     /// Create a valid simpleaf command object using the arguments recoreded in the field.
     /// step and program name will be ignored in this procedure
     pub fn create_simpleaf_cmd(&self, value: &Value) -> anyhow::Result<WFCommand> {
@@ -1123,10 +1311,18 @@ impl ProgramName {
                 })?
             );
         }
-        // make Command struct for the command
-        let external_cmd = shell(arg_vec.join(" "));
+        let requires_shell = arg_vec.iter().any(|s| Self::has_shell_metachar(s));
 
-        Ok(WFCommand::ExternalCommand(external_cmd))
+        if requires_shell {
+            // Preserve shell semantics for advanced command forms (e.g. pipes/redirection).
+            let external_cmd = shell(arg_vec.join(" "));
+            Ok(WFCommand::ExternalCommand(external_cmd))
+        } else {
+            // Prefer direct argv invocation for robust argument handling.
+            let mut external_cmd = std::process::Command::new(&arg_vec[0]);
+            external_cmd.args(&arg_vec[1..]);
+            Ok(WFCommand::ExternalCommand(external_cmd))
+        }
     }
 
     pub fn create_cmd(&self, value: &Value) -> anyhow::Result<WFCommand> {
@@ -1367,12 +1563,15 @@ mod tests {
     use crate::{
         utils::{
             prog_utils::{get_cmd_line_string, shell},
-            workflow_utils::{initialize_workflow, WorkflowLog},
+            workflow_utils::{
+                execute_commands_in_workflow, initialize_workflow, validate_manifest_structure,
+                CommandRecord, SimpleafWorkflow, WorkflowLog,
+            },
         },
         Commands,
     };
-    use core::panic;
-    use std::path::PathBuf;
+    use std::path::{Path, PathBuf};
+    use tempfile::tempdir;
 
     #[test]
     fn test_workflow_command() {
@@ -1482,69 +1681,70 @@ mod tests {
         )
         .unwrap();
 
-        match &wl {
-            WorkflowLog {
-                meta_info_path,
+        let WorkflowLog {
+            meta_info_path,
+            exec_log_path,
+            workflow_start_time: _,
+            command_runtime,
+            num_succ,
+            start_at,
+            workflow_name,
+            workflow_meta_info,
+            value,
+            cmd_runtime_records,
+            field_id_to_name,
+            field_name_to_id,
+            skip_step,
+            previous_log: _,
+        } = &wl;
+        {
+            // test wl
+            // check JSON log output json
+            assert_eq!(
                 exec_log_path,
-                workflow_start_time: _,
-                command_runtime,
-                num_succ,
-                start_at,
-                workflow_name,
-                workflow_meta_info,
-                value,
-                cmd_runtime_records,
-                field_id_to_name,
-                skip_step,
-                previous_log: _,
-            } => {
-                // test wl
-                // check JSON log output json
-                assert_eq!(
-                    exec_log_path,
-                    &PathBuf::from("output_dir/workflow_execution_log.json")
-                );
-                assert_eq!(
-                    meta_info_path,
-                    &PathBuf::from("output_dir/simpleaf_workflow_log.json")
-                );
+                &PathBuf::from("output_dir/workflow_execution_log.json")
+            );
+            assert_eq!(
+                meta_info_path,
+                &PathBuf::from("output_dir/simpleaf_workflow_log.json")
+            );
 
-                assert_eq!(workflow_name, &String::from("fake_config"));
+            assert_eq!(workflow_name, &String::from("fake_config"));
 
-                assert_eq!(
-                    workflow_meta_info,
-                    &Some(
-                        workflow_json_value
-                            .get(SystemFields::MetaInfo.as_str())
-                            .unwrap()
-                            .to_owned()
-                    )
-                );
+            assert_eq!(
+                workflow_meta_info,
+                &Some(
+                    workflow_json_value
+                        .get(SystemFields::MetaInfo.as_str())
+                        .unwrap()
+                        .to_owned()
+                )
+            );
 
-                let mut new_value = value.to_owned();
-                new_value["rna"]["simpleaf_index"]["active"] = json!(true);
-                new_value["external-commands"]["HTO ref gunzip"]["active"] = json!(true);
+            let mut new_value = value.to_owned();
+            new_value["rna"]["simpleaf_index"]["active"] = json!(true);
+            new_value["external-commands"]["HTO ref gunzip"]["active"] = json!(true);
 
-                assert_eq!(new_value, workflow_json_value);
+            assert_eq!(new_value, workflow_json_value);
 
-                assert_eq!(cmd_runtime_records, &Map::new());
+            assert_eq!(cmd_runtime_records, &Map::new());
 
-                assert_eq!(start_at, &2u64);
-                assert_eq!(skip_step, &vec![3]);
-                assert!(
-                    field_id_to_name.contains(&"rna".to_string())
-                        && field_id_to_name.contains(&"meta_info".to_string())
-                        && field_id_to_name.contains(&"simpleaf_index".to_string())
-                        && field_id_to_name.contains(&"simpleaf_quant".to_string())
-                        && field_id_to_name.contains(&"external-commands".to_string())
-                        && field_id_to_name.contains(&"HTO ref gunzip".to_string())
-                        && field_id_to_name.contains(&"ADT ref gunzip".to_string())
-                );
+            assert_eq!(start_at, &2u64);
+            assert_eq!(skip_step, &vec![3]);
+            assert!(
+                field_id_to_name.contains(&"rna".to_string())
+                    && field_id_to_name.contains(&"meta_info".to_string())
+                    && field_id_to_name.contains(&"simpleaf_index".to_string())
+                    && field_id_to_name.contains(&"simpleaf_quant".to_string())
+                    && field_id_to_name.contains(&"external-commands".to_string())
+                    && field_id_to_name.contains(&"HTO ref gunzip".to_string())
+                    && field_id_to_name.contains(&"ADT ref gunzip".to_string())
+            );
+            assert_eq!(field_id_to_name.len(), field_name_to_id.len());
 
-                assert!(command_runtime.is_none());
+            assert!(command_runtime.is_none());
 
-                assert_eq!(num_succ, &0);
-            }
+            assert_eq!(num_succ, &0);
         }
 
         // we started at 2, and skipped 3. So there are two commands
@@ -1656,4 +1856,269 @@ mod tests {
             e => panic!("expected SimpleafCommand, found {:?}", e),
         };
     }
+
+    #[test]
+    fn test_get_field_id_is_stable_for_unsorted_insertions() {
+        let tmp = tempdir().unwrap();
+        let mut wl = WorkflowLog::new(
+            tmp.path(),
+            Path::new("fake_template.jsonnet"),
+            &json!({}),
+            1,
+            vec![],
+            false,
+        )
+        .unwrap();
+
+        let id_z_1 = wl.get_field_id(&"z_field".to_string());
+        let _id_a = wl.get_field_id(&"a_field".to_string());
+        let id_z_2 = wl.get_field_id(&"z_field".to_string());
+
+        assert_eq!(id_z_1, id_z_2);
+    }
+
+    #[test]
+    fn test_execute_commands_unsupported_simpleaf_command_returns_error() {
+        let tmp = tempdir().unwrap();
+        let mut wl = WorkflowLog::new(
+            tmp.path(),
+            Path::new("fake_template.jsonnet"),
+            &json!({}),
+            1,
+            vec![],
+            false,
+        )
+        .unwrap();
+
+        let sw = SimpleafWorkflow {
+            af_home_path: tmp.path().to_path_buf(),
+            cmd_queue: vec![CommandRecord {
+                step: 1,
+                active: true,
+                program_name: ProgramName::Index,
+                cmd: WFCommand::SimpleafCommand(Box::new(Commands::Inspect {})),
+                field_trajectory_vec: vec![],
+            }],
+        };
+
+        let err = execute_commands_in_workflow(sw, tmp.path(), &mut wl).unwrap_err();
+        assert!(
+            format!("{:#}", err).contains("Unsupported simpleaf workflow command variant"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+
+    #[test]
+    fn test_validate_manifest_structure_rejects_missing_program_name() {
+        let manifest = json!({
+            "external": {
+                "broken": {
+                    "step": 1,
+                    "arguments": ["-l"]
+                }
+            }
+        });
+
+        let err = validate_manifest_structure(&manifest).unwrap_err();
+        assert!(
+            format!("{:#}", err).contains("must contain both `step` and `program_name`"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+
+    #[test]
+    fn test_validate_manifest_structure_rejects_unsupported_simpleaf_command() {
+        let manifest = json!({
+            "bad_simpleaf": {
+                "step": 1,
+                "program_name": "simpleaf inspect",
+                "arguments": []
+            }
+        });
+
+        let err = validate_manifest_structure(&manifest).unwrap_err();
+        assert!(
+            format!("{:#}", err)
+                .contains("Only `simpleaf index` and `simpleaf quant` are currently supported"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+
+    #[test]
+    fn test_initialize_workflow_resume_uses_previous_terminated_step() {
+        let tmp = tempdir().unwrap();
+        let previous_log = json!({
+            "Succeed": false,
+            "Latest Run": {
+                "Execution Terminated Step": 2
+            }
+        });
+        std::fs::write(
+            tmp.path().join("simpleaf_workflow_log.json"),
+            serde_json::to_string_pretty(&previous_log).unwrap(),
+        )
+        .unwrap();
+
+        let manifest = json!({
+            "external": {
+                "step1": {
+                    "step": 1,
+                    "program_name": "echo",
+                    "arguments": ["one"]
+                },
+                "step2": {
+                    "step": 2,
+                    "program_name": "echo",
+                    "arguments": ["two"]
+                }
+            }
+        });
+
+        let (sw, wl) = initialize_workflow(
+            tmp.path(),
+            Path::new("resume_template.jsonnet"),
+            tmp.path(),
+            manifest,
+            1,
+            vec![],
+            true,
+        )
+        .unwrap();
+
+        assert_eq!(wl.start_at, 2);
+        assert_eq!(sw.cmd_queue.len(), 1);
+        assert_eq!(sw.cmd_queue[0].step, 2);
+    }
+
+    #[test]
+    fn test_initialize_workflow_skip_step_excludes_skipped_command() {
+        let tmp = tempdir().unwrap();
+        let manifest = json!({
+            "external": {
+                "step1": {
+                    "step": 1,
+                    "program_name": "echo",
+                    "arguments": ["one"]
+                },
+                "step2": {
+                    "step": 2,
+                    "program_name": "echo",
+                    "arguments": ["two"]
+                }
+            }
+        });
+
+        let (sw, wl) = initialize_workflow(
+            tmp.path(),
+            Path::new("skip_template.jsonnet"),
+            tmp.path(),
+            manifest,
+            1,
+            vec![1],
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(wl.skip_step, vec![1]);
+        assert_eq!(sw.cmd_queue.len(), 1);
+        assert_eq!(sw.cmd_queue[0].step, 2);
+    }
+
+    #[test]
+    fn test_external_command_builder_uses_shell_only_when_required() {
+        let direct = ProgramName::from_str("echo")
+            .create_external_cmd(&json!({"arguments": ["hello"]}))
+            .unwrap();
+        let shell_required = ProgramName::from_str("echo")
+            .create_external_cmd(&json!({"arguments": ["hello", ">", "out.txt"]}))
+            .unwrap();
+
+        match direct {
+            WFCommand::ExternalCommand(cmd) => {
+                let cmd_line = get_cmd_line_string(&cmd);
+                assert!(
+                    !cmd_line.contains(" -c "),
+                    "expected direct argv invocation, saw: {}",
+                    cmd_line
+                );
+            }
+            _ => panic!("expected external command for direct argv case"),
+        }
+
+        match shell_required {
+            WFCommand::ExternalCommand(cmd) => {
+                let cmd_line = get_cmd_line_string(&cmd);
+                assert!(
+                    cmd_line.contains(" -c "),
+                    "expected shell invocation for redirection case, saw: {}",
+                    cmd_line
+                );
+            }
+            _ => panic!("expected external command for shell case"),
+        }
+    }
+
+    #[test]
+    fn test_execute_commands_external_failure_reports_status_and_stderr() {
+        let tmp = tempdir().unwrap();
+        let mut wl = WorkflowLog::new(
+            tmp.path(),
+            Path::new("fake_template.jsonnet"),
+            &json!({}),
+            1,
+            vec![],
+            false,
+        )
+        .unwrap();
+
+        let sw = SimpleafWorkflow {
+            af_home_path: tmp.path().to_path_buf(),
+            cmd_queue: vec![CommandRecord {
+                step: 1,
+                active: true,
+                program_name: ProgramName::External("sh".to_string()),
+                cmd: WFCommand::ExternalCommand(shell("echo workflow_phase6_err 1>&2; exit 7")),
+                field_trajectory_vec: vec![],
+            }],
+        };
+
+        let err = execute_commands_in_workflow(sw, tmp.path(), &mut wl).unwrap_err();
+        let rendered = format!("{:#}", err);
+        assert!(
+            rendered.contains("failed executing external command"),
+            "unexpected error: {}",
+            rendered
+        );
+        assert!(
+            rendered.contains("exit status"),
+            "unexpected error: {}",
+            rendered
+        );
+        assert!(
+            rendered.contains("workflow_phase6_err"),
+            "unexpected error: {}",
+            rendered
+        );
+    }
+
+    #[test]
+    fn test_workflow_log_write_creates_both_json_logs() {
+        let tmp = tempdir().unwrap();
+        let wl = WorkflowLog::new(
+            tmp.path(),
+            Path::new("fake_template.jsonnet"),
+            &json!({}),
+            1,
+            vec![],
+            false,
+        )
+        .unwrap();
+
+        wl.write(true).unwrap();
+        assert!(tmp.path().join("simpleaf_workflow_log.json").is_file());
+        assert!(tmp.path().join("workflow_execution_log.json").is_file());
+    }
 }
diff --git a/tests/cli_help_snapshots.rs b/tests/cli_help_snapshots.rs
new file mode 100644
index 0000000..6d0c5e3
--- /dev/null
+++ b/tests/cli_help_snapshots.rs
@@ -0,0 +1,116 @@
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use tempfile::tempdir;
+
+fn snapshot_cases() -> Vec<(&'static str, Vec<&'static str>)> {
+    vec![
+        ("simpleaf___help.txt", vec!["--help"]),
+        ("simpleaf_index___help.txt", vec!["index", "--help"]),
+        ("simpleaf_quant___help.txt", vec!["quant", "--help"]),
+        ("simpleaf_chemistry___help.txt", vec!["chemistry", "--help"]),
+        (
+            "simpleaf_chemistry_add___help.txt",
+            vec!["chemistry", "add", "--help"],
+        ),
+        (
+            "simpleaf_chemistry_remove___help.txt",
+            vec!["chemistry", "remove", "--help"],
+        ),
+        (
+            "simpleaf_chemistry_clean___help.txt",
+            vec!["chemistry", "clean", "--help"],
+        ),
+        (
+            "simpleaf_chemistry_lookup___help.txt",
+            vec!["chemistry", "lookup", "--help"],
+        ),
+        (
+            "simpleaf_chemistry_refresh___help.txt",
+            vec!["chemistry", "refresh", "--help"],
+        ),
+        (
+            "simpleaf_chemistry_fetch___help.txt",
+            vec!["chemistry", "fetch", "--help"],
+        ),
+        ("simpleaf_inspect___help.txt", vec!["inspect", "--help"]),
+        ("simpleaf_set_paths___help.txt", vec!["set-paths", "--help"]),
+        (
+            "simpleaf_refresh_prog_info___help.txt",
+            vec!["refresh-prog-info", "--help"],
+        ),
+        ("simpleaf_workflow___help.txt", vec!["workflow", "--help"]),
+        (
+            "simpleaf_workflow_run___help.txt",
+            vec!["workflow", "run", "--help"],
+        ),
+        (
+            "simpleaf_workflow_get___help.txt",
+            vec!["workflow", "get", "--help"],
+        ),
+        (
+            "simpleaf_workflow_patch___help.txt",
+            vec!["workflow", "patch", "--help"],
+        ),
+        (
+            "simpleaf_workflow_list___help.txt",
+            vec!["workflow", "list", "--help"],
+        ),
+        (
+            "simpleaf_workflow_refresh___help.txt",
+            vec!["workflow", "refresh", "--help"],
+        ),
+        ("simpleaf_atac___help.txt", vec!["atac", "--help"]),
+        (
+            "simpleaf_atac_index___help.txt",
+            vec!["atac", "index", "--help"],
+        ),
+        (
+            "simpleaf_atac_process___help.txt",
+            vec!["atac", "process", "--help"],
+        ),
+    ]
+}
+
+fn snapshots_dir() -> PathBuf {
+    Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("tests")
+        .join("snapshots")
+        .join("cli-help")
+}
+
+#[test]
+fn cli_help_outputs_match_snapshots() {
+    let binary = env!("CARGO_BIN_EXE_simpleaf");
+    let af_home = tempdir().expect("unable to create temp ALEVIN_FRY_HOME");
+
+    for (snapshot_file, args) in snapshot_cases() {
+        let output = Command::new(binary)
+            .args(&args)
+            .env("ALEVIN_FRY_HOME", af_home.path())
+            .env("COLUMNS", "100")
+            .output()
+            .expect("failed to execute simpleaf");
+
+        assert!(
+            output.status.success(),
+            "expected success for args {:?}, got status {:?}\nstderr:\n{}",
+            &args,
+            output.status.code(),
+            String::from_utf8_lossy(&output.stderr)
+        );
+
+        let actual = String::from_utf8(output.stdout).expect("stdout was not valid UTF-8");
+        let expected_path = snapshots_dir().join(snapshot_file);
+        let expected = fs::read_to_string(&expected_path).unwrap_or_else(|e| {
+            panic!("failed reading snapshot {}: {}", expected_path.display(), e)
+        });
+
+        assert_eq!(
+            actual, expected,
+            "help output drifted for args {:?} against snapshot {}",
+            &args, snapshot_file
+        );
+    }
+}
diff --git a/tests/cli_smoke.rs b/tests/cli_smoke.rs
new file mode 100644
index 0000000..fa016bd
--- /dev/null
+++ b/tests/cli_smoke.rs
@@ -0,0 +1,69 @@
+use std::process::{Command, Output};
+
+use tempfile::tempdir;
+
+fn run_simpleaf(args: &[&str]) -> Output {
+    let binary = env!("CARGO_BIN_EXE_simpleaf");
+    let af_home = tempdir().expect("unable to create temp ALEVIN_FRY_HOME");
+
+    Command::new(binary)
+        .args(args)
+        .env("ALEVIN_FRY_HOME", af_home.path())
+        .output()
+        .expect("failed to execute simpleaf")
+}
+
+fn assert_parse_error(args: &[&str]) {
+    let output = run_simpleaf(args);
+    assert!(
+        !output.status.success(),
+        "expected parse error for args {:?}",
+        args
+    );
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    assert!(
+        stderr.contains("Usage:"),
+        "expected clap usage in stderr for {:?}, got:\n{}",
+        args,
+        stderr
+    );
+}
+
+#[test]
+fn quant_requires_input_type_group() {
+    assert_parse_error(&[
+        "quant", "-c", "10xv3", "-o", "/tmp/out", "-r", "cr-like", "--knee",
+    ]);
+}
+
+#[test]
+fn quant_rejects_conflicting_map_inputs() {
+    assert_parse_error(&[
+        "quant",
+        "-c",
+        "10xv3",
+        "-o",
+        "/tmp/out",
+        "-r",
+        "cr-like",
+        "--knee",
+        "--map-dir",
+        "/tmp/mapped",
+        "--index",
+        "/tmp/index",
+        "-1",
+        "r1.fastq",
+        "-2",
+        "r2.fastq",
+    ]);
+}
+
+#[test]
+fn index_requires_gtf_when_fasta_is_provided() {
+    assert_parse_error(&["index", "-f", "genome.fa", "-o", "/tmp/index_out"]);
+}
+
+#[test]
+fn atac_command_requires_subcommand() {
+    assert_parse_error(&["atac"]);
+}
diff --git a/tests/phase1_regressions.rs b/tests/phase1_regressions.rs
new file mode 100644
index 0000000..58bce8e
--- /dev/null
+++ b/tests/phase1_regressions.rs
@@ -0,0 +1,59 @@
+use std::fs;
+use std::process::Command;
+
+use serde_json::json;
+use tempfile::tempdir;
+
+#[test]
+fn salmon_missing_error_mentions_salmon_executable() {
+    let binary = env!("CARGO_BIN_EXE_simpleaf");
+    let af_home = tempdir().expect("failed to create temp af home");
+    let t2g = af_home.path().join("t2g.tsv");
+    fs::write(&t2g, "tx1\tgene1\n").expect("failed to write t2g");
+
+    let af_info = json!({
+        "prog_info": {
+            "salmon": null,
+            "piscem": null,
+            "alevin_fry": {"exe_path": "/bin/echo", "version": "0.11.2"},
+            "macs": null
+        }
+    });
+    fs::write(
+        af_home.path().join("simpleaf_info.json"),
+        serde_json::to_string_pretty(&af_info).expect("failed to serialize af info"),
+    )
+    .expect("failed to write simpleaf_info.json");
+
+    let output = Command::new(binary)
+        .env("ALEVIN_FRY_HOME", af_home.path())
+        .args([
+            "quant",
+            "-c",
+            "10xv3",
+            "-o",
+            "/tmp/out",
+            "-r",
+            "cr-like",
+            "--knee",
+            "-i",
+            "/tmp/fake_index",
+            "--no-piscem",
+            "-1",
+            "r1.fastq",
+            "-2",
+            "r2.fastq",
+            "-m",
+            &t2g.to_string_lossy(),
+        ])
+        .output()
+        .expect("failed to run simpleaf");
+
+    assert!(!output.status.success());
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    assert!(
+        stderr.contains("no salmon executable is provided"),
+        "stderr did not mention salmon executable:\n{}",
+        stderr
+    );
+}
diff --git a/tests/snapshots/cli-help/simpleaf___help.txt b/tests/snapshots/cli-help/simpleaf___help.txt
new file mode 100644
index 0000000..d722166
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf___help.txt
@@ -0,0 +1,18 @@
+A rust framework to make using alevin-fry and alevin-fry-ATAC even simpler.
+
+Usage: simpleaf <COMMAND>
+
+Commands:
+  index              build the (expanded) reference index
+  chemistry          operate on or inspect the chemistry registry
+  inspect            inspect the current configuration
+  quant              quantify a sample
+  set-paths          set paths to the programs that simpleaf will use
+  refresh-prog-info  refreshes version information associated with programs used by simpleaf
+  atac               run a sub-command dealing with atac-seq data
+  workflow           simpleaf workflow related command set
+  help               Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_atac___help.txt b/tests/snapshots/cli-help/simpleaf_atac___help.txt
new file mode 100644
index 0000000..b0bb92f
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_atac___help.txt
@@ -0,0 +1,13 @@
+run a sub-command dealing with atac-seq data
+
+Usage: simpleaf atac <COMMAND>
+
+Commands:
+  index    build a piscem index over the genome for scATAC-seq mapping
+  process  process a scATAC-seq sample by performing mapping, barcode correction, and sorted
+           (deduplicated) BED file generation
+  help     Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_atac_index___help.txt b/tests/snapshots/cli-help/simpleaf_atac_index___help.txt
new file mode 100644
index 0000000..e2a9344
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_atac_index___help.txt
@@ -0,0 +1,26 @@
+build a piscem index over the genome for scATAC-seq mapping
+
+Usage: simpleaf atac index [OPTIONS] --input <INPUT> --output <OUTPUT>
+
+Options:
+  -i, --input <INPUT>        target sequences (provide target sequences directly; avoid expanded
+                             reference construction)
+  -o, --output <OUTPUT>      path to output directory (will be created if it doesn't exist)
+  -t, --threads <THREADS>    number of threads to use when running [default: 16]
+      --work-dir <WORK_DIR>  working directory where temporary files should be placed [default:
+                             ./workdir.noindex]
+      --seed <HASH_SEED>     seed value to use in SSHash index construction (try changing this in
+                             the rare event index build fails) [default: 1]
+      --overwrite            overwrite existing files if the output directory is already populated
+  -h, --help                 Print help
+  -V, --version              Print version
+
+Index Configuration Options:
+  -k, --kmer-length <KMER_LENGTH>
+          the value of k to be used to construct the index [default: 25]
+  -m, --minimizer-length <MINIMIZER_LENGTH>
+          the value of m to be used to construct the piscem index (must be < k) [default: 17]
+
+Piscem Index Options:
+      --decoy-paths <DECOY_PATHS>  path to (optional) decoy sequence used to insert poison k-mer
+                                   information into the index (only if using piscem >= 0.7)
diff --git a/tests/snapshots/cli-help/simpleaf_atac_process___help.txt b/tests/snapshots/cli-help/simpleaf_atac_process___help.txt
new file mode 100644
index 0000000..6399d38
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_atac_process___help.txt
@@ -0,0 +1,86 @@
+process a scATAC-seq sample by performing mapping, barcode correction, and sorted (deduplicated) BED
+file generation
+
+Usage: simpleaf atac process [OPTIONS] --index <INDEX> --barcode-reads <BARCODE_READS> --chemistry <CHEMISTRY> --output <OUTPUT>
+
+Options:
+  -c, --chemistry <CHEMISTRY>  chemistry [possible values: 10x-v1, 10x-v2, 10x-multi]
+  -t, --threads <THREADS>      number of threads to use when running [default: 16]
+      --output <OUTPUT>        
+      --call-peaks             do peak calling after generating the bed file
+  -h, --help                   Print help
+  -V, --version                Print version
+
+Mapping Options:
+  -i, --index <INDEX>
+          path to index
+  -1, --reads1 <READS1>
+          comma-separated list of paths to read 1 files
+  -2, --reads2 <READS2>
+          comma-separated list of paths to read 2 files
+  -r, --reads <READS>
+          path to the read files containing single-end reads
+  -b, --barcode-reads <BARCODE_READS>
+          path to the read files containing the cell barcodes
+      --barcode-length <BARCODE_LENGTH>
+          the length of the barcode read from which to extract the barcode (usually this is the
+          length of the entire read, and reads shorter than this will be discarded) [default: 16]
+
+Permit List Generation Options:
+      --permit-barcode-ori <PERMIT_BARCODE_ORI>
+          The expected orientation of the barcodes in the permit list relative to the barcodes
+          extracted from the reads. If this is "fw", it is expected that the sequences will match
+          directly, if "rc" it is expected the reverse complement of the permit-list sequence will
+          match the reads' barcodes. If provided, this value will be used, if not provided, simpleaf
+          will attempt to look up the appropriate orientation in the chemistry registry
+  -u, --unfiltered-pl <UNFILTERED_PL>
+          Use the provided file as the unfiltered permit list (i.e. whitelist). This argument only
+          needs to be provided if you are providing the permit list explicitly, overriding the
+          default permit list for the provided chemistry
+      --min-reads <MIN_READS>
+          minimum read count threshold for a cell to be retained/processed; only used with
+          --unfiltered-pl [default: 10]
+
+Advanced Options:
+      --compress
+          compress the output mapping bed file
+      --ignore-ambig-hits
+          skip checking of the equivalence classes of k-mers that were too ambiguous to be otherwise
+          considered (passing this flag can speed up mapping slightly, but may reduce specificity)
+      --no-poison
+          do not consider poison k-mers, even if the underlying index contains them. In this case,
+          the mapping results will be identical to those obtained as if no poison table was added to
+          the index
+      --use-chr
+          use chromosomes as color
+      --thr <THR>
+          threshold to be considered for pseudoalignment [default: 0.7]
+      --bin-size <BIN_SIZE>
+          size of virtual color intervals [default: 1000]
+      --bin-overlap <BIN_OVERLAP>
+          size for virtual color interval overlap [default: 300]
+      --no-tn5-shift
+          do not apply Tn5 shift to mapped positions
+      --check-kmer-orphan
+          Check if any mapping kmer exist for a mate which is not mapped, but there exists mapping
+          for the other read. If set to true and a mapping kmer exists, then the pair would not be
+          mapped
+      --max-ec-card <MAX_EC_CARD>
+          determines the maximum cardinality equivalence class (number of (txp, orientation status)
+          pairs) to examine (cannot be used with --ignore-ambig-hits) [default: 4096]
+      --max-hit-occ <MAX_HIT_OCC>
+          in the first pass, consider only k-mers having <= --max-hit-occ hits [default: 256]
+      --max-hit-occ-recover <MAX_HIT_OCC_RECOVER>
+          if all k-mers have > --max-hit-occ hits, then make a second pass and consider k-mers
+          having <= --max-hit-occ-recover hits [default: 1024]
+      --max-read-occ <MAX_READ_OCC>
+          reads with more than this number of mappings will not have their mappings reported
+          [default: 2500]
+
+Peak Caller Options:
+      --gsize <GSIZE>      The value to be passed to the `macs3` `--gsize` (genome size) option.
+                           Possible values are "hs", "mm", "ce", "dm" or an unsigned integer
+                           [default: hs]
+      --qvalue <QVALUE>    The value to be passed to the `macs3` `--qvalue` (minimum FDR cutoff)
+                           option [default: 0.1]
+      --extsize <EXTSIZE>  The value to be passed to the `macs3` `--extsize` option [default: 50]
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry___help.txt
new file mode 100644
index 0000000..b0c9a42
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry___help.txt
@@ -0,0 +1,16 @@
+operate on or inspect the chemistry registry
+
+Usage: simpleaf chemistry <COMMAND>
+
+Commands:
+  refresh  Update the local chemistry registry according to the upstream repository
+  add      Add a new or update an existing chemistry in the local registry
+  remove   Remove chemistries from the local chemistry registry
+  clean    Remove cached permit list files that do not belong to any registered chemistries
+  lookup   Look up chemistries in the local registry and print the details
+  fetch    Download the permit list files for registered chemistries
+  help     Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_add___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_add___help.txt
new file mode 100644
index 0000000..e2c6824
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_add___help.txt
@@ -0,0 +1,28 @@
+Add a new or update an existing chemistry in the local registry
+
+Usage: simpleaf chemistry add [OPTIONS] --name <NAME>
+
+Options:
+  -n, --name <NAME>                  The name to give to the chemistry
+  -g, --geometry <GEOMETRY>          A quoted string representing the geometry to which the
+                                     chemistry maps
+  -e, --expected-ori <EXPECTED_ORI>  The direction of the first (most upstream) mappable biological
+                                     sequence [possible values: fw, rc, both]
+      --local-url <LOCAL_URL>        The (fully-qualified) path to a local permit list file that
+                                     will be copied into the ALEVIN_FRY_HOME directory for future
+                                     use
+      --remote-url <REMOTE_URL>      The url of a remote file that will be downloaded (on demand) to
+                                     provide a permit list for use with this chemistry. This file
+                                     should be obtainable with the equivalent of `wget <local-url>`.
+                                     The file will only be downloaded the first time it is needed
+                                     and will be locally cached in ALEVIN_FRY_HOME after that
+      --version <VERSION>            A semver format version tag, e.g., `0.1.0`, indicating the
+                                     version of the chemistry definition. To update a registered
+                                     chemistry, please provide a higher version number, e.g.,
+                                     `0.2.0` [default: 0.0.0]
+      --from-json <FROM_JSON>        Instead of providing the chemistry directly on the command
+                                     line, use the chemistry definition provided in the provided
+                                     JSON file. This JSON file can be local or remote, but it must
+                                     contain a valid JSON object with the provided `--name` as the
+                                     key of the chemistry you wish to add
+  -h, --help                         Print help
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_clean___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_clean___help.txt
new file mode 100644
index 0000000..126a384
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_clean___help.txt
@@ -0,0 +1,8 @@
+Remove cached permit list files that do not belong to any registered chemistries
+
+Usage: simpleaf chemistry clean [OPTIONS]
+
+Options:
+  -d, --dry-run  Print the permit list file(s) that will be removed without removing them
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_fetch___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_fetch___help.txt
new file mode 100644
index 0000000..5cff394
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_fetch___help.txt
@@ -0,0 +1,11 @@
+Download the permit list files for registered chemistries
+
+Usage: simpleaf chemistry fetch [OPTIONS] --name <NAME>
+
+Options:
+  -n, --name <NAME>  A comma-separated list of chemistry names to fetch (or a *single* regex pattern
+                     for matching multiple chemistries). Use '.*' to fetch for all registered
+                     chemistries
+  -d, --dry-run      Print the permit list file(s) that will be downloaded without downloading them
+  -h, --help         Print help
+  -V, --version      Print version
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_lookup___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_lookup___help.txt
new file mode 100644
index 0000000..9bbd2fa
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_lookup___help.txt
@@ -0,0 +1,9 @@
+Look up chemistries in the local registry and print the details
+
+Usage: simpleaf chemistry lookup --name <NAME>
+
+Options:
+  -n, --name <NAME>  The name of a registered chemistry, or a regex pattern for matching registered
+                     chemistries' names
+  -h, --help         Print help
+  -V, --version      Print version
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_refresh___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_refresh___help.txt
new file mode 100644
index 0000000..ef471ac
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_refresh___help.txt
@@ -0,0 +1,9 @@
+Update the local chemistry registry according to the upstream repository
+
+Usage: simpleaf chemistry refresh [OPTIONS]
+
+Options:
+  -f, --force    overwrite existing chemistries even if the versions aren't newer
+  -d, --dry-run  print the chemistries that will be added or updated without modifying the local
+                 registry
+  -h, --help     Print help
diff --git a/tests/snapshots/cli-help/simpleaf_chemistry_remove___help.txt b/tests/snapshots/cli-help/simpleaf_chemistry_remove___help.txt
new file mode 100644
index 0000000..b704273
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_chemistry_remove___help.txt
@@ -0,0 +1,10 @@
+Remove chemistries from the local chemistry registry
+
+Usage: simpleaf chemistry remove [OPTIONS] --name <NAME>
+
+Options:
+  -n, --name <NAME>  A chemistry name or a regex pattern matching the names of chemistries in the
+                     registry to remove
+  -d, --dry-run      Print the chemistries that would be removed without removing them
+  -h, --help         Print help
+  -V, --version      Print version
diff --git a/tests/snapshots/cli-help/simpleaf_index___help.txt b/tests/snapshots/cli-help/simpleaf_index___help.txt
new file mode 100644
index 0000000..29c9d44
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_index___help.txt
@@ -0,0 +1,61 @@
+build the (expanded) reference index
+
+Usage: simpleaf index [OPTIONS] --output <OUTPUT> <--fasta <FASTA>|--ref-seq <REF_SEQ>|--probe-csv <PROBE_CSV>|--feature-csv <FEATURE_CSV>>
+
+Options:
+  -o, --output <OUTPUT>            Path to output directory (will be created if it doesn't exist)
+  -t, --threads <THREADS>          Number of threads to use when running [default: 16]
+  -k, --kmer-length <KMER_LENGTH>  The value of k to be used to construct the index [default: 31]
+      --gff3-format                Denotes that the input annotation is a GFF3 (instead of GTF) file
+      --keep-duplicates            Keep duplicated identical sequences when constructing the index
+      --overwrite                  Overwrite existing files if the output directory is already
+                                   populated
+  -h, --help                       Print help
+  -V, --version                    Print version
+
+Expanded Reference Options:
+      --ref-type <REF_TYPE>    Specify whether an expanded reference, spliced+intronic (or splici)
+                               or spliced+unspliced (or spliceu), should be built [default:
+                               spliced+intronic]
+  -f, --fasta <FASTA>          Path to a reference genome to be used for the expanded reference
+                               construction
+  -g, --gtf <GTF>              Path to a reference GTF/GFF3 file to be used for the expanded
+                               reference construction
+  -r, --rlen <RLEN>            The Read length used in roers to add flanking lengths to intronic
+                               sequences
+      --dedup                  Deduplicate identical sequences in roers when building the expanded
+                               reference
+      --spliced <SPLICED>      Path to a FASTA file with extra spliced sequence to add to the index
+      --unspliced <UNSPLICED>  Path to a FASTA file with extra unspliced sequence to add to the
+                               index
+
+Direct Reference Options:
+      --feature-csv <FEATURE_CSV>  Path to a CSV file containing feature barcode sequences to use
+                                   for direct reference indexing. The file must follow the format of
+                                   10x Feature Reference CSV. Currently, only three columns are
+                                   used: id, name, and sequence
+      --probe-csv <PROBE_CSV>      Path to a CSV file containing probe sequences to use for direct
+                                   reference indexing. The file must follow the format of 10x Probe
+                                   Set Reference v2 CSV, containing four mandatory columns: gene_id,
+                                   probe_seq, probe_id, and included (TRUE or FALSE), and an
+                                   optional column: region (spliced or unspliced)
+      --ref-seq <REF_SEQ>          Path to a FASTA file containing reference sequences to directly
+                                   build index on, and avoid expanded reference construction
+
+Piscem Index Options:
+  -m, --minimizer-length <MINIMIZER_LENGTH>
+          Minimizer length to be used to construct the piscem index (must be < k) [default: 19]
+      --decoy-paths <DECOY_PATHS>
+          Paths to decoy sequence FASTA files used to insert poison k-mer information into the index
+          (only if using piscem >= 0.7)
+      --seed <HASH_SEED>
+          The seed value to use in SSHash index construction (try changing this in the rare event
+          index build fails) [default: 1]
+      --work-dir <WORK_DIR>
+          The working directory where temporary files should be placed [default: ./workdir.noindex]
+      --use-piscem
+          Use piscem instead of salmon for indexing and mapping (default)
+
+Alternative salmon-alevin Index Options:
+  -p, --sparse     If this flag is passed, build the sparse rather than dense index for mapping
+      --no-piscem  Don't use the default piscem mapper, instead, use salmon-alevin
diff --git a/tests/snapshots/cli-help/simpleaf_inspect___help.txt b/tests/snapshots/cli-help/simpleaf_inspect___help.txt
new file mode 100644
index 0000000..b4fd152
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_inspect___help.txt
@@ -0,0 +1,7 @@
+inspect the current configuration
+
+Usage: simpleaf inspect
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_quant___help.txt b/tests/snapshots/cli-help/simpleaf_quant___help.txt
new file mode 100644
index 0000000..da4cc31
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_quant___help.txt
@@ -0,0 +1,78 @@
+quantify a sample
+
+Usage: simpleaf quant [OPTIONS] --chemistry <CHEMISTRY> --output <OUTPUT> --resolution <RESOLUTION> <--expect-cells <EXPECT_CELLS>|--explicit-pl <EXPLICIT_PL>|--forced-cells <FORCED_CELLS>|--knee|--unfiltered-pl [<UNFILTERED_PL>]> <--index <INDEX>|--map-dir <MAP_DIR>>
+
+Options:
+  -c, --chemistry <CHEMISTRY>  The name of a registered chemistry or a quoted string representing a
+                               custom geometry specification
+  -o, --output <OUTPUT>        Path to the output directory
+  -t, --threads <THREADS>      Number of threads to use when running [default: 16]
+  -h, --help                   Print help
+  -V, --version                Print version
+
+Mapping Options:
+  -i, --index <INDEX>            Path to a folder containing the index files
+  -1, --reads1 <READS1>          Comma-separated list of paths to read 1 files. The order must match
+                                 the read 2 files
+  -2, --reads2 <READS2>          Comma-separated list of paths to read 2 files. The order must match
+                                 the read 1 files
+      --no-piscem                Don't use the default piscem mapper, instead, use salmon-alevin
+      --use-piscem               Use piscem for mapping (requires that index points to the piscem
+                                 index)
+  -s, --use-selective-alignment  Use selective-alignment for mapping (only if using salmon alevin as
+                                 the underlying mapper)
+      --map-dir <MAP_DIR>        Path to a mapped output directory containing a RAD file to skip
+                                 mapping
+
+Piscem Mapping Options:
+      --struct-constraints
+          If piscem >= 0.7.0, enable structural constraints
+      --ignore-ambig-hits
+          Skip checking of the equivalence classes of k-mers that were too ambiguous to be otherwise
+          considered (passing this flag can speed up mapping slightly, but may reduce specificity)
+      --no-poison
+          Do not consider poison k-mers, even if the underlying index contains them. In this case,
+          the mapping results will be identical to those obtained as if no poison table was added to
+          the index
+      --skipping-strategy <SKIPPING_STRATEGY>
+          The skipping strategy to use for k-mer collection [default: permissive] [possible values:
+          permissive, strict]
+      --max-ec-card <MAX_EC_CARD>
+          Determines the maximum cardinality equivalence class (number of (txp, orientation status)
+          pairs) to examine (cannot be used with --ignore-ambig-hits) [default: 4096]
+      --max-hit-occ <MAX_HIT_OCC>
+          In the first pass, consider only collected and matched k-mers of a read having <=
+          --max-hit-occ hits [default: 256]
+      --max-hit-occ-recover <MAX_HIT_OCC_RECOVER>
+          If all collected and matched k-mers of a read have > --max-hit-occ hits, then make a
+          second pass and consider k-mers having <= --max-hit-occ-recover hits [default: 1024]
+      --max-read-occ <MAX_READ_OCC>
+          Threshold for discarding reads with too many mappings [default: 2500]
+
+Permit List Generation Options:
+  -k, --knee
+          Use knee filtering mode
+  -u, --unfiltered-pl [<UNFILTERED_PL>]
+          Use unfiltered permit list
+  -f, --forced-cells <FORCED_CELLS>
+          Use forced number of cells
+  -x, --explicit-pl <EXPLICIT_PL>
+          Use a filtered, explicit permit list
+  -e, --expect-cells <EXPECT_CELLS>
+          Use expected number of cells
+  -d, --expected-ori <EXPECTED_ORI>
+          The expected direction/orientation of alignments in the chemistry being processed. If not
+          provided, will default to `fw` for 10xv2/10xv3, otherwise `both` [possible values: fw, rc,
+          both]
+      --min-reads <MIN_READS>
+          Minimum read count threshold for a cell to be retained/processed; only use with
+          --unfiltered-pl [default: 10]
+
+UMI Resolution Options:
+  -m, --t2g-map <T2G_MAP>        Path to a transcript to gene map file
+  -r, --resolution <RESOLUTION>  UMI resolution mode [possible values: cr-like, cr-like-em,
+                                 parsimony, parsimony-em, parsimony-gene, parsimony-gene-em]
+
+Output Options:
+      --anndata-out  Generate an anndata (h5ad format) count matrix from the standard (matrix-market
+                     format) output
diff --git a/tests/snapshots/cli-help/simpleaf_refresh_prog_info___help.txt b/tests/snapshots/cli-help/simpleaf_refresh_prog_info___help.txt
new file mode 100644
index 0000000..5125ed6
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_refresh_prog_info___help.txt
@@ -0,0 +1,7 @@
+refreshes version information associated with programs used by simpleaf
+
+Usage: simpleaf refresh-prog-info
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_set_paths___help.txt b/tests/snapshots/cli-help/simpleaf_set_paths___help.txt
new file mode 100644
index 0000000..03bc425
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_set_paths___help.txt
@@ -0,0 +1,11 @@
+set paths to the programs that simpleaf will use
+
+Usage: simpleaf set-paths [OPTIONS]
+
+Options:
+  -s, --salmon <SALMON>          path to salmon to use
+  -p, --piscem <PISCEM>          path to piscem to use
+  -a, --alevin-fry <ALEVIN_FRY>  path to alein-fry to use
+  -m, --macs <MACS>              path to macs3 to use
+  -h, --help                     Print help
+  -V, --version                  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow___help.txt b/tests/snapshots/cli-help/simpleaf_workflow___help.txt
new file mode 100644
index 0000000..a769590
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow___help.txt
@@ -0,0 +1,18 @@
+simpleaf workflow related command set
+
+Usage: simpleaf workflow
+       simpleaf workflow <COMMAND>
+
+Commands:
+  list     Print a summary of the currently available workflows
+  refresh  Update the local copy of protocol esturary to the latest version
+  get      Get the workflow template and related files of a registered workflow
+  patch    Patch a workflow template or instantiated manifest with a subset of parameters to produce
+           a series of workflow manifests
+  run      Parse and instantiate a workflow template and invoke the workflow commands, or run an
+           instantiated manifest directly
+  help     Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow_get___help.txt b/tests/snapshots/cli-help/simpleaf_workflow_get___help.txt
new file mode 100644
index 0000000..2546441
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow_get___help.txt
@@ -0,0 +1,9 @@
+Get the workflow template and related files of a registered workflow
+
+Usage: simpleaf workflow get --name <NAME> --output <OUTPUT>
+
+Options:
+  -o, --output <OUTPUT>  path to dump the folder containing the workflow related files
+  -n, --name <NAME>      name of the queried workflow
+  -h, --help             Print help
+  -V, --version          Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow_list___help.txt b/tests/snapshots/cli-help/simpleaf_workflow_list___help.txt
new file mode 100644
index 0000000..3d2835c
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow_list___help.txt
@@ -0,0 +1,7 @@
+Print a summary of the currently available workflows
+
+Usage: simpleaf workflow list
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow_patch___help.txt b/tests/snapshots/cli-help/simpleaf_workflow_patch___help.txt
new file mode 100644
index 0000000..a007158
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow_patch___help.txt
@@ -0,0 +1,22 @@
+Patch a workflow template or instantiated manifest with a subset of parameters to produce a series
+of workflow manifests
+
+Usage: simpleaf workflow patch [OPTIONS] --patch <PATCH> <--manifest <MANIFEST>|--template <TEMPLATE>>
+
+Options:
+  -m, --manifest <MANIFEST>  fully-instantiated manifest (JSON file) to patch. If this argument is
+                             given, the patch is applied directly to the JSON file in a manner akin
+                             to simple key-value replacement. Since the manifest is
+                             fully-instantiated, no derived values will be affected
+  -t, --template <TEMPLATE>  partially-instantiated template (JSONNET file) to patch. If this
+                             argument is given, the patch is applied *before* the template is
+                             instantiated (i.e. if you override a variable used elswhere in the
+                             template, all derived values will be affected)
+  -p, --patch <PATCH>        patch to apply as a ';' separated parameter table with headers declared
+                             as specified in the documentation
+  -o, --output <OUTPUT>      output directory where the patched manifest files (i.e. the output of
+                             applying the patching procedure) should be stored. If no directory is
+                             provided, the patched manifests are stored in the same location as the
+                             input template or manifest to which patching is applied
+  -h, --help                 Print help
+  -V, --version              Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow_refresh___help.txt b/tests/snapshots/cli-help/simpleaf_workflow_refresh___help.txt
new file mode 100644
index 0000000..5dd9589
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow_refresh___help.txt
@@ -0,0 +1,7 @@
+Update the local copy of protocol esturary to the latest version
+
+Usage: simpleaf workflow refresh
+
+Options:
+  -h, --help     Print help
+  -V, --version  Print version
diff --git a/tests/snapshots/cli-help/simpleaf_workflow_run___help.txt b/tests/snapshots/cli-help/simpleaf_workflow_run___help.txt
new file mode 100644
index 0000000..a29a692
--- /dev/null
+++ b/tests/snapshots/cli-help/simpleaf_workflow_run___help.txt
@@ -0,0 +1,28 @@
+Parse and instantiate a workflow template and invoke the workflow commands, or run an instantiated
+manifest directly
+
+Usage: simpleaf workflow run [OPTIONS] <--manifest <MANIFEST>|--template <TEMPLATE>>
+
+Options:
+  -t, --template <TEMPLATE>  path to an instantiated simpleaf workflow template
+  -o, --output <OUTPUT>      output directory for log files and the workflow outputs that have no
+                             explicit output directory
+  -m, --manifest <MANIFEST>  path to an instantiated simpleaf workflow template
+  -h, --help                 Print help
+  -V, --version              Print version
+
+Control Flow:
+  -n, --no-execution           return after instantiating the template (JSONNET file) into a
+                               manifest (JSON foramt) without actually executing the resulting
+                               manifest
+  -s, --start-at <START_AT>    Start the execution from a specific Step. All previous steps will be
+                               ignored [default: 1]
+  -r, --resume                 resume execution from the termination step of a previous run. To use
+                               this flag, the output directory must contains the JSON file generated
+                               from a previous run
+      --skip-step <SKIP_STEP>  comma separated integers indicating which steps (commands) will be
+                               skipped during the execution
+
+Jsonnet:
+  -j, --jpaths <JPATHS>  comma separated library search paths passing to internal Jsonnet engine as
+                         --jpath flags

From 304310150fbc1a5487ee0c1d8d960c642091355a Mon Sep 17 00:00:00 2001
From: Rob Patro <rob.patro@gmail.com>
Date: Sat, 21 Feb 2026 00:37:55 -0500
Subject: [PATCH 2/4] add git ignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index e6a88f4..270b9d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,6 @@ TEST_REF
 TEST_REF_CLAPV4
 TEST_REF_ROERS
 blarg
+
+# macOS Finder metadata
+**/.DS_Store

From fcdf3aaa7e0c1a72e1b6b6a73a16486d3a938243 Mon Sep 17 00:00:00 2001
From: Rob Patro <rob.patro@gmail.com>
Date: Sat, 21 Feb 2026 17:17:52 -0500
Subject: [PATCH 3/4] improve file checking move to 2024 edition

---
 Cargo.lock                         |  10 ++
 Cargo.toml                         |   3 +-
 src/atac/commands.rs               |   2 +-
 src/atac/process.rs                |  42 +++---
 src/core/exec.rs                   |   2 +-
 src/core/runtime.rs                |   8 +-
 src/main.rs                        |   4 +-
 src/simpleaf_commands.rs           |   2 +-
 src/simpleaf_commands/chemistry.rs | 207 +++++++++++++++++------------
 src/simpleaf_commands/indexing.rs  |  55 +++++---
 src/simpleaf_commands/inspect.rs   |   2 +-
 src/simpleaf_commands/paths.rs     |   6 +-
 src/simpleaf_commands/quant.rs     | 100 ++++++++------
 src/simpleaf_commands/refresh.rs   |   2 +-
 src/simpleaf_commands/workflow.rs  |  54 +++++---
 src/utils/af_utils.rs              |  85 +++++++++---
 src/utils/chem_utils.rs            |  16 +--
 src/utils/jrsonnet_main.rs         |  12 +-
 src/utils/prog_utils.rs            | 193 ++++++++++++++++++++-------
 src/utils/workflow_utils.rs        | 114 ++++++++++------
 tests/cli_smoke.rs                 |  10 +-
 21 files changed, 603 insertions(+), 326 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5e9fd85..60f74e9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -912,6 +912,15 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
+[[package]]
+name = "file-requirements"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab1300e0345c97d6d5ed20b90d53ea83e710e971215b47590ac5c6c69847b65"
+dependencies = [
+ "thiserror 2.0.12",
+]
+
 [[package]]
 name = "flate2"
 version = "1.1.1"
@@ -3577,6 +3586,7 @@ dependencies = [
  "clap",
  "cmd_lib",
  "csv",
+ "file-requirements",
  "flate2",
  "jrsonnet-cli",
  "jrsonnet-evaluator",
diff --git a/Cargo.toml b/Cargo.toml
index abeac5e..a869951 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "simpleaf"
 version = "0.19.5"
-edition = "2021"
+edition = "2024"
 authors = [
   "Rob Patro <rob@cs.umd.edu>",
   "Dongze He <dhe17@umd.edu>",
@@ -86,6 +86,7 @@ regex = { version = "1.11.1", default-features = false, features = [
 tempfile = "3.19.1"
 ureq = { version = "3.0.11", features = ["json"] }
 af-anndata = { version = "0.3.3", git = "https://github.com/COMBINE-lab/af-anndata" }
+file-requirements = "0.1.0"
 
 [profile.release]
 lto = "thin"
diff --git a/src/atac/commands.rs b/src/atac/commands.rs
index 00e9bd6..9f1ee42 100644
--- a/src/atac/commands.rs
+++ b/src/atac/commands.rs
@@ -2,8 +2,8 @@ use crate::atac::defaults::{AtacIndexParams, DefaultAtacParams};
 use crate::defaults::{DefaultMappingParams, DefaultParams};
 use crate::utils::chem_utils::{ExpectedOri, QueryInRegistry};
 use clap::{
-    builder::{ArgPredicate, PossibleValue},
     Args, Subcommand, ValueEnum,
+    builder::{ArgPredicate, PossibleValue},
 };
 use std::fmt;
 use std::path::PathBuf;
diff --git a/src/atac/process.rs b/src/atac/process.rs
index 118c059..069e965 100644
--- a/src/atac/process.rs
+++ b/src/atac/process.rs
@@ -1,12 +1,12 @@
 use crate::atac::commands::ProcessOpts;
 use crate::core::{context, exec, index_meta, io, runtime};
-use crate::utils::chem_utils::get_single_custom_chem_from_file;
 use crate::utils::chem_utils::ExpectedOri;
 use crate::utils::chem_utils::QueryInRegistry;
+use crate::utils::chem_utils::get_single_custom_chem_from_file;
 use crate::utils::constants::CHEMISTRIES_PATH;
 use crate::utils::{prog_utils, prog_utils::ReqProgs};
 use anyhow;
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use serde_json::json;
 use std::io::BufReader;
 use std::path::{Path, PathBuf};
@@ -216,13 +216,7 @@ pub(crate) fn map_reads(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Resu
         .context("piscem program info is missing; please run `simpleaf set-paths`.")?;
 
     let index_base = index_meta::resolve_atac_piscem_index_base(opts.index.clone())?;
-
-    let input_files = vec![
-        index_base.with_extension("ctab"),
-        index_base.with_extension("refinfo"),
-        index_base.with_extension("sshash"),
-    ];
-    prog_utils::check_files_exist(&input_files)?;
+    prog_utils::check_piscem_index_files(index_base.as_path())?;
 
     // using a piscem index
     let mut piscem_map_cmd =
@@ -262,21 +256,24 @@ pub(crate) fn map_reads(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Resu
 
     // if the user is requesting a mapping option that required
     // piscem version >= 0.7.0, ensure we have that
-    if let Ok(_piscem_ver) = prog_utils::check_version_constraints(
+    match prog_utils::check_version_constraints(
         "piscem",
         ">=0.11.0, <1.0.0",
         &piscem_prog_info.version,
     ) {
-        push_advanced_piscem_options(&mut piscem_map_cmd, opts)?;
-    } else {
-        info!(
-            r#"
+        Ok(_piscem_ver) => {
+            push_advanced_piscem_options(&mut piscem_map_cmd, opts)?;
+        }
+        Err(_) => {
+            info!(
+                r#"
 Simpleaf is currently using piscem version {}, but you must be using version >= 0.11.0 in order to use the 
 mapping options specific to this, or later versions. If you wish to use these options, please upgrade your 
 piscem version or, if you believe you have a sufficiently new version installed, update the executable 
 being used by simpleaf"#,
-            &piscem_prog_info.version
-        );
+                &piscem_prog_info.version
+            );
+        }
     }
 
     let map_cmd_string = prog_utils::get_cmd_line_string(&piscem_map_cmd);
@@ -566,15 +563,22 @@ fn af_gpl(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<GplStageOut
                             warn!("barcode_ori \"{}\" is unknown; assuming forward.", s);
                         }
                         None => {
-                            warn!("couldn't interpret value associated with \"barcode_ori\" as a string; assuming forward.");
+                            warn!(
+                                "couldn't interpret value associated with \"barcode_ori\" as a string; assuming forward."
+                            );
                         }
                     }
                 } else {
-                    warn!("No meta field present for the chemistry so can't check if barcodes should be reverse complemented.");
+                    warn!(
+                        "No meta field present for the chemistry so can't check if barcodes should be reverse complemented."
+                    );
                 }
             }
         } else {
-            warn!("Couldn't find expected chemistry registry {} so can't check if barcodes should be reverse complemented.", custom_chem_p.display());
+            warn!(
+                "Couldn't find expected chemistry registry {} so can't check if barcodes should be reverse complemented.",
+                custom_chem_p.display()
+            );
         }
         pbco
     };
diff --git a/src/core/exec.rs b/src/core/exec.rs
index b550f7c..1abaee3 100644
--- a/src/core/exec.rs
+++ b/src/core/exec.rs
@@ -1,4 +1,4 @@
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 
 use crate::utils::prog_utils::{self, CommandVerbosityLevel};
 
diff --git a/src/core/runtime.rs b/src/core/runtime.rs
index 3329337..ac38487 100644
--- a/src/core/runtime.rs
+++ b/src/core/runtime.rs
@@ -8,10 +8,10 @@ pub fn cap_threads(requested: u32) -> (u32, Option<u32>) {
 }
 
 fn cap_threads_with_limit(requested: u32, limit: Option<u32>) -> (u32, Option<u32>) {
-    if let Some(max_threads) = limit {
-        if requested > max_threads {
-            return (max_threads, Some(max_threads));
-        }
+    if let Some(max_threads) = limit
+        && requested > max_threads
+    {
+        return (max_threads, Some(max_threads));
     }
     (requested, None)
 }
diff --git a/src/main.rs b/src/main.rs
index c144bbe..e5bcbc8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,10 +3,10 @@ use chemistry::{
     remove_chemistry,
 };
 use tracing::info;
-use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter};
+use tracing_subscriber::{EnvFilter, filter::LevelFilter, fmt, prelude::*};
 
 use anyhow::bail;
-use clap::{crate_version, Parser};
+use clap::{Parser, crate_version};
 
 use std::env;
 use std::path::PathBuf;
diff --git a/src/simpleaf_commands.rs b/src/simpleaf_commands.rs
index 9bc72f3..b226f6d 100644
--- a/src/simpleaf_commands.rs
+++ b/src/simpleaf_commands.rs
@@ -24,7 +24,7 @@ pub use self::workflow::{
 pub use crate::atac::commands::AtacCommand;
 pub use crate::defaults::{DefaultMappingParams, DefaultParams};
 
-use clap::{builder::ArgPredicate, ArgAction, ArgGroup, Args, Subcommand};
+use clap::{ArgAction, ArgGroup, Args, Subcommand, builder::ArgPredicate};
 use std::path::PathBuf;
 
 /// The type of references we might create
diff --git a/src/simpleaf_commands/chemistry.rs b/src/simpleaf_commands/chemistry.rs
index 418b551..91b7924 100644
--- a/src/simpleaf_commands/chemistry.rs
+++ b/src/simpleaf_commands/chemistry.rs
@@ -1,14 +1,14 @@
 use crate::core::io::write_json_pretty_atomic;
 use crate::utils::chem_utils::{
-    custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file,
     CustomChemistry, CustomChemistryMap, ExpectedOri, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY,
+    custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file,
 };
 use crate::utils::constants::*;
 use crate::utils::prog_utils::{self, download_to_file_compute_hash};
 use crate::utils::{self, af_utils::*};
 use regex::Regex;
 
-use anyhow::{bail, Context, Result};
+use anyhow::{Context, Result, bail};
 use semver::Version;
 use serde_json::json;
 use serde_json::{Map, Value};
@@ -253,7 +253,8 @@ pub fn add_chemistry(
             anyhow::Result::Err(e) => {
                 bail!(
                     "failed to obtain the chemistry definition from the provided JSON source : {}. Error :: {:#}",
-                    json_src, e
+                    json_src,
+                    e
                 );
             }
         }
@@ -288,7 +289,10 @@ pub fn add_chemistry(
             "reading existing chemistry version from chemistries.json",
         )?;
         if add_ver <= existing_ver {
-            info!("Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry; Skipping addition.", add_ver, existing_ver);
+            info!(
+                "Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry; Skipping addition.",
+                add_ver, existing_ver
+            );
             return Ok(());
         } else {
             info!(
@@ -304,7 +308,11 @@ pub fn add_chemistry(
                 let metadata = std::fs::metadata(&local_url)?;
                 let flen = metadata.size();
                 if flen > 4_294_967_296 {
-                    warn!("The file provided to local-url ({}) is {:.1} GB. This file will be *copied* into the ALEVIN_FRY_HOME directory", local_url.display(), flen / 1_073_741_824);
+                    warn!(
+                        "The file provided to local-url ({}) is {:.1} GB. This file will be *copied* into the ALEVIN_FRY_HOME directory",
+                        local_url.display(),
+                        flen / 1_073_741_824
+                    );
                 }
 
                 let mut hasher = blake3::Hasher::new();
@@ -369,8 +377,8 @@ pub fn add_chemistry(
             // check if the file already exists
             if local_plist_path.is_file() {
                 info!(
-                "Found a cached, content-equivalent permit list file; will use the existing file."
-            );
+                    "Found a cached, content-equivalent permit list file; will use the existing file."
+                );
 
                 // remove what we just downloaded
                 fs::remove_file(tmpfile)?;
@@ -404,7 +412,16 @@ pub fn add_chemistry(
 
     // check if the chemistry already exists and log
     if let Some(cc) = chem_hm.get(custom_chem.name()) {
-        info!("Chemistry {} is already registered, with geometry {} the one recorded: {}; overwriting geometry specification.", custom_chem.name(), if cc.geometry() == custom_chem.geometry() {"same as"} else {"different with"}, cc.geometry());
+        info!(
+            "Chemistry {} is already registered, with geometry {} the one recorded: {}; overwriting geometry specification.",
+            custom_chem.name(),
+            if cc.geometry() == custom_chem.geometry() {
+                "same as"
+            } else {
+                "different with"
+            },
+            cc.geometry()
+        );
         chem_hm
             .entry(custom_chem.name().to_string())
             .and_modify(|e| *e = custom_chem);
@@ -442,7 +459,10 @@ pub fn refresh_chemistries(
     // but read it in and attempt to populate.
     let custom_chem_file = af_home.join(CUSTOM_CHEMISTRIES_PATH);
     let merge_custom_chem = if custom_chem_file.exists() {
-        warn!("{}Found deprecated chemistry registry file \"{}\"; Attempting to merge the chemistries defined in this file into the main registry.", dry_run_pref, CUSTOM_CHEMISTRIES_PATH);
+        warn!(
+            "{}Found deprecated chemistry registry file \"{}\"; Attempting to merge the chemistries defined in this file into the main registry.",
+            dry_run_pref, CUSTOM_CHEMISTRIES_PATH
+        );
         true
     } else {
         false
@@ -492,7 +512,10 @@ pub fn refresh_chemistries(
                 // remove the temp file
                 std::fs::remove_file(tmp_chem_path)?;
             } else {
-                bail!("Could not parse the main registry from \"{}\" file. Please report this on GitHub.", chem_path.display());
+                bail!(
+                    "Could not parse the main registry from \"{}\" file. Please report this on GitHub.",
+                    chem_path.display()
+                );
             }
         } else {
             bail!(
@@ -513,10 +536,16 @@ pub fn refresh_chemistries(
                 let backup = custom_chem_file.with_extension("json.bak");
                 std::fs::rename(custom_chem_file, backup)?;
             } else {
-                bail!("Could not parse the deprecated registry file as a JSON object; it may be corrupted. Consider deleting this file from {}.", custom_chem_file.display());
+                bail!(
+                    "Could not parse the deprecated registry file as a JSON object; it may be corrupted. Consider deleting this file from {}.",
+                    custom_chem_file.display()
+                );
             }
         } else {
-            bail!("Could not parse the main chemistry registry file, \"{}\", as a JSON object. Please report this on GitHub.", chem_path.display());
+            bail!(
+                "Could not parse the main chemistry registry file, \"{}\", as a JSON object. Please report this on GitHub.",
+                chem_path.display()
+            );
         }
     }
 
@@ -567,11 +596,7 @@ pub fn clean_chemistries(
         .filter_map(|de| {
             if let Ok(entry) = de {
                 let path = entry.path();
-                if path.is_file() {
-                    Some(path)
-                } else {
-                    None
-                }
+                if path.is_file() { Some(path) } else { None }
             } else {
                 None
             }
@@ -583,9 +608,14 @@ pub fn clean_chemistries(
     // check if the chemistry already exists and log
     if dry_run {
         if rem_pls.is_empty() {
-            info!("[dry_run] : No permit list files in the cache directory are currently unused; Nothing to clean.");
+            info!(
+                "[dry_run] : No permit list files in the cache directory are currently unused; Nothing to clean."
+            );
         } else {
-            info!("[dry_run] : The following files in the permit list directory do not match any registered chemistries and would be removed: {:#?}", rem_pls);
+            info!(
+                "[dry_run] : The following files in the permit list directory do not match any registered chemistries and would be removed: {:#?}",
+                rem_pls
+            );
         }
     } else {
         for pl in &rem_pls {
@@ -610,26 +640,29 @@ pub fn remove_chemistry(
     let mut num_matched = 0;
     let keys = chem_hm.keys().cloned().collect::<Vec<String>>();
 
-    if let Ok(name_re) = regex::Regex::new(&name) {
-        for k in keys {
-            if name_re.is_match(&k) {
-                num_matched += 1;
-                if remove_opts.dry_run {
-                    info!(
-                        "[dry_run] : Would remove chemistry \"{}\" from the registry.",
-                        k
-                    );
-                } else {
-                    info!("Chemistry \"{}\" found in the registry; Removing it!", k);
-                    chem_hm.remove(&k);
+    match regex::Regex::new(&name) {
+        Ok(name_re) => {
+            for k in keys {
+                if name_re.is_match(&k) {
+                    num_matched += 1;
+                    if remove_opts.dry_run {
+                        info!(
+                            "[dry_run] : Would remove chemistry \"{}\" from the registry.",
+                            k
+                        );
+                    } else {
+                        info!("Chemistry \"{}\" found in the registry; Removing it!", k);
+                        chem_hm.remove(&k);
+                    }
                 }
             }
         }
-    } else {
-        bail!(
-            "The provided chemistry name {} was neither a valid chemistry name nor a valid regex.",
-            name
-        );
+        Err(_) => {
+            bail!(
+                "The provided chemistry name {} was neither a valid chemistry name nor a valid regex.",
+                name
+            );
+        }
     }
 
     if num_matched == 0 {
@@ -669,19 +702,22 @@ pub fn lookup_chemistry(
         );
         let chem_hm = get_custom_chem_hm(&chem_p)?;
 
-        if let Ok(re) = Regex::new(&name) {
-            println!("=================");
-            for (cname, cval) in chem_hm.iter() {
-                if re.is_match(cname) {
-                    print!("{}", cval);
-                    println!("=================");
+        match Regex::new(&name) {
+            Ok(re) => {
+                println!("=================");
+                for (cname, cval) in chem_hm.iter() {
+                    if re.is_match(cname) {
+                        print!("{}", cval);
+                        println!("=================");
+                    }
                 }
             }
-        } else {
-            info!(
-                "No chemistry matching regex pattern {} was found in the registry!",
-                name
-            );
+            Err(_) => {
+                info!(
+                    "No chemistry matching regex pattern {} was found in the registry!",
+                    name
+                );
+            }
         }
     }
 
@@ -695,13 +731,14 @@ struct FetchSet<'a> {
 
 impl<'a> FetchSet<'a> {
     pub fn from_re(s: &str) -> Result<Self> {
-        if let Ok(re) = regex::Regex::new(s) {
-            Ok(Self {
+        match regex::Regex::new(s) {
+            Ok(re) => Ok(Self {
                 m: HashSet::new(),
                 re: Some(re),
-            })
-        } else {
-            bail!("Could not compile regex : [{}]", s)
+            }),
+            Err(_) => {
+                bail!("Could not compile regex : [{}]", s)
+            }
         }
     }
 
@@ -755,43 +792,45 @@ pub fn fetch_chemistries(
 
         for (k, v) in chem_obj.iter() {
             // if we want to fetch this chem
-            if fetch_chems.contains(k) {
-                if let Some(serde_json::Value::String(pfile)) = v.get(LOCAL_PL_PATH_KEY) {
-                    let fpath = plist_path.join(pfile);
-
-                    // if it doesn't exist
-                    if !fpath.is_file() {
-                        //check for a remote path
-                        if let Some(serde_json::Value::String(rpath)) = v.get(REMOTE_PL_URL_KEY) {
-                            if fetch_opts.dry_run {
-                                info!(
-                                    "[dry_run] : Fetch would fetch missing file {} for {} from {}",
-                                    pfile, k, rpath
+            if fetch_chems.contains(k)
+                && let Some(serde_json::Value::String(pfile)) = v.get(LOCAL_PL_PATH_KEY)
+            {
+                let fpath = plist_path.join(pfile);
+
+                // if it doesn't exist
+                if !fpath.is_file() {
+                    //check for a remote path
+                    if let Some(serde_json::Value::String(rpath)) = v.get(REMOTE_PL_URL_KEY) {
+                        if fetch_opts.dry_run {
+                            info!(
+                                "[dry_run] : Fetch would fetch missing file {} for {} from {}",
+                                pfile, k, rpath
+                            );
+                        } else {
+                            let hash = download_to_file_compute_hash(rpath, &fpath)?;
+                            let expected_hash = pfile.to_string();
+                            let observed_hash = hash.to_string();
+                            if expected_hash != observed_hash {
+                                warn!(
+                                    "Downloaded the file for chemistry {} from {}, but the observed hash {} was not equal to the expcted hash {}",
+                                    k, rpath, observed_hash, expected_hash
                                 );
-                            } else {
-                                let hash = download_to_file_compute_hash(rpath, &fpath)?;
-                                let expected_hash = pfile.to_string();
-                                let observed_hash = hash.to_string();
-                                if expected_hash != observed_hash {
-                                    warn!("Downloaded the file for chemistry {} from {}, but the observed hash {} was not equal to the expcted hash {}",
-                                    k, rpath, observed_hash, expected_hash);
-                                }
-                                info!("Fetched permit list file for {} to {}", k, fpath.display());
                             }
-                        } else {
-                            warn!(
-                                "{}Requested to obtain chemistry {}, but it has no remote URL!",
-                                dry_run_str, k
-                            );
+                            info!("Fetched permit list file for {} to {}", k, fpath.display());
                         }
                     } else {
-                        info!(
-                            "{}File for requested chemistry {} already exists ({}).",
-                            dry_run_str,
-                            k,
-                            fpath.display()
+                        warn!(
+                            "{}Requested to obtain chemistry {}, but it has no remote URL!",
+                            dry_run_str, k
                         );
                     }
+                } else {
+                    info!(
+                        "{}File for requested chemistry {} already exists ({}).",
+                        dry_run_str,
+                        k,
+                        fpath.display()
+                    );
                 }
             }
         }
@@ -808,7 +847,7 @@ mod tests {
     };
     use crate::simpleaf_commands::{ChemistryAddOpts, ChemistryCleanOpts, ChemistryRemoveOpts};
     use crate::utils::constants::CHEMISTRIES_PATH;
-    use serde_json::{json, Map, Value};
+    use serde_json::{Map, Value, json};
     use std::collections::HashSet;
     use std::fs;
     use std::path::PathBuf;
diff --git a/src/simpleaf_commands/indexing.rs b/src/simpleaf_commands/indexing.rs
index 98dd124..6f38b3a 100644
--- a/src/simpleaf_commands/indexing.rs
+++ b/src/simpleaf_commands/indexing.rs
@@ -3,7 +3,7 @@ use crate::utils::af_utils::create_dir_if_absent;
 use crate::utils::prog_utils;
 use crate::utils::prog_utils::ReqProgs;
 
-use anyhow::{anyhow, bail, Context};
+use anyhow::{Context, anyhow, bail};
 use roers;
 use serde::Deserialize;
 use serde_json::json;
@@ -37,7 +37,9 @@ fn derive_kmer_and_minimizer(
 ) -> anyhow::Result<(u32, u32)> {
     if let Some(msl) = min_seq_len {
         if msl < 10 {
-            bail!("The reference sequences are too short for indexing. Please provide sequences with a minimum length of at least 10 bases.");
+            bail!(
+                "The reference sequences are too short for indexing. Please provide sequences with a minimum length of at least 10 bases."
+            );
         }
         if (msl / 2) < default_kmer_length && default_kmer_length == 31 {
             let kmer_length = msl / 2;
@@ -452,7 +454,9 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
 
             CsvReader::Feature(rdr)
         } else {
-            bail!("No reference sequence provided. It should not happen, please report this issue on GitHub.");
+            bail!(
+                "No reference sequence provided. It should not happen, please report this issue on GitHub."
+            );
         };
 
         // determine the format of t2g file
@@ -558,8 +562,10 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
     if opts.use_piscem {
         // ensure we have piscem
         if rp.piscem.is_none() {
-            bail!("The construction of a piscem index was requested, but a valid piscem executable was not available. \n\
-                            Please either set a path using the `set-paths` command, or ensure the `PISCEM` environment variable is set properly.");
+            bail!(
+                "The construction of a piscem index was requested, but a valid piscem executable was not available. \n\
+                            Please either set a path using the `set-paths` command, or ensure the `PISCEM` environment variable is set properly."
+            );
         }
 
         let piscem_prog_info = rp.piscem.as_ref().context(
@@ -610,26 +616,29 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
         // if the user is requesting a poison k-mer table, ensure the
         // piscem version is at least 0.7.0
         if let Some(decoy_paths) = opts.decoy_paths {
-            if let Ok(_piscem_ver) = prog_utils::check_version_constraints(
+            match prog_utils::check_version_constraints(
                 "piscem",
                 ">=0.7.0, <1.0.0",
                 &piscem_prog_info.version,
             ) {
-                let path_args = decoy_paths
-                    .into_iter()
-                    .map(|x| x.to_string_lossy().into_owned())
-                    .collect::<Vec<String>>()
-                    .join(",");
-                piscem_index_cmd.arg("--decoy-paths").arg(path_args);
-            } else {
-                warn!(
-                    r#"
+                Ok(_piscem_ver) => {
+                    let path_args = decoy_paths
+                        .into_iter()
+                        .map(|x| x.to_string_lossy().into_owned())
+                        .collect::<Vec<String>>()
+                        .join(",");
+                    piscem_index_cmd.arg("--decoy-paths").arg(path_args);
+                }
+                Err(_) => {
+                    warn!(
+                        r#"
 You requested to build a poison k-mer table with {:?}, but you must be using piscem version >= 0.7.0 
 to use this feature. Simpleaf is currently using version {}. Please upgrade your piscem version or, 
 if you believe you have a sufficiently new version installed, update the executable being used by 
 simpleaf"#,
-                    decoy_paths, &piscem_prog_info.version
-                );
+                        decoy_paths, &piscem_prog_info.version
+                    );
+                }
             }
         }
 
@@ -675,8 +684,10 @@ simpleaf"#,
     } else {
         // ensure we have piscem
         if rp.salmon.is_none() {
-            bail!("The construction of a salmon index was requested, but a valid salmon executable was not available. \n\
-                            Please either set a path using the `simpleaf set-paths` command, or ensure the `SALMON` environment variable is set properly.");
+            bail!(
+                "The construction of a salmon index was requested, but a valid salmon executable was not available. \n\
+                            Please either set a path using the `simpleaf set-paths` command, or ensure the `SALMON` environment variable is set properly."
+            );
         }
 
         let salmon_prog_info = rp.salmon.as_ref().context(
@@ -697,8 +708,10 @@ simpleaf"#,
         // overwrite doesn't do anything special for the salmon index, so mention this to
         // the user.
         if opts.overwrite {
-            info!("As the default salmon behavior is to overwrite an existing index if the same directory is provided, \n\
-                        the --overwrite flag will have no additional effect.");
+            info!(
+                "As the default salmon behavior is to overwrite an existing index if the same directory is provided, \n\
+                        the --overwrite flag will have no additional effect."
+            );
         }
 
         // if the user requested a sparse index.
diff --git a/src/simpleaf_commands/inspect.rs b/src/simpleaf_commands/inspect.rs
index c5e5988..067c74e 100644
--- a/src/simpleaf_commands/inspect.rs
+++ b/src/simpleaf_commands/inspect.rs
@@ -6,7 +6,7 @@ use crate::utils::{
     prog_utils::*,
 };
 use anyhow::Result;
-use serde_json::{json, Value};
+use serde_json::{Value, json};
 use std::path::PathBuf;
 use strum::IntoEnumIterator;
 use tracing::warn;
diff --git a/src/simpleaf_commands/paths.rs b/src/simpleaf_commands/paths.rs
index 9db27b4..1fc5be6 100644
--- a/src/simpleaf_commands/paths.rs
+++ b/src/simpleaf_commands/paths.rs
@@ -1,6 +1,6 @@
 use crate::utils::prog_utils::*;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use serde_json::json;
 use std::fs;
 use std::path::PathBuf;
@@ -30,7 +30,9 @@ pub fn set_paths(af_home_path: PathBuf, set_path_args: SetPathOpts) -> anyhow::R
 
     let have_mapper = rp.salmon.is_some() || rp.piscem.is_some();
     if !have_mapper {
-        bail!("Suitable executable for piscem or salmon not found — at least one of these must be available.");
+        bail!(
+            "Suitable executable for piscem or salmon not found — at least one of these must be available."
+        );
     }
     if rp.alevin_fry.is_none() {
         bail!("Suitable alevin_fry executable not found.");
diff --git a/src/simpleaf_commands/quant.rs b/src/simpleaf_commands/quant.rs
index 1a1a1ca..e2e3ecd 100644
--- a/src/simpleaf_commands/quant.rs
+++ b/src/simpleaf_commands/quant.rs
@@ -5,10 +5,10 @@ use crate::utils::prog_parsing_utils;
 use crate::utils::prog_utils;
 use crate::utils::prog_utils::ReqProgs;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use serde_json::json;
 use std::collections::HashMap;
-use std::io::{BufRead, BufReader, BufWriter, Write};
+use std::io::{BufRead, BufReader, BufWriter, Read, Write};
 use std::path::{Path, PathBuf};
 use std::time::{Duration, Instant};
 use tracing::{error, info, warn};
@@ -17,7 +17,9 @@ use super::MapQuantOpts;
 use crate::utils::chem_utils::ExpectedOri;
 use crate::utils::constants::{CHEMISTRIES_PATH, NUM_SAMPLE_LINES};
 
-fn get_generic_buf_reader(ipath: &PathBuf) -> anyhow::Result<impl BufRead> {
+/// Open a permit-list file with transparent compression handling and return a
+/// buffered reader over the resulting stream.
+fn get_generic_buf_reader(ipath: &Path) -> anyhow::Result<BufReader<Box<dyn Read>>> {
     let (reader, compression) = niffler::from_path(ipath)
         .with_context(|| format!("Could not open requsted file {}", ipath.display()))?;
     match compression {
@@ -42,7 +44,7 @@ impl CBListInfo {
         }
     }
     // we iterate the file to see if it only has cb or with affiliated info (by separator \t).
-    fn init(&mut self, pl_file: &PathBuf, output: &PathBuf) -> anyhow::Result<()> {
+    fn init(&mut self, pl_file: &Path, output: &Path) -> anyhow::Result<()> {
         // open pl_file
         let br = get_generic_buf_reader(pl_file)
             .with_context(|| "failed to successfully open permit-list file.")?;
@@ -61,9 +63,11 @@ impl CBListInfo {
         // if single column, we are good. Otherwise, we need to write the first column to the final file
         let final_file: PathBuf;
         if is_single_column {
-            final_file = pl_file.clone();
+            final_file = pl_file.to_path_buf();
         } else {
-            info!("found multiple columns in the barcode list tsv file, use the first column as the barcodes.");
+            info!(
+                "found multiple columns in the barcode list tsv file, use the first column as the barcodes."
+            );
 
             // create output dir if doesn't exist
             if !output.exists() {
@@ -93,7 +97,7 @@ impl CBListInfo {
             }
         }
 
-        self.init_file = pl_file.clone();
+        self.init_file = pl_file.to_path_buf();
         self.final_file = final_file;
         self.is_single_column = is_single_column;
         Ok(())
@@ -108,7 +112,9 @@ impl CBListInfo {
 
         // if we are here but the init file doesn't exist, then we have a problem
         if !self.init_file.exists() {
-            bail!("The CBListInfo struct was not properly initialized. Please report this issue on GitHub.");
+            bail!(
+                "The CBListInfo struct was not properly initialized. Please report this issue on GitHub."
+            );
         }
 
         // if we cannot find the count matrix column files, then complain
@@ -279,14 +285,14 @@ fn resolve_quant_setup(
     rp.issue_recommended_version_messages();
 
     let index_meta = index_meta::resolve_quant_index(opts.index.clone(), opts.use_piscem)?;
-    if t2g_map.is_none() {
-        if let Some(t2g_loc) = index_meta.inferred_t2g.clone() {
-            info!(
-                "found local t2g file at {}, will attempt to use this since none was provided explicitly",
-                t2g_loc.display()
-            );
-            t2g_map = Some(t2g_loc);
-        }
+    if t2g_map.is_none()
+        && let Some(t2g_loc) = index_meta.inferred_t2g.clone()
+    {
+        info!(
+            "found local t2g file at {}, will attempt to use this since none was provided explicitly",
+            t2g_loc.display()
+        );
+        t2g_map = Some(t2g_loc);
     }
     let index_type = index_meta.index_type;
     let gene_id_to_name_opt = index_meta.inferred_gene_id_to_name;
@@ -300,12 +306,16 @@ fn resolve_quant_setup(
     match index_type {
         IndexType::Piscem(_) => {
             if rp.piscem.is_none() {
-                bail!("A piscem index is being used, but no piscem executable is provided. Please set one with `simpleaf set-paths`.");
+                bail!(
+                    "A piscem index is being used, but no piscem executable is provided. Please set one with `simpleaf set-paths`."
+                );
             }
         }
         IndexType::Salmon(_) => {
             if rp.salmon.is_none() {
-                bail!("A salmon index is being used, but no salmon executable is provided. Please set one with `simpleaf set-paths`.");
+                bail!(
+                    "A salmon index is being used, but no salmon executable is provided. Please set one with `simpleaf set-paths`."
+                );
             }
         }
         IndexType::NoIndex => {}
@@ -439,21 +449,24 @@ fn run_mapping_stage(
                     .arg("-o")
                     .arg(&map_output);
 
-                if let Ok(_piscem_ver) = prog_utils::check_version_constraints(
+                match prog_utils::check_version_constraints(
                     "piscem",
                     ">=0.7.0, <1.0.0",
                     &piscem_prog_info.version,
                 ) {
-                    push_advanced_piscem_options(&mut piscem_quant_cmd, opts)?;
-                } else {
-                    info!(
-                        r#"
+                    Ok(_piscem_ver) => {
+                        push_advanced_piscem_options(&mut piscem_quant_cmd, opts)?;
+                    }
+                    Err(_) => {
+                        info!(
+                            r#"
 Simpleaf is currently using piscem version {}, but you must be using version >= 0.7.0 in order to use the 
 mapping options specific to this, or later versions. If you wish to use these options, please upgrade your 
 piscem version or, if you believe you have a sufficiently new version installed, update the executable 
 being used by simpleaf"#,
-                        &piscem_prog_info.version
-                    );
+                            &piscem_prog_info.version
+                        );
+                    }
                 }
 
                 let frag_lib_xform = add_or_transform_fragment_library(
@@ -466,14 +479,11 @@ being used by simpleaf"#,
 
                 let map_cmd_string = prog_utils::get_cmd_line_string(&piscem_quant_cmd);
                 info!("piscem map-sc cmd : {}", map_cmd_string);
-                let mut input_files = vec![
-                    index_base.with_extension("ctab"),
-                    index_base.with_extension("refinfo"),
-                    index_base.with_extension("sshash"),
-                ];
-                input_files.extend_from_slice(reads1);
-                input_files.extend_from_slice(reads2);
-                prog_utils::check_files_exist(&input_files)?;
+                prog_utils::check_piscem_index_files(index_base.as_path())?;
+                let mut read_inputs = Vec::new();
+                read_inputs.extend_from_slice(reads1);
+                read_inputs.extend_from_slice(reads2);
+                prog_utils::check_files_exist(&read_inputs)?;
 
                 let map_start = Instant::now();
                 exec::run_checked(&mut piscem_quant_cmd, "piscem [mapping phase]")?;
@@ -486,7 +496,13 @@ being used by simpleaf"#,
                                 let failed = xform_stats.failed_parsing;
                                 info!(
                                     "seq_geom_xform : observed {} input fragments. {} ({:.2}%) of them failed to parse and were not transformed",
-                                    total, failed, if total > 0 { (failed as f64) / (total as f64) } else { 0_f64 } * 100_f64
+                                    total,
+                                    failed,
+                                    if total > 0 {
+                                        (failed as f64) / (total as f64)
+                                    } else {
+                                        0_f64
+                                    } * 100_f64
                                 );
                             }
                             Err(e) => {
@@ -557,7 +573,13 @@ being used by simpleaf"#,
                                 let failed = xform_stats.failed_parsing;
                                 info!(
                                     "seq_geom_xform : observed {} input fragments. {} ({:.2}%) of them failed to parse and were not transformed",
-                                    total, failed, if total > 0 { (failed as f64) / (total as f64) } else { 0_f64 } * 100_f64
+                                    total,
+                                    failed,
+                                    if total > 0 {
+                                        (failed as f64) / (total as f64)
+                                    } else {
+                                        0_f64
+                                    } * 100_f64
                                 );
                             }
                             Err(e) => {
@@ -576,7 +598,9 @@ being used by simpleaf"#,
                 })
             }
             IndexType::NoIndex => {
-                bail!("Cannot perform mapping an quantification without known (piscem or salmon) index!");
+                bail!(
+                    "Cannot perform mapping an quantification without known (piscem or salmon) index!"
+                );
             }
         }
     } else {
@@ -689,7 +713,9 @@ fn run_quant_stage(
         let target_path = gpl_output.join("gene_id_to_name.tsv");
         match std::fs::copy(gene_name_path, &target_path) {
             Ok(_) => {
-                info!("successfully copied the gene_name_to_id.tsv file into the quantification directory.");
+                info!(
+                    "successfully copied the gene_name_to_id.tsv file into the quantification directory."
+                );
             }
             Err(err) => {
                 warn!(
diff --git a/src/simpleaf_commands/refresh.rs b/src/simpleaf_commands/refresh.rs
index 24e21e7..7db8926 100644
--- a/src/simpleaf_commands/refresh.rs
+++ b/src/simpleaf_commands/refresh.rs
@@ -1,7 +1,7 @@
 use crate::utils::prog_utils::*;
 
 use anyhow::Context;
-use serde_json::{json, Value};
+use serde_json::{Value, json};
 use std::path::PathBuf;
 
 pub fn refresh_prog_info(af_home_path: PathBuf) -> anyhow::Result<()> {
diff --git a/src/simpleaf_commands/workflow.rs b/src/simpleaf_commands/workflow.rs
index 9dad7a8..156e40c 100644
--- a/src/simpleaf_commands/workflow.rs
+++ b/src/simpleaf_commands/workflow.rs
@@ -1,15 +1,15 @@
-use crate::utils::jrsonnet_main::{parse_jsonnet, ParseAction};
+use crate::utils::jrsonnet_main::{ParseAction, parse_jsonnet};
 use crate::utils::prog_utils;
 use crate::utils::prog_utils::ReqProgs;
 use crate::utils::workflow_utils;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use cmd_lib::run_fun;
-use serde_json::json;
 use serde_json::Value;
+use serde_json::json;
 use std::fs;
 use std::path::Path;
-use tabled::{settings::Style, Table, Tabled};
+use tabled::{Table, Tabled, settings::Style};
 use tracing::{info, warn};
 
 use super::WorkflowCommands;
@@ -71,20 +71,20 @@ fn resolve_workflow_run_inputs<T: AsRef<Path>>(
         let output_path = workflow_utils::get_output_path(&instantiated_manifest)?;
 
         // If both command-line and template outputs are set, warn if template wins.
-        if let Some(requested_output_path) = output_opt {
-            if requested_output_path != output_path {
-                warn!(
-                    r#"The output path {} was requested via the command line, but 
+        if let Some(requested_output_path) = output_opt
+            && requested_output_path != output_path
+        {
+            warn!(
+                r#"The output path {} was requested via the command line, but 
                             the output path {} was resolved from the workflow template.
                             In this case, since the output variable is not used when instantiating 
                             the template, the value ({}) present in the template must be used.
                             Please be aware that {} will not be used for output!"#,
-                    requested_output_path.display(),
-                    output_path.display(),
-                    output_path.display(),
-                    requested_output_path.display()
-                );
-            }
+                requested_output_path.display(),
+                output_path.display(),
+                output_path.display(),
+                requested_output_path.display()
+            );
         }
 
         return Ok(ResolvedWorkflowRun {
@@ -203,7 +203,9 @@ pub fn patch_manifest_or_template<T: AsRef<Path>>(
             }
         }
         _ => {
-            bail!("The patch function received a non-patch command. This should not happen. Please report this issue.");
+            bail!(
+                "The patch function received a non-patch command. This should not happen. Please report this issue."
+            );
         }
     }
     Ok(())
@@ -246,7 +248,9 @@ pub fn list_workflows<T: AsRef<Path>>(af_home_path: T) -> anyhow::Result<()> {
     }
     println!("{}", Table::new(workflow_entries).with(Style::rounded()));
     if print_na_cap {
-        println!("* : could not parse uninstantiated template to attempt extracting the version, please see [shorturl.at/gouB1] for further details");
+        println!(
+            "* : could not parse uninstantiated template to attempt extracting the version, please see [shorturl.at/gouB1] for further details"
+        );
     }
     Ok(())
 }
@@ -304,10 +308,14 @@ pub fn get_workflow<T: AsRef<Path>>(
              */
             match version_str.as_ref() {
                 "N/A*" => {
-                    warn!("couldn't evaluate the requested template to fetch the version number, it may be a deprecated version; consider refreshing.");
+                    warn!(
+                        "couldn't evaluate the requested template to fetch the version number, it may be a deprecated version; consider refreshing."
+                    );
                 }
                 "missing" => {
-                    warn!("the template that was requested to be fetched appeared to be missing a version number, but this field should be present; consider refreshing or further investigating the issue.");
+                    warn!(
+                        "the template that was requested to be fetched appeared to be missing a version number, but this field should be present; consider refreshing or further investigating the issue."
+                    );
                 }
                 ver => {
                     const REQ_VER: &str = "0.1.0";
@@ -316,7 +324,10 @@ pub fn get_workflow<T: AsRef<Path>>(
                             info!("getting workflow {} version {}", name, ver);
                         }
                         Err(_) => {
-                            warn!("the version parsed from the workflow you are attempting to get is {}, but it should be at least {}.", version_str, REQ_VER);
+                            warn!(
+                                "the version parsed from the workflow you are attempting to get is {}, but it should be at least {}.",
+                                version_str, REQ_VER
+                            );
                         }
                     }
                 }
@@ -339,7 +350,10 @@ pub fn get_workflow<T: AsRef<Path>>(
                     ) {
                         Ok(_) => {}
                         Err(e) => {
-                            bail!("Could not copy workflow files to the output folder. The error was: {}", e);
+                            bail!(
+                                "Could not copy workflow files to the output folder. The error was: {}",
+                                e
+                            );
                         }
                     };
                 }
diff --git a/src/utils/af_utils.rs b/src/utils/af_utils.rs
index c973471..e4fae0a 100644
--- a/src/utils/af_utils.rs
+++ b/src/utils/af_utils.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{Context, Result, anyhow, bail};
 // use cmd_lib::run_fun;
 use phf::phf_map;
 use seq_geom_parser::{AppendToCmdArgs, FragmentGeomDesc, PiscemGeomDesc, SalmonSeparateGeomDesc};
@@ -13,10 +13,10 @@ use strum_macros::EnumIter;
 use tracing::{debug, error, info, warn};
 
 use crate::atac::commands::AtacChemistry;
-use crate::utils::chem_utils::{get_single_custom_chem_from_file, CustomChemistry, ExpectedOri};
+use crate::utils::chem_utils::{CustomChemistry, ExpectedOri, get_single_custom_chem_from_file};
 use crate::utils::{self, prog_utils};
 
-use super::chem_utils::{QueryInRegistry, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY};
+use super::chem_utils::{LOCAL_PL_PATH_KEY, QueryInRegistry, REMOTE_PL_URL_KEY};
 
 /// The map from pre-specified chemistry types that salmon knows
 /// to the corresponding command line flag that salmon should be passed
@@ -71,7 +71,9 @@ impl IndexType {
             IndexType::Salmon(_) => KNOWN_CHEM_MAP_SALMON.contains_key(chem),
             IndexType::Piscem(_) => KNOWN_CHEM_MAP_PISCEM.contains_key(chem),
             IndexType::NoIndex => {
-                info!("Since we are dealing with an already-mapped RAD file, the user is responsible for ensuring that a valid chemistry definition was provided during mapping");
+                info!(
+                    "Since we are dealing with an already-mapped RAD file, the user is responsible for ensuring that a valid chemistry definition was provided during mapping"
+                );
                 KNOWN_CHEM_MAP_SALMON.contains_key(chem) || KNOWN_CHEM_MAP_PISCEM.contains_key(chem)
             }
         }
@@ -81,7 +83,9 @@ impl IndexType {
             IndexType::Salmon(_) => KNOWN_CHEM_MAP_PISCEM.contains_key(chem),
             IndexType::Piscem(_) => KNOWN_CHEM_MAP_SALMON.contains_key(chem),
             IndexType::NoIndex => {
-                info!("Since we are dealing with an already-mapped RAD file, the user is responsible for ensuring that a valid chemistry definition was provided during mapping");
+                info!(
+                    "Since we are dealing with an already-mapped RAD file, the user is responsible for ensuring that a valid chemistry definition was provided during mapping"
+                );
                 !self.is_known_chem(chem)
             }
         }
@@ -269,21 +273,29 @@ impl Chemistry {
             "10xv3-5p" => match index_type {
                 IndexType::Piscem(_) => Chemistry::Rna(RnaChemistry::TenxV35P),
                 IndexType::NoIndex => {
-                    info!("The 10xv3-5p chemistry flag is designed only for the piscem index. Please make sure the RAD file you are provided was mapped using piscem; otherwise the fragment orientations may not be treated correctly");
+                    info!(
+                        "The 10xv3-5p chemistry flag is designed only for the piscem index. Please make sure the RAD file you are provided was mapped using piscem; otherwise the fragment orientations may not be treated correctly"
+                    );
                     Chemistry::Rna(RnaChemistry::TenxV35P)
                 }
                 IndexType::Salmon(_) => {
-                    bail!("The 10xv3-5p chemistry flag is not suppored under the salmon mapper. Instead please use the 10xv3 chemistry (which will treat samples as single-end).");
+                    bail!(
+                        "The 10xv3-5p chemistry flag is not suppored under the salmon mapper. Instead please use the 10xv3 chemistry (which will treat samples as single-end)."
+                    );
                 }
             },
             "10xv2-5p" => match index_type {
                 IndexType::Piscem(_) => Chemistry::Rna(RnaChemistry::TenxV25P),
                 IndexType::NoIndex => {
-                    info!("The 10xv2-5p chemistry flag is designed only for the piscem index. Please make sure the RAD file you are provided was mapped using piscem; otherwise the fragment orientations may not be treated correctly");
+                    info!(
+                        "The 10xv2-5p chemistry flag is designed only for the piscem index. Please make sure the RAD file you are provided was mapped using piscem; otherwise the fragment orientations may not be treated correctly"
+                    );
                     Chemistry::Rna(RnaChemistry::TenxV25P)
                 }
                 IndexType::Salmon(_) => {
-                    bail!("The 10xv2-5p chemistry flag is not suppored under the salmon mapper. Instead please use the 10xv2 chemistry (which will treat samples as single-end).");
+                    bail!(
+                        "The 10xv2-5p chemistry flag is not suppored under the salmon mapper. Instead please use the 10xv2 chemistry (which will treat samples as single-end)."
+                    );
                 }
             },
             s => {
@@ -306,7 +318,12 @@ impl Chemistry {
                     // not supported using this mapper (i.e. if it is a piscem-specific chem
                     // and the mapper is salmon or vice versa).
                     if index_type.is_unsupported_known_chem(s) {
-                        bail!("The chemistry {} is not supported by the given mapper {}. Please switch to {}, provide the explicit geometry, or add this chemistry to the registry with the \"chemistry add\" command.", s, index_type.as_str(), index_type.counterpart().as_str());
+                        bail!(
+                            "The chemistry {} is not supported by the given mapper {}. Please switch to {}, provide the explicit geometry, or add this chemistry to the registry with the \"chemistry add\" command.",
+                            s,
+                            index_type.as_str(),
+                            index_type.counterpart().as_str()
+                        );
                     }
                     Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| {
                         format!(
@@ -417,7 +434,11 @@ pub fn validate_geometry(geo: &str) -> Result<()> {
         match fg {
             Ok(_fg) => Ok(()),
             Err(e) => {
-                bail!("Could not parse geometry {}. Please ensure that it is a valid geometry definition wrapped by quotes. The error message was: {:?}", geo, e);
+                bail!(
+                    "Could not parse geometry {}. Please ensure that it is a valid geometry definition wrapped by quotes. The error message was: {:?}",
+                    geo,
+                    e
+                );
             }
         }
     }
@@ -429,13 +450,19 @@ pub fn validate_geometry(geo: &str) -> Result<()> {
 /// will return an error (as there is no corresponding `FragmentGeomDesc` in general).
 pub fn extract_geometry(geo: &str) -> Result<FragmentGeomDesc> {
     if let Some(builtin_kwd) = is_builtin(geo) {
-        bail!("The provided geometry is a builtin keyword [{}] (preceeded by \"__\") and so no attempt was made to parse it", builtin_kwd);
+        bail!(
+            "The provided geometry is a builtin keyword [{}] (preceeded by \"__\") and so no attempt was made to parse it",
+            builtin_kwd
+        );
     } else {
         let fg = FragmentGeomDesc::try_from(geo);
         match fg {
             Ok(fg) => Ok(fg),
             Err(e) => {
-                error!("Could not parse geometry {}. Please ensure that it is a valid geometry definition wrapped by quotes. The error message was: {:?}", geo, e);
+                error!(
+                    "Could not parse geometry {}. Please ensure that it is a valid geometry definition wrapped by quotes. The error message was: {:?}",
+                    geo, e
+                );
                 Err(e)
             }
         }
@@ -524,7 +551,10 @@ pub fn get_permit_if_absent(af_home: &Path, chem: &Chemistry) -> Result<PermitLi
                 // null, then we don't even have a place to put this file
                 // when we download it, so it's an error.
                 None | Some(Value::Null) => {
-                    error!("No permit list is registered for {}, so one cannot be used automatically. You should either provide a permit list directly on the command line, or re-add this chemistry to the registry with a permit list.", chem.registry_key());
+                    error!(
+                        "No permit list is registered for {}, so one cannot be used automatically. You should either provide a permit list directly on the command line, or re-add this chemistry to the registry with a permit list.",
+                        chem.registry_key()
+                    );
                     bail!("No registered permit list available");
                 }
                 Some(Value::String(lpath)) => {
@@ -533,11 +563,17 @@ pub fn get_permit_if_absent(af_home: &Path, chem: &Chemistry) -> Result<PermitLi
                     if expected_file_path.is_file() {
                         return Ok(PermitListResult::AlreadyPresent(expected_file_path));
                     } else {
-                        info!("Expected {} but didn't find it, will try to download it using a remote url.", expected_file_path.display());
+                        info!(
+                            "Expected {} but didn't find it, will try to download it using a remote url.",
+                            expected_file_path.display()
+                        );
                     }
                 }
                 _ => {
-                    error!("Expected a JSON string associated with the {} key, but didn't find one; cannot proceed.", LOCAL_PL_PATH_KEY);
+                    error!(
+                        "Expected a JSON string associated with the {} key, but didn't find one; cannot proceed.",
+                        LOCAL_PL_PATH_KEY
+                    );
                     bail!("Wrong JSON type");
                 }
             }
@@ -568,12 +604,21 @@ Please consider removing and re-adding this chemistry with a valid permit list."
                         .to_str()
                         .ok_or(anyhow!("cannot convert expected filename to proper string"))?;
                     if hash.to_string() != expected_hash {
-                        warn!("The permit list file for {}, obtained from the provided remote url {}, does not match the expected hash {} (the observed hash was {})", chem.registry_key(), rpath, expected_hash, hash.to_string());
+                        warn!(
+                            "The permit list file for {}, obtained from the provided remote url {}, does not match the expected hash {} (the observed hash was {})",
+                            chem.registry_key(),
+                            rpath,
+                            expected_hash,
+                            hash.to_string()
+                        );
                     }
                     Ok(PermitListResult::DownloadSuccessful(expected_file_path))
                 }
                 _ => {
-                    error!("Expected a JSON string associated with the {} key, but didn't find one; cannot proceed.", REMOTE_PL_URL_KEY);
+                    error!(
+                        "Expected a JSON string associated with the {} key, but didn't find one; cannot proceed.",
+                        REMOTE_PL_URL_KEY
+                    );
                     bail!("Wrong JSON type");
                 }
             }
@@ -620,7 +665,9 @@ pub fn add_or_transform_fragment_library(
 ) -> Result<FragmentTransformationType> {
     let known_chem = match mapper_type {
         MapperType::MappedRadFile => {
-            bail!("Cannot add_or_transform_fragment library when dealing with an already-mapped RAD file.");
+            bail!(
+                "Cannot add_or_transform_fragment library when dealing with an already-mapped RAD file."
+            );
         }
         MapperType::Piscem => KNOWN_CHEM_MAP_PISCEM.contains_key(fragment_geometry_str),
         MapperType::Salmon => KNOWN_CHEM_MAP_SALMON.contains_key(fragment_geometry_str),
diff --git a/src/utils/chem_utils.rs b/src/utils/chem_utils.rs
index 5893bbb..ef6790b 100644
--- a/src/utils/chem_utils.rs
+++ b/src/utils/chem_utils.rs
@@ -1,6 +1,6 @@
 use crate::utils::af_utils::{extract_geometry, parse_resource_json_file, validate_geometry};
 use crate::utils::constants::*;
-use anyhow::{anyhow, Result};
+use anyhow::{Result, anyhow};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::collections::HashMap;
@@ -162,14 +162,14 @@ impl fmt::Display for CustomChemistry {
             writeln!(f, "{}\t: {}", REMOTE_PL_URL_KEY, remote_pl_url)?;
         }
 
-        if let Some(serde_json::Value::Object(meta)) = self.meta() {
-            if !meta.is_empty() {
-                writeln!(f, "meta\t: {{")?;
-                for (k, v) in meta.iter() {
-                    writeln!(f, "  {}\t: {:#}", k, v)?;
-                }
-                writeln!(f, "}}")?;
+        if let Some(serde_json::Value::Object(meta)) = self.meta()
+            && !meta.is_empty()
+        {
+            writeln!(f, "meta\t: {{")?;
+            for (k, v) in meta.iter() {
+                writeln!(f, "  {}\t: {:#}", k, v)?;
             }
+            writeln!(f, "}}")?;
         }
         Ok(())
     }
diff --git a/src/utils/jrsonnet_main.rs b/src/utils/jrsonnet_main.rs
index 316738b..81d70e3 100644
--- a/src/utils/jrsonnet_main.rs
+++ b/src/utils/jrsonnet_main.rs
@@ -2,13 +2,12 @@
 // https://github.com/CertainLach/jrsonnet/blob/master/cmds/jrsonnet/src/main.rs
 
 use crate::utils::workflow_utils::JsonPatch;
-use anyhow::{anyhow, bail, Context};
+use anyhow::{Context, anyhow, bail};
 use clap::Parser;
 use jrsonnet_cli::{GcOpts, ManifestOpts, MiscOpts, OutputOpts, StdOpts, TlaOpts, TraceOpts};
 use jrsonnet_evaluator::{
-    apply_tla,
+    State, apply_tla,
     error::{Error as JrError, ErrorKind},
-    State,
 };
 use std::path::{Path, PathBuf};
 
@@ -98,7 +97,9 @@ pub fn parse_jsonnet(
     // get main.jsonnet file path
     let main_jsonnet_file_path = utils_dir.join("main.jsonnet");
     if !main_jsonnet_file_path.exists() {
-        bail!("Could not find main.jsonnet file protocol-asturay; Please update it by invoking `simpleaf workflow refresh`")
+        bail!(
+            "Could not find main.jsonnet file protocol-asturay; Please update it by invoking `simpleaf workflow refresh`"
+        )
     }
     let main_jsonnet_file_str = main_jsonnet_file_path.to_str().with_context(|| {
         format!(
@@ -193,7 +194,8 @@ impl From<ErrorKind> for Error {
 fn main_catch(opts: Opts) -> anyhow::Result<String> {
     let s = State::default();
     let trace = opts.trace.trace_format();
-    match main_real(&s, opts) {
+    let eval_result = main_real(&s, opts);
+    match eval_result {
         Ok(js) => Ok(js),
         Err(e) => {
             if let Error::Evaluation(e) = e {
diff --git a/src/utils/prog_utils.rs b/src/utils/prog_utils.rs
index 5528471..91f1d2d 100644
--- a/src/utils/prog_utils.rs
+++ b/src/utils/prog_utils.rs
@@ -1,7 +1,8 @@
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{Context, Result, anyhow, bail};
 use cmd_lib::run_fun;
 use semver::{Version, VersionReq};
 use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
 use std::env;
 use std::ffi::{OsStr, OsString};
 use std::path::{Path, PathBuf};
@@ -11,6 +12,8 @@ use tracing::{debug, error, info, warn};
 use ureq::ResponseExt;
 use which::which;
 
+use file_requirements::{FileRequirementBuildError, FileRequirementBuilder};
+
 // The below functions are taken from the [`execute`](https://crates.io/crates/execute)
 // crate.
 
@@ -235,9 +238,12 @@ impl ReqProgs {
             if desired_ver.matches(&current_ver) {
                 // nothing to do here
             } else {
-                warn!("It is recommended to use piscem version {}, but currently version {} is being used. \
+                warn!(
+                    "It is recommended to use piscem version {}, but currently version {} is being used. \
                        Please consider installing the latest version of piscem and setting simpleaf to use this \
-                       new version by running the `refresh-prog-info` command.", &desired_ver, &current_ver);
+                       new version by running the `refresh-prog-info` command.",
+                    &desired_ver, &current_ver
+                );
             }
         }
     }
@@ -279,7 +285,10 @@ pub fn check_version_constraints_from_output<S1: AsRef<str>>(
             let x = vs.split_whitespace();
             if let Some(version) = x.last() {
                 let ver = if version.split(".").count() > 3 {
-                    warn!("version info {} is not a valid semver (more than 3 dotted version parts; looking only at the major, minor & patch versions).", version);
+                    warn!(
+                        "version info {} is not a valid semver (more than 3 dotted version parts; looking only at the major, minor & patch versions).",
+                        version
+                    );
                     version
                         .split(".")
                         .take(3)
@@ -373,20 +382,20 @@ pub fn get_required_progs_from_paths(
 
     let opt_salmon = match salmon_exe {
         Some(p) => Some(p),
-        None => {
-            match get_which_executable("salmon") {
-                Ok(p) => Some(p),
-                Err(e) => match &opt_piscem {
-                    None => {
-                        return Err(e);
-                    }
-                    Some(_) => {
-                        info!("could not find salmon executable, only piscem will be usable as a mapper.");
-                        None
-                    }
-                },
-            }
-        }
+        None => match get_which_executable("salmon") {
+            Ok(p) => Some(p),
+            Err(e) => match &opt_piscem {
+                None => {
+                    return Err(e);
+                }
+                Some(_) => {
+                    info!(
+                        "could not find salmon executable, only piscem will be usable as a mapper."
+                    );
+                    None
+                }
+            },
+        },
     };
 
     // We should only get to this point if we have at least one of piscem and salmon, sanity
@@ -408,15 +417,15 @@ pub fn get_required_progs_from_paths(
 
     let opt_macs = match macs_exe {
         Some(p) => Some(p),
-        None => {
-            match get_which_executable("macs3") {
-                Ok(p) => Some(p),
-                Err(_e) => {
-                    info!("Could not find macs3 executable, peak calling cannot be peformed by simpleaf");
-                    None
-                }
+        None => match get_which_executable("macs3") {
+            Ok(p) => Some(p),
+            Err(_e) => {
+                info!(
+                    "Could not find macs3 executable, peak calling cannot be peformed by simpleaf"
+                );
+                None
             }
-        }
+        },
     };
 
     if let Some(piscem) = opt_piscem {
@@ -472,34 +481,67 @@ pub fn get_required_progs() -> Result<ReqProgs> {
     get_required_progs_from_paths(salmon_exe, piscem_exe, alevin_fry_exe, macs_exe)
 }
 
+/// Check that all files in `file_vec` exist.
+///
+/// This maintains backwards compatibility for existing call sites by expressing
+/// the check as a conjunction of file terms in the generic file-requirement
+/// engine.
 pub fn check_files_exist(file_vec: &[PathBuf]) -> Result<()> {
-    let mut all_valid = true;
+    let mut builder = FileRequirementBuilder::new();
+    let mut seen = HashSet::new();
     for fb in file_vec {
-        let er = fb.as_path().try_exists();
-        match er {
-            Ok(true) => {
-                // do nothing
-            }
-            Ok(false) => {
-                error!(
-                    "Required input file at path {} was not found.",
-                    fb.display()
-                );
-                all_valid = false;
-            }
-            Err(e) => {
-                error!("{:#?}", e);
-                all_valid = false;
-            }
+        if !seen.insert(fb.clone()) {
+            continue;
         }
+        builder
+            .require_file(fb)
+            .map_err(map_file_requirement_build_err)?;
     }
+    builder
+        .build()
+        .check()
+        .map_err(|e| anyhow!("Required input files were missing; cannot proceed! {}", e))
+}
 
-    if !all_valid {
-        return Err(anyhow!(
-            "Required input files were missing; cannot proceed!"
-        ));
-    }
-    Ok(())
+/// Check that a piscem index base contains all required files for either supported
+/// on-disk layout:
+///
+/// 1. C++ layout: includes `<base>.sshash`
+/// 2. Rust layout: includes both `<base>.ssi` and `<base>.ssi.mphf`
+///
+/// In both cases the common files must exist (`.ctab` and `.refinfo`).
+pub fn check_piscem_index_files(index_base: &Path) -> Result<()> {
+    let mut builder = FileRequirementBuilder::new();
+    builder
+        .require_all(|all| {
+            all.require_file(index_base.with_extension("ctab"))?;
+            all.require_file(index_base.with_extension("refinfo"))?;
+            Ok(())
+        })
+        .map_err(map_file_requirement_build_err)?;
+    builder
+        .require_any(|any| {
+            any.require_file(index_base.with_extension("sshash"))?;
+            any.require_all(|rust_all| {
+                rust_all.require_file(index_base.with_extension("ssi"))?;
+                rust_all.require_file(index_base.with_extension("ssi.mphf"))?;
+                Ok(())
+            })?;
+            Ok(())
+        })
+        .map_err(map_file_requirement_build_err)?;
+
+    builder.build().check().map_err(|e| {
+        anyhow!(
+            "Required piscem index files were missing or incomplete for base {}. {}",
+            index_base.display(),
+            e
+        )
+    })
+}
+
+fn map_file_requirement_build_err(err: FileRequirementBuildError) -> anyhow::Error {
+    anyhow!("Invalid file requirement expression: {}", err)
 }
 
 pub fn read_json(json_path: &Path) -> anyhow::Result<serde_json::Value> {
@@ -525,3 +567,56 @@ pub fn inspect_af_home(af_home_path: &Path) -> anyhow::Result<serde_json::Value>
         )),
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{check_files_exist, check_piscem_index_files};
+    use std::fs;
+    use tempfile::tempdir;
+
+    #[test]
+    fn check_files_exist_ignores_duplicate_entries() {
+        let td = tempdir().expect("failed to create tempdir");
+        let file = td.path().join("a.txt");
+        fs::write(&file, "").expect("failed to write test file");
+        let files = vec![file.clone(), file];
+        check_files_exist(&files).expect("duplicate terms should not fail legacy file checks");
+    }
+
+    #[test]
+    fn check_piscem_index_files_accepts_cpp_layout() {
+        let td = tempdir().expect("failed to create tempdir");
+        let base = td.path().join("piscem_idx");
+        fs::write(base.with_extension("ctab"), "").expect("failed to write ctab");
+        fs::write(base.with_extension("refinfo"), "").expect("failed to write refinfo");
+        fs::write(base.with_extension("sshash"), "").expect("failed to write sshash");
+        check_piscem_index_files(base.as_path()).expect("cpp index layout should be accepted");
+    }
+
+    #[test]
+    fn check_piscem_index_files_accepts_rust_layout() {
+        let td = tempdir().expect("failed to create tempdir");
+        let base = td.path().join("piscem_idx");
+        fs::write(base.with_extension("ctab"), "").expect("failed to write ctab");
+        fs::write(base.with_extension("refinfo"), "").expect("failed to write refinfo");
+        fs::write(base.with_extension("ssi"), "").expect("failed to write ssi");
+        fs::write(base.with_extension("ssi.mphf"), "").expect("failed to write ssi.mphf");
+        check_piscem_index_files(base.as_path()).expect("rust index layout should be accepted");
+    }
+
+    #[test]
+    fn check_piscem_index_files_rejects_partial_layout() {
+        let td = tempdir().expect("failed to create tempdir");
+        let base = td.path().join("piscem_idx");
+        fs::write(base.with_extension("ctab"), "").expect("failed to write ctab");
+        fs::write(base.with_extension("refinfo"), "").expect("failed to write refinfo");
+        fs::write(base.with_extension("ssi"), "").expect("failed to write ssi");
+        let err =
+            check_piscem_index_files(base.as_path()).expect_err("partial rust layout must fail");
+        assert!(
+            format!("{:#}", err).contains("Required piscem index files were missing or incomplete"),
+            "unexpected error: {:#}",
+            err
+        );
+    }
+}
diff --git a/src/utils/workflow_utils.rs b/src/utils/workflow_utils.rs
index 3212c35..248a305 100644
--- a/src/utils/workflow_utils.rs
+++ b/src/utils/workflow_utils.rs
@@ -2,12 +2,12 @@
 // allow multiple registry, just like conda envs
 // find a way to pull files from github directly instead of using local copy of protocol estuary
 
-use anyhow::{anyhow, bail, Context};
+use anyhow::{Context, anyhow, bail};
 use chrono::{DateTime, Local};
 use clap::Parser;
 // use cmd_lib::run_cmd;
 use crate::core::io::write_json_pretty_atomic;
-use serde_json::{json, Map, Value};
+use serde_json::{Map, Value, json};
 use std::boxed::Box;
 use std::collections::HashMap;
 use std::fs;
@@ -269,44 +269,56 @@ be certain you intend to do this.",
                                                 } else {
                                                     v.insert(json!(rec));
                                                 }
-                                            },
+                                            }
                                             ColumnTypeTag::Number => {
                                                 if let Ok(n) = rec.parse::<i64>() {
                                                     v.insert(json!(n));
                                                 } else if let Ok(n) = rec.parse::<f64>() {
                                                     v.insert(json!(n));
                                                 } else {
-                                                    bail!("could not parse {}, which is expected to be a number, as such", rec);
+                                                    bail!(
+                                                        "could not parse {}, which is expected to be a number, as such",
+                                                        rec
+                                                    );
                                                 }
-                                            },
+                                            }
                                             ColumnTypeTag::Boolean => {
                                                 if let Ok(b) = rec.parse::<bool>() {
                                                     v.insert(json!(b));
                                                 } else {
-                                                    bail!("could not parse {}, which is expected to be boolean, as such", rec);
+                                                    bail!(
+                                                        "could not parse {}, which is expected to be boolean, as such",
+                                                        rec
+                                                    );
                                                 }
-                                            },
+                                            }
                                             ColumnTypeTag::Array => {
                                                 let no_pref = rec.strip_prefix('[').with_context(
                                                     || format!("In record {}, array type must begin with [", rec))?;
                                                 let no_suffix = no_pref.strip_suffix(']').with_context(
                                                     || format!("In record {}, array type must end with ]", rec))?;
                                                 let inner = no_suffix.trim();
-                                                let rdr = csv::ReaderBuilder::new().
-                                                    has_headers(false).from_reader(inner.as_bytes());
+                                                let rdr = csv::ReaderBuilder::new()
+                                                    .has_headers(false)
+                                                    .from_reader(inner.as_bytes());
                                                 let array_elems_result = rdr.into_records().next();
-                                                if let Some(Ok(ok_array_elems)) = array_elems_result {
-                                                    let array_elems = ok_array_elems.into_iter()
-                                                        .map( |s| json!(s)).collect::<Vec<serde_json::Value>>();
+                                                if let Some(Ok(ok_array_elems)) = array_elems_result
+                                                {
+                                                    let array_elems = ok_array_elems
+                                                        .into_iter()
+                                                        .map(|s| json!(s))
+                                                        .collect::<Vec<serde_json::Value>>();
                                                     v.insert(serde_json::Value::Array(array_elems));
                                                 }
-                                            },
-                                            _ => bail!("type not supported")
+                                            }
+                                            _ => bail!("type not supported"),
                                         }
                                     }
                                 }
-                            },
-                            _ => bail!("encountered a path that led to a non-object entry; this shouldn't happen")
+                            }
+                            _ => bail!(
+                                "encountered a path that led to a non-object entry; this shouldn't happen"
+                            ),
                         };
                     } else {
                         bail!("Should never query a non-existent path!");
@@ -360,14 +372,14 @@ pub fn validate_manifest_structure(workflow: &Value) -> anyhow::Result<()> {
                         );
                     }
 
-                    if let Some(active_val) = obj.get(SystemFields::Active.as_str()) {
-                        if active_val.as_bool().is_none() {
-                            bail!(
-                                "Malformed workflow entry at {}: `active` must be a boolean, found {:?}.",
-                                path,
-                                active_val
-                            );
-                        }
+                    if let Some(active_val) = obj.get(SystemFields::Active.as_str())
+                        && active_val.as_bool().is_none()
+                    {
+                        bail!(
+                            "Malformed workflow entry at {}: `active` must be a boolean, found {:?}.",
+                            path,
+                            active_val
+                        );
                     }
 
                     let program_name_val = obj
@@ -446,7 +458,9 @@ pub fn get_output_path(manifest: &serde_json::Value) -> anyhow::Result<PathBuf>
         match output {
             Value::String(s) => Ok(std::path::PathBuf::from(s)),
             _ => {
-                bail!("/meta_info/output must have JSON string type, int he manifest, but it did not.")
+                bail!(
+                    "/meta_info/output must have JSON string type, int he manifest, but it did not."
+                )
             }
         }
     } else {
@@ -532,17 +546,17 @@ pub fn update_start_at(v: &Value) -> anyhow::Result<u64> {
     // Check if the previous run was succeed. If yes, then no need to resume
     let succeed = v
         .get("Succeed")
-        .with_context(|| {
-            "Could not get `Execution Terminated Step` from the log file; Cannot resume."
-        })?
+        .with_context(
+            || "Could not get `Execution Terminated Step` from the log file; Cannot resume.",
+        )?
         .as_bool()
         .with_context(|| "cannot parse `Succeed` as bool; Cannot resume.")?;
 
     let start_at = latest_run
         .get("Execution Terminated Step")
-        .with_context(|| {
-            "Could not get `Execution Terminated Step` from the log file; Cannot resume."
-        })?
+        .with_context(
+            || "Could not get `Execution Terminated Step` from the log file; Cannot resume.",
+        )?
         .as_u64()
         .with_context(|| "cannot parse `Execution Terminated Step` as str; Cannot resume.")?;
 
@@ -578,9 +592,9 @@ pub fn get_previous_log<T: AsRef<Path>>(output: T) -> anyhow::Result<Value> {
         }
         Ok(false) => {
             bail!(
-                    "Could not find `simpleaf_workflow_log.json` in the output directory {:?}; Cannot resume.",
-                    output.as_ref()
-                )
+                "Could not find `simpleaf_workflow_log.json` in the output directory {:?}; Cannot resume.",
+                output.as_ref()
+            )
         }
         Err(e) => {
             bail!(e)
@@ -831,9 +845,9 @@ impl SimpleafWorkflow {
                             )
                         })?;
                     pn =
-                        ProgramName::from_str(program_name.as_str().with_context(|| {
-                            "Cannot create ProgramName struct from a program name"
-                        })?);
+                        ProgramName::from_str(program_name.as_str().with_context(
+                            || "Cannot create ProgramName struct from a program name",
+                        )?);
 
                     // if active, then push to execution queue
                     if active {
@@ -844,9 +858,19 @@ impl SimpleafWorkflow {
                             Ok(v) => v,
                             Err(e) => {
                                 if pn.is_external() {
-                                    bail!("Could not parse external command {} for step {}. The error message was: {}", pn, step, e);
+                                    bail!(
+                                        "Could not parse external command {} for step {}. The error message was: {}",
+                                        pn,
+                                        step,
+                                        e
+                                    );
                                 } else {
-                                    bail!("Could not parse simpleaf command {} for step {}. The error message was: {}", pn, step, e);
+                                    bail!(
+                                        "Could not parse simpleaf command {} for step {}. The error message was: {}",
+                                        pn,
+                                        step,
+                                        e
+                                    );
                                 }
                             }
                         };
@@ -1268,7 +1292,9 @@ impl ProgramName {
                 }
             }
         } else {
-            warn!("Found an invalid root layer; Ignored. All root layers must represent a valid simpleaf command.");
+            warn!(
+                "Found an invalid root layer; Ignored. All root layers must represent a valid simpleaf command."
+            );
         };
 
         // check if empty
@@ -1556,19 +1582,19 @@ mod tests {
     use super::ProgramName;
     use super::WFCommand;
     use crate::utils::workflow_utils::SystemFields;
-    use serde_json::{json, Map, Value};
+    use serde_json::{Map, Value, json};
     // use crate::Cli;
     // use crate::Commands;
     // use crate::SimpleafCmdRecord;
     use crate::{
+        Commands,
         utils::{
             prog_utils::{get_cmd_line_string, shell},
             workflow_utils::{
-                execute_commands_in_workflow, initialize_workflow, validate_manifest_structure,
-                CommandRecord, SimpleafWorkflow, WorkflowLog,
+                CommandRecord, SimpleafWorkflow, WorkflowLog, execute_commands_in_workflow,
+                initialize_workflow, validate_manifest_structure,
             },
         },
-        Commands,
     };
     use std::path::{Path, PathBuf};
     use tempfile::tempdir;
diff --git a/tests/cli_smoke.rs b/tests/cli_smoke.rs
index fa016bd..ca465b8 100644
--- a/tests/cli_smoke.rs
+++ b/tests/cli_smoke.rs
@@ -5,12 +5,10 @@ use tempfile::tempdir;
 fn run_simpleaf(args: &[&str]) -> Output {
     let binary = env!("CARGO_BIN_EXE_simpleaf");
     let af_home = tempdir().expect("unable to create temp ALEVIN_FRY_HOME");
-
-    Command::new(binary)
-        .args(args)
-        .env("ALEVIN_FRY_HOME", af_home.path())
-        .output()
-        .expect("failed to execute simpleaf")
+    let mut cmd = Command::new(binary);
+    cmd.args(args);
+    cmd.env("ALEVIN_FRY_HOME", af_home.path());
+    cmd.output().expect("failed to execute simpleaf")
 }
 
 fn assert_parse_error(args: &[&str]) {

From 99d35768578b3b9a195e361ec964ef8eb91f569c Mon Sep 17 00:00:00 2001
From: Rob Patro <rob.patro@gmail.com>
Date: Tue, 24 Feb 2026 00:26:44 -0500
Subject: [PATCH 4/4] add release script, update dist

---
 .github/workflows/release.yml | 109 +++++++------
 Cargo.toml                    |  24 ---
 dist-workspace.toml           |  22 +++
 release.sh                    | 284 ++++++++++++++++++++++++++++++++++
 4 files changed, 367 insertions(+), 72 deletions(-)
 create mode 100644 dist-workspace.toml
 create mode 100755 release.sh

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 314331e..d9aa406 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,4 +1,4 @@
-# This file was autogenerated by cargo-dist: https://opensource.axo.dev/cargo-dist/
+# This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist
 #
 # Copyright 2022-2024, axodotdev
 # SPDX-License-Identifier: MIT or Apache-2.0
@@ -6,7 +6,7 @@
 # CI that:
 #
 # * checks for a Git Tag that looks like a release
-# * builds artifacts with cargo-dist (archives, installers, hashes)
+# * builds artifacts with dist (archives, installers, hashes)
 # * uploads those artifacts to temporary workflow zip
 # * on success, uploads the artifacts to a GitHub Release
 #
@@ -24,10 +24,10 @@ permissions:
 # must be a Cargo-style SemVer Version (must have at least major.minor.patch).
 #
 # If PACKAGE_NAME is specified, then the announcement will be for that
-# package (erroring out if it doesn't have the given version or isn't cargo-dist-able).
+# package (erroring out if it doesn't have the given version or isn't dist-able).
 #
 # If PACKAGE_NAME isn't specified, then the announcement will be for all
-# (cargo-dist-able) packages in the workspace with that version (this mode is
+# (dist-able) packages in the workspace with that version (this mode is
 # intended for workspaces with only one dist-able package, or with all dist-able
 # packages versioned/released in lockstep).
 #
@@ -45,9 +45,9 @@ on:
       - '**[0-9]+.[0-9]+.[0-9]+*'
 
 jobs:
-  # Run 'cargo dist plan' (or host) to determine what tasks we need to do
+  # Run 'dist plan' (or host) to determine what tasks we need to do
   plan:
-    runs-on: "ubuntu-20.04"
+    runs-on: "ubuntu-22.04"
     outputs:
       val: ${{ steps.plan.outputs.manifest }}
       tag: ${{ !github.event.pull_request && github.ref_name || '' }}
@@ -56,19 +56,20 @@ jobs:
     env:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
+          persist-credentials: false
           submodules: recursive
-      - name: Install cargo-dist
+      - name: Install dist
         # we specify bash to get pipefail; it guards against the `curl` command
         # failing. otherwise `sh` won't catch that `curl` returned non-0
         shell: bash
-        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.21.1/cargo-dist-installer.sh | sh"
-      - name: Cache cargo-dist
-        uses: actions/upload-artifact@v4
+        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh"
+      - name: Cache dist
+        uses: actions/upload-artifact@v6
         with:
           name: cargo-dist-cache
-          path: ~/.cargo/bin/cargo-dist
+          path: ~/.cargo/bin/dist
       # sure would be cool if github gave us proper conditionals...
       # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
       # functionality based on whether this is a pull_request, and whether it's from a fork.
@@ -76,12 +77,12 @@ jobs:
       # but also really annoying to build CI around when it needs secrets to work right.)
       - id: plan
         run: |
-          cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
-          echo "cargo dist ran successfully"
+          dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
+          echo "dist ran successfully"
           cat plan-dist-manifest.json
           echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
       - name: "Upload dist-manifest.json"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: artifacts-plan-dist-manifest
           path: plan-dist-manifest.json
@@ -95,18 +96,19 @@ jobs:
     if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
     strategy:
       fail-fast: false
-      # Target platforms/runners are computed by cargo-dist in create-release.
+      # Target platforms/runners are computed by dist in create-release.
       # Each member of the matrix has the following arguments:
       #
       # - runner: the github runner
-      # - dist-args: cli flags to pass to cargo dist
-      # - install-dist: expression to run to install cargo-dist on the runner
+      # - dist-args: cli flags to pass to dist
+      # - install-dist: expression to run to install dist on the runner
       #
       # Typically there will be:
       # - 1 "global" task that builds universal installers
       # - N "local" tasks that build each platform's binaries and platform-specific installers
       matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
     runs-on: ${{ matrix.runner }}
+    container: ${{ matrix.container && matrix.container.image || null }}
     env:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
@@ -114,14 +116,22 @@ jobs:
       - name: enable windows longpaths
         run: |
           git config --global core.longpaths true
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
+          persist-credentials: false
           submodules: recursive
-      - name: Install cargo-dist
-        run: ${{ matrix.install_dist }}
+      - name: Install Rust non-interactively if not already installed
+        if: ${{ matrix.container }}
+        run: |
+          if ! command -v cargo > /dev/null 2>&1; then
+            curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+            echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+          fi
+      - name: Install dist
+        run: ${{ matrix.install_dist.run }}
       # Get the dist-manifest
       - name: Fetch local artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -132,8 +142,8 @@ jobs:
       - name: Build artifacts
         run: |
           # Actually do builds and make zips and whatnot
-          cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
-          echo "cargo dist ran successfully"
+          dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
+          echo "dist ran successfully"
       - id: cargo-dist
         name: Post-build
         # We force bash here just because github makes it really hard to get values up
@@ -143,12 +153,12 @@ jobs:
         run: |
           # Parse out what we just built and upload it to scratch storage
           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
-          jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
+          dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
           echo "EOF" >> "$GITHUB_OUTPUT"
 
           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
       - name: "Upload artifacts"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: artifacts-build-local-${{ join(matrix.targets, '_') }}
           path: |
@@ -160,23 +170,24 @@ jobs:
     needs:
       - plan
       - build-local-artifacts
-    runs-on: "ubuntu-20.04"
+    runs-on: "ubuntu-22.04"
     env:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
+          persist-credentials: false
           submodules: recursive
-      - name: Install cached cargo-dist
-        uses: actions/download-artifact@v4
+      - name: Install cached dist
+        uses: actions/download-artifact@v7
         with:
           name: cargo-dist-cache
           path: ~/.cargo/bin/
-      - run: chmod +x ~/.cargo/bin/cargo-dist
+      - run: chmod +x ~/.cargo/bin/dist
       # Get all the local artifacts for the global tasks to use (for e.g. checksums)
       - name: Fetch local artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -184,8 +195,8 @@ jobs:
       - id: cargo-dist
         shell: bash
         run: |
-          cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
-          echo "cargo dist ran successfully"
+          dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
+          echo "dist ran successfully"
 
           # Parse out what we just built and upload it to scratch storage
           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
@@ -194,7 +205,7 @@ jobs:
 
           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
       - name: "Upload artifacts"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: artifacts-build-global
           path: |
@@ -206,26 +217,27 @@ jobs:
       - plan
       - build-local-artifacts
       - build-global-artifacts
-    # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
-    if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
+    # Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine)
+    if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
     env:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-    runs-on: "ubuntu-20.04"
+    runs-on: "ubuntu-22.04"
     outputs:
       val: ${{ steps.host.outputs.manifest }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
+          persist-credentials: false
           submodules: recursive
-      - name: Install cached cargo-dist
-        uses: actions/download-artifact@v4
+      - name: Install cached dist
+        uses: actions/download-artifact@v7
         with:
           name: cargo-dist-cache
           path: ~/.cargo/bin/
-      - run: chmod +x ~/.cargo/bin/cargo-dist
+      - run: chmod +x ~/.cargo/bin/dist
       # Fetch artifacts from scratch-storage
       - name: Fetch artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
         with:
           pattern: artifacts-*
           path: target/distrib/
@@ -233,19 +245,19 @@ jobs:
       - id: host
         shell: bash
         run: |
-          cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
+          dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
           echo "artifacts uploaded and released successfully"
           cat dist-manifest.json
           echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
       - name: "Upload dist-manifest.json"
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           # Overwrite the previous copy
           name: artifacts-dist-manifest
           path: dist-manifest.json
       # Create a GitHub Release while uploading all files to it
       - name: "Download GitHub Artifacts"
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
         with:
           pattern: artifacts-*
           path: artifacts
@@ -274,10 +286,11 @@ jobs:
     # still allowing individual publish jobs to skip themselves (for prereleases).
     # "host" however must run to completion, no skipping allowed!
     if: ${{ always() && needs.host.result == 'success' }}
-    runs-on: "ubuntu-20.04"
+    runs-on: "ubuntu-22.04"
     env:
       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
+          persist-credentials: false
           submodules: recursive
diff --git a/Cargo.toml b/Cargo.toml
index a869951..70acc7b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -96,27 +96,3 @@ opt-level = 3
 [profile.dist]
 inherits = "release"
 lto = "thin"
-
-# Config for 'cargo dist'
-[workspace.metadata.dist]
-# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
-cargo-dist-version = "0.21.1"
-# CI backends to support
-ci = "github"
-# The installers to generate for each app
-installers = ["shell"]
-# Target platforms to build apps for (Rust target-triple syntax)
-targets = [
-  "aarch64-apple-darwin",
-  "x86_64-apple-darwin",
-  "x86_64-unknown-linux-gnu",
-]
-# Which actions to run on pull requests
-pr-run-mode = "plan"
-# Whether to install an updater program
-install-updater = true
-# Path that installers should place binaries in
-install-path = "CARGO_HOME"
-
-[workspace.metadata.dist.github-custom-runners]
-aarch64-apple-darwin = "macos-14"
diff --git a/dist-workspace.toml b/dist-workspace.toml
new file mode 100644
index 0000000..6d6b111
--- /dev/null
+++ b/dist-workspace.toml
@@ -0,0 +1,22 @@
+[workspace]
+members = ["cargo:."]
+
+# Config for 'dist'
+[dist]
+# The preferred dist version to use in CI (Cargo.toml SemVer syntax)
+cargo-dist-version = "0.31.0"
+# CI backends to support
+ci = "github"
+# The installers to generate for each app
+installers = ["shell"]
+# Target platforms to build apps for (Rust target-triple syntax)
+targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu"]
+# Which actions to run on pull requests
+pr-run-mode = "plan"
+# Whether to install an updater program
+install-updater = true
+# Path that installers should place binaries in
+install-path = "CARGO_HOME"
+
+[dist.github-custom-runners]
+aarch64-apple-darwin = "macos-14"
diff --git a/release.sh b/release.sh
new file mode 100755
index 0000000..b7442a1
--- /dev/null
+++ b/release.sh
@@ -0,0 +1,284 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat <<'USAGE'
+Usage: release.sh <version> [--push] [--dry-run]
+
+Creates a release by:
+1. Validating <version> as semantic versioning (SemVer 2.0.0)
+2. Ensuring <version> is greater than the current Cargo.toml package version
+3. Ensuring tag v<version> does not already exist
+4. Updating Cargo.toml [package].version
+5. Committing Cargo.toml and creating tag v<version>
+6. Optionally pushing the tag when --push is provided
+7. With --dry-run, print planned actions without changing git state/files
+USAGE
+}
+
+err() {
+  echo "Error: $*" >&2
+  exit 1
+}
+
+is_valid_semver() {
+  local version="$1"
+  local semver_regex='^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9A-Za-z-]*[A-Za-z-][0-9A-Za-z-]*)(\.(0|[1-9][0-9]*|[0-9A-Za-z-]*[A-Za-z-][0-9A-Za-z-]*))*))?(\+([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*))?$'
+  printf '%s\n' "$version" | grep -Eq "$semver_regex"
+}
+
+split_semver() {
+  local version="$1"
+  local core="$version"
+  local pre=""
+  local major minor patch
+
+  if [[ "$core" == *+* ]]; then
+    core="${core%%+*}"
+  fi
+
+  if [[ "$core" == *-* ]]; then
+    pre="${core#*-}"
+    core="${core%%-*}"
+  fi
+
+  IFS='.' read -r major minor patch <<<"$core"
+  printf '%s\t%s\t%s\t%s\n' "$major" "$minor" "$patch" "$pre"
+}
+
+compare_prerelease() {
+  local left="$1"
+  local right="$2"
+
+  if [[ -z "$left" && -z "$right" ]]; then
+    echo 0
+    return
+  fi
+  if [[ -z "$left" ]]; then
+    echo 1
+    return
+  fi
+  if [[ -z "$right" ]]; then
+    echo -1
+    return
+  fi
+
+  local -a lparts=()
+  local -a rparts=()
+  IFS='.' read -r -a lparts <<<"$left"
+  IFS='.' read -r -a rparts <<<"$right"
+
+  local i
+  local lnum rnum
+  for ((i = 0; i < ${#lparts[@]} || i < ${#rparts[@]}; i++)); do
+    if ((i >= ${#lparts[@]})); then
+      echo -1
+      return
+    fi
+    if ((i >= ${#rparts[@]})); then
+      echo 1
+      return
+    fi
+
+    if [[ "${lparts[$i]}" == "${rparts[$i]}" ]]; then
+      continue
+    fi
+
+    if [[ "${lparts[$i]}" =~ ^(0|[1-9][0-9]*)$ ]]; then
+      lnum=1
+    else
+      lnum=0
+    fi
+    if [[ "${rparts[$i]}" =~ ^(0|[1-9][0-9]*)$ ]]; then
+      rnum=1
+    else
+      rnum=0
+    fi
+
+    if ((lnum == 1 && rnum == 1)); then
+      if ((10#${lparts[$i]} > 10#${rparts[$i]})); then
+        echo 1
+      else
+        echo -1
+      fi
+      return
+    fi
+    if ((lnum == 1 && rnum == 0)); then
+      echo -1
+      return
+    fi
+    if ((lnum == 0 && rnum == 1)); then
+      echo 1
+      return
+    fi
+
+    if [[ "${lparts[$i]}" > "${rparts[$i]}" ]]; then
+      echo 1
+    else
+      echo -1
+    fi
+    return
+  done
+
+  echo 0
+}
+
+compare_semver() {
+  local left="$1"
+  local right="$2"
+
+  local lmaj lmin lpat lpre
+  local rmaj rmin rpat rpre
+  IFS=$'\t' read -r lmaj lmin lpat lpre <<<"$(split_semver "$left")"
+  IFS=$'\t' read -r rmaj rmin rpat rpre <<<"$(split_semver "$right")"
+
+  if ((10#$lmaj > 10#$rmaj)); then
+    echo 1
+    return
+  fi
+  if ((10#$lmaj < 10#$rmaj)); then
+    echo -1
+    return
+  fi
+
+  if ((10#$lmin > 10#$rmin)); then
+    echo 1
+    return
+  fi
+  if ((10#$lmin < 10#$rmin)); then
+    echo -1
+    return
+  fi
+
+  if ((10#$lpat > 10#$rpat)); then
+    echo 1
+    return
+  fi
+  if ((10#$lpat < 10#$rpat)); then
+    echo -1
+    return
+  fi
+
+  compare_prerelease "$lpre" "$rpre"
+}
+
+push_tag=false
+dry_run=false
+version=""
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --push)
+      push_tag=true
+      shift
+      ;;
+    --dry-run)
+      dry_run=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    -*)
+      err "Unknown option: $1"
+      ;;
+    *)
+      if [[ -n "$version" ]]; then
+        err "Provide exactly one version argument."
+      fi
+      version="$1"
+      shift
+      ;;
+  esac
+done
+
+[[ -n "$version" ]] || { usage >&2; exit 1; }
+
+[[ -f "Cargo.toml" ]] || err "Cargo.toml not found in current directory."
+git rev-parse --is-inside-work-tree >/dev/null 2>&1 || err "Current directory is not a git repository."
+
+is_valid_semver "$version" || err "Version '$version' is not valid SemVer 2.0.0."
+
+current_version="$(
+  awk '
+    /^\[package\]/ { in_package=1; next }
+    in_package && /^\[/ { in_package=0 }
+    in_package && /^[[:space:]]*version[[:space:]]*=/ {
+      match($0, /"[^"]+"/)
+      if (RSTART > 0) {
+        print substr($0, RSTART + 1, RLENGTH - 2)
+        exit
+      }
+    }
+  ' Cargo.toml
+)"
+
+[[ -n "$current_version" ]] || err "Could not find [package].version in Cargo.toml."
+is_valid_semver "$current_version" || err "Current Cargo.toml version '$current_version' is not valid SemVer."
+
+cmp="$(compare_semver "$version" "$current_version")"
+if [[ "$cmp" != "1" ]]; then
+  err "Version '$version' must be greater than current version '$current_version'."
+fi
+
+tag="v${version}"
+if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null 2>&1; then
+  err "Tag '${tag}' already exists locally."
+fi
+if git remote get-url origin >/dev/null 2>&1; then
+  if git ls-remote --exit-code --tags origin "refs/tags/${tag}" >/dev/null 2>&1; then
+    err "Tag '${tag}' already exists on remote 'origin'."
+  fi
+fi
+
+tmp_file="$(mktemp)"
+cleanup() {
+  rm -f "$tmp_file"
+}
+trap cleanup EXIT
+
+awk -v new_version="$version" '
+  BEGIN { in_package=0; updated=0 }
+  /^\[package\]/ { in_package=1; print; next }
+  in_package && /^\[/ { in_package=0 }
+  in_package && !updated && /^[[:space:]]*version[[:space:]]*=/ {
+    print "version = \"" new_version "\""
+    updated=1
+    next
+  }
+  { print }
+  END {
+    if (!updated) {
+      exit 2
+    }
+  }
+' Cargo.toml > "$tmp_file" || err "Failed to update Cargo.toml."
+
+if [[ "$dry_run" == true ]]; then
+  echo "Dry run successful:"
+  echo "- Current version: ${current_version}"
+  echo "- New version: ${version}"
+  echo "- Tag to create: ${tag}"
+  echo "- Would update Cargo.toml [package].version to ${version}"
+  echo "- Would commit: Release ${tag}"
+  echo "- Would create tag: ${tag}"
+  if [[ "$push_tag" == true ]]; then
+    git remote get-url origin >/dev/null 2>&1 || err "No 'origin' remote configured, cannot push tag."
+    echo "- Would push tag to origin: ${tag}"
+  fi
+  exit 0
+fi
+
+mv "$tmp_file" Cargo.toml
+
+git add Cargo.toml
+git commit -m "Release ${tag}" -- Cargo.toml
+git tag "${tag}"
+
+if [[ "$push_tag" == true ]]; then
+  git remote get-url origin >/dev/null 2>&1 || err "No 'origin' remote configured, cannot push tag."
+  git push origin "${tag}"
+fi
+
+echo "Released ${tag} (updated Cargo.toml from ${current_version} to ${version})."