From 2c780313de72fcf56f23a1b622aaacdbcdf6b188 Mon Sep 17 00:00:00 2001
From: Bastian Havers-Zulka <bastian.havers-zulka@ri.se>
Date: Wed, 8 Apr 2026 16:56:40 +0200
Subject: [PATCH 1/3] Add dataprov-add CLI tool for recording processing steps

Expose ProvenanceChain.add() as a CLI tool so users can record
processing steps in shell scripts and pipelines without writing Python.

Changes:
- dataprov/cli/addstep.py: new CLI module with full argument coverage
  of add()'s required and optional parameters, including --capture-agent,
  --capture-environment, --input-provenance-files (with 'none' sentinel),
  --drl, and -o / --overwrite for output control
- pyproject.toml: register dataprov-add entry point
- README.md: add dataprov-add to CLI Tools section and table of contents
- tests/test_dataprov.py: add TestCLIAddStep with 15 tests covering
  basic usage, optional fields, multiple I/O, provenance file linking,
  output redirect, overwrite, agent/env capture, and error cases

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 README.md               |  57 ++++++
 dataprov/cli/addstep.py | 410 ++++++++++++++++++++++++++++++++++++++++
 pyproject.toml          |   1 +
 tests/test_dataprov.py  | 373 ++++++++++++++++++++++++++++++++++++
 4 files changed, 841 insertions(+)
 create mode 100644 dataprov/cli/addstep.py

diff --git a/README.md b/README.md
index b2299e4..c9bb143 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,7 @@ A lightweight Python library for tracking data provenance through processing pip
     - [Visualization](#visualization)
   - [CLI Tools](#cli-tools)
     - [dataprov-new](#dataprov-new)
+    - [dataprov-add](#dataprov-add)
     - [dataprov-visualize](#dataprov-visualize)
     - [dataprov-add-attribution](#dataprov-add-attribution)
     - [dataprov-report](#dataprov-report)
@@ -791,6 +792,62 @@ dataprov-new \
     --tags video,processing,2024
 ```
 
+### dataprov-add
+
+Add a processing step to an existing provenance chain. This is the CLI equivalent of `ProvenanceChain.add()`:
+
+```bash
+# Minimal usage
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T11:00:00Z" --ended-at "2024-10-15T11:05:30Z" \
+    --tool-name drone_stabilizer --tool-version 2.0 --operation stabilization \
+    -i raw.mp4 --input-formats MP4 \
+    --outputs stabilized.mp4 --output-formats MP4
+
+# Multiple inputs and outputs
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T12:00:00Z" --ended-at "2024-10-15T12:10:00Z" \
+    --tool-name video_combiner --tool-version 1.5 --operation concatenation \
+    -i video1.mp4 video2.mp4 video3.mp4 \
+    --input-formats MP4 MP4 MP4 \
+    --outputs combined.mp4 --output-formats MP4
+
+# With linked input provenance files (use "none" for inputs with no provenance)
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T12:00:00Z" --ended-at "2024-10-15T12:10:00Z" \
+    --tool-name video_combiner --tool-version 1.5 --operation concatenation \
+    -i video1.mp4 video2.mp4 --input-formats MP4 MP4 \
+    --outputs combined.mp4 --output-formats MP4 \
+    --input-provenance-files video1_prov.json none
+
+# With DRL, agent capture, and execution log
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T10:00:00Z" --ended-at "2024-10-15T10:05:30Z" \
+    --tool-name processor --tool-version 1.0 --operation processing \
+    -i input.txt --input-formats TXT \
+    --outputs output.txt --output-formats TXT \
+    --drl 3 --capture-agent \
+    --output-log "Processing completed successfully"
+
+# Capture execution environment
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T10:00:00Z" --ended-at "2024-10-15T10:05:30Z" \
+    --tool-name processor --tool-version 1.0 --operation processing \
+    -i input.txt --input-formats TXT \
+    --outputs output.txt --output-formats TXT \
+    --capture-environment
+
+# Write to new file instead of modifying in place
+dataprov-add -p provenance.json \
+    --started-at "2024-10-15T11:00:00Z" --ended-at "2024-10-15T11:05:30Z" \
+    --tool-name my_tool --tool-version 1.0 --operation processing \
+    -i input.csv --input-formats CSV \
+    --outputs output.csv --output-formats CSV \
+    -o provenance_updated.json
+```
+
+Omit `--ended-at` for steps that are still ongoing or whose end time is not yet known.
+
 ### dataprov-visualize
 
 Generate GraphViz DOT visualization:
diff --git a/dataprov/cli/addstep.py b/dataprov/cli/addstep.py
new file mode 100644
index 0000000..5a9f8be
--- /dev/null
+++ b/dataprov/cli/addstep.py
@@ -0,0 +1,410 @@
+#!/usr/bin/env python3
+"""
+Add Processing Step to Provenance Chain
+
+Command-line tool to record a processing step in an existing provenance chain.
+Wraps the ProvenanceChain.add() method for use in shell pipelines and scripts.
+
+Usage:
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T11:00:00Z" --ended-at "2024-10-15T11:05:30Z" \\
+        --tool-name my_tool --tool-version 1.0 --operation processing \\
+        -i input.csv --input-formats CSV \\
+        --outputs output.csv --output-formats CSV
+
+Examples:
+    # Minimal usage
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T11:00:00Z" --ended-at "2024-10-15T11:05:30Z" \\
+        --tool-name drone_stabilizer --tool-version 2.0 --operation stabilization \\
+        -i raw.mp4 --input-formats MP4 \\
+        --outputs stabilized.mp4 --output-formats MP4
+
+    # Multiple inputs and outputs with provenance references
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T12:00:00Z" --ended-at "2024-10-15T12:10:00Z" \\
+        --tool-name video_combiner --tool-version 1.5 --operation concatenation \\
+        -i video1.mp4 video2.mp4 --input-formats MP4 MP4 \\
+        --outputs combined.mp4 --output-formats MP4 \\
+        --input-provenance-files video1_prov.json none
+
+    # With agent capture, DRL, and execution log
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T10:00:00Z" \\
+        --tool-name processor --tool-version 1.0 --operation processing \\
+        -i input.txt --input-formats TXT \\
+        --outputs output.txt --output-formats TXT \\
+        --drl 3 --capture-agent --output-log "Processing completed successfully"
+
+    # Capture runtime environment
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T10:00:00Z" \\
+        --tool-name processor --tool-version 1.0 --operation processing \\
+        -i input.txt --input-formats TXT \\
+        --outputs output.txt --output-formats TXT \\
+        --capture-environment
+
+    # Output to new file instead of in-place modification
+    dataprov-add -p provenance.json \\
+        --started-at "2024-10-15T11:00:00Z" \\
+        --tool-name my_tool --tool-version 1.0 --operation processing \\
+        -i input.csv --input-formats CSV \\
+        --outputs output.csv --output-formats CSV \\
+        -o provenance_updated.json
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+from dataprov import ProvenanceChain
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Add a processing step to an existing provenance chain",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Basic step
+  dataprov-add -p provenance.json \\
+    --started-at "2024-10-15T11:00:00Z" --ended-at "2024-10-15T11:05:30Z" \\
+    --tool-name my_tool --tool-version 1.0 --operation processing \\
+    -i input.csv --input-formats CSV \\
+    --outputs output.csv --output-formats CSV
+
+  # Multiple inputs/outputs
+  dataprov-add -p provenance.json \\
+    --started-at "2024-10-15T12:00:00Z" --ended-at "2024-10-15T12:10:00Z" \\
+    --tool-name combiner --tool-version 1.0 --operation merge \\
+    -i a.csv b.csv --input-formats CSV CSV \\
+    --outputs merged.csv --output-formats CSV
+
+  # With provenance references (use "none" for missing entries)
+  dataprov-add -p provenance.json \\
+    --started-at "2024-10-15T12:00:00Z" --ended-at "2024-10-15T12:10:00Z" \\
+    --tool-name combiner --tool-version 1.0 --operation merge \\
+    -i a.csv b.csv --input-formats CSV CSV \\
+    --outputs merged.csv --output-formats CSV \\
+    --input-provenance-files a_prov.json none
+        """,
+    )
+
+    # Required arguments
+    parser.add_argument(
+        "-p",
+        "--provenance-file",
+        required=True,
+        help="Path to existing provenance JSON file",
+    )
+
+    parser.add_argument(
+        "--started-at",
+        required=True,
+        help="ISO 8601 timestamp when processing started (e.g. 2024-10-15T11:00:00Z)",
+    )
+
+    parser.add_argument(
+        "--tool-name",
+        required=True,
+        help="Name of the tool that performed this step",
+    )
+
+    parser.add_argument(
+        "--tool-version",
+        required=True,
+        help="Version of the tool",
+    )
+
+    parser.add_argument(
+        "--operation",
+        required=True,
+        help="Semantic description of the operation (e.g. stabilization, merge, calibration)",
+    )
+
+    parser.add_argument(
+        "-i",
+        "--inputs",
+        required=True,
+        nargs="+",
+        metavar="FILE",
+        help="One or more input file paths",
+    )
+
+    parser.add_argument(
+        "--input-formats",
+        required=True,
+        nargs="+",
+        metavar="FORMAT",
+        help="File formats for each input (e.g. MP4 CSV TXT) — must match number of inputs",
+    )
+
+    parser.add_argument(
+        "--outputs",
+        required=True,
+        nargs="+",
+        metavar="FILE",
+        help="One or more output file paths",
+    )
+
+    parser.add_argument(
+        "--output-formats",
+        required=True,
+        nargs="+",
+        metavar="FORMAT",
+        help="File formats for each output — must match number of outputs",
+    )
+
+    # Optional step metadata
+    parser.add_argument(
+        "--ended-at",
+        default=None,
+        help="ISO 8601 timestamp when processing ended (omit for ongoing/incomplete steps)",
+    )
+
+    parser.add_argument(
+        "--source",
+        default="",
+        help="Tool source or organization",
+    )
+
+    parser.add_argument(
+        "--arguments",
+        default="",
+        help="Command-line arguments string used for this step",
+    )
+
+    parser.add_argument(
+        "--output-log",
+        default="",
+        help="Free-text output log from tool execution",
+    )
+
+    parser.add_argument(
+        "--warnings",
+        default="",
+        help="Warning messages or issues encountered",
+    )
+
+    parser.add_argument(
+        "--input-provenance-files",
+        nargs="+",
+        metavar="FILE",
+        default=None,
+        help=(
+            "Provenance JSON files for each input (must match number of inputs). "
+            'Use "none" for inputs that have no provenance file.'
+        ),
+    )
+
+    parser.add_argument(
+        "--drl",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Data Readiness Level after this step (0–9)",
+    )
+
+    # Agent tracking
+    parser.add_argument(
+        "--capture-agent",
+        action="store_true",
+        default=False,
+        help="Capture agent/user information automatically",
+    )
+
+    parser.add_argument(
+        "--user",
+        default=None,
+        help="Override username for agent capture (only with --capture-agent)",
+    )
+
+    parser.add_argument(
+        "--hostname",
+        default=None,
+        help="Override hostname for agent capture (only with --capture-agent)",
+    )
+
+    # Environment tracking
+    parser.add_argument(
+        "--capture-environment",
+        action="store_true",
+        default=False,
+        help="Capture execution environment information (Python version, platform, etc.)",
+    )
+
+    parser.add_argument(
+        "--runtime",
+        default=None,
+        help="Override runtime name for environment capture (default: auto-detect)",
+    )
+
+    parser.add_argument(
+        "--runtime-version",
+        default=None,
+        help="Override runtime version for environment capture (default: auto-detect)",
+    )
+
+    # Output options
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Write updated chain to a new file instead of modifying in place",
+    )
+
+    parser.add_argument(
+        "--overwrite",
+        action="store_true",
+        help="Overwrite output file if it exists (only with -o)",
+    )
+
+    args = parser.parse_args()
+
+    # Validate provenance file exists
+    prov_path = Path(args.provenance_file)
+    if not prov_path.exists():
+        print(
+            f"Error: Provenance file not found: {args.provenance_file}", file=sys.stderr
+        )
+        return 1
+
+    # Validate format list lengths
+    if len(args.inputs) != len(args.input_formats):
+        print(
+            f"Error: Number of --input-formats ({len(args.input_formats)}) must match"
+            f" number of --inputs ({len(args.inputs)})",
+            file=sys.stderr,
+        )
+        return 1
+
+    if len(args.outputs) != len(args.output_formats):
+        print(
+            f"Error: Number of --output-formats ({len(args.output_formats)}) must match"
+            f" number of --outputs ({len(args.outputs)})",
+            file=sys.stderr,
+        )
+        return 1
+
+    # Validate input provenance files list length
+    if args.input_provenance_files is not None and len(
+        args.input_provenance_files
+    ) != len(args.inputs):
+        print(
+            f"Error: Number of --input-provenance-files ({len(args.input_provenance_files)})"
+            f" must match number of --inputs ({len(args.inputs)})",
+            file=sys.stderr,
+        )
+        return 1
+
+    # Validate agent-only flags
+    if not args.capture_agent and (args.user or args.hostname):
+        print(
+            "Error: --user and --hostname can only be used with --capture-agent",
+            file=sys.stderr,
+        )
+        return 1
+
+    # Validate environment-only flags
+    if not args.capture_environment and (args.runtime or args.runtime_version):
+        print(
+            "Error: --runtime and --runtime-version can only be used with --capture-environment",
+            file=sys.stderr,
+        )
+        return 1
+
+    # Validate output file
+    output_path = Path(args.output) if args.output else prov_path
+    if args.output and output_path.exists() and not args.overwrite:
+        print(
+            f"Error: Output file already exists: {args.output}", file=sys.stderr
+        )
+        print("Use --overwrite to replace existing file", file=sys.stderr)
+        return 1
+
+    # Convert "none" sentinels to Python None in provenance files list
+    input_provenance_files = None
+    if args.input_provenance_files is not None:
+        input_provenance_files = [
+            None if entry.lower() == "none" else entry
+            for entry in args.input_provenance_files
+        ]
+
+    # Load provenance chain
+    try:
+        chain = ProvenanceChain.load(args.provenance_file)
+    except Exception as e:
+        print(f"Error: Failed to load provenance chain: {e}", file=sys.stderr)
+        return 1
+
+    # Add the processing step
+    try:
+        success = chain.add(
+            started_at=args.started_at,
+            ended_at=args.ended_at,
+            tool_name=args.tool_name,
+            tool_version=args.tool_version,
+            operation=args.operation,
+            inputs=args.inputs,
+            input_formats=args.input_formats,
+            outputs=args.outputs,
+            output_formats=args.output_formats,
+            source=args.source,
+            arguments=args.arguments,
+            output_log=args.output_log,
+            warnings=args.warnings,
+            input_provenance_files=input_provenance_files,
+            drl=args.drl,
+            capture_agent=args.capture_agent,
+            user=args.user,
+            hostname=args.hostname,
+            capture_environment=args.capture_environment,
+            runtime=args.runtime,
+            runtime_version=args.runtime_version,
+        )
+
+        if not success:
+            print(
+                "Error: Failed to add processing step (see error messages above)",
+                file=sys.stderr,
+            )
+            return 1
+
+    except Exception as e:
+        print(f"Error: Failed to add processing step: {e}", file=sys.stderr)
+        return 1
+
+    # Save the updated chain
+    try:
+        chain.save(str(output_path))
+    except Exception as e:
+        print(f"Error: Failed to save provenance chain: {e}", file=sys.stderr)
+        return 1
+
+    # Success message
+    input_list = (
+        ", ".join(args.inputs)
+        if len(args.inputs) <= 3
+        else f"{args.inputs[0]}, ... ({len(args.inputs)} files)"
+    )
+    output_list = (
+        ", ".join(args.outputs)
+        if len(args.outputs) <= 3
+        else f"{args.outputs[0]}, ... ({len(args.outputs)} files)"
+    )
+
+    print(f"Added processing step to {output_path}:", file=sys.stderr)
+    print(f"  Tool: {args.tool_name} v{args.tool_version}", file=sys.stderr)
+    print(f"  Operation: {args.operation}", file=sys.stderr)
+    print(f"  Inputs:  {input_list}", file=sys.stderr)
+    print(f"  Outputs: {output_list}", file=sys.stderr)
+    if args.drl is not None:
+        print(f"  DRL: {args.drl}", file=sys.stderr)
+
+    print("\nProvenance chain updated successfully.", file=sys.stderr)
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/pyproject.toml b/pyproject.toml
index 2188fcb..d4de664 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dev = [
 
 [project.scripts]
 dataprov-new = "dataprov.cli.newprovchain:main"
+dataprov-add = "dataprov.cli.addstep:main"
 dataprov-add-attribution = "dataprov.cli.addattribution:main"
 dataprov-visualize = "dataprov.cli.visualize:main"
 dataprov-report = "dataprov.cli.report:main"
diff --git a/tests/test_dataprov.py b/tests/test_dataprov.py
index 80a79af..994350b 100644
--- a/tests/test_dataprov.py
+++ b/tests/test_dataprov.py
@@ -2875,3 +2875,376 @@ def test_custom_metadata_validation(self):
                     "invalidkey": "value"  # missing namespace prefix
                 },
             )
+
+
+class TestCLIAddStep:
+    """Tests for the dataprov-add CLI tool."""
+
+    @pytest.fixture
+    def prov_file(self, tmp_path):
+        """Create a minimal provenance chain file for CLI tests."""
+        chain = ProvenanceChain.create(
+            entity_id="cli_test_entity",
+            initial_source="/test/source/",
+        )
+        filepath = tmp_path / "prov.json"
+        chain.save(str(filepath))
+        return filepath
+
+    def _run_main(self, argv):
+        """Run dataprov-add main() with given argv list."""
+        import sys
+
+        from dataprov.cli.addstep import main
+
+        old_argv = sys.argv
+        sys.argv = ["dataprov-add"] + argv
+        try:
+            return main()
+        finally:
+            sys.argv = old_argv
+
+    def test_add_basic_step(self, prov_file, tmp_path, sample_file):
+        """Test adding a basic processing step via CLI."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        assert len(chain.get_steps()) == 1
+        step = chain.get_steps()[0]
+        assert step["tool"]["name"] == "my_tool"
+        assert step["tool"]["version"] == "1.0"
+        assert step["operation"] == "processing"
+
+    def test_add_step_with_optional_fields(self, prov_file, sample_file):
+        """Test adding a step with optional metadata fields."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "2.0",
+                "--operation", "calibration",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "--source", "ACME Corp",
+                "--arguments", "--mode fast",
+                "--output-log", "Done",
+                "--warnings", "None",
+                "--drl", "5",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        step = chain.get_steps()[0]
+        assert step["arguments"] == "--mode fast"
+        assert step["output_log"] == "Done"
+        assert step["warnings"] == "None"
+        assert step["drl"] == 5
+
+    def test_add_step_no_ended_at(self, prov_file, sample_file):
+        """Test adding an ongoing step without --ended-at."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        assert len(chain.get_steps()) == 1
+
+    def test_add_step_multiple_inputs_outputs(self, prov_file, sample_files):
+        """Test adding a step with multiple inputs and outputs."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "combiner",
+                "--tool-version", "1.0",
+                "--operation", "merge",
+                "-i", str(sample_files[0]), str(sample_files[1]),
+                "--input-formats", "TXT", "TXT",
+                "--outputs", str(sample_files[2]),
+                "--output-formats", "TXT",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        assert len(chain.get_steps()) == 1
+
+    def test_add_step_with_none_provenance_files(self, prov_file, sample_files):
+        """Test --input-provenance-files with 'none' sentinel values."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "combiner",
+                "--tool-version", "1.0",
+                "--operation", "merge",
+                "-i", str(sample_files[0]), str(sample_files[1]),
+                "--input-formats", "TXT", "TXT",
+                "--outputs", str(sample_files[2]),
+                "--output-formats", "TXT",
+                "--input-provenance-files", "none", "none",
+            ]
+        )
+
+        assert result == 0
+
+    def test_add_step_output_to_new_file(self, prov_file, tmp_path, sample_file):
+        """Test writing the updated chain to a new output file."""
+        output_file = tmp_path / "prov_updated.json"
+
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "-o", str(output_file),
+            ]
+        )
+
+        assert result == 0
+        assert output_file.exists()
+        # Original file should be unchanged
+        original_chain = ProvenanceChain.load(str(prov_file))
+        assert len(original_chain.get_steps()) == 0
+        # Updated file should have the step
+        updated_chain = ProvenanceChain.load(str(output_file))
+        assert len(updated_chain.get_steps()) == 1
+
+    def test_missing_provenance_file(self, tmp_path, sample_file):
+        """Test error when provenance file does not exist."""
+        result = self._run_main(
+            [
+                "-p", str(tmp_path / "nonexistent.json"),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+            ]
+        )
+
+        assert result == 1
+
+    def test_mismatched_input_formats(self, prov_file, sample_files):
+        """Test error when --input-formats count does not match --inputs."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_files[0]), str(sample_files[1]),
+                "--input-formats", "TXT",  # only 1 format for 2 inputs
+                "--outputs", str(sample_files[2]),
+                "--output-formats", "TXT",
+            ]
+        )
+
+        assert result == 1
+
+    def test_mismatched_output_formats(self, prov_file, sample_files):
+        """Test error when --output-formats count does not match --outputs."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_files[0]),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_files[1]), str(sample_files[2]),
+                "--output-formats", "TXT",  # only 1 format for 2 outputs
+            ]
+        )
+
+        assert result == 1
+
+    def test_overwrite_existing_output(self, prov_file, tmp_path, sample_file):
+        """Test --overwrite flag for output file."""
+        output_file = tmp_path / "output.json"
+        output_file.write_text("{}")
+
+        # Without --overwrite should fail
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "-o", str(output_file),
+            ]
+        )
+        assert result == 1
+
+        # With --overwrite should succeed
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "-o", str(output_file),
+                "--overwrite",
+            ]
+        )
+        assert result == 0
+
+    def test_capture_agent(self, prov_file, sample_file):
+        """Test --capture-agent flag records user info."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "--capture-agent",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        step = chain.get_steps()[0]
+        assert step.get("agent") is not None
+
+    def test_capture_environment(self, prov_file, sample_file):
+        """Test --capture-environment flag records runtime info."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--ended-at", "2024-10-15T11:05:30Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "--capture-environment",
+            ]
+        )
+
+        assert result == 0
+        chain = ProvenanceChain.load(str(prov_file))
+        step = chain.get_steps()[0]
+        assert step.get("environment") is not None
+
+    def test_user_without_capture_agent_fails(self, prov_file, sample_file):
+        """Test that --user without --capture-agent returns error."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "--user", "alice",
+            ]
+        )
+
+        assert result == 1
+
+    def test_runtime_without_capture_environment_fails(self, prov_file, sample_file):
+        """Test that --runtime without --capture-environment returns error."""
+        result = self._run_main(
+            [
+                "-p", str(prov_file),
+                "--started-at", "2024-10-15T11:00:00Z",
+                "--tool-name", "my_tool",
+                "--tool-version", "1.0",
+                "--operation", "processing",
+                "-i", str(sample_file),
+                "--input-formats", "TXT",
+                "--outputs", str(sample_file),
+                "--output-formats", "TXT",
+                "--runtime", "Node.js",
+            ]
+        )
+
+        assert result == 1
+
+    def test_add_multiple_steps(self, prov_file, sample_file):
+        """Test adding multiple steps sequentially via CLI."""
+        for i in range(3):
+            result = self._run_main(
+                [
+                    "-p", str(prov_file),
+                    "--started-at", f"2024-10-15T1{i}:00:00Z",
+                    "--ended-at", f"2024-10-15T1{i}:05:00Z",
+                    "--tool-name", f"tool_{i}",
+                    "--tool-version", "1.0",
+                    "--operation", f"step_{i}",
+                    "-i", str(sample_file),
+                    "--input-formats", "TXT",
+                    "--outputs", str(sample_file),
+                    "--output-formats", "TXT",
+                ]
+            )
+            assert result == 0
+
+        chain = ProvenanceChain.load(str(prov_file))
+        assert len(chain.get_steps()) == 3

From 721306353c66747569b315ea30f8115152a37351 Mon Sep 17 00:00:00 2001
From: Bastian Havers-Zulka <bastian.havers-zulka@ri.se>
Date: Tue, 14 Apr 2026 13:49:08 +0200
Subject: [PATCH 2/3] reformat to pass ruff format

---
 dataprov/cli/addstep.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dataprov/cli/addstep.py b/dataprov/cli/addstep.py
index 5a9f8be..4c18041 100644
--- a/dataprov/cli/addstep.py
+++ b/dataprov/cli/addstep.py
@@ -316,9 +316,7 @@ def main():
     # Validate output file
     output_path = Path(args.output) if args.output else prov_path
     if args.output and output_path.exists() and not args.overwrite:
-        print(
-            f"Error: Output file already exists: {args.output}", file=sys.stderr
-        )
+        print(f"Error: Output file already exists: {args.output}", file=sys.stderr)
         print("Use --overwrite to replace existing file", file=sys.stderr)
         return 1
 

From 0c542b61547eec85ef2b255c947ac0a48f9d38d0 Mon Sep 17 00:00:00 2001
From: Bastian Havers-Zulka <bastian.havers-zulka@ri.se>
Date: Tue, 14 Apr 2026 13:51:12 +0200
Subject: [PATCH 3/3] see previous commit

---
 tests/test_dataprov.py | 496 +++++++++++++++++++++++++++--------------
 1 file changed, 333 insertions(+), 163 deletions(-)

diff --git a/tests/test_dataprov.py b/tests/test_dataprov.py
index 994350b..0391261 100644
--- a/tests/test_dataprov.py
+++ b/tests/test_dataprov.py
@@ -2908,16 +2908,26 @@ def test_add_basic_step(self, prov_file, tmp_path, sample_file):
         """Test adding a basic processing step via CLI."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
             ]
         )
 
@@ -2933,21 +2943,36 @@ def test_add_step_with_optional_fields(self, prov_file, sample_file):
         """Test adding a step with optional metadata fields."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "2.0",
-                "--operation", "calibration",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "--source", "ACME Corp",
-                "--arguments", "--mode fast",
-                "--output-log", "Done",
-                "--warnings", "None",
-                "--drl", "5",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "2.0",
+                "--operation",
+                "calibration",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "--source",
+                "ACME Corp",
+                "--arguments",
+                "--mode fast",
+                "--output-log",
+                "Done",
+                "--warnings",
+                "None",
+                "--drl",
+                "5",
             ]
         )
 
@@ -2963,15 +2988,24 @@ def test_add_step_no_ended_at(self, prov_file, sample_file):
         """Test adding an ongoing step without --ended-at."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
             ]
         )
 
@@ -2983,16 +3017,28 @@ def test_add_step_multiple_inputs_outputs(self, prov_file, sample_files):
         """Test adding a step with multiple inputs and outputs."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "combiner",
-                "--tool-version", "1.0",
-                "--operation", "merge",
-                "-i", str(sample_files[0]), str(sample_files[1]),
-                "--input-formats", "TXT", "TXT",
-                "--outputs", str(sample_files[2]),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "combiner",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "merge",
+                "-i",
+                str(sample_files[0]),
+                str(sample_files[1]),
+                "--input-formats",
+                "TXT",
+                "TXT",
+                "--outputs",
+                str(sample_files[2]),
+                "--output-formats",
+                "TXT",
             ]
         )
 
@@ -3004,17 +3050,31 @@ def test_add_step_with_none_provenance_files(self, prov_file, sample_files):
         """Test --input-provenance-files with 'none' sentinel values."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "combiner",
-                "--tool-version", "1.0",
-                "--operation", "merge",
-                "-i", str(sample_files[0]), str(sample_files[1]),
-                "--input-formats", "TXT", "TXT",
-                "--outputs", str(sample_files[2]),
-                "--output-formats", "TXT",
-                "--input-provenance-files", "none", "none",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "combiner",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "merge",
+                "-i",
+                str(sample_files[0]),
+                str(sample_files[1]),
+                "--input-formats",
+                "TXT",
+                "TXT",
+                "--outputs",
+                str(sample_files[2]),
+                "--output-formats",
+                "TXT",
+                "--input-provenance-files",
+                "none",
+                "none",
             ]
         )
 
@@ -3026,17 +3086,28 @@ def test_add_step_output_to_new_file(self, prov_file, tmp_path, sample_file):
 
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "-o", str(output_file),
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "-o",
+                str(output_file),
             ]
         )
 
@@ -3053,15 +3124,24 @@ def test_missing_provenance_file(self, tmp_path, sample_file):
         """Test error when provenance file does not exist."""
         result = self._run_main(
             [
-                "-p", str(tmp_path / "nonexistent.json"),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
+                "-p",
+                str(tmp_path / "nonexistent.json"),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
             ]
         )
 
@@ -3071,15 +3151,25 @@ def test_mismatched_input_formats(self, prov_file, sample_files):
         """Test error when --input-formats count does not match --inputs."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_files[0]), str(sample_files[1]),
-                "--input-formats", "TXT",  # only 1 format for 2 inputs
-                "--outputs", str(sample_files[2]),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_files[0]),
+                str(sample_files[1]),
+                "--input-formats",
+                "TXT",  # only 1 format for 2 inputs
+                "--outputs",
+                str(sample_files[2]),
+                "--output-formats",
+                "TXT",
             ]
         )
 
@@ -3089,15 +3179,25 @@ def test_mismatched_output_formats(self, prov_file, sample_files):
         """Test error when --output-formats count does not match --outputs."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_files[0]),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_files[1]), str(sample_files[2]),
-                "--output-formats", "TXT",  # only 1 format for 2 outputs
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_files[0]),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_files[1]),
+                str(sample_files[2]),
+                "--output-formats",
+                "TXT",  # only 1 format for 2 outputs
             ]
         )
 
@@ -3111,16 +3211,26 @@ def test_overwrite_existing_output(self, prov_file, tmp_path, sample_file):
         # Without --overwrite should fail
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "-o", str(output_file),
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "-o",
+                str(output_file),
             ]
         )
         assert result == 1
@@ -3128,16 +3238,26 @@ def test_overwrite_existing_output(self, prov_file, tmp_path, sample_file):
         # With --overwrite should succeed
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "-o", str(output_file),
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "-o",
+                str(output_file),
                 "--overwrite",
             ]
         )
@@ -3147,16 +3267,26 @@ def test_capture_agent(self, prov_file, sample_file):
         """Test --capture-agent flag records user info."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
                 "--capture-agent",
             ]
         )
@@ -3170,16 +3300,26 @@ def test_capture_environment(self, prov_file, sample_file):
         """Test --capture-environment flag records runtime info."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--ended-at", "2024-10-15T11:05:30Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--ended-at",
+                "2024-10-15T11:05:30Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
                 "--capture-environment",
             ]
         )
@@ -3193,16 +3333,26 @@ def test_user_without_capture_agent_fails(self, prov_file, sample_file):
         """Test that --user without --capture-agent returns error."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "--user", "alice",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "--user",
+                "alice",
             ]
         )
 
@@ -3212,16 +3362,26 @@ def test_runtime_without_capture_environment_fails(self, prov_file, sample_file)
         """Test that --runtime without --capture-environment returns error."""
         result = self._run_main(
             [
-                "-p", str(prov_file),
-                "--started-at", "2024-10-15T11:00:00Z",
-                "--tool-name", "my_tool",
-                "--tool-version", "1.0",
-                "--operation", "processing",
-                "-i", str(sample_file),
-                "--input-formats", "TXT",
-                "--outputs", str(sample_file),
-                "--output-formats", "TXT",
-                "--runtime", "Node.js",
+                "-p",
+                str(prov_file),
+                "--started-at",
+                "2024-10-15T11:00:00Z",
+                "--tool-name",
+                "my_tool",
+                "--tool-version",
+                "1.0",
+                "--operation",
+                "processing",
+                "-i",
+                str(sample_file),
+                "--input-formats",
+                "TXT",
+                "--outputs",
+                str(sample_file),
+                "--output-formats",
+                "TXT",
+                "--runtime",
+                "Node.js",
             ]
         )
 
@@ -3232,16 +3392,26 @@ def test_add_multiple_steps(self, prov_file, sample_file):
         for i in range(3):
             result = self._run_main(
                 [
-                    "-p", str(prov_file),
-                    "--started-at", f"2024-10-15T1{i}:00:00Z",
-                    "--ended-at", f"2024-10-15T1{i}:05:00Z",
-                    "--tool-name", f"tool_{i}",
-                    "--tool-version", "1.0",
-                    "--operation", f"step_{i}",
-                    "-i", str(sample_file),
-                    "--input-formats", "TXT",
-                    "--outputs", str(sample_file),
-                    "--output-formats", "TXT",
+                    "-p",
+                    str(prov_file),
+                    "--started-at",
+                    f"2024-10-15T1{i}:00:00Z",
+                    "--ended-at",
+                    f"2024-10-15T1{i}:05:00Z",
+                    "--tool-name",
+                    f"tool_{i}",
+                    "--tool-version",
+                    "1.0",
+                    "--operation",
+                    f"step_{i}",
+                    "-i",
+                    str(sample_file),
+                    "--input-formats",
+                    "TXT",
+                    "--outputs",
+                    str(sample_file),
+                    "--output-formats",
+                    "TXT",
                 ]
             )
             assert result == 0