From 369f472e5a42075661d69f43a5f67dc80bfd8bed Mon Sep 17 00:00:00 2001
From: boringethan <ethan@openwater.cc>
Date: Sat, 2 May 2026 21:59:02 -0700
Subject: [PATCH] feat: drop _corrected suffix on merged CSV; mark raw
 histogram CSVs with _raw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per OpenwaterHealth/openmotion-bloodflow-app#44, the SDK now writes the merged
dark-baseline-corrected CSV as `<timestamp>_<subject>.csv` (was
`<timestamp>_<subject>_corrected.csv`) and tags per-side raw histogram CSVs
with a `_raw` suffix (`<timestamp>_<subject>_<side>_mask##_raw.csv`, was
`<timestamp>_<subject>_<side>_mask##.csv`).  Internal Python symbols
(`corrected_path`, `corrected_columns`, `write_corrected_csv`,
`ScanResult.corrected_path`) keep their names — only the on-disk filename
changed; the file content is still dark-baseline-corrected.

The bloodflow-app reader already tolerates both naming conventions
(commit 44d08b2 on feature/44-output-file-naming).  SDK-bundled consumers of
SDK output (`scripts/view_corrected_scan.py`, `stream-db/importer.py`) gain
the same back-compat tolerance so historical scan_data folders keep loading.
`data-processing/{plot_corrected_scan,compare_pipelines}.py` accept user-
supplied paths unchanged; only their docstrings and `--help` text were
refreshed to describe the new convention while noting the legacy form is
still accepted.

Docs (`docs/PipelineComparison.md`, `docs/SciencePipeline.md`) updated to
match.  No test fixtures renamed — those are archived real captures whose
filenames are part of the recorded artefact.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 data-processing/compare_pipelines.py   |  9 +++--
 data-processing/plot_corrected_scan.py | 10 +++---
 docs/PipelineComparison.md             | 12 ++++---
 docs/SciencePipeline.md                |  2 +-
 omotion/ScanWorkflow.py                |  4 +--
 scripts/view_corrected_scan.py         | 48 +++++++++++++++++++++-----
 stream-db/importer.py                  |  5 ++-
 7 files changed, 66 insertions(+), 24 deletions(-)
diff --git a/data-processing/compare_pipelines.py b/data-processing/compare_pipelines.py
index 41fed8c..b63434f 100644
--- a/data-processing/compare_pipelines.py
+++ b/data-processing/compare_pipelines.py
@@ -12,7 +12,8 @@
   Precomputed mode (compares already-generated output files):
     python data-processing/compare_pipelines.py \\
         --bfi-results path/to/_bfi_results.csv \\
-        --corrected   path/to/_corrected.csv   [--save]
+        --corrected   path/to/<timestamp>_<subject>.csv   [--save]
+    (Legacy <timestamp>_<subject>_corrected.csv files are also accepted.)
 
   Defaults (raw mode) use the perf-test fixture CSVs.
 """
@@ -380,7 +381,9 @@ def load_legacy_precomputed(bfi_results_csv: str) -> dict[tuple, dict]:
 
 def load_sdk_precomputed(corrected_csv: str) -> dict[tuple, dict]:
     """
-    Load a _corrected.csv written by the SDK SciencePipeline streaming writer.
+    Load the merged dark-baseline-corrected CSV written by the SDK
+    SciencePipeline streaming writer (``<timestamp>_<subject>.csv``;
+    legacy ``..._corrected.csv`` files are also accepted).
     Format: frame_id, timestamp_s, bfi_l1..r8, bvi_l1..r8, mean_l1..r8,
             std_l1..r8, contrast_l1..r8, temp_l1..r8
 
@@ -628,7 +631,7 @@ def parse_args() -> argparse.Namespace:
     p.add_argument("--bfi-results",
                    help="Pre-computed _bfi_results.csv from VisualizeBloodflow (precomputed mode)")
     p.add_argument("--corrected",
-                   help="Pre-computed _corrected.csv from SDK pipeline (precomputed mode)")
+                   help="Pre-computed merged corrected CSV from SDK pipeline; legacy *_corrected.csv also accepted (precomputed mode)")
     p.add_argument("--save", action="store_true", help="Save PNGs instead of showing")
     return p.parse_args()
 
diff --git a/data-processing/plot_corrected_scan.py b/data-processing/plot_corrected_scan.py
index c6185b0..d1b39b5 100644
--- a/data-processing/plot_corrected_scan.py
+++ b/data-processing/plot_corrected_scan.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 """
-Plot BFI and BVI from a _corrected.csv file produced by the OpenMOTION SDK.
+Plot BFI and BVI from the merged dark-baseline-corrected CSV produced by the
+OpenMOTION SDK (``<timestamp>_<subject>.csv``; legacy builds wrote
+``<timestamp>_<subject>_corrected.csv`` and are still accepted).
 
 Both sensor sides are shown in one figure.  The subplot grid mirrors the
 physical camera layout described in docs/CameraArrangement.md:
@@ -23,7 +25,7 @@
 
 Usage
 -----
-    python plot_corrected_scan.py --csv path/to/_corrected.csv
+    python plot_corrected_scan.py --csv path/to/scan.csv
     python plot_corrected_scan.py --csv scan.csv --show-signal --save
 """
 
@@ -119,7 +121,7 @@ def _requested_sides(df: pd.DataFrame, requested: str) -> list[str]:
 
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser(description="Plot OpenMOTION corrected scan CSV")
-    p.add_argument("--csv", required=True, help="Path to the _corrected.csv file")
+    p.add_argument("--csv", required=True, help="Path to the merged corrected CSV (legacy *_corrected.csv also accepted)")
     p.add_argument(
         "--sides", choices=["left", "right", "both"], default="both",
         help="Which sensor side(s) to plot (default: both)",
@@ -227,7 +229,7 @@ def main() -> None:
     print(f"  {len(df)} rows, {len(df.columns)} columns")
 
     if "timestamp_s" not in df.columns:
-        print("ERROR: 'timestamp_s' column not found — is this a _corrected.csv?",
+        print("ERROR: 'timestamp_s' column not found — is this the merged corrected CSV?",
               file=sys.stderr)
         sys.exit(1)
 
diff --git a/docs/PipelineComparison.md b/docs/PipelineComparison.md
index 0873119..d23c17e 100644
--- a/docs/PipelineComparison.md
+++ b/docs/PipelineComparison.md
@@ -76,9 +76,11 @@ a bug. The real-time display shows positive BFI because it uses uncorrected cont
 ## Scripts Written
 
 ### `data-processing/plot_corrected_scan.py`
-Plots data from a `_corrected.csv` file produced by the SDK pipeline.
+Plots data from the merged dark-baseline-corrected CSV produced by the SDK pipeline
+(`<timestamp>_<subject>.csv`; legacy `<timestamp>_<subject>_corrected.csv` files are
+also accepted).
 
-- **Inputs:** `--csv path/to/_corrected.csv`
+- **Inputs:** `--csv path/to/<timestamp>_<subject>.csv`
 - **Options:** `--save` (save PNGs next to CSV), `--show-signal` (add second figure with
   mean / std / contrast in addition to BFI/BVI)
 - **Layout:** Uses the physical camera grid from `docs/CameraArrangement.md`. Inactive
@@ -87,7 +89,7 @@ Plots data from a `_corrected.csv` file produced by the SDK pipeline.
   secondary y-axis.
 
 ```bash
-python data-processing/plot_corrected_scan.py --csv path/to/scan_corrected.csv --save
+python data-processing/plot_corrected_scan.py --csv path/to/scan.csv --save
 ```
 
 ### `data-processing/compare_pipelines.py`
@@ -110,8 +112,8 @@ python data-processing/compare_pipelines.py --left left.csv --right right.csv --
 ## Tests Written
 
 ### `tests/test_corrected_csv_output.py`
-Verifies the content and structure of the `_corrected.csv` file produced by the SDK
-pipeline, using the real perf-test fixture CSVs as input.
+Verifies the content and structure of the merged dark-baseline-corrected CSV produced
+by the SDK pipeline, using the real perf-test fixture CSVs as input.
 
 **Key checks (20 tests):**
 - Header contains all 98 expected columns (`frame_id`, `timestamp_s`, and 96 metric
diff --git a/docs/SciencePipeline.md b/docs/SciencePipeline.md
index 7be1d5c..0bca22d 100644
--- a/docs/SciencePipeline.md
+++ b/docs/SciencePipeline.md
@@ -634,7 +634,7 @@ After CSV writing, the batch samples are grouped by `(side, absolute_frame_id)`
 
 ### 16.5 What is NOT changed in reduced mode
 
-- **Raw histogram CSVs** — per-camera histogram data continues to be written to `*_left_mask*.csv` and `*_right_mask*.csv` files at full resolution.  These files are the ground-truth record and can be reprocessed offline if needed.
+- **Raw histogram CSVs** — per-camera histogram data continues to be written to `*_left_mask*_raw.csv` and `*_right_mask*_raw.csv` files at full resolution.  These files are the ground-truth record and can be reprocessed offline if needed.
 - **Science pipeline** — all per-camera computations (frame classification, dark subtraction, shot-noise correction, BFI/BVI calibration) run identically.
 - **Telemetry CSV** — console temperature, PDC, and safety data are unaffected.
 
diff --git a/omotion/ScanWorkflow.py b/omotion/ScanWorkflow.py
index bad3229..85d5f1d 100644
--- a/omotion/ScanWorkflow.py
+++ b/omotion/ScanWorkflow.py
@@ -242,7 +242,7 @@ def _worker():
             writer_queues: dict[str, queue.Queue] = {}
             science_pipeline = None
             corrected_path = os.path.join(
-                request.data_dir, f"{ts}_{request.subject_id}_corrected.csv"
+                request.data_dir, f"{ts}_{request.subject_id}.csv"
             )
             telemetry_path = os.path.join(
                 request.data_dir, f"{ts}_{request.subject_id}_telemetry.csv"
@@ -700,7 +700,7 @@ def _on_row(cam_id, frame_id, ts_val, hist, row_sum, temp):
 
                     # Resolve CSV file path for this side.
                     if request.write_raw_csv:
-                        filename = f"{ts}_{request.subject_id}_{side}_mask{mask:02X}.csv"
+                        filename = f"{ts}_{request.subject_id}_{side}_mask{mask:02X}_raw.csv"
                         filepath = os.path.join(request.data_dir, filename)
                     else:
                         filepath = ""
diff --git a/scripts/view_corrected_scan.py b/scripts/view_corrected_scan.py
index e1c5594..213f59c 100644
--- a/scripts/view_corrected_scan.py
+++ b/scripts/view_corrected_scan.py
@@ -45,11 +45,31 @@ def _is_valid_corrected_csv(path: Path) -> bool:
 
 
 def _latest_corrected_csv(scan_data_dir: Path) -> Path:
-    candidates = sorted(
-        scan_data_dir.glob("scan_*_corrected.csv"),
-        key=lambda p: p.stat().st_mtime,
-        reverse=True,
-    )
+    # The SDK now writes the merged dark-baseline-corrected CSV without a
+    # `_corrected` suffix (see openwaterhealth/openmotion-bloodflow-app#44).
+    # Match both the new bare-stem layout and the legacy `_corrected.csv`
+    # name so historical scans keep loading.  Per-side raw histogram CSVs
+    # use a `_raw.csv` suffix and are excluded.
+    seen: set[Path] = set()
+    candidates: list[Path] = []
+    for pattern in ("scan_*.csv", "scan_*_corrected.csv"):
+        for p in scan_data_dir.glob(pattern):
+            rp = p.resolve()
+            if rp in seen:
+                continue
+            name = p.name
+            if name.endswith("_raw.csv"):
+                continue
+            if name.endswith("_telemetry.csv"):
+                continue
+            # Skip per-side raw histogram CSVs (mask suffix without _raw is
+            # only produced by pre-rename SDK builds; new builds emit
+            # ..._mask##_raw.csv.  We handle both by gating on cam_id below.)
+            if "_mask" in name and not name.endswith("_corrected.csv"):
+                continue
+            seen.add(rp)
+            candidates.append(p)
+    candidates.sort(key=lambda p: p.stat().st_mtime, reverse=True)
     for c in candidates:
         if _is_valid_corrected_csv(c):
             return c
@@ -113,10 +133,22 @@ def _read_raw_metrics(raw_csv: Path, side_prefix: str):
 
 
 def _load_mean_contrast(corrected_csv: Path, frame_ids: List[int]):
-    stem = corrected_csv.name.replace("_corrected.csv", "")
+    # Strip the legacy `_corrected.csv` suffix if present; otherwise drop
+    # the bare `.csv` extension.  Either layout yields the shared scan stem
+    # used to discover the per-side raw histogram CSVs.
+    if corrected_csv.name.endswith("_corrected.csv"):
+        stem = corrected_csv.name[: -len("_corrected.csv")]
+    else:
+        stem = corrected_csv.stem
     scan_data_dir = corrected_csv.parent
-    left = list(scan_data_dir.glob(f"{stem}_left_mask*.csv"))
-    right = list(scan_data_dir.glob(f"{stem}_right_mask*.csv"))
+    # Match both the new `_raw.csv` suffix and the legacy bare mask suffix
+    # so historical scan_data folders keep loading.
+    left = list(scan_data_dir.glob(f"{stem}_left_mask*_raw.csv")) or list(
+        scan_data_dir.glob(f"{stem}_left_mask*.csv")
+    )
+    right = list(scan_data_dir.glob(f"{stem}_right_mask*_raw.csv")) or list(
+        scan_data_dir.glob(f"{stem}_right_mask*.csv")
+    )
 
     raw_metrics: Dict[str, Dict[int, tuple[float, float]]] = {}
     if left:
diff --git a/stream-db/importer.py b/stream-db/importer.py
index fa06780..02ab2e2 100644
--- a/stream-db/importer.py
+++ b/stream-db/importer.py
@@ -37,7 +37,10 @@
     r"(?P<label>[^_]+)_"
     r"(?P<date>\d{8})_"
     r"(?P<time>\d{6})"
-    r"(?:_(?P<side>left|right)_mask(?P<mask>[0-9A-Fa-f]+))?"
+    # Per-side raw histogram CSVs gained a `_raw` suffix in
+    # openwaterhealth/openmotion-bloodflow-app#44; accept it as optional so
+    # both pre- and post-rename scan_data folders import cleanly.
+    r"(?:_(?P<side>left|right)_mask(?P<mask>[0-9A-Fa-f]+)(?:_raw)?)?"
     r"\.(?P<ext>csv|txt)$"
 )