From 369f472e5a42075661d69f43a5f67dc80bfd8bed Mon Sep 17 00:00:00 2001 From: boringethan Date: Sat, 2 May 2026 21:59:02 -0700 Subject: [PATCH] feat: drop _corrected suffix on merged CSV; mark raw histogram CSVs with _raw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per OpenwaterHealth/openmotion-bloodflow-app#44, the SDK now writes the merged dark-baseline-corrected CSV as `_.csv` (was `__corrected.csv`) and tags per-side raw histogram CSVs with a `_raw` suffix (`___mask##_raw.csv`, was `___mask##.csv`). Internal Python symbols (`corrected_path`, `corrected_columns`, `write_corrected_csv`, `ScanResult.corrected_path`) keep their names — only the on-disk filename changed; the file content is still dark-baseline-corrected. The bloodflow-app reader already tolerates both naming conventions (commit 44d08b2 on feature/44-output-file-naming). SDK-bundled consumers of SDK output (`scripts/view_corrected_scan.py`, `stream-db/importer.py`) gain the same back-compat tolerance so historical scan_data folders keep loading. `data-processing/{plot_corrected_scan,compare_pipelines}.py` accept user- supplied paths unchanged; only their docstrings and `--help` text were refreshed to describe the new convention while noting the legacy form is still accepted. Docs (`docs/PipelineComparison.md`, `docs/SciencePipeline.md`) updated to match. No test fixtures renamed — those are archived real captures whose filenames are part of the recorded artefact. Co-Authored-By: Claude Opus 4.7 (1M context) --- data-processing/compare_pipelines.py | 9 +++-- data-processing/plot_corrected_scan.py | 10 +++--- docs/PipelineComparison.md | 12 ++++--- docs/SciencePipeline.md | 2 +- omotion/ScanWorkflow.py | 4 +-- scripts/view_corrected_scan.py | 48 +++++++++++++++++++++----- stream-db/importer.py | 5 ++- 7 files changed, 66 insertions(+), 24 deletions(-) diff --git a/data-processing/compare_pipelines.py b/data-processing/compare_pipelines.py index 41fed8c..b63434f 100644 --- a/data-processing/compare_pipelines.py +++ b/data-processing/compare_pipelines.py @@ -12,7 +12,8 @@ Precomputed mode (compares already-generated output files): python data-processing/compare_pipelines.py \\ --bfi-results path/to/_bfi_results.csv \\ - --corrected path/to/_corrected.csv [--save] + --corrected path/to/_.csv [--save] + (Legacy __corrected.csv files are also accepted.) Defaults (raw mode) use the perf-test fixture CSVs. """ @@ -380,7 +381,9 @@ def load_legacy_precomputed(bfi_results_csv: str) -> dict[tuple, dict]: def load_sdk_precomputed(corrected_csv: str) -> dict[tuple, dict]: """ - Load a _corrected.csv written by the SDK SciencePipeline streaming writer. + Load the merged dark-baseline-corrected CSV written by the SDK + SciencePipeline streaming writer (``_.csv``; + legacy ``..._corrected.csv`` files are also accepted). Format: frame_id, timestamp_s, bfi_l1..r8, bvi_l1..r8, mean_l1..r8, std_l1..r8, contrast_l1..r8, temp_l1..r8 @@ -628,7 +631,7 @@ def parse_args() -> argparse.Namespace: p.add_argument("--bfi-results", help="Pre-computed _bfi_results.csv from VisualizeBloodflow (precomputed mode)") p.add_argument("--corrected", - help="Pre-computed _corrected.csv from SDK pipeline (precomputed mode)") + help="Pre-computed merged corrected CSV from SDK pipeline; legacy *_corrected.csv also accepted (precomputed mode)") p.add_argument("--save", action="store_true", help="Save PNGs instead of showing") return p.parse_args() diff --git a/data-processing/plot_corrected_scan.py b/data-processing/plot_corrected_scan.py index c6185b0..d1b39b5 100644 --- a/data-processing/plot_corrected_scan.py +++ b/data-processing/plot_corrected_scan.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """ -Plot BFI and BVI from a _corrected.csv file produced by the OpenMOTION SDK. +Plot BFI and BVI from the merged dark-baseline-corrected CSV produced by the +OpenMOTION SDK (``_.csv``; legacy builds wrote +``__corrected.csv`` and are still accepted). Both sensor sides are shown in one figure. The subplot grid mirrors the physical camera layout described in docs/CameraArrangement.md: @@ -23,7 +25,7 @@ Usage ----- - python plot_corrected_scan.py --csv path/to/_corrected.csv + python plot_corrected_scan.py --csv path/to/scan.csv python plot_corrected_scan.py --csv scan.csv --show-signal --save """ @@ -119,7 +121,7 @@ def _requested_sides(df: pd.DataFrame, requested: str) -> list[str]: def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description="Plot OpenMOTION corrected scan CSV") - p.add_argument("--csv", required=True, help="Path to the _corrected.csv file") + p.add_argument("--csv", required=True, help="Path to the merged corrected CSV (legacy *_corrected.csv also accepted)") p.add_argument( "--sides", choices=["left", "right", "both"], default="both", help="Which sensor side(s) to plot (default: both)", @@ -227,7 +229,7 @@ def main() -> None: print(f" {len(df)} rows, {len(df.columns)} columns") if "timestamp_s" not in df.columns: - print("ERROR: 'timestamp_s' column not found — is this a _corrected.csv?", + print("ERROR: 'timestamp_s' column not found — is this the merged corrected CSV?", file=sys.stderr) sys.exit(1) diff --git a/docs/PipelineComparison.md b/docs/PipelineComparison.md index 0873119..d23c17e 100644 --- a/docs/PipelineComparison.md +++ b/docs/PipelineComparison.md @@ -76,9 +76,11 @@ a bug. The real-time display shows positive BFI because it uses uncorrected cont ## Scripts Written ### `data-processing/plot_corrected_scan.py` -Plots data from a `_corrected.csv` file produced by the SDK pipeline. +Plots data from the merged dark-baseline-corrected CSV produced by the SDK pipeline +(`_.csv`; legacy `__corrected.csv` files are +also accepted). -- **Inputs:** `--csv path/to/_corrected.csv` +- **Inputs:** `--csv path/to/_.csv` - **Options:** `--save` (save PNGs next to CSV), `--show-signal` (add second figure with mean / std / contrast in addition to BFI/BVI) - **Layout:** Uses the physical camera grid from `docs/CameraArrangement.md`. Inactive @@ -87,7 +89,7 @@ Plots data from a `_corrected.csv` file produced by the SDK pipeline. secondary y-axis. ```bash -python data-processing/plot_corrected_scan.py --csv path/to/scan_corrected.csv --save +python data-processing/plot_corrected_scan.py --csv path/to/scan.csv --save ``` ### `data-processing/compare_pipelines.py` @@ -110,8 +112,8 @@ python data-processing/compare_pipelines.py --left left.csv --right right.csv -- ## Tests Written ### `tests/test_corrected_csv_output.py` -Verifies the content and structure of the `_corrected.csv` file produced by the SDK -pipeline, using the real perf-test fixture CSVs as input. +Verifies the content and structure of the merged dark-baseline-corrected CSV produced +by the SDK pipeline, using the real perf-test fixture CSVs as input. **Key checks (20 tests):** - Header contains all 98 expected columns (`frame_id`, `timestamp_s`, and 96 metric diff --git a/docs/SciencePipeline.md b/docs/SciencePipeline.md index 7be1d5c..0bca22d 100644 --- a/docs/SciencePipeline.md +++ b/docs/SciencePipeline.md @@ -634,7 +634,7 @@ After CSV writing, the batch samples are grouped by `(side, absolute_frame_id)` ### 16.5 What is NOT changed in reduced mode -- **Raw histogram CSVs** — per-camera histogram data continues to be written to `*_left_mask*.csv` and `*_right_mask*.csv` files at full resolution. These files are the ground-truth record and can be reprocessed offline if needed. +- **Raw histogram CSVs** — per-camera histogram data continues to be written to `*_left_mask*_raw.csv` and `*_right_mask*_raw.csv` files at full resolution. These files are the ground-truth record and can be reprocessed offline if needed. - **Science pipeline** — all per-camera computations (frame classification, dark subtraction, shot-noise correction, BFI/BVI calibration) run identically. - **Telemetry CSV** — console temperature, PDC, and safety data are unaffected. diff --git a/omotion/ScanWorkflow.py b/omotion/ScanWorkflow.py index bad3229..85d5f1d 100644 --- a/omotion/ScanWorkflow.py +++ b/omotion/ScanWorkflow.py @@ -242,7 +242,7 @@ def _worker(): writer_queues: dict[str, queue.Queue] = {} science_pipeline = None corrected_path = os.path.join( - request.data_dir, f"{ts}_{request.subject_id}_corrected.csv" + request.data_dir, f"{ts}_{request.subject_id}.csv" ) telemetry_path = os.path.join( request.data_dir, f"{ts}_{request.subject_id}_telemetry.csv" @@ -700,7 +700,7 @@ def _on_row(cam_id, frame_id, ts_val, hist, row_sum, temp): # Resolve CSV file path for this side. if request.write_raw_csv: - filename = f"{ts}_{request.subject_id}_{side}_mask{mask:02X}.csv" + filename = f"{ts}_{request.subject_id}_{side}_mask{mask:02X}_raw.csv" filepath = os.path.join(request.data_dir, filename) else: filepath = "" diff --git a/scripts/view_corrected_scan.py b/scripts/view_corrected_scan.py index e1c5594..213f59c 100644 --- a/scripts/view_corrected_scan.py +++ b/scripts/view_corrected_scan.py @@ -45,11 +45,31 @@ def _is_valid_corrected_csv(path: Path) -> bool: def _latest_corrected_csv(scan_data_dir: Path) -> Path: - candidates = sorted( - scan_data_dir.glob("scan_*_corrected.csv"), - key=lambda p: p.stat().st_mtime, - reverse=True, - ) + # The SDK now writes the merged dark-baseline-corrected CSV without a + # `_corrected` suffix (see openwaterhealth/openmotion-bloodflow-app#44). + # Match both the new bare-stem layout and the legacy `_corrected.csv` + # name so historical scans keep loading. Per-side raw histogram CSVs + # use a `_raw.csv` suffix and are excluded. + seen: set[Path] = set() + candidates: list[Path] = [] + for pattern in ("scan_*.csv", "scan_*_corrected.csv"): + for p in scan_data_dir.glob(pattern): + rp = p.resolve() + if rp in seen: + continue + name = p.name + if name.endswith("_raw.csv"): + continue + if name.endswith("_telemetry.csv"): + continue + # Skip per-side raw histogram CSVs (mask suffix without _raw is + # only produced by pre-rename SDK builds; new builds emit + # ..._mask##_raw.csv. We handle both by gating on cam_id below.) + if "_mask" in name and not name.endswith("_corrected.csv"): + continue + seen.add(rp) + candidates.append(p) + candidates.sort(key=lambda p: p.stat().st_mtime, reverse=True) for c in candidates: if _is_valid_corrected_csv(c): return c @@ -113,10 +133,22 @@ def _read_raw_metrics(raw_csv: Path, side_prefix: str): def _load_mean_contrast(corrected_csv: Path, frame_ids: List[int]): - stem = corrected_csv.name.replace("_corrected.csv", "") + # Strip the legacy `_corrected.csv` suffix if present; otherwise drop + # the bare `.csv` extension. Either layout yields the shared scan stem + # used to discover the per-side raw histogram CSVs. + if corrected_csv.name.endswith("_corrected.csv"): + stem = corrected_csv.name[: -len("_corrected.csv")] + else: + stem = corrected_csv.stem scan_data_dir = corrected_csv.parent - left = list(scan_data_dir.glob(f"{stem}_left_mask*.csv")) - right = list(scan_data_dir.glob(f"{stem}_right_mask*.csv")) + # Match both the new `_raw.csv` suffix and the legacy bare mask suffix + # so historical scan_data folders keep loading. + left = list(scan_data_dir.glob(f"{stem}_left_mask*_raw.csv")) or list( + scan_data_dir.glob(f"{stem}_left_mask*.csv") + ) + right = list(scan_data_dir.glob(f"{stem}_right_mask*_raw.csv")) or list( + scan_data_dir.glob(f"{stem}_right_mask*.csv") + ) raw_metrics: Dict[str, Dict[int, tuple[float, float]]] = {} if left: diff --git a/stream-db/importer.py b/stream-db/importer.py index fa06780..02ab2e2 100644 --- a/stream-db/importer.py +++ b/stream-db/importer.py @@ -37,7 +37,10 @@ r"(?P