From b550c6fd05f5a591d5cfca0f38876d11fc09973a Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 15:24:52 -0700
Subject: [PATCH 01/21] Add min_rows to BlackAreaDetector and timestamp-based
 matching to stitch

BlackAreaDetector now supports a min_rows parameter (default=1, preserving
backward compatibility) that requires multiple flagged rows before marking
a frame as invalid. This eliminates false positives from naturally dark
regions in calcium imaging data.

Stitch now supports --match-by=timestamp which matches frames across
recordings by nearest buffer_recv_unix_time (within configurable threshold),
instead of by device frame_num. This handles DAQs with offset frame
numbering, different start/stop times, and mid-recording gaps automatically.

Also adds denoise_calcium_imaging.yml example config with recommended
parameters for calcium imaging (consecutive_threshold=30, value_threshold=0,
min_rows=10).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/cli/process.py                            |  44 +++++-
 .../process/denoise_calcium_imaging.yml       |  28 ++++
 mio/models/process.py                         |   6 +
 mio/process/frame_helper.py                   |   7 +-
 mio/process/stitch.py                         | 135 ++++++++++++++++--
 tests/test_process/test_frame_helper.py       |  31 +++-
 tests/test_process/test_stitch.py             |  38 +++++
 7 files changed, 269 insertions(+), 20 deletions(-)
 create mode 100644 mio/data/config/process/denoise_calcium_imaging.yml

diff --git a/mio/cli/process.py b/mio/cli/process.py
index 0c34af86..ccd5dd8d 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -212,12 +212,27 @@ def crop(
     default=20,
     help="Frames per second for output video.",
 )
+@click.option(
+    "--match-by",
+    type=click.Choice(["frame_num", "timestamp"]),
+    default="frame_num",
+    help="Frame matching strategy. 'frame_num' matches by device frame number. "
+    "'timestamp' matches by nearest buffer_recv_unix_time.",
+)
+@click.option(
+    "--timestamp-threshold",
+    type=float,
+    default=25.0,
+    help="Max time difference in ms for timestamp matching (default: 25).",
+)
 def stitch(
     inputs: tuple,
     output: Optional[str],
     debug_video: Optional[str],
     debug_csv: Optional[str],
     fps: int,
+    match_by: str,
+    timestamp_threshold: float,
 ) -> None:
     """
     Stitch multiple video recordings into one by selecting the best frame
@@ -250,8 +265,11 @@ def stitch(
         debug_csv_path=debug_csv_path,
     )
 
-    click.echo(f"Stitching {len(recordings)} recordings...")
-    recording_bundle.stitch_recordings()
+    click.echo(f"Stitching {len(recordings)} recordings (match-by={match_by})...")
+    recording_bundle.stitch_recordings(
+        matching_method=match_by,
+        timestamp_threshold_ms=timestamp_threshold,
+    )
 
     try:
         validate_video_metadata_match(stitched_video_path)
@@ -304,6 +322,19 @@ def stitch(
     default=20,
     help="Frames per second for stitched video.",
 )
+@click.option(
+    "--match-by",
+    type=click.Choice(["frame_num", "timestamp"]),
+    default="frame_num",
+    help="Frame matching strategy for stitching. 'frame_num' matches by device frame number. "
+    "'timestamp' matches by nearest buffer_recv_unix_time.",
+)
+@click.option(
+    "--timestamp-threshold",
+    type=float,
+    default=25.0,
+    help="Max time difference in ms for timestamp matching (default: 25).",
+)
 def workflow(
     inputs: tuple,
     output: Optional[str],
@@ -311,6 +342,8 @@ def workflow(
     trim_start: int,
     trim_end: int,
     fps: int,
+    match_by: str,
+    timestamp_threshold: float,
 ) -> None:
     """
     Complete workflow: stitch → trim → denoise with validation at each step.
@@ -421,8 +454,11 @@ def workflow(
             debug_csv_path=debug_csv_path,
         )
 
-        click.echo(f"Stitching {len(recordings)} recordings...")
-        recording_bundle.stitch_recordings()
+        click.echo(f"Stitching {len(recordings)} recordings (match-by={match_by})...")
+        recording_bundle.stitch_recordings(
+            matching_method=match_by,
+            timestamp_threshold_ms=timestamp_threshold,
+        )
 
         try:
             validate_video_metadata_match(stitched_video_path)
diff --git a/mio/data/config/process/denoise_calcium_imaging.yml b/mio/data/config/process/denoise_calcium_imaging.yml
new file mode 100644
index 00000000..059b16d5
--- /dev/null
+++ b/mio/data/config/process/denoise_calcium_imaging.yml
@@ -0,0 +1,28 @@
+id: denoise_calcium_imaging
+mio_model: mio.models.process.DenoiseConfig
+mio_version: 0.6.1
+noise_patch:
+  enable: true
+  method: [gradient, black_area]
+  gradient_config:
+    threshold: 20
+  black_area_config:
+    consecutive_threshold: 30
+    value_threshold: 0
+    min_rows: 10
+  output_result: true
+  output_noise_patch: true
+  output_noisy_frames: true
+frequency_masking:
+  enable: true
+  cast_float32: true
+  spatial_LPF_cutoff_radius: 15
+  vertical_BEF_cutoff: 2
+  horizontal_BEF_cutoff: 0
+  output_result: true
+minimum_projection:
+  enable: true
+  normalize: true
+  output_result: true
+end_frame: -1
+output_result: true
diff --git a/mio/models/process.py b/mio/models/process.py
index 03fddafb..ac2b799e 100644
--- a/mio/models/process.py
+++ b/mio/models/process.py
@@ -101,6 +101,12 @@ class BlackAreaDetectorConfig(BaseModel):
         default=0,
         description="Pixel intensity value below which a pixel is considered 'black'.",
     )
+    min_rows: int = Field(
+        default=1,
+        description="Minimum number of flagged rows required to mark the frame as invalid. "
+        "Default of 1 preserves original behavior. For calcium imaging, values around 10 "
+        "reduce false positives from naturally dark regions.",
+    )
 
 
 class NoisePatchConfig(BaseModel):
diff --git a/mio/process/frame_helper.py b/mio/process/frame_helper.py
index 9b614a6e..202116f4 100644
--- a/mio/process/frame_helper.py
+++ b/mio/process/frame_helper.py
@@ -234,7 +234,7 @@ def _detect_black_pixels(
         logger.debug(f"Using black pixel threshold: <= {black_pixel_value_threshold}")
         logger.debug(f"Consecutive black pixel threshold: {consecutive_threshold}")
 
-        frame_is_noisy = False  # Track if frame should be discarded
+        noisy_row_count = 0
 
         for y in range(height):
             row = current_frame[y, :]  # Extract row
@@ -246,16 +246,17 @@ def _detect_black_pixels(
                 else:
                     consecutive_count = 0  # Reset if a non-black pixel is found
 
-                # If we exceed the allowed threshold of consecutive black pixels, discard the frame
+                # If we exceed the allowed threshold of consecutive black pixels, flag the row
                 if consecutive_count >= consecutive_threshold:
                     logger.debug(
                         f"Frame noisy due to {consecutive_count} consecutive black pixels "
                         f"in row {y}."
                     )
                     noisy_mask[y, :] = 1  # Mark row as noisy
-                    frame_is_noisy = True
+                    noisy_row_count += 1
                     break  # No need to check further in this row
 
+        frame_is_noisy = noisy_row_count >= self.config.min_rows
         return frame_is_noisy, noisy_mask
 
 
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 06297b65..91b4fe59 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -251,20 +251,133 @@ def _finalize(self) -> None:
                 self.combined_csv_path, index=False
             )
 
-    def stitch_recordings(self) -> None:
-        """Stitch recordings by iterating unique frame_nums and selecting the best frame."""
+    def _build_timestamp_matches(
+        self, threshold_ms: float = 25.0
+    ) -> list[dict[int, int]]:
+        """
+        Match frames across recordings by nearest unix timestamp.
+
+        For each recording, compute per-frame timestamp as the max
+        buffer_recv_unix_time for each reconstructed_frame_index.
+
+        Uses recording[0] as the reference. For each frame in ref,
+        find nearest frame in each other recording within threshold.
+
+        Returns list of dicts: [{rec_idx: reconstructed_frame_index, ...}, ...]
+        One entry per matched frame set, ordered by ref recording's frame order.
+        """
+        threshold_s = threshold_ms / 1000.0
+
+        # Build per-frame timestamp arrays for each recording
+        per_rec_timestamps: list[tuple[np.ndarray, np.ndarray]] = []
+        for rec in self.recordings:
+            df = rec.metadata
+            grouped = df.groupby("reconstructed_frame_index")["buffer_recv_unix_time"].max()
+            frame_indices = grouped.index.values
+            timestamps = grouped.values
+            sort_order = np.argsort(timestamps)
+            per_rec_timestamps.append((frame_indices[sort_order], timestamps[sort_order]))
+
+        ref_indices, ref_timestamps = per_rec_timestamps[0]
+        matches: list[dict[int, int]] = []
+
+        for i, (ref_idx, ref_ts) in enumerate(zip(ref_indices, ref_timestamps)):
+            match: dict[int, int] = {0: int(ref_idx)}
+            for rec_num in range(1, len(self.recordings)):
+                other_indices, other_timestamps = per_rec_timestamps[rec_num]
+                pos = np.searchsorted(other_timestamps, ref_ts)
+
+                best_dist = float("inf")
+                best_idx = -1
+                for candidate_pos in [pos - 1, pos]:
+                    if 0 <= candidate_pos < len(other_timestamps):
+                        dist = abs(other_timestamps[candidate_pos] - ref_ts)
+                        if dist < best_dist:
+                            best_dist = dist
+                            best_idx = int(other_indices[candidate_pos])
+
+                if best_dist <= threshold_s:
+                    match[rec_num] = best_idx
+
+            if len(match) > 1:
+                matches.append(match)
+
+        return matches
+
+    def _collect_candidates_by_index(
+        self, frame_indices: dict[int, int]
+    ) -> list[CandidateFrame]:
+        """Collect candidates using reconstructed_frame_index directly."""
+        candidates: list[CandidateFrame] = []
+        for rec_num, rfi in frame_indices.items():
+            recording = self.recordings[rec_num]
+            rows = recording.metadata[recording.metadata["reconstructed_frame_index"] == rfi]
+            if rows.empty:
+                continue
+            frame = recording.video_reader.read_frame(rfi)
+            if frame is None:
+                continue
+            num_buffers = int(len(rows))
+            sum_black = int(rows["black_padding_px"].fillna(0).sum())
+            candidates.append(
+                CandidateFrame(
+                    recording=recording,
+                    frame=frame,
+                    num_buffers=num_buffers,
+                    sum_black_padding=sum_black,
+                    metadata_rows=rows,
+                    edge_score=score_edges(frame),
+                )
+            )
+        return candidates
+
+    def stitch_recordings(
+        self,
+        matching_method: str = "frame_num",
+        timestamp_threshold_ms: float = 25.0,
+    ) -> None:
+        """Stitch recordings by selecting the best frame per matched position.
+
+        Parameters
+        ----------
+        matching_method : str
+            ``"frame_num"`` (default) matches by device frame_num.
+            ``"timestamp"`` matches by nearest ``buffer_recv_unix_time``.
+        timestamp_threshold_ms : float
+            Max time difference in ms for timestamp matching (default 25).
+        """
         stitched_writes = 0
         debug_writes = 0
-        frame_iter = tqdm(self.combined_frame_num, desc="Stitching frames")
 
-        for frame_num in frame_iter:
-            valid_pairs = self._collect_candidates(frame_num)
-            if not valid_pairs:
-                continue
-            selected_idx, is_tie = select_best_candidate(valid_pairs)
-            debug_writes += self._write_debug(frame_num, valid_pairs, selected_idx, is_tie)
-            self._write_stitched(valid_pairs, selected_idx)
-            stitched_writes += 1
+        if matching_method == "timestamp":
+            matches = self._build_timestamp_matches(
+                threshold_ms=timestamp_threshold_ms
+            )
+            frame_iter = tqdm(matches, desc="Stitching frames (timestamp)")
+            for match in frame_iter:
+                candidates = self._collect_candidates_by_index(match)
+                if not candidates:
+                    continue
+                selected_idx, is_tie = select_best_candidate(candidates)
+                # Use first recording's frame index as label for debug
+                frame_label = match.get(0, 0)
+                debug_writes += self._write_debug(
+                    frame_label, candidates, selected_idx, is_tie
+                )
+                self._write_stitched(candidates, selected_idx)
+                stitched_writes += 1
+        else:
+            frame_iter = tqdm(self.combined_frame_num, desc="Stitching frames")
+            for frame_num in frame_iter:
+                valid_pairs = self._collect_candidates(frame_num)
+                if not valid_pairs:
+                    continue
+                selected_idx, is_tie = select_best_candidate(valid_pairs)
+                debug_writes += self._write_debug(
+                    frame_num, valid_pairs, selected_idx, is_tie
+                )
+                self._write_stitched(valid_pairs, selected_idx)
+                stitched_writes += 1
 
         self._finalize()
         logger.info(
diff --git a/tests/test_process/test_frame_helper.py b/tests/test_process/test_frame_helper.py
index 41b7ad55..2c7980fe 100644
--- a/tests/test_process/test_frame_helper.py
+++ b/tests/test_process/test_frame_helper.py
@@ -7,8 +7,8 @@
 from pprint import pformat
 from pydantic import BaseModel
 
-from mio.models.process import DenoiseConfig, NoisePatchConfig
-from mio.process.frame_helper import InvalidFrameDetector
+from mio.models.process import BlackAreaDetectorConfig, DenoiseConfig, NoisePatchConfig
+from mio.process.frame_helper import BlackAreaDetector, InvalidFrameDetector
 
 from ..conftest import DATA_DIR
 
@@ -122,3 +122,30 @@ def test_noisy_frame_detection(video, ground_truth, noise_detection_method, nois
     )
     extra_frames = set(detected_frame_indices) - all_expected
     assert extra_frames == set(), f"Detected extra, non-noise frames as noisy: {extra_frames}"
+
+
+@pytest.mark.parametrize(
+    "min_rows,expected_noisy",
+    [
+        (1, True),   # default: any flagged row triggers detection
+        (5, True),   # exactly 5 noisy rows meets the threshold
+        (10, False),  # only 5 noisy rows, below threshold of 10
+    ],
+)
+def test_black_area_min_rows(min_rows, expected_noisy):
+    """min_rows controls how many flagged rows are needed to mark a frame as invalid."""
+    # Create a 50x50 frame with 5 rows of consecutive zeros (noisy) and the rest bright
+    frame = np.ones((50, 50), dtype=np.uint8) * 128
+    for row in range(5):
+        frame[row, :30] = 0  # 30 consecutive black pixels in rows 0-4
+
+    config = BlackAreaDetectorConfig(
+        consecutive_threshold=10,
+        value_threshold=0,
+        min_rows=min_rows,
+    )
+    detector = BlackAreaDetector(config)
+    is_noisy, mask = detector.find_invalid_area(frame)
+    assert is_noisy == expected_noisy, (
+        f"min_rows={min_rows}: expected noisy={expected_noisy}, got {is_noisy}"
+    )
diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index a1c641b1..611e4b8c 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -357,6 +357,44 @@ def test_edge_scoring_selects_less_sharp():
     assert score_edges(uniform) > score_edges(edgy)
 
 
+def test_stitch_timestamp_matching(tmp_path):
+    """Timestamp matching produces a valid stitched output on real fixtures."""
+    recordings = [
+        RecordingData(
+            video_path=STITCH_DATA_DIR / "video1.avi",
+            csv_path=STITCH_DATA_DIR / "video1.csv",
+        ),
+        RecordingData(
+            video_path=STITCH_DATA_DIR / "video2.avi",
+            csv_path=STITCH_DATA_DIR / "video2.csv",
+        ),
+    ]
+
+    stitched_video = tmp_path / "stitched.avi"
+    stitched_csv = tmp_path / "stitched.csv"
+
+    bundle = RecordingDataBundle(
+        recordings=recordings,
+        stitched_video_writer=VideoWriter(path=stitched_video, fps=20),
+        combined_csv_path=stitched_csv,
+    )
+    bundle.stitch_recordings(matching_method="timestamp", timestamp_threshold_ms=25.0)
+
+    # Should produce a non-empty stitched video
+    cap = cv2.VideoCapture(str(stitched_video))
+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    assert frame_count > 0, "Timestamp matching produced no output frames"
+
+    # Stitched CSV should exist and have contiguous reconstructed_frame_index
+    df = pd.read_csv(stitched_csv)
+    indices = sorted(df["reconstructed_frame_index"].unique())
+    assert indices == list(range(len(indices)))
+
+    # Frame count should be similar to frame_num matching (within 20%)
+    assert abs(frame_count - EXPECTED_STITCHED_FRAME_COUNT) / EXPECTED_STITCHED_FRAME_COUNT < 0.2
+
+
 def test_frame_info_majority_vote_rfi():
     """When a frame_num maps to multiple rfi values, majority wins."""
     base = {

From 49a5d89ecda1a6ec9500c00439e990da86a56f67 Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 15:58:00 -0700
Subject: [PATCH 02/21] Add concat command to merge sequential recording
 segments

New `mio process concat -d /path/to/daq1/` command discovers all .avi
files (with companion .csv) in a directory, sorts by filename, and
concatenates them into a single video + CSV with contiguous
reconstructed_frame_index. This is needed when a DAQ produces multiple
segment files that must be combined before cross-DAQ stitching.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/cli/process.py                | 86 ++++++++++++++++++++++++++++++-
 mio/process/stitch.py             | 62 +++++++++++++++++++++-
 tests/test_process/test_stitch.py | 50 ++++++++++++++++++
 3 files changed, 196 insertions(+), 2 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index ccd5dd8d..cf3d2c3f 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -12,7 +12,7 @@
 from mio.io import VideoWriter
 from mio.logging import init_logger
 from mio.models.process import DenoiseConfig
-from mio.process.stitch import RecordingData, RecordingDataBundle
+from mio.process.stitch import RecordingData, RecordingDataBundle, concat_recordings
 from mio.process.video import crop_run, denoise_run
 from mio.utils import (
     DEFAULT_PROCESS_DIR,
@@ -177,6 +177,90 @@ def crop(
     click.echo(f"✅ Frame count alignment verified: {cropped_output}")
 
 
+@process.command()
+@click.option(
+    "-d",
+    "--directory",
+    required=True,
+    type=click.Path(exists=True, file_okay=False),
+    help="Directory containing .avi segment files. All .avi files with companion "
+    ".csv files will be discovered and sorted by name.",
+)
+@click.option(
+    "-o",
+    "--output",
+    type=click.Path(),
+    default=None,
+    help="Path to the output concatenated video file or directory. "
+    f"If not specified, saves to {DEFAULT_PROCESS_DIR}/ with '_combined' suffix.",
+)
+@click.option(
+    "--fps",
+    type=int,
+    default=20,
+    help="Frames per second for output video.",
+)
+def concat(
+    directory: str,
+    output: Optional[str],
+    fps: int,
+) -> None:
+    """
+    Concatenate sequential recording segments from one DAQ into a single video.
+
+    Discovers all .avi files in the given directory (that have companion .csv files),
+    sorts them by filename, and concatenates them into a single video + CSV with
+    contiguous reconstructed_frame_index.
+
+    Use this to combine multiple segment files (e.g. long-2.avi, long-3.avi, ...)
+    from the same DAQ before stitching across DAQs.
+    """
+    dir_path = Path(directory)
+    avi_files = sorted(dir_path.glob("*.avi"))
+
+    # Filter to only AVIs that have a companion CSV
+    valid_avis = []
+    for avi in avi_files:
+        csv_path = avi.with_suffix(".csv")
+        if csv_path.exists():
+            valid_avis.append(avi)
+        else:
+            click.echo(f"  Skipping {avi.name} (no companion .csv found)")
+
+    if len(valid_avis) < 2:
+        raise click.ClickException(
+            f"Need at least 2 .avi files with companion .csv files in {directory}, "
+            f"found {len(valid_avis)}."
+        )
+
+    click.echo(f"Found {len(valid_avis)} segments in {directory}:")
+    for avi in valid_avis:
+        click.echo(f"  {avi.name}")
+
+    recordings = RecordingData.from_video_paths(valid_avis)
+
+    first_input_path = valid_avis[0]
+    output_arg = output if output is not None else DEFAULT_PROCESS_DIR
+    combined_video_path = resolve_output_path(first_input_path, "_combined", output_arg)
+    combined_csv_path = combined_video_path.with_suffix(".csv")
+
+    click.echo(f"Concatenating {len(recordings)} segments...")
+    concat_recordings(
+        recordings=recordings,
+        output_video_path=combined_video_path,
+        output_csv_path=combined_csv_path,
+        fps=fps,
+    )
+
+    try:
+        validate_video_metadata_match(combined_video_path)
+    except VideoMetadataError as e:
+        raise click.ClickException(
+            f"Frame count alignment failed after concatenation: {e}"
+        ) from None
+    click.echo(f"✅ Frame count alignment verified: {combined_video_path}")
+
+
 @process.command()
 @click.option(
     "-i",
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 91b4fe59..d291ff28 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -1,8 +1,9 @@
 """
-Buffer-wise stitching of multiple data streams based on device timestamps.
+Buffer-wise stitching and concatenation of multiple data streams.
 
 This module combines multiple recordings (AVI video + metadata CSV) by selecting
 the best buffers from each stream using gradient noise detection.
+It also provides concatenation of sequential recording segments from the same DAQ.
 This is still hardcoded around the StreamDevConfig metadata fields.
 """
 
@@ -383,3 +384,62 @@ def stitch_recordings(
         logger.info(
             f"Stitch completed: stitched_writes={stitched_writes}, debug_writes={debug_writes}"
         )
+
+
+def concat_recordings(
+    recordings: list[RecordingData],
+    output_video_path: Path,
+    output_csv_path: Path,
+    fps: int = 20,
+) -> None:
+    """Concatenate sequential recording segments into a single video + CSV.
+
+    Each recording's frames are appended in order. The CSV metadata is merged
+    with ``reconstructed_frame_index`` renumbered to be contiguous across all
+    segments.
+
+    Parameters
+    ----------
+    recordings : list[RecordingData]
+        Ordered list of recording segments to concatenate.
+    output_video_path : Path
+        Path for the combined output AVI.
+    output_csv_path : Path
+        Path for the combined output CSV.
+    fps : int
+        Frames per second for the output video.
+    """
+    video_writer = VideoWriter(path=output_video_path, fps=fps)
+    metadata_parts: list[pd.DataFrame] = []
+    rfi_offset = 0
+    total_frames = 0
+
+    for i, rec in enumerate(tqdm(recordings, desc="Concatenating segments")):
+        # Copy all video frames
+        seg_frames = 0
+        for _, frame in rec.video_reader.read_frames():
+            video_writer.write_frame(frame)
+            seg_frames += 1
+
+        # Offset reconstructed_frame_index in metadata
+        df = rec.metadata.copy()
+        max_rfi = int(df["reconstructed_frame_index"].max())
+        df["reconstructed_frame_index"] = df["reconstructed_frame_index"] + rfi_offset
+        metadata_parts.append(df)
+
+        logger.info(
+            f"Segment {i}: {rec.video_path.name} — "
+            f"{seg_frames} frames, rfi_offset={rfi_offset}"
+        )
+        rfi_offset += max_rfi + 1
+        total_frames += seg_frames
+
+    video_writer.close()
+
+    combined_df = pd.concat(metadata_parts, ignore_index=True)
+    combined_df.to_csv(output_csv_path, index=False)
+
+    logger.info(
+        f"Concat completed: {total_frames} frames from "
+        f"{len(recordings)} segments -> {output_video_path}"
+    )
diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index 611e4b8c..033dd668 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -18,6 +18,7 @@
     CandidateFrame,
     RecordingData,
     RecordingDataBundle,
+    concat_recordings,
     select_best_candidate,
     score_edges,
 )
@@ -357,6 +358,55 @@ def test_edge_scoring_selects_less_sharp():
     assert score_edges(uniform) > score_edges(edgy)
 
 
+def test_concat_recordings(tmp_path):
+    """Concatenating two recordings produces contiguous frame indices and correct frame count."""
+    recordings = [
+        RecordingData(
+            video_path=STITCH_DATA_DIR / "video1.avi",
+            csv_path=STITCH_DATA_DIR / "video1.csv",
+        ),
+        RecordingData(
+            video_path=STITCH_DATA_DIR / "video2.avi",
+            csv_path=STITCH_DATA_DIR / "video2.csv",
+        ),
+    ]
+
+    combined_video = tmp_path / "combined.avi"
+    combined_csv = tmp_path / "combined.csv"
+
+    concat_recordings(
+        recordings=recordings,
+        output_video_path=combined_video,
+        output_csv_path=combined_csv,
+        fps=20,
+    )
+
+    # Video frame count should be sum of both inputs
+    cap1 = cv2.VideoCapture(str(STITCH_DATA_DIR / "video1.avi"))
+    cap2 = cv2.VideoCapture(str(STITCH_DATA_DIR / "video2.avi"))
+    expected_frames = int(cap1.get(cv2.CAP_PROP_FRAME_COUNT)) + int(
+        cap2.get(cv2.CAP_PROP_FRAME_COUNT)
+    )
+    cap1.release()
+    cap2.release()
+
+    cap = cv2.VideoCapture(str(combined_video))
+    actual_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    assert actual_frames == expected_frames
+
+    # CSV should have contiguous reconstructed_frame_index
+    df = pd.read_csv(combined_csv)
+    indices = sorted(df["reconstructed_frame_index"].unique())
+    assert indices == list(range(len(indices)))
+
+    # Second segment's rfi should start after first segment's max rfi
+    df1 = pd.read_csv(STITCH_DATA_DIR / "video1.csv")
+    max_rfi_1 = df1["reconstructed_frame_index"].max()
+    # Combined CSV should have indices beyond max_rfi_1
+    assert df["reconstructed_frame_index"].max() > max_rfi_1
+
+
 def test_stitch_timestamp_matching(tmp_path):
     """Timestamp matching produces a valid stitched output on real fixtures."""
     recordings = [

From 5fbfea6e3ce366b036c9af310f4c40bea91ce2c2 Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 16:00:06 -0700
Subject: [PATCH 03/21] Use natural numeric sort for concat segment discovery

Alphabetical sort puts long-10 before long-2. Natural sort correctly
orders segments by their numeric suffix (2, 3, ..., 9, 10, 12, 13).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/cli/process.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index cf3d2c3f..63f41188 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -2,6 +2,7 @@
 Command line interface for offline video pre-processing.
 """
 
+import re
 import shutil
 from pathlib import Path
 from typing import Optional
@@ -184,7 +185,7 @@ def crop(
     required=True,
     type=click.Path(exists=True, file_okay=False),
     help="Directory containing .avi segment files. All .avi files with companion "
-    ".csv files will be discovered and sorted by name.",
+    ".csv files will be discovered and sorted in natural numeric order.",
 )
 @click.option(
     "-o",
@@ -216,7 +217,15 @@ def concat(
     from the same DAQ before stitching across DAQs.
     """
     dir_path = Path(directory)
-    avi_files = sorted(dir_path.glob("*.avi"))
+
+    def _natural_sort_key(path: Path) -> list:
+        """Sort by splitting name into text and numeric parts for natural ordering."""
+        return [
+            int(part) if part.isdigit() else part.lower()
+            for part in re.split(r"(\d+)", path.name)
+        ]
+
+    avi_files = sorted(dir_path.glob("*.avi"), key=_natural_sort_key)
 
     # Filter to only AVIs that have a companion CSV
     valid_avis = []

From c8ccd6319d3de1d4985d0f17d3469431c8a1ce30 Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 16:16:13 -0700
Subject: [PATCH 04/21] Fix concat CSV discovery for mismatched AVI/CSV names

AVI files like long-8-002.avi have CSVs named long-8.csv (without the
extra numeric suffix). The concat command now strips trailing -NNN
suffixes when looking for companion CSVs, and shows the mapping in
output. Also quiets per-frame debug messages to logger only.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/cli/process.py    | 41 ++++++++++++++++++++++++++++++-----------
 mio/process/stitch.py |  1 -
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index 63f41188..577cc158 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -227,28 +227,47 @@ def _natural_sort_key(path: Path) -> list:
 
     avi_files = sorted(dir_path.glob("*.avi"), key=_natural_sort_key)
 
+    def _find_companion_csv(avi_path: Path) -> Optional[Path]:
+        """Find companion CSV, handling name mismatches like long-8-002.avi -> long-8.csv."""
+        # Try exact stem match first
+        csv_path = avi_path.with_suffix(".csv")
+        if csv_path.exists():
+            return csv_path
+        # Try stripping trailing numeric suffixes: long-8-002.avi -> long-8.csv
+        stem = avi_path.stem
+        stripped = re.sub(r"-\d+$", "", stem)
+        while stripped != stem:
+            csv_path = avi_path.parent / f"{stripped}.csv"
+            if csv_path.exists():
+                return csv_path
+            stem = stripped
+            stripped = re.sub(r"-\d+$", "", stem)
+        return None
+
     # Filter to only AVIs that have a companion CSV
-    valid_avis = []
+    valid_pairs: list[tuple[Path, Path]] = []
     for avi in avi_files:
-        csv_path = avi.with_suffix(".csv")
-        if csv_path.exists():
-            valid_avis.append(avi)
+        csv_path = _find_companion_csv(avi)
+        if csv_path is not None:
+            valid_pairs.append((avi, csv_path))
         else:
             click.echo(f"  Skipping {avi.name} (no companion .csv found)")
 
-    if len(valid_avis) < 2:
+    if len(valid_pairs) < 2:
         raise click.ClickException(
             f"Need at least 2 .avi files with companion .csv files in {directory}, "
-            f"found {len(valid_avis)}."
+            f"found {len(valid_pairs)}."
         )
 
-    click.echo(f"Found {len(valid_avis)} segments in {directory}:")
-    for avi in valid_avis:
-        click.echo(f"  {avi.name}")
+    click.echo(f"Found {len(valid_pairs)} segments in {directory}:")
+    for avi, csv in valid_pairs:
+        click.echo(f"  {avi.name} -> {csv.name}")
 
-    recordings = RecordingData.from_video_paths(valid_avis)
+    recordings = [
+        RecordingData(video_path=avi, csv_path=csv) for avi, csv in valid_pairs
+    ]
 
-    first_input_path = valid_avis[0]
+    first_input_path = valid_pairs[0][0]
     output_arg = output if output is not None else DEFAULT_PROCESS_DIR
     combined_video_path = resolve_output_path(first_input_path, "_combined", output_arg)
     combined_csv_path = combined_video_path.with_suffix(".csv")
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index d291ff28..8003dfba 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -197,7 +197,6 @@ def _write_debug(
                 f"Frames are not the same for frame {frame_num} "
                 f"(Rec {selected_idx} vs Rec {idx}): {diff_pixels} px differ"
             )
-            tqdm.write(msg)
             logger.debug(msg)
 
             if self.debug_video_writer is not None:

From b1814206818d76830e7514ec283f4639e04f5743 Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 17:33:08 -0700
Subject: [PATCH 05/21] Fix denoise_calcium_imaging config: add required fields
 to frequency_masking

The FrequencyMaskingConfig requires id, mio_model, and mio_version fields
from its MiniscopeConfig base class. Without these, the config fails
Pydantic validation at runtime.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/data/config/process/denoise_calcium_imaging.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mio/data/config/process/denoise_calcium_imaging.yml b/mio/data/config/process/denoise_calcium_imaging.yml
index 059b16d5..73b45b8b 100644
--- a/mio/data/config/process/denoise_calcium_imaging.yml
+++ b/mio/data/config/process/denoise_calcium_imaging.yml
@@ -14,6 +14,9 @@ noise_patch:
   output_noise_patch: true
   output_noisy_frames: true
 frequency_masking:
+  id: frequency_masking_calcium_imaging
+  mio_model: mio.models.process.FrequencyMaskingConfig
+  mio_version: 0.6.1
   enable: true
   cast_float32: true
   spatial_LPF_cutoff_radius: 15

From 551cd628b8c8adb13c10fd144ec5c5a53d989043 Mon Sep 17 00:00:00 2001
From: MarcelMB <marcelbrosch.mb@gmail.com>
Date: Mon, 16 Mar 2026 18:31:14 -0700
Subject: [PATCH 06/21] Add noise-aware frame selection, vectorize
 BlackAreaDetector, add stitch diagnostics

- Add --selection-mode noise_aware to stitch/workflow: uses InvalidFrameDetector
  during stitching to pick clean frames and skip both-broken pairs
- Vectorize BlackAreaDetector with numpy cumsum sliding window (~100x faster)
- Skip Sobel edge scoring in noise_aware mode for additional speedup
- Add --max-frames flag for quick test runs
- Add terminal noise summary (per-DAQ noisy counts, both-broken percentage)
- Add noise_report.png (timeline, run length distribution, drop density)
- Add both_broken.avi debug output for manual inspection of skipped frames
- Fix denoise_calcium_imaging.yml: add interactive_display section

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 mio/cli/process.py                            |  73 +++++-
 .../process/denoise_calcium_imaging.yml       |   5 +
 mio/models/stitch.py                          |   3 +
 mio/process/frame_helper.py                   |  57 ++--
 mio/process/stitch.py                         | 247 +++++++++++++++++-
 5 files changed, 340 insertions(+), 45 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index 577cc158..7a386378 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -12,7 +12,7 @@
 from mio.exceptions import VideoMetadataError
 from mio.io import VideoWriter
 from mio.logging import init_logger
-from mio.models.process import DenoiseConfig
+from mio.models.process import DenoiseConfig, NoisePatchConfig
 from mio.process.stitch import RecordingData, RecordingDataBundle, concat_recordings
 from mio.process.video import crop_run, denoise_run
 from mio.utils import (
@@ -337,6 +337,27 @@ def _find_companion_csv(avi_path: Path) -> Optional[Path]:
     default=25.0,
     help="Max time difference in ms for timestamp matching (default: 25).",
 )
+@click.option(
+    "--max-frames",
+    type=int,
+    default=-1,
+    help="Maximum number of frames to process. -1 means all frames. "
+    "Useful for quick test runs.",
+)
+@click.option(
+    "--selection-mode",
+    type=click.Choice(["metadata", "noise_aware"]),
+    default="metadata",
+    help="Frame selection strategy. 'metadata' uses buffer count + edge scoring (default). "
+    "'noise_aware' uses noise detection to pick clean frames and skip both-bad pairs.",
+)
+@click.option(
+    "--noise-config",
+    type=str,
+    default=None,
+    help="Denoise config ID or YAML path for noise detection (used with --selection-mode noise_aware). "
+    "Uses the noise_patch section from the config.",
+)
 def stitch(
     inputs: tuple,
     output: Optional[str],
@@ -345,6 +366,9 @@ def stitch(
     fps: int,
     match_by: str,
     timestamp_threshold: float,
+    max_frames: int,
+    selection_mode: str,
+    noise_config: Optional[str],
 ) -> None:
     """
     Stitch multiple video recordings into one by selecting the best frame
@@ -355,6 +379,16 @@ def stitch(
     if len(inputs) < 2:
         raise click.ClickException("At least 2 input videos are required for stitching.")
 
+    noise_patch_config = None
+    if selection_mode == "noise_aware":
+        if noise_config is None:
+            raise click.ClickException(
+                "--noise-config is required when using --selection-mode noise_aware"
+            )
+        denoise_cfg = DenoiseConfig.from_any(noise_config)
+        noise_patch_config = denoise_cfg.noise_patch
+        click.echo(f"Noise-aware selection enabled (methods: {noise_patch_config.method})")
+
     recordings = RecordingData.from_video_paths([Path(p) for p in inputs])
 
     first_input_path = Path(inputs[0])
@@ -369,18 +403,28 @@ def stitch(
     stitched_video_writer = VideoWriter(path=stitched_video_path, fps=fps)
     debug_video_writer = VideoWriter(path=debug_video_path, fps=fps) if debug_video_path else None
 
+    # Set up debug dir for noise report when using noise_aware
+    stitch_debug_dir = None
+    if noise_patch_config is not None:
+        stitch_debug_dir = stitched_video_path.parent / "debug"
+        stitch_debug_dir.mkdir(parents=True, exist_ok=True)
+
     recording_bundle = RecordingDataBundle(
         recordings=recordings,
         stitched_video_writer=stitched_video_writer,
         debug_video_writer=debug_video_writer,
         combined_csv_path=output_csv_path,
         debug_csv_path=debug_csv_path,
+        noise_config=noise_patch_config,
+        debug_dir=stitch_debug_dir,
+        fps=fps,
     )
 
     click.echo(f"Stitching {len(recordings)} recordings (match-by={match_by})...")
     recording_bundle.stitch_recordings(
         matching_method=match_by,
         timestamp_threshold_ms=timestamp_threshold,
+        max_frames=max_frames,
     )
 
     try:
@@ -447,6 +491,20 @@ def stitch(
     default=25.0,
     help="Max time difference in ms for timestamp matching (default: 25).",
 )
+@click.option(
+    "--max-frames",
+    type=int,
+    default=-1,
+    help="Maximum number of frames to process during stitching. -1 means all frames. "
+    "Useful for quick test runs.",
+)
+@click.option(
+    "--selection-mode",
+    type=click.Choice(["metadata", "noise_aware"]),
+    default="metadata",
+    help="Frame selection strategy for stitching. 'metadata' uses buffer count + edge scoring (default). "
+    "'noise_aware' uses noise detection to pick clean frames and skip both-bad pairs.",
+)
 def workflow(
     inputs: tuple,
     output: Optional[str],
@@ -456,6 +514,8 @@ def workflow(
     fps: int,
     match_by: str,
     timestamp_threshold: float,
+    max_frames: int,
+    selection_mode: str,
 ) -> None:
     """
     Complete workflow: stitch → trim → denoise with validation at each step.
@@ -558,18 +618,29 @@ def workflow(
         stitched_video_writer = VideoWriter(path=stitched_video_path, fps=fps)
         debug_video_writer = VideoWriter(path=debug_video_path, fps=fps)
 
+        # For noise_aware selection, extract noise_patch config from the denoise config
+        noise_patch_config = None
+        if selection_mode == "noise_aware":
+            denoise_config_parsed_early = DenoiseConfig.from_any(denoise_config)
+            noise_patch_config = denoise_config_parsed_early.noise_patch
+            click.echo(f"Noise-aware selection enabled (methods: {noise_patch_config.method})")
+
         recording_bundle = RecordingDataBundle(
             recordings=recordings,
             stitched_video_writer=stitched_video_writer,
             debug_video_writer=debug_video_writer,
             combined_csv_path=output_csv_path,
             debug_csv_path=debug_csv_path,
+            noise_config=noise_patch_config,
+            debug_dir=debug_dir if noise_patch_config is not None else None,
+            fps=fps,
         )
 
         click.echo(f"Stitching {len(recordings)} recordings (match-by={match_by})...")
         recording_bundle.stitch_recordings(
             matching_method=match_by,
             timestamp_threshold_ms=timestamp_threshold,
+            max_frames=max_frames,
         )
 
         try:
diff --git a/mio/data/config/process/denoise_calcium_imaging.yml b/mio/data/config/process/denoise_calcium_imaging.yml
index 73b45b8b..4c8dec35 100644
--- a/mio/data/config/process/denoise_calcium_imaging.yml
+++ b/mio/data/config/process/denoise_calcium_imaging.yml
@@ -27,5 +27,10 @@ minimum_projection:
   enable: true
   normalize: true
   output_result: true
+interactive_display:
+  show_videos: false
+  start_frame: 0
+  end_frame: 100
+  display_freq_mask: false
 end_frame: -1
 output_result: true
diff --git a/mio/models/stitch.py b/mio/models/stitch.py
index 52ad52e2..1432bb1d 100644
--- a/mio/models/stitch.py
+++ b/mio/models/stitch.py
@@ -69,6 +69,9 @@ class DebugRecord(BaseModel):
     selected_edge_score: float
     compare_edge_score: float
     metadata_tie: bool
+    selection_mode: str = "metadata"
+    selected_is_noisy: bool | None = None
+    compare_is_noisy: bool | None = None
 
     @classmethod
     def header(cls) -> List[str]:
diff --git a/mio/process/frame_helper.py b/mio/process/frame_helper.py
index 202116f4..30bcdec5 100644
--- a/mio/process/frame_helper.py
+++ b/mio/process/frame_helper.py
@@ -215,47 +215,34 @@ def _detect_black_pixels(
         current_frame: np.ndarray,
     ) -> Tuple[bool, np.ndarray]:
         """
-        Detect black-out noise by checking for black pixels (value 0) over rows of pixels.
+        Detect black-out noise by checking for consecutive black pixels per row.
+
+        Uses vectorized numpy (cumulative sum sliding window) instead of
+        pixel-by-pixel Python loops for ~100x speedup.
 
         Returns:
             Tuple[bool, np.ndarray]: A boolean indicating if the frame is corrupted and noise mask.
         """
-        height, width = current_frame.shape
-        noisy_mask = np.zeros_like(current_frame, dtype=np.uint8)
+        consecutive_threshold = self.config.consecutive_threshold
+        black_pixel_value_threshold = self.config.value_threshold
+
+        # Boolean mask of "black" pixels, then cumsum-based sliding window
+        black_mask = (current_frame <= black_pixel_value_threshold).astype(np.float32)
+        cs = np.cumsum(black_mask, axis=1)
 
-        # Read values from YAML config
-        consecutive_threshold = (
-            self.config.consecutive_threshold
-        )  # How many consecutive pixels must be black
-        black_pixel_value_threshold = (
-            self.config.value_threshold
-        )  # Max pixel value considered "black"
-
-        logger.debug(f"Using black pixel threshold: <= {black_pixel_value_threshold}")
-        logger.debug(f"Consecutive black pixel threshold: {consecutive_threshold}")
-
-        noisy_row_count = 0
-
-        for y in range(height):
-            row = current_frame[y, :]  # Extract row
-            consecutive_count = 0  # Counter for consecutive black pixels
-
-            for x in range(width):
-                if row[x] <= black_pixel_value_threshold:  # Check if pixel is "black"
-                    consecutive_count += 1
-                else:
-                    consecutive_count = 0  # Reset if a non-black pixel is found
-
-                # If we exceed the allowed threshold of consecutive black pixels, flag the row
-                if consecutive_count >= consecutive_threshold:
-                    logger.debug(
-                        f"Frame noisy due to {consecutive_count} consecutive black pixels "
-                        f"in row {y}."
-                    )
-                    noisy_mask[y, :] = 1  # Mark row as noisy
-                    noisy_row_count += 1
-                    break  # No need to check further in this row
+        if current_frame.shape[1] >= consecutive_threshold:
+            # Sliding window: sum of `consecutive_threshold` consecutive pixels
+            run_sum = cs[:, consecutive_threshold:] - cs[:, :-consecutive_threshold]
+            # A row has a run if any window sums to exactly consecutive_threshold
+            # (meaning all pixels in that window were black)
+            row_has_run = np.any(run_sum >= consecutive_threshold, axis=1)
+        else:
+            row_has_run = np.zeros(current_frame.shape[0], dtype=bool)
+
+        noisy_mask = np.zeros_like(current_frame, dtype=np.uint8)
+        noisy_mask[row_has_run, :] = 1
 
+        noisy_row_count = int(np.sum(row_has_run))
         frame_is_noisy = noisy_row_count >= self.config.min_rows
         return frame_is_noisy, noisy_mask
 
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 8003dfba..25d38cbc 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -19,7 +19,9 @@
 
 from mio.io import BufferedCSVWriter, VideoReader, VideoWriter
 from mio.logging import init_logger
+from mio.models.process import NoisePatchConfig
 from mio.models.stitch import DebugRecord, FrameInfo
+from mio.process.frame_helper import InvalidFrameDetector
 
 logger = init_logger(name="stitch")
 
@@ -41,6 +43,7 @@ class CandidateFrame:
     sum_black_padding: int
     metadata_rows: pd.DataFrame
     edge_score: float
+    is_noisy: bool | None = None
 
     @property
     def metadata_score(self) -> tuple[int, int]:
@@ -68,6 +71,30 @@ def select_best_candidate(candidates: list[CandidateFrame]) -> tuple[int, bool]:
     return best_idx, is_tie
 
 
+def select_best_candidate_noise_aware(
+    candidates: list[CandidateFrame],
+) -> tuple[int, bool] | None:
+    """
+    Pick the best candidate using noise detection results.
+
+    Returns (best_index, was_tie) or None if all candidates are noisy (skip frame).
+
+    Logic:
+    - If all candidates are noisy, return None (skip this frame)
+    - If exactly one is clean, pick it
+    - If multiple are clean, fall back to metadata scoring among clean ones
+    """
+    clean = [i for i, c in enumerate(candidates) if not c.is_noisy]
+    if not clean:
+        return None
+    if len(clean) == 1:
+        return clean[0], False
+    # Multiple clean candidates — use metadata scoring among them
+    clean_candidates = [candidates[i] for i in clean]
+    best_among_clean, was_tie = select_best_candidate(clean_candidates)
+    return clean[best_among_clean], was_tie
+
+
 class RecordingData:
     """Class for a single stream's data (video + metadata)."""
 
@@ -117,6 +144,9 @@ def __init__(
         debug_video_writer: VideoWriter | None = None,
         combined_csv_path: Path | None = None,
         debug_csv_path: Path | None = None,
+        noise_config: NoisePatchConfig | None = None,
+        debug_dir: Path | None = None,
+        fps: int = 20,
     ) -> None:
         self.recordings: list[RecordingData] = recordings
         self.stitched_video_writer: VideoWriter = stitched_video_writer
@@ -127,6 +157,27 @@ def __init__(
         self._out_frame_index: int = 0
         self.debug_csv_writer: BufferedCSVWriter | None = None
         self._debug_frame_index: int = 0
+        self._noise_detector: InvalidFrameDetector | None = None
+        self._debug_dir: Path | None = debug_dir
+        self._fps: int = fps
+        # Noise tracking for summary
+        self._per_rec_noisy: list[int] = [0] * len(recordings)
+        self._both_noisy_indices: list[int] = []  # matched position indices
+        self._both_noisy_writer: VideoWriter | None = None
+        self._total_matched: int = 0
+        if noise_config is not None:
+            if "mean_error" in (noise_config.method or []):
+                raise ValueError(
+                    "mean_error detection is not supported during stitching "
+                    "(it requires sequential frames from a single recording). "
+                    "Use only 'gradient' and/or 'black_area' methods."
+                )
+            self._noise_detector = InvalidFrameDetector(noise_config)
+            if debug_dir is not None:
+                debug_dir.mkdir(parents=True, exist_ok=True)
+                self._both_noisy_writer = VideoWriter(
+                    path=debug_dir / "both_broken.avi", fps=fps
+                )
         if debug_csv_path is not None:
             self.debug_csv_writer = BufferedCSVWriter(
                 debug_csv_path, header=DebugRecord.header(), buffer_size=100
@@ -145,9 +196,17 @@ def combined_frame_num(self) -> list[int]:
             self._combined_frame_num = combined
         return self._combined_frame_num
 
+    def _detect_noise(self, frame: np.ndarray) -> bool | None:
+        """Run noise detection on a frame if a detector is configured."""
+        if self._noise_detector is None:
+            return None
+        is_noisy, _ = self._noise_detector.find_invalid_area(frame)
+        return is_noisy
+
     def _collect_candidates(self, frame_num: int) -> list[CandidateFrame]:
         """Read frames and metadata scores for all recordings that have *frame_num*."""
         candidates: list[CandidateFrame] = []
+        skip_edge_score = self._noise_detector is not None
         for recording in self.recordings:
             rows = recording.metadata[recording.metadata["frame_num"] == frame_num]
             if rows.empty:
@@ -165,7 +224,8 @@ def _collect_candidates(self, frame_num: int) -> list[CandidateFrame]:
                     num_buffers=num_buffers,
                     sum_black_padding=sum_black,
                     metadata_rows=rows,
-                    edge_score=score_edges(frame),
+                    edge_score=0.0 if skip_edge_score else score_edges(frame),
+                    is_noisy=self._detect_noise(frame),
                 )
             )
         return candidates
@@ -205,6 +265,7 @@ def _write_debug(
                 writes += 1
 
             if self.debug_csv_writer is not None:
+                selection_mode = "noise_aware" if self._noise_detector is not None else "metadata"
                 record = DebugRecord(
                     debug_frame_index=self._debug_frame_index,
                     stitched_frame_index=self._out_frame_index,
@@ -219,6 +280,9 @@ def _write_debug(
                     selected_edge_score=selected.edge_score,
                     compare_edge_score=cand.edge_score,
                     metadata_tie=bool(is_tie),
+                    selection_mode=selection_mode,
+                    selected_is_noisy=selected.is_noisy,
+                    compare_is_noisy=cand.is_noisy,
                 )
                 self.debug_csv_writer.append(record.model_dump())
                 self._debug_frame_index += 1
@@ -246,6 +310,8 @@ def _finalize(self) -> None:
             self.debug_video_writer.close()
         if self.debug_csv_writer is not None:
             self.debug_csv_writer.close()
+        if self._both_noisy_writer is not None:
+            self._both_noisy_writer.close()
         if self.combined_csv_path is not None and self._metadata_parts:
             pd.concat(self._metadata_parts, ignore_index=True).to_csv(
                 self.combined_csv_path, index=False
@@ -309,6 +375,7 @@ def _collect_candidates_by_index(
     ) -> list[CandidateFrame]:
         """Collect candidates using reconstructed_frame_index directly."""
         candidates: list[CandidateFrame] = []
+        skip_edge_score = self._noise_detector is not None
         for rec_num, rfi in frame_indices.items():
             recording = self.recordings[rec_num]
             rows = recording.metadata[recording.metadata["reconstructed_frame_index"] == rfi]
@@ -326,15 +393,121 @@ def _collect_candidates_by_index(
                     num_buffers=num_buffers,
                     sum_black_padding=sum_black,
                     metadata_rows=rows,
-                    edge_score=score_edges(frame),
+                    edge_score=0.0 if skip_edge_score else score_edges(frame),
+                    is_noisy=self._detect_noise(frame),
                 )
             )
         return candidates
 
+    def _print_noise_summary(
+        self, stitched_writes: int, skipped_both_noisy: int
+    ) -> None:
+        """Print a terminal summary of noise statistics."""
+        total = self._total_matched
+        if total == 0:
+            return
+        fps = self._fps
+        both_pct = 100.0 * skipped_both_noisy / total
+        print(f"\n{'=' * 60}")
+        print("STITCH NOISE SUMMARY")
+        print(f"{'=' * 60}")
+        print(f"  Total matched frames:    {total}")
+        print(f"  Stitched (output):       {stitched_writes} "
+              f"({stitched_writes / fps:.1f}s, {stitched_writes / fps / 3600:.2f}h)")
+        print(f"  Both broken (skipped):   {skipped_both_noisy} "
+              f"({both_pct:.2f}%, {skipped_both_noisy / fps:.1f}s)")
+        for i, rec in enumerate(self.recordings):
+            noisy = self._per_rec_noisy[i]
+            pct = 100.0 * noisy / total if total > 0 else 0
+            print(f"  Rec {i} noisy ({rec.video_path.name}): "
+                  f"{noisy} ({pct:.2f}%)")
+        print(f"{'=' * 60}\n")
+
+    def _generate_noise_report(
+        self, stitched_writes: int, skipped_both_noisy: int
+    ) -> None:
+        """Generate a drop analysis PNG in the debug directory."""
+        try:
+            import matplotlib
+            matplotlib.use("Agg")
+            import matplotlib.pyplot as plt
+        except ImportError:
+            logger.warning("matplotlib not available, skipping noise report plot")
+            return
+
+        if not self._both_noisy_indices and skipped_both_noisy == 0:
+            return
+
+        total = self._total_matched
+        fps = self._fps
+        dropped_indices = np.array(self._both_noisy_indices)
+
+        fig, axes = plt.subplots(3, 1, figsize=(14, 10))
+
+        # Plot 1: Timeline
+        ax1 = axes[0]
+        time_hours = dropped_indices / fps / 3600.0
+        total_hours = total / fps / 3600.0
+        ax1.vlines(time_hours, 0, 1, colors="red", alpha=0.5, linewidth=0.5)
+        ax1.set_xlim(0, total_hours)
+        ax1.set_ylim(0, 1)
+        ax1.set_yticks([])
+        ax1.set_xlabel("Time (hours)")
+        ax1.set_title(
+            f"Both-broken frames timeline "
+            f"({len(dropped_indices)} dropped across {total} total)"
+        )
+
+        # Plot 2: Run length distribution
+        ax2 = axes[1]
+        if len(dropped_indices) > 0:
+            diffs = np.diff(dropped_indices)
+            runs = []
+            current_run = 1
+            for d in diffs:
+                if d == 1:
+                    current_run += 1
+                else:
+                    runs.append(current_run)
+                    current_run = 1
+            runs.append(current_run)
+            max_run = max(runs) if runs else 1
+            bins = range(1, min(max_run + 2, 102))
+            ax2.hist(runs, bins=bins, color="steelblue", edgecolor="black",
+                     linewidth=0.5)
+            ax2.axvline(x=fps, color="red", linestyle="--", alpha=0.7,
+                        label=f"1 second ({fps} frames)")
+            ax2.legend()
+        ax2.set_xlabel("Consecutive dropped frames (run length)")
+        ax2.set_ylabel("Number of events")
+        ax2.set_title(
+            f"Distribution of drop run lengths "
+            f"({len(dropped_indices)} events)"
+        )
+
+        # Plot 3: Drop density (1-minute bins)
+        ax3 = axes[2]
+        time_minutes = dropped_indices / fps / 60.0
+        total_minutes = total / fps / 60.0
+        if total_minutes > 0:
+            bins_minutes = np.arange(0, total_minutes + 1, 1)
+            ax3.hist(time_minutes, bins=bins_minutes, color="orangered",
+                     edgecolor="none", rwidth=0.8)
+        ax3.set_xlabel("Time (minutes)")
+        ax3.set_ylabel("Dropped frames per minute")
+        ax3.set_title("Drop density over time (1-minute bins)")
+
+        plt.tight_layout()
+        out_path = self._debug_dir / "noise_report.png"
+        fig.savefig(str(out_path), dpi=150)
+        plt.close(fig)
+        logger.info(f"Noise report saved to {out_path}")
+
     def stitch_recordings(
         self,
         matching_method: str = "frame_num",
         timestamp_threshold_ms: float = 25.0,
+        max_frames: int = -1,
     ) -> None:
         """Stitch recordings by selecting the best frame per matched position.
 
@@ -345,44 +518,100 @@ def stitch_recordings(
             ``"timestamp"`` matches by nearest ``buffer_recv_unix_time``.
         timestamp_threshold_ms : float
             Max time difference in ms for timestamp matching (default 25).
+        max_frames : int
+            Maximum number of frames to write. -1 means all frames.
         """
         stitched_writes = 0
         debug_writes = 0
+        skipped_both_noisy = 0
+        use_noise_aware = self._noise_detector is not None
+        match_position = 0
+
+        def _select(candidates: list[CandidateFrame]) -> tuple[int, bool] | None:
+            if use_noise_aware:
+                return select_best_candidate_noise_aware(candidates)
+            return select_best_candidate(candidates)
+
+        def _track_noise(candidates: list[CandidateFrame], position: int) -> None:
+            """Track per-recording noisy counts."""
+            if not use_noise_aware:
+                return
+            for i, c in enumerate(candidates):
+                if c.is_noisy and i < len(self._per_rec_noisy):
+                    self._per_rec_noisy[i] += 1
+
+        def _handle_both_noisy(candidates: list[CandidateFrame], position: int) -> None:
+            """Write all candidate frames to both-broken AVI for manual review."""
+            self._both_noisy_indices.append(position)
+            if self._both_noisy_writer is not None:
+                for c in candidates:
+                    self._both_noisy_writer.write_frame(c.frame)
 
         if matching_method == "timestamp":
             matches = self._build_timestamp_matches(
                 threshold_ms=timestamp_threshold_ms
             )
+            if max_frames > 0:
+                matches = matches[:max_frames]
             frame_iter = tqdm(matches, desc="Stitching frames (timestamp)")
             for match in frame_iter:
                 candidates = self._collect_candidates_by_index(match)
                 if not candidates:
+                    match_position += 1
+                    continue
+                self._total_matched += 1
+                _track_noise(candidates, match_position)
+                result = _select(candidates)
+                if result is None:
+                    skipped_both_noisy += 1
+                    _handle_both_noisy(candidates, match_position)
+                    match_position += 1
                     continue
-                selected_idx, is_tie = select_best_candidate(candidates)
-                # Use first recording's frame index as label for debug
+                selected_idx, is_tie = result
                 frame_label = match.get(0, 0)
                 debug_writes += self._write_debug(
                     frame_label, candidates, selected_idx, is_tie
                 )
                 self._write_stitched(candidates, selected_idx)
                 stitched_writes += 1
+                match_position += 1
         else:
-            frame_iter = tqdm(self.combined_frame_num, desc="Stitching frames")
+            frame_nums = self.combined_frame_num
+            if max_frames > 0:
+                frame_nums = frame_nums[:max_frames]
+            frame_iter = tqdm(frame_nums, desc="Stitching frames")
             for frame_num in frame_iter:
                 valid_pairs = self._collect_candidates(frame_num)
                 if not valid_pairs:
+                    match_position += 1
+                    continue
+                self._total_matched += 1
+                _track_noise(valid_pairs, match_position)
+                result = _select(valid_pairs)
+                if result is None:
+                    skipped_both_noisy += 1
+                    _handle_both_noisy(valid_pairs, match_position)
+                    match_position += 1
                     continue
-                selected_idx, is_tie = select_best_candidate(valid_pairs)
+                selected_idx, is_tie = result
                 debug_writes += self._write_debug(
                     frame_num, valid_pairs, selected_idx, is_tie
                 )
                 self._write_stitched(valid_pairs, selected_idx)
                 stitched_writes += 1
+                match_position += 1
 
         self._finalize()
-        logger.info(
-            f"Stitch completed: stitched_writes={stitched_writes}, debug_writes={debug_writes}"
-        )
+        msg = f"Stitch completed: stitched_writes={stitched_writes}, debug_writes={debug_writes}"
+        if skipped_both_noisy > 0:
+            msg += f", skipped_both_noisy={skipped_both_noisy}"
+        logger.info(msg)
+
+        # Print noise summary and generate plots if noise-aware
+        if use_noise_aware:
+            self._print_noise_summary(stitched_writes, skipped_both_noisy)
+            if self._debug_dir is not None:
+                self._generate_noise_report(stitched_writes, skipped_both_noisy)
 
 
 def concat_recordings(

From fc7d6c2de550dd3e613b224cfdeba5f9aa9eb967 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 19:41:49 -0700
Subject: [PATCH 07/21] remove mean error noise detection method

---
 mio/data/config/process/denoise_example.yml   |   7 -
 .../process/denoise_example_mean_error.yml    |  45 ------
 mio/data/config/process/denoise_patchonly.yml |   7 -
 mio/models/process.py                         |  54 +------
 mio/process/frame_helper.py                   | 140 ++++--------------
 mio/process/video.py                          |  29 ----
 .../config/denoise_noise_detection_test.yml   |   6 -
 7 files changed, 32 insertions(+), 256 deletions(-)
 delete mode 100644 mio/data/config/process/denoise_example_mean_error.yml

diff --git a/mio/data/config/process/denoise_example.yml b/mio/data/config/process/denoise_example.yml
index 24bf075e..0c7aabef 100644
--- a/mio/data/config/process/denoise_example.yml
+++ b/mio/data/config/process/denoise_example.yml
@@ -4,12 +4,6 @@ mio_version: 0.6.1
 noise_patch:
   enable: true
   method: [gradient, black_area]
-  mean_error_config:
-    threshold: 40
-    device_config_id: wireless-200px
-    buffer_split: 8
-    comparison_unit: 1000
-    diff_multiply: 1
   gradient_config:
     threshold: 20
   black_area_config:
@@ -17,7 +11,6 @@ noise_patch:
     value_threshold: 0
   output_result: true
   output_noise_patch: true
-  output_diff: true
   output_noisy_frames: true
 frequency_masking:
   id: frequency_masking_example
diff --git a/mio/data/config/process/denoise_example_mean_error.yml b/mio/data/config/process/denoise_example_mean_error.yml
deleted file mode 100644
index 728df692..00000000
--- a/mio/data/config/process/denoise_example_mean_error.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-id: denoise_example_mean_error
-mio_model: mio.models.process.DenoiseConfig
-mio_version: 0.6.1
-noise_patch:
-  enable: true
-  method: [mean_error]
-  mean_error_config:
-    threshold: 40
-    device_config_id: wireless-200px
-    buffer_split: 8
-    comparison_unit: 1000
-    diff_multiply: 1
-  gradient_config:
-    threshold: 20
-  black_area_config:
-    consecutive_threshold: 5
-    value_threshold: 16
-  output_result: true
-  output_noise_patch: true
-  output_diff: true
-  output_noisy_frames: true
-frequency_masking:
-  id: frequency_masking_example_mean_error
-  mio_model: mio.models.process.FrequencyMaskingConfig
-  mio_version: 0.6.1
-  enable: true
-  spatial_LPF_cutoff_radius: 15
-  vertical_BEF_cutoff: 2
-  horizontal_BEF_cutoff: 0
-  output_mask: true
-  output_result: true
-  output_freq_domain: true
-minimum_projection:
-  enable: true
-  normalize: true
-  output_result: true
-  output_min_proj: true
-interactive_display:
-  show_videos: true
-  start_frame: 40
-  end_frame: 140
-  display_freq_mask: true
-end_frame: -1 #-1 means all frames
-output_result: true
-output_dir: user_data/output
\ No newline at end of file
diff --git a/mio/data/config/process/denoise_patchonly.yml b/mio/data/config/process/denoise_patchonly.yml
index 44db9145..d88e34d4 100644
--- a/mio/data/config/process/denoise_patchonly.yml
+++ b/mio/data/config/process/denoise_patchonly.yml
@@ -6,12 +6,6 @@ noise_patch:
   method:
   - gradient
   - black_area
-  mean_error_config:
-    threshold: 40
-    device_config_id: wireless-200px
-    buffer_split: 8
-    comparison_unit: 1000
-    diff_multiply: 1
   gradient_config:
     threshold: 20
   black_area_config:
@@ -19,7 +13,6 @@ noise_patch:
     value_threshold: 0
   output_result: true
   output_noise_patch: true
-  output_diff: false
   output_noisy_frames: true
 frequency_masking:
   id: frequency_masking_example
diff --git a/mio/models/process.py b/mio/models/process.py
index 33f447eb..ffe87bed 100644
--- a/mio/models/process.py
+++ b/mio/models/process.py
@@ -8,7 +8,6 @@
 
 from mio.models import MiniscopeConfig
 from mio.models.mixins import ConfigYAMLMixin
-from mio.models.stream import StreamDevConfig
 
 
 class MinimumProjectionConfig(BaseModel):
@@ -38,45 +37,6 @@ class MinimumProjectionConfig(BaseModel):
     )
 
 
-class MSEDetectorConfig(BaseModel):
-    """
-    Configraiton for detecting invalid frames based on mean squared error.
-    """
-
-    threshold: float = Field(
-        ...,
-        description="Threshold for detecting invalid frames based on mean squared error.",
-    )
-    device_config_id: str | None = Field(
-        default=None,
-        description="ID of the stream device configuration used for aquiring the video."
-        "This is used in the mean_error method to compare frames"
-        " in the units of data transfer buffers.",
-    )
-    buffer_split: int = Field(
-        default=1,
-        description="Number of splits to make in the buffer when detecting noisy areas."
-        "This further splits the buffer into smaller patches to detect small noisy areas."
-        "This is used in the mean_error method.",
-    )
-    diff_multiply: int = Field(
-        default=1,
-        description="Multiplier for visualizing the diff between the current and previous frame.",
-    )
-
-    _device_config: StreamDevConfig | None = None
-
-    @property
-    def device_config(self) -> StreamDevConfig:
-        """
-        Get the device configuration based on the device_config_id.
-        This is used in the mean_error method to compare frames in the units of data buffers.
-        """
-        if self._device_config is None:
-            self._device_config = StreamDevConfig.from_any(self.device_config_id)
-        return self._device_config
-
-
 class GradientDetectorConfig(BaseModel):
     """
     Configraiton for detecting invalid frames based on gradient.
@@ -119,18 +79,12 @@ class NoisePatchConfig(BaseModel):
         default=True,
         description="Enable patch based noise handling.",
     )
-    method: list[Literal["mean_error", "gradient", "black_area"]] = Field(
+    method: list[Literal["gradient", "black_area"]] = Field(
         default="gradient",
         description="Method for detecting noise."
         "gradient: Detection based on the gradient of the frame row."
-        "mean_error: Detection based on the mean error with the same row of the previous frame."
         "black_area: Detection based on the number of consecutive black pixels in a row.",
     )
-    mean_error_config: MSEDetectorConfig | None = Field(
-        default=None,
-        description="Configuration for detecting invalid frames based on mean squared error."
-        " Any positive value or zero is valid.",
-    )
     gradient_config: GradientDetectorConfig | None = Field(
         default=None,
         description="Configuration for detecting invalid frames based on gradient.",
@@ -148,12 +102,6 @@ class NoisePatchConfig(BaseModel):
         description="Output the noise patch video"
         "This highlights the noisy areas found in the video stream.",
     )
-    output_diff: bool = Field(
-        default=False,
-        description="Output the diff video stream."
-        "The diff video stream shows the difference between the current and previous frame."
-        "This is used in the mean_error method.",
-    )
     output_noisy_frames: bool = Field(
         default=True,
         description="Output the stack of noisy frames as an independent video stream.",
diff --git a/mio/process/frame_helper.py b/mio/process/frame_helper.py
index 7012ebfb..1dc095a5 100644
--- a/mio/process/frame_helper.py
+++ b/mio/process/frame_helper.py
@@ -2,6 +2,9 @@
 This module contains a helper class for frame operations.
 """
 
+from __future__ import annotations
+
+import sys
 from abc import abstractmethod
 
 import cv2
@@ -12,13 +15,37 @@
     BlackAreaDetectorConfig,
     FrequencyMaskingConfig,
     GradientDetectorConfig,
-    MSEDetectorConfig,
     NoisePatchConfig,
 )
 
+if sys.version_info < (3, 11):
+    from typing_extensions import TypedDict
+else:
+    from typing import TypedDict
+
 logger = init_logger("frame_helper")
 
 
+class Detectors(TypedDict, total=False):
+    """Map between shortnames and detector class instances"""
+
+    black_area: BlackAreaDetector
+    gradient: GradientNoiseDetector
+
+
+def make_detectors(config: NoisePatchConfig) -> Detectors:
+    """Make detector classes from a config"""
+    detectors = {}
+    for method in config.method:
+        if method == "gradient":
+            detectors[method] = GradientNoiseDetector(config.gradient_config)
+        elif method == "black_area":
+            detectors[method] = BlackAreaDetector(config.black_area_config)
+        else:
+            raise ValueError(f"Unknown method {method}")
+    return detectors
+
+
 class BaseSingleFrameHelper:
     """
     Base class for single frame operations.
@@ -78,19 +105,7 @@ def __init__(self, noise_patch_config: NoisePatchConfig):
         if noise_patch_config.method is None:
             raise ValueError("No noise detection methods provided")
         self.methods = noise_patch_config.method
-
-        if "mean_error" in self.methods:
-            if noise_patch_config.mean_error_config is None:
-                raise ValueError("Mean error config must be provided for mean error detection")
-            self.mse_detector = MSENoiseDetector(noise_patch_config.mean_error_config)
-        if "gradient" in self.methods:
-            if noise_patch_config.gradient_config is None:
-                raise ValueError("Gradient config must be provided for gradient detection")
-            self.gradient_detector = GradientNoiseDetector(noise_patch_config.gradient_config)
-        if "black_area" in self.methods:
-            if noise_patch_config.black_area_config is None:
-                raise ValueError("Black area config must be provided for black area detection")
-            self.black_detector = BlackAreaDetector(noise_patch_config.black_area_config)
+        self.detectors = make_detectors(noise_patch_config)
 
     def find_invalid_area(self, frame: np.ndarray) -> tuple[bool, np.ndarray]:
         """
@@ -104,19 +119,8 @@ def find_invalid_area(self, frame: np.ndarray) -> tuple[bool, np.ndarray]:
         """
         noisy_flag = False
         combined_noisy_area = np.zeros_like(frame, dtype=np.uint8)
-
-        if "mean_error" in self.methods:
-            noisy, noisy_area = self.mse_detector.find_invalid_area(frame)
-            combined_noisy_area = np.maximum(combined_noisy_area, noisy_area)
-            noisy_flag = noisy_flag or noisy
-
-        if "gradient" in self.methods:
-            noisy, noisy_area = self.gradient_detector.find_invalid_area(frame)
-            combined_noisy_area = np.maximum(combined_noisy_area, noisy_area)
-            noisy_flag = noisy_flag or noisy
-
-        if "black_area" in self.methods:
-            noisy, noisy_area = self.black_detector.find_invalid_area(frame)
+        for detector in self.detectors.values():
+            noisy, noisy_area = detector.find_invalid_area(frame)
             combined_noisy_area = np.maximum(combined_noisy_area, noisy_area)
             noisy_flag = noisy_flag or noisy
 
@@ -246,88 +250,6 @@ def _detect_black_pixels(
         return frame_is_noisy, noisy_mask
 
 
-class MSENoiseDetector(BaseSingleFrameHelper):
-    """
-    Helper class for mean squared error noise detection.
-    """
-
-    def __init__(self, config: MSEDetectorConfig):
-        """
-        Initialize the MeanErrorNoiseDetectionHelper object.
-
-        Parameters:
-            threshold (float): The threshold for noise detection.
-
-        Returns:
-            MeanErrorNoiseDetectionHelper: A MeanErrorNoiseDetectionHelper object.
-        """
-        self.config = config
-        self.previous_frame = None
-
-    def register_previous_frame(self, previous_frame: np.ndarray) -> None:
-        """
-        Register the previous frame for mean error calculation.
-
-        Parameters:
-            previous_frame (np.ndarray): The previous frame to compare against.
-        """
-        self.previous_frame = previous_frame
-
-    def find_invalid_area(self, frame: np.ndarray) -> tuple[bool, np.ndarray]:
-        """
-        Process a single frame and verify if it is valid.
-
-        Parameters:
-            frame (np.ndarray): The frame to process.
-
-        Returns:
-            Tuple[bool, np.ndarray]: A boolean indicating if the frame is valid
-            and the processed frame.
-        """
-        if self.previous_frame is None:
-            self.previous_frame = frame
-            return False, np.zeros_like(frame, dtype=np.uint8)
-        noisy, mask = self._detect_with_mean_error(frame)
-        return noisy, mask
-
-    def _detect_with_mean_error(self, current_frame: np.ndarray) -> tuple[bool, np.ndarray]:
-        """
-        Detect noise using mean error between current and previous frames.
-
-        Returns:
-            Tuple[bool, np.ndarray]: A boolean indicating if the frame is noisy and the noise mask.
-        """
-        if self.previous_frame is None:
-            return False, np.zeros_like(current_frame, dtype=np.uint8)
-
-        current_flat = current_frame.astype(np.int16).flatten()
-        previous_flat = self.previous_frame.astype(np.int16).flatten()
-
-        buffer_indices = FrameSplitter.get_buffer_shape(
-            current_frame.shape[1], current_frame.shape[0], self.config.device_config.px_per_buffer
-        ) + [
-            current_frame.size
-        ]  # Ensure final boundary is included
-
-        noisy_mask = np.ones_like(current_flat, dtype=np.uint8)
-        has_noise = False
-
-        for start_idx, end_idx in zip(buffer_indices[:-1], buffer_indices[1:]):
-            for sub_start in range(
-                end_idx - self.config.buffer_split, start_idx, -self.config.buffer_split
-            ):
-                mean_error = np.mean(
-                    np.abs(current_flat[sub_start:end_idx] - previous_flat[sub_start:end_idx])
-                )
-
-                if mean_error > self.config.threshold:
-                    noisy_mask[sub_start:end_idx] = 0
-                    has_noise = True
-                    break
-
-        return has_noise, noisy_mask.reshape(current_frame.shape)
-
-
 class FrequencyMaskHelper(BaseSingleFrameHelper):
     """
     Helper class for frequency masking operations.
diff --git a/mio/process/video.py b/mio/process/video.py
index 4ff8d2c7..a166b5c4 100644
--- a/mio/process/video.py
+++ b/mio/process/video.py
@@ -131,16 +131,10 @@ def __init__(
         self.noise_detect_helper = InvalidFrameDetector(noise_patch_config=noise_patch_config)
         self.noise_patchs: list[np.ndarray] = []
         self.noisy_frames: list[np.ndarray] = []
-        self.diff_frames: list[np.ndarray] = []
         self.dropped_frame_indices: list[int] = []
 
         self.output_enable: bool = noise_patch_config.output_result
 
-        if "mean_error" in noise_patch_config.method:
-            logger.warning(
-                "The mean_error method is unstable and not fully tested yet." " Use with caution."
-            )
-
     def process_frame(self, input_frame: np.ndarray, index: int) -> np.ndarray | None:
         """
         Process a single frame.
@@ -185,15 +179,6 @@ def noise_patch_named_video(self) -> NamedVideo:
         """
         return NamedVideo(name="patched_area", video=self.noise_patchs)
 
-    @property
-    def diff_frames_named_video(self) -> NamedVideo:
-        """
-        Get the NamedFrame object for the difference frames.
-        """
-        if not hasattr(self.noise_patch_config, "diff_multiply"):
-            diff_multiply = 1
-        return NamedVideo(name=f"diff_{diff_multiply}x", video=self.diff_frames)
-
     @property
     def noisy_frames_named_video(self) -> NamedVideo:
         """
@@ -217,18 +202,6 @@ def export_noise_patch(self) -> None:
         else:
             logger.info(f"{self.name} noise patch output disabled.")
 
-    def export_diff_frames(self) -> None:
-        """
-        Export the difference frames to a file.
-        """
-        if self.noise_patch_config.output_diff:
-            logger.info(f"Exporting {self.name} difference frames to {self.output_dir}")
-            self.diff_frames_named_video.export(
-                output_path=self.output_dir / f"{self.name}", fps=20, suffix=True, force=self.force
-            )
-        else:
-            logger.info(f"{self.name} difference frames output disabled.")
-
     def export_noisy_video(self) -> None:
         """
         Export the noisy frames to a file.
@@ -250,7 +223,6 @@ def batch_export_videos(self) -> None:
         """
         self.export_output_video()
         self.export_noise_patch()
-        self.export_diff_frames()
         self.export_noisy_video()
 
 
@@ -583,7 +555,6 @@ def denoise(
 
     noise_patch_processor.output_dir = debug_dir
     noise_patch_processor.export_noise_patch()
-    noise_patch_processor.export_diff_frames()
     noise_patch_processor.export_noisy_video()
 
     freq_mask_processor.batch_export_videos()
diff --git a/tests/data/config/denoise_noise_detection_test.yml b/tests/data/config/denoise_noise_detection_test.yml
index 886bf644..bc10f898 100644
--- a/tests/data/config/denoise_noise_detection_test.yml
+++ b/tests/data/config/denoise_noise_detection_test.yml
@@ -4,12 +4,6 @@ mio_version: 0.6.1
 noise_patch:
   enable: true
   method: [gradient, black_area]
-  mean_error_config:
-    threshold: 40
-    device_config_id: wireless-200px
-    buffer_split: 8
-    comparison_unit: 1000
-    diff_multiply: 1
   gradient_config:
     threshold: 20
   black_area_config:

From f22d51764e155885ab27bae88986ea01e41cf089 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 20:25:20 -0700
Subject: [PATCH 08/21] add score_noise method

---
 mio/cli/process.py    | 15 ++++++---------
 mio/models/dataset.py | 27 +++++++++++++++++++++++++++
 mio/process/stitch.py |  7 ++-----
 mio/process/video.py  | 42 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index c3db61d5..e5e1c066 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -10,8 +10,8 @@
 from mio.logging import init_logger
 from mio.models.dataset import Recording
 from mio.models.process import DenoiseConfig
-from mio.process.stitch import stitch as run_stitch
 from mio.process.stitch import concat_recordings
+from mio.process.stitch import stitch as run_stitch
 from mio.process.video import denoise as run_denoise
 from mio.process.video import remove_frames as run_remove_frames
 from mio.process.video import trim as run_trim
@@ -212,10 +212,10 @@ def remove_frames(input: str, output: str | None, frames: str, force: bool = Fal
 @click.option(
     "-o",
     "--output",
-    type=click.Path(),
+    type=click.Path(dir_okay=False),
     default=None,
     help="Path to the output concatenated video file or directory. "
-    f"If not specified, saves to {DEFAULT_PROCESS_DIR}/ with '_combined' suffix.",
+    "If not specified, saves next to video with '_combined' suffix.",
 )
 @click.option(
     "--fps",
@@ -225,7 +225,7 @@ def remove_frames(input: str, output: str | None, frames: str, force: bool = Fal
 )
 def concat(
     directory: str,
-    output: Optional[str],
+    output: str | None,
     fps: int,
 ) -> None:
     """
@@ -243,8 +243,7 @@ def concat(
     def _natural_sort_key(path: Path) -> list:
         """Sort by splitting name into text and numeric parts for natural ordering."""
         return [
-            int(part) if part.isdigit() else part.lower()
-            for part in re.split(r"(\d+)", path.name)
+            int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)
         ]
 
     avi_files = sorted(dir_path.glob("*.avi"), key=_natural_sort_key)
@@ -285,9 +284,7 @@ def _find_companion_csv(avi_path: Path) -> Optional[Path]:
     for avi, csv in valid_pairs:
         click.echo(f"  {avi.name} -> {csv.name}")
 
-    recordings = [
-        RecordingData(video_path=avi, csv_path=csv) for avi, csv in valid_pairs
-    ]
+    recordings = [RecordingData(video_path=avi, csv_path=csv) for avi, csv in valid_pairs]
 
     first_input_path = valid_pairs[0][0]
     output_arg = output if output is not None else DEFAULT_PROCESS_DIR
diff --git a/mio/models/dataset.py b/mio/models/dataset.py
index 24ba6c2a..f05025f6 100644
--- a/mio/models/dataset.py
+++ b/mio/models/dataset.py
@@ -63,8 +63,10 @@
 from typing import Any, Literal, TypeAlias
 
 import pandas as pd
+from models.process import NoisePatchConfig
 from numpydantic import NDArraySchema
 from numpydantic.interface.video import VideoProxy
+from process.video import score_noise
 from pydantic import (
     ConfigDict,
     Discriminator,
@@ -107,6 +109,8 @@ class RecordingPaths(TypedDict):
     """{stem}.csv"""
     timestamps: Path
     """{stem}_timestamps.csv"""
+    noise: Path
+    """{stem}_noise.csv"""
     binary: Path
     """{stem}.bin"""
 
@@ -117,6 +121,7 @@ def paths_from_video(video: Path) -> RecordingPaths:
         video=video,
         metadata=video.with_suffix(".csv"),
         timestamps=video.with_name(video.stem + "_timestamps.csv"),
+        noise=video.with_name(video.stem + "_noise.csv"),
         binary=video.with_suffix(".bin"),
     )
 
@@ -138,6 +143,10 @@ class Recording(MiniscopeIOModel):
     When instantiating a recording, if a metadata file exists but timestamps do not,
     they are automatically generated. 
     """
+    noise: pd.DataFrame | None = None
+    """
+    Framewise noise measurements (created with :meth:`score_noise` ).
+    """
     binary: Path | None = None
     """Path to any raw binary version of the data in the video"""
     derived_from: RecordingDerivation | None = None
@@ -159,6 +168,24 @@ def from_video(cls, path: Path) -> "RecordingUnion":
         else:
             return RawVideoRecording(name=path.stem, video=path)
 
+    def score_noise(
+        self, config: NoisePatchConfig, progress: bool = False, force: bool = False
+    ) -> pd.DataFrame:
+        """
+        Score the noise level in each frame with :func:`.score_noise`,
+        saving as a csv with `{name}_noise.csv`
+        """
+        if not force:
+            if self.noise is not None:
+                return self.noise
+            elif self.paths["noise"].exists():
+                self.noise = pd.read_csv(self.paths["noise"])
+                return self.noise
+
+        self.noise = score_noise(self, config, progress=progress)
+        self.noise.to_csv(self.paths["noise"], index=False)
+        return self.noise
+
     @model_validator(mode="before")
     @classmethod
     def _load_csvs(cls, v: dict) -> dict:
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 54fb5458..6e1c9484 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -321,8 +321,7 @@ def concat_recordings(
         metadata_parts.append(df)
 
         logger.info(
-            f"Segment {i}: {rec.video.path.name} — "
-            f"{seg_frames} frames, rfi_offset={rfi_offset}"
+            f"Segment {i}: {rec.video.path.name} — " f"{seg_frames} frames, rfi_offset={rfi_offset}"
         )
         rfi_offset += max_rfi + 1
         total_frames += seg_frames
@@ -338,9 +337,7 @@ def concat_recordings(
     )
 
 
-def _build_timestamp_matches(
-    recordings, threshold_ms: float = 25.0
-) -> list[dict[int, int]]:
+def _build_timestamp_matches(recordings, threshold_ms: float = 25.0) -> list[dict[int, int]]:
     """
     Match frames across recordings by nearest unix timestamp.
 
diff --git a/mio/process/video.py b/mio/process/video.py
index a166b5c4..1bf426a8 100644
--- a/mio/process/video.py
+++ b/mio/process/video.py
@@ -8,7 +8,7 @@
 import cv2
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
+from tqdm import tqdm, trange
 
 from mio import init_logger
 from mio.io import VideoReader, VideoWriter
@@ -21,7 +21,11 @@
     NoisePatchConfig,
 )
 from mio.plots.video import VideoPlotter
-from mio.process.frame_helper import FrequencyMaskHelper, InvalidFrameDetector
+from mio.process.frame_helper import (
+    FrequencyMaskHelper,
+    InvalidFrameDetector,
+    make_detectors,
+)
 from mio.process.zstack_helper import ZStackHelper
 
 logger = init_logger("video")
@@ -606,6 +610,40 @@ def denoise(
     return output_video_path
 
 
+def score_noise(
+    recording: Recording, config: NoisePatchConfig, progress: bool = False
+) -> pd.DataFrame:
+    """
+    Score framewise noise from a recording,
+    yielding a dataframe with columns for each kind of noise
+
+    - reconstructed_frame_index: the index of the frame in the video
+    - gradient: the number of pixels that are part of noise patches,
+      as determined by the second row-wise derivative being above the configured threshold
+    - black_area: the number of pixels in contiguous black regions (and thus missing)
+    """
+
+    records = []
+    detectors = make_detectors(config)
+    n_frames = recording.video.n_frames
+    iterator = trange(n_frames) if progress else range(n_frames)
+
+    try:
+        for idx in iterator:
+            record = {"reconstructed_frame_index": idx}
+            frame = recording.video[idx]
+            for method, detector in detectors.items():
+                _, noise_mask = detector.find_invalid_area(frame)
+                record[method] = np.count_nonzero(noise_mask)
+            records.append(record)
+
+    finally:
+        if progress:
+            iterator.close()
+
+    return pd.DataFrame(records)
+
+
 def trim(
     video_path: Path,
     output_path: Path | None = None,

From d93565eb7b82205f463ded698e0781a7d1c48f38 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 21:27:19 -0700
Subject: [PATCH 09/21] incorporate noise scoring in stitcher

---
 mio/cli/process.py    | 46 ++++++++++++++++++++++++++++++++-----------
 mio/models/dataset.py | 10 +++++++---
 mio/models/process.py | 14 ++++++-------
 mio/process/stitch.py | 39 +++++++++++++++++++++++++++++++++++-
 mio/process/video.py  |  4 ++++
 5 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index e5e1c066..1e1a3f04 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -15,6 +15,7 @@
 from mio.process.video import denoise as run_denoise
 from mio.process.video import remove_frames as run_remove_frames
 from mio.process.video import trim as run_trim
+from mio.types import ConfigSource
 
 logger = init_logger("mio.cli.process")
 
@@ -299,14 +300,6 @@ def _find_companion_csv(avi_path: Path) -> Optional[Path]:
         fps=fps,
     )
 
-    try:
-        validate_video_metadata_match(combined_video_path)
-    except VideoMetadataError as e:
-        raise click.ClickException(
-            f"Frame count alignment failed after concatenation: {e}"
-        ) from None
-    click.echo(f"✅ Frame count alignment verified: {combined_video_path}")
-
 
 @process.command()
 @click.option(
@@ -324,6 +317,13 @@ def _find_companion_csv(avi_path: Path) -> Optional[Path]:
     default=None,
     help="Directory for output videos and metadata. If none provided, same as the inputs.",
 )
+@click.option(
+    "-c",
+    "--config",
+    default=None,
+    help="A config id or path for a DenoiseConfig used to score frames if no noise score exists."
+    "If not provided, default config is used.",
+)
 @click.option(
     "--debug-video",
     default=False,
@@ -338,7 +338,11 @@ def _find_companion_csv(avi_path: Path) -> Optional[Path]:
     help="Overwrite any existing files",
 )
 def stitch(
-    inputs: tuple, output: Path | None = None, debug_video: bool = False, force: bool = False
+    inputs: tuple,
+    output: Path | None = None,
+    config: ConfigSource | None = None,
+    debug_video: bool = False,
+    force: bool = False,
 ) -> None:
     """
     Stitch multiple video recordings into one by selecting the best frame
@@ -349,9 +353,20 @@ def stitch(
     if len(inputs) < 2:
         raise click.ClickException("At least 2 input videos are required for stitching.")
 
+    if config is not None:
+        denoise_config: DenoiseConfig = DenoiseConfig.from_any(config)
+        patch_config = denoise_config.noise_patch
+    else:
+        patch_config = None
+
     recordings = [Recording.from_video(Path(p)) for p in inputs]
     stitched = run_stitch(
-        recordings, debug_video=debug_video, output_dir=output, progress=True, force=force
+        recordings,
+        debug_video=debug_video,
+        noise_config=patch_config,
+        output_dir=output,
+        progress=True,
+        force=force,
     )
     click.echo(f"Stitched videos to {stitched.video.path}")
 
@@ -419,13 +434,21 @@ def workflow(
         output_dir = Path(output).expanduser()
         output_dir.mkdir(parents=True, exist_ok=True)
 
+    denoise_config_parsed = DenoiseConfig.from_any(denoise_config)
+
     if len(inputs) == 1:
         click.echo("Only one input video provided, skipping stitching")
         stitched_video = inputs[0]
     else:
         click.echo("Stitching videos...")
         recordings = [Recording.from_video(p) for p in inputs]
-        stitched = run_stitch(recordings, output_dir=output_dir, progress=True, force=force)
+        stitched = run_stitch(
+            recordings,
+            output_dir=output_dir,
+            noise_config=denoise_config_parsed,
+            progress=True,
+            force=force,
+        )
         stitched_video = stitched.video.path
 
     if trim_start == 0 and trim_end == 0:
@@ -441,7 +464,6 @@ def workflow(
     if trimmed.metadata is None:
         raise FileNotFoundError(f"No metadata csv found for video {trimmed_video}")
 
-    denoise_config_parsed = DenoiseConfig.from_any(denoise_config)
     final_video = run_denoise(
         trimmed_video,
         denoise_config_parsed,
diff --git a/mio/models/dataset.py b/mio/models/dataset.py
index f05025f6..b72b71dc 100644
--- a/mio/models/dataset.py
+++ b/mio/models/dataset.py
@@ -63,10 +63,8 @@
 from typing import Any, Literal, TypeAlias
 
 import pandas as pd
-from models.process import NoisePatchConfig
 from numpydantic import NDArraySchema
 from numpydantic.interface.video import VideoProxy
-from process.video import score_noise
 from pydantic import (
     ConfigDict,
     Discriminator,
@@ -76,6 +74,7 @@
 )
 
 from mio.models import MiniscopeIOModel
+from mio.models.process import NoisePatchConfig
 from mio.utils import _format_ranges
 
 if sys.version_info < (3, 11):
@@ -169,12 +168,17 @@ def from_video(cls, path: Path) -> "RecordingUnion":
             return RawVideoRecording(name=path.stem, video=path)
 
     def score_noise(
-        self, config: NoisePatchConfig, progress: bool = False, force: bool = False
+        self, config: NoisePatchConfig | None = None, progress: bool = False, force: bool = False
     ) -> pd.DataFrame:
         """
         Score the noise level in each frame with :func:`.score_noise`,
         saving as a csv with `{name}_noise.csv`
         """
+
+        from mio.process.video import score_noise
+
+        if config is None:
+            config = NoisePatchConfig()
         if not force:
             if self.noise is not None:
                 return self.noise
diff --git a/mio/models/process.py b/mio/models/process.py
index ffe87bed..63fb02ea 100644
--- a/mio/models/process.py
+++ b/mio/models/process.py
@@ -39,11 +39,11 @@ class MinimumProjectionConfig(BaseModel):
 
 class GradientDetectorConfig(BaseModel):
     """
-    Configraiton for detecting invalid frames based on gradient.
+    Configuration for detecting invalid frames based on gradient.
     """
 
     threshold: float = Field(
-        ...,
+        default=20,
         description="Threshold for detecting invalid frames based on gradient.",
     )
 
@@ -80,17 +80,17 @@ class NoisePatchConfig(BaseModel):
         description="Enable patch based noise handling.",
     )
     method: list[Literal["gradient", "black_area"]] = Field(
-        default="gradient",
+        default_factory=lambda: ["gradient", "black_area"],
         description="Method for detecting noise."
         "gradient: Detection based on the gradient of the frame row."
         "black_area: Detection based on the number of consecutive black pixels in a row.",
     )
-    gradient_config: GradientDetectorConfig | None = Field(
-        default=None,
+    gradient_config: GradientDetectorConfig = Field(
+        default_factory=GradientDetectorConfig,
         description="Configuration for detecting invalid frames based on gradient.",
     )
-    black_area_config: BlackAreaDetectorConfig | None = Field(
-        default=None,
+    black_area_config: BlackAreaDetectorConfig = Field(
+        default_factory=BlackAreaDetectorConfig,
         description="Configuration for detecting invalid frames based on black area.",
     )
     output_result: bool = Field(
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 6e1c9484..880b8ff8 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -21,6 +21,7 @@
 from mio.io import BufferedCSVWriter, VideoWriter
 from mio.logging import init_logger
 from mio.models.dataset import Dataset, Recording, StitchedRecording
+from mio.models.process import NoisePatchConfig
 
 logger = init_logger(name="stitch")
 
@@ -74,6 +75,7 @@ def align(recordings: list[Recording]) -> pd.DataFrame:
 
 def stitch(
     recordings: list[Recording],
+    noise_config: NoisePatchConfig | None = None,
     dataset: Dataset | None = None,
     debug_video: bool = False,
     output_dir: Path | None = None,
@@ -90,10 +92,25 @@ def stitch(
     It does not handle stitching or aligning videos that were recorded with *different* devices,
     for that use the :attr:`.Dataset.alignment_map`
     which aligns simultaneous frames in different recordings.
+
+    Args:
+        recordings (list[Recording]): List of recordings to stitch.
+        noise_config (NoisePatchConfig | None): Configuration used for scoring
+            noise per frame (with :func:`.score_noise` ). If None, use defaults
+        dataset (Dataset | None): existing dataset, e.g. with existing alignment mapping
+        output_dir: (Path | None): where to write stitched video and metadata,
+            if None, same as recording directory
+        progress (bool): Show a progress bar. Default ``False``
+        force (bool): Overwrite existing stitched video and metadata CSV files
     """
     if len(recordings) != 2:
         raise NotImplementedError("Only stitching two videos simultaneously is supported!")
 
+    # ensure that the recordings have noise scores
+    # (does not recompute if they already exist)
+    for rec in recordings:
+        rec.score_noise(config=noise_config, progress=progress, force=force)
+
     if dataset is None:
         dataset = Dataset.from_recordings(recordings)
     output_dir = dataset.path if output_dir is None else Path(output_dir)
@@ -137,6 +154,10 @@ def stitch(
                         frames.append(np.zeros(rec.video.shape[1:], dtype=np.uint8))
                     continue
                 buffer_rows = rec.metadata[rec.metadata["reconstructed_frame_index"] == row[name]]
+                noise_row = rec.noise[rec.noise["reconstructed_frame_index"] == row[name]].iloc[0]
+                black_pixels = int(noise_row["black_area"]) if "black_area" in noise_row else 0
+                noisy_pixels = int(noise_row["noisy_area"]) if "noisy_area" in noise_row else 0
+
                 frames.append(rec.video[int(row[name])])
                 candidates.append(
                     CandidateFrame(
@@ -144,6 +165,8 @@ def stitch(
                         frame=frames[-1],
                         num_buffers=len(buffer_rows),
                         sum_black_padding=int(buffer_rows["black_padding_px"].fillna(0).sum()),
+                        black_pixels=black_pixels,
+                        noisy_pixels=noisy_pixels,
                         metadata_rows=buffer_rows,
                     )
                 )
@@ -179,6 +202,8 @@ class CandidateFrame:
     frame: np.ndarray
     num_buffers: int
     sum_black_padding: int
+    black_pixels: int
+    noisy_pixels: int
     metadata_rows: pd.DataFrame
     _edge_score: float | None = field(default=None, repr=False)
 
@@ -194,7 +219,9 @@ def metadata_score(self) -> tuple[int, int]:
         """Higher is better: more buffers, less black padding.
         A bit overkill but left this for future extension.
         """
-        return (self.num_buffers, -self.sum_black_padding)
+        # To discuss - we are probably double counting padding and missing buffers,
+        # but keeping similar to existing method until we can decide what we want here -jls
+        return (self.num_buffers, -self.sum_black_padding - self.black_pixels - self.noisy_pixels)
 
 
 def _score_edges(frame: np.ndarray) -> float:
@@ -217,8 +244,12 @@ class StitchRecord(BaseModel):
     compare_video: str | None = None
     selected_num_buffers: int
     selected_black_padding: int
+    selected_black_pixels: int
+    selected_noisy_pixels: int
     compare_num_buffers: int | None = None
     compare_black_padding: int | None = None
+    compare_black_pixels: int | None = None
+    compare_noisy_pixels: int | None = None
     selected_edge_score: float | None = None
     compare_edge_score: float | None = None
 
@@ -247,6 +278,8 @@ def _select_best_candidate(
             selected_video=candidates[0].recording.name,
             selected_num_buffers=candidates[0].num_buffers,
             selected_black_padding=candidates[0].sum_black_padding,
+            selected_black_pixels=candidates[0].black_pixels,
+            selected_noisy_pixels=candidates[0].noisy_pixels,
             **kwargs,
         )
 
@@ -270,9 +303,13 @@ def _select_best_candidate(
         selected_video=selected.recording.name,
         selected_num_buffers=selected.num_buffers,
         selected_black_padding=selected.sum_black_padding,
+        selected_black_pixels=selected.black_pixels,
+        selected_noisy_pixels=selected.noisy_pixels,
         compare_video=other.recording.name,
         compare_num_buffers=other.num_buffers,
         compare_black_padding=other.sum_black_padding,
+        compare_black_pixels=other.black_pixels,
+        compare_noisy_pixels=other.noisy_pixels,
         **kwargs,
     )
 
diff --git a/mio/process/video.py b/mio/process/video.py
index 1bf426a8..7a843719 100644
--- a/mio/process/video.py
+++ b/mio/process/video.py
@@ -632,6 +632,10 @@ def score_noise(
         for idx in iterator:
             record = {"reconstructed_frame_index": idx}
             frame = recording.video[idx]
+            # FIXME: video proxy should return grayscale as grayscale
+            # https://github.com/miniscope/mio/issues/175
+            if len(frame.shape) == 3 and frame.shape[-1] == 3:
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             for method, detector in detectors.items():
                 _, noise_mask = detector.find_invalid_area(frame)
                 record[method] = np.count_nonzero(noise_mask)

From 5eea4e076f932d295d9e56d2122d95e97ecc78b4 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 21:44:02 -0700
Subject: [PATCH 10/21] clean up concat

---
 mio/cli/process.py    | 91 ++++++----------------------------------
 mio/process/stitch.py | 96 +++++++++++++++++++++++++------------------
 2 files changed, 69 insertions(+), 118 deletions(-)

diff --git a/mio/cli/process.py b/mio/cli/process.py
index 1e1a3f04..5d201c37 100644
--- a/mio/cli/process.py
+++ b/mio/cli/process.py
@@ -2,7 +2,6 @@
 Command line interface for offline video pre-processing.
 """
 
-import re
 from pathlib import Path
 
 import click
@@ -203,102 +202,38 @@ def remove_frames(input: str, output: str | None, frames: str, force: bool = Fal
 
 @process.command()
 @click.option(
-    "-d",
-    "--directory",
+    "-i",
+    "--inputs",
     required=True,
-    type=click.Path(exists=True, file_okay=False),
-    help="Directory containing .avi segment files. All .avi files with companion "
-    ".csv files will be discovered and sorted in natural numeric order.",
+    multiple=True,
+    type=click.Path(exists=True, dir_okay=False),
+    help="Paths to video files. Each requires a .csv with the same stem name.",
 )
 @click.option(
     "-o",
     "--output",
     type=click.Path(dir_okay=False),
-    default=None,
+    required=True,
     help="Path to the output concatenated video file or directory. "
     "If not specified, saves next to video with '_combined' suffix.",
 )
-@click.option(
-    "--fps",
-    type=int,
-    default=20,
-    help="Frames per second for output video.",
-)
 def concat(
-    directory: str,
-    output: str | None,
-    fps: int,
+    inputs: list[Path],
+    output: Path,
 ) -> None:
     """
     Concatenate sequential recording segments from one DAQ into a single video.
 
-    Discovers all .avi files in the given directory (that have companion .csv files),
-    sorts them by filename, and concatenates them into a single video + CSV with
-    contiguous reconstructed_frame_index.
-
     Use this to combine multiple segment files (e.g. long-2.avi, long-3.avi, ...)
     from the same DAQ before stitching across DAQs.
     """
-    dir_path = Path(directory)
-
-    def _natural_sort_key(path: Path) -> list:
-        """Sort by splitting name into text and numeric parts for natural ordering."""
-        return [
-            int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)
-        ]
-
-    avi_files = sorted(dir_path.glob("*.avi"), key=_natural_sort_key)
-
-    def _find_companion_csv(avi_path: Path) -> Optional[Path]:
-        """Find companion CSV, handling name mismatches like long-8-002.avi -> long-8.csv."""
-        # Try exact stem match first
-        csv_path = avi_path.with_suffix(".csv")
-        if csv_path.exists():
-            return csv_path
-        # Try stripping trailing numeric suffixes: long-8-002.avi -> long-8.csv
-        stem = avi_path.stem
-        stripped = re.sub(r"-\d+$", "", stem)
-        while stripped != stem:
-            csv_path = avi_path.parent / f"{stripped}.csv"
-            if csv_path.exists():
-                return csv_path
-            stem = stripped
-            stripped = re.sub(r"-\d+$", "", stem)
-        return None
-
-    # Filter to only AVIs that have a companion CSV
-    valid_pairs: list[tuple[Path, Path]] = []
-    for avi in avi_files:
-        csv_path = _find_companion_csv(avi)
-        if csv_path is not None:
-            valid_pairs.append((avi, csv_path))
-        else:
-            click.echo(f"  Skipping {avi.name} (no companion .csv found)")
-
-    if len(valid_pairs) < 2:
-        raise click.ClickException(
-            f"Need at least 2 .avi files with companion .csv files in {directory}, "
-            f"found {len(valid_pairs)}."
-        )
-
-    click.echo(f"Found {len(valid_pairs)} segments in {directory}:")
-    for avi, csv in valid_pairs:
-        click.echo(f"  {avi.name} -> {csv.name}")
-
-    recordings = [RecordingData(video_path=avi, csv_path=csv) for avi, csv in valid_pairs]
-
-    first_input_path = valid_pairs[0][0]
-    output_arg = output if output is not None else DEFAULT_PROCESS_DIR
-    combined_video_path = resolve_output_path(first_input_path, "_combined", output_arg)
-    combined_csv_path = combined_video_path.with_suffix(".csv")
+    if len(inputs) < 2:
+        raise click.ClickException("Need at least 2 .avi files to concat")
+    recordings = [Recording.from_video(Path(p)) for p in inputs]
+    output = Path(output)
 
     click.echo(f"Concatenating {len(recordings)} segments...")
-    concat_recordings(
-        recordings=recordings,
-        output_video_path=combined_video_path,
-        output_csv_path=combined_csv_path,
-        fps=fps,
-    )
+    concat_recordings(recordings=recordings, output_video_path=output, progress=True)
 
 
 @process.command()
diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 880b8ff8..639427f0 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -10,17 +10,18 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
+from functools import partial
 from pathlib import Path
 
 import cv2
 import numpy as np
 import pandas as pd
 from pydantic import BaseModel
-from tqdm import tqdm
+from tqdm import tqdm, trange
 
 from mio.io import BufferedCSVWriter, VideoWriter
 from mio.logging import init_logger
-from mio.models.dataset import Dataset, Recording, StitchedRecording
+from mio.models.dataset import Dataset, Recording, StitchedRecording, paths_from_video
 from mio.models.process import NoisePatchConfig
 
 logger = init_logger(name="stitch")
@@ -315,11 +316,8 @@ def _select_best_candidate(
 
 
 def concat_recordings(
-    recordings: list[Recording],
-    output_video_path: Path,
-    output_csv_path: Path,
-    fps: int = 20,
-) -> None:
+    recordings: list[Recording], output_video_path: Path, progress: bool = False
+) -> Recording:
     """Concatenate sequential recording segments into a single video + CSV.
 
     Each recording's frames are appended in order. The CSV metadata is merged
@@ -332,49 +330,67 @@ def concat_recordings(
         Ordered list of recording segments to concatenate.
     output_video_path : Path
         Path for the combined output AVI.
-    output_csv_path : Path
-        Path for the combined output CSV.
-    fps : int
-        Frames per second for the output video.
+    progress : bool
+        Show a progress bar
     """
+    fps = int(recordings[0].video.video.get(cv2.CAP_PROP_FPS))
     video_writer = VideoWriter(path=output_video_path, fps=fps)
     metadata_parts: list[pd.DataFrame] = []
     rfi_offset = 0
     total_frames = 0
 
-    for i, rec in enumerate(tqdm(recordings, desc="Concatenating segments")):
-        # Copy all video frames
-        seg_frames = 0
-        total_frames = rec.video.shape[0]
-        for n in range(total_frames):
-            frame = rec.video[0]
-            video_writer.write_frame(frame)
-            seg_frames += 1
-
-        # Offset reconstructed_frame_index in metadata
-        df = rec.metadata.copy()
-        max_rfi = int(df["reconstructed_frame_index"].max())
-        df["reconstructed_frame_index"] = df["reconstructed_frame_index"] + rfi_offset
-        metadata_parts.append(df)
-
-        logger.info(
-            f"Segment {i}: {rec.video.path.name} — " f"{seg_frames} frames, rfi_offset={rfi_offset}"
-        )
-        rfi_offset += max_rfi + 1
-        total_frames += seg_frames
-
-    video_writer.close()
+    recs = (
+        tqdm(enumerate(recordings), desc="Concatenating recordings", position=0)
+        if progress
+        else enumerate(recordings)
+    )
+    frame_iter_cls = partial(trange, position=1) if progress else range
+    try:
+        for i, rec in recs:
+            # Copy all video frames
+            seg_frames = 0
+            total_frames = rec.video.shape[0]
+
+            for n in frame_iter_cls(total_frames):
+                frame = rec.video[n]
+                video_writer.write_frame(frame)
+                seg_frames += 1
+
+            # Offset reconstructed_frame_index in metadata
+            df = rec.metadata.copy()
+            max_rfi = int(df["reconstructed_frame_index"].max())
+            df["reconstructed_frame_index"] = df["reconstructed_frame_index"] + rfi_offset
+            metadata_parts.append(df)
+
+            logger.debug(
+                "Segment %s: %s — %s frames, rfi_offset=%s",
+                i,
+                rec.video.path.name,
+                seg_frames,
+                rfi_offset,
+            )
+            rfi_offset += max_rfi + 1
+            total_frames += seg_frames
+    finally:
+        video_writer.close()
+        if progress:
+            recs.close()
 
     combined_df = pd.concat(metadata_parts, ignore_index=True)
-    combined_df.to_csv(output_csv_path, index=False)
+    combined_df.to_csv(paths_from_video(output_video_path)["metadata"], index=False)
 
-    logger.info(
-        f"Concat completed: {total_frames} frames from "
-        f"{len(recordings)} segments -> {output_video_path}"
+    logger.debug(
+        "Concat completed: %s frames from %s segments -> %s",
+        total_frames,
+        len(recordings),
+        output_video_path,
     )
+    return Recording.from_video(output_video_path)
 
 
-def _build_timestamp_matches(recordings, threshold_ms: float = 25.0) -> list[dict[int, int]]:
+def _build_timestamp_matches(
+    recordings: list[Recording], threshold_ms: float = 25.0
+) -> list[dict[int, int]]:
     """
     Match frames across recordings by nearest unix timestamp.
 
@@ -402,7 +418,7 @@ def _build_timestamp_matches(recordings, threshold_ms: float = 25.0) -> list[dic
     ref_indices, ref_timestamps = per_rec_timestamps[0]
     matches: list[dict[int, int]] = []
 
-    for i, (ref_idx, ref_ts) in enumerate(zip(ref_indices, ref_timestamps)):
+    for ref_idx, ref_ts in zip(ref_indices, ref_timestamps):
         match: dict[int, int] = {0: int(ref_idx)}
         for rec_num in range(1, len(recordings)):
             other_indices, other_timestamps = per_rec_timestamps[rec_num]
@@ -422,4 +438,4 @@ def _build_timestamp_matches(recordings, threshold_ms: float = 25.0) -> list[dic
 
         if len(match) > 1:
             matches.append(match)
-        return matches
+    return matches

From 8341b8b8988f14c2be113b4e2868abb6a37eebff Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 22:09:50 -0700
Subject: [PATCH 11/21] update concat tests

---
 .gitignore   | 4 ++++
 mio/utils.py | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/.gitignore b/.gitignore
index 7ab1a9cb..ffae3ef1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,4 +165,8 @@ user_data/*
 # The default output directory for the process commands. Not necessary but doesn't hurt to have.
 mio_process/*
 !user_data/.gitkeep
+tests/data/stitch/*_timestamps.csv
+tests/data/stitch/*_noise.csv
+tests/data/stitch/*_scores.csv
+tests/data/stitch/*_stitched*
 
diff --git a/mio/utils.py b/mio/utils.py
index bcb25950..457ec51e 100644
--- a/mio/utils.py
+++ b/mio/utils.py
@@ -49,6 +49,8 @@ def hash_video(
     Returns:
         str
     """
+    if not Path(path).exists():
+        raise FileNotFoundError("No such video exists!")
     h = hashlib.new(method)
 
     vid = cv2.VideoCapture(str(path))

From e40e88c8a46d1bb9b9ea99ac8ebf9c029509baad Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Wed, 22 Apr 2026 22:10:02 -0700
Subject: [PATCH 12/21] actually commit the updated tests

---
 tests/test_process/test_stitch.py | 84 ++++++-------------------------
 tests/test_process/test_video.py  |  1 -
 2 files changed, 15 insertions(+), 70 deletions(-)

diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index 320fe51a..64f0c653 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -290,89 +290,35 @@ def test_remove_frames_invalid(tmp_path):
         )
 
 
-def test_concat_recordings(tmp_path):
+def test_concat_recordings(tmp_path, recordings):
     """Concatenating two recordings produces contiguous frame indices and correct frame count."""
-    recordings = [
-        RecordingData(
-            video_path=STITCH_DATA_DIR / "video1.avi",
-            csv_path=STITCH_DATA_DIR / "video1.csv",
-        ),
-        RecordingData(
-            video_path=STITCH_DATA_DIR / "video2.avi",
-            csv_path=STITCH_DATA_DIR / "video2.csv",
-        ),
-    ]
 
     combined_video = tmp_path / "combined.avi"
-    combined_csv = tmp_path / "combined.csv"
 
-    concat_recordings(
-        recordings=recordings,
+    combined = concat_recordings(
+        recordings=list(recordings.values()),
         output_video_path=combined_video,
-        output_csv_path=combined_csv,
-        fps=20,
     )
 
     # Video frame count should be sum of both inputs
-    cap1 = cv2.VideoCapture(str(STITCH_DATA_DIR / "video1.avi"))
-    cap2 = cv2.VideoCapture(str(STITCH_DATA_DIR / "video2.avi"))
-    expected_frames = int(cap1.get(cv2.CAP_PROP_FRAME_COUNT)) + int(
-        cap2.get(cv2.CAP_PROP_FRAME_COUNT)
-    )
-    cap1.release()
-    cap2.release()
+    expected_frames = sum(r.video.n_frames for r in recordings.values())
+    # the Recording class also validates that the metadata has a matching length if present
+    # so its presence means that the metadata is also matching
+    assert combined.metadata is not None
 
-    cap = cv2.VideoCapture(str(combined_video))
-    actual_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
+    actual_frames = combined.video.n_frames
     assert actual_frames == expected_frames
 
     # CSV should have contiguous reconstructed_frame_index
-    df = pd.read_csv(combined_csv)
-    indices = sorted(df["reconstructed_frame_index"].unique())
-    assert indices == list(range(len(indices)))
-
-    # Second segment's rfi should start after first segment's max rfi
-    df1 = pd.read_csv(STITCH_DATA_DIR / "video1.csv")
-    max_rfi_1 = df1["reconstructed_frame_index"].max()
-    # Combined CSV should have indices beyond max_rfi_1
-    assert df["reconstructed_frame_index"].max() > max_rfi_1
+    df = combined.metadata
+    diffs = df["reconstructed_frame_index"].diff().iloc[1:].to_numpy()
+    assert (diffs <= 1).all() and (diffs >= 0).all()
 
 
 def test_stitch_timestamp_matching(tmp_path):
     """Timestamp matching produces a valid stitched output on real fixtures."""
-    recordings = [
-        RecordingData(
-            video_path=STITCH_DATA_DIR / "video1.avi",
-            csv_path=STITCH_DATA_DIR / "video1.csv",
-        ),
-        RecordingData(
-            video_path=STITCH_DATA_DIR / "video2.avi",
-            csv_path=STITCH_DATA_DIR / "video2.csv",
-        ),
-    ]
-
-    stitched_video = tmp_path / "stitched.avi"
-    stitched_csv = tmp_path / "stitched.csv"
-
-    bundle = RecordingDataBundle(
-        recordings=recordings,
-        stitched_video_writer=VideoWriter(path=stitched_video, fps=20),
-        combined_csv_path=stitched_csv,
+    raise NotImplementedError(
+        "We need an actual test of this "
+        "where we just pass two synthesized metadata dataframes "
+        "and ensure that they align as we expect."
     )
-    bundle.stitch_recordings(matching_method="timestamp", timestamp_threshold_ms=25.0)
-
-    # Should produce a non-empty stitched video
-    cap = cv2.VideoCapture(str(stitched_video))
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
-    assert frame_count > 0, "Timestamp matching produced no output frames"
-
-    # Stitched CSV should exist and have contiguous reconstructed_frame_index
-    df = pd.read_csv(stitched_csv)
-    indices = sorted(df["reconstructed_frame_index"].unique())
-    assert indices == list(range(len(indices)))
-
-    # Frame count should be similar to frame_num matching (within 20%)
-    assert abs(frame_count - EXPECTED_STITCHED_FRAME_COUNT) / EXPECTED_STITCHED_FRAME_COUNT < 0.2
-
diff --git a/tests/test_process/test_video.py b/tests/test_process/test_video.py
index 6bb70f58..640cd3d8 100644
--- a/tests/test_process/test_video.py
+++ b/tests/test_process/test_video.py
@@ -49,7 +49,6 @@ def test_noise_patch_processor(video_frame, tmp_path):
 def test_noise_patch_processor_no_config(random_8bit_video_frame, tmp_path):
     denoise_config = DenoiseConfig.from_id("denoise_example")
     denoise_config.noise_patch.enable = True
-    denoise_config.noise_patch.mean_error_config = None
     denoise_config.noise_patch.gradient_config = None
     denoise_config.noise_patch.black_area_config = None
 

From 24d09db8f427330c5bfeb167491d622b6fe2ff97 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Thu, 23 Apr 2026 17:22:35 -0700
Subject: [PATCH 13/21] update tests

---
 tests/test_cli_process.py               |  2 +-
 tests/test_process/test_frame_helper.py | 19 +++++--------------
 tests/test_process/test_stitch.py       |  8 +++++---
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/tests/test_cli_process.py b/tests/test_cli_process.py
index 36a1626d..c6d10f90 100644
--- a/tests/test_cli_process.py
+++ b/tests/test_cli_process.py
@@ -15,7 +15,7 @@
 
 STITCH_DATA_DIR = Path(__file__).parent / "data" / "stitch"
 
-EXPECTED_STITCHED_VIDEO_HASH = "c8cdf3149f812ae25e6f3f1a876249e4ce118e9a53aa1805e48b995b01f07a91"
+EXPECTED_STITCHED_VIDEO_HASH = "df937c8651cf142b4d8e2a75140729dcacdc1151ebc3767b48d0ca71578007ff"
 EXPECTED_CROP_VIDEO_HASH = "432642b1528fcd9ad553cfb3cc3862bef931301bd11d44dc3c2372fc379fa629"
 EXPECTED_STITCHED_TRIMMED_VIDEO_HASH = (
     "2c62b65ddd537e94e7d3f29e7c46523357d70aefed02d46baa9726ee57798af9"
diff --git a/tests/test_process/test_frame_helper.py b/tests/test_process/test_frame_helper.py
index 2c7980fe..131bfc0e 100644
--- a/tests/test_process/test_frame_helper.py
+++ b/tests/test_process/test_frame_helper.py
@@ -50,7 +50,6 @@ class NoiseGroundTruth(BaseModel):
     [
         (["gradient"], GroundTruthCategory.check_pattern),
         (["black_area"], GroundTruthCategory.blacked_out),
-        (["mean_error"], GroundTruthCategory.check_pattern),
     ],
 )
 def test_noisy_frame_detection(video, ground_truth, noise_detection_method, noise_category):
@@ -60,14 +59,6 @@ def test_noisy_frame_detection(video, ground_truth, noise_detection_method, nois
     """
     if "gradient" in noise_detection_method:
         global_config: DenoiseConfig = DenoiseConfig.from_id("denoise_noise_detection_test")
-    elif "mean_error" in noise_detection_method:
-        if "extended" in video:
-            # FIXME: resolve this before merging `feat-preprocess` to `main`
-            pytest.xfail(
-                "Bug in comparison to previous frames when first frame is noisy, "
-                "see https://github.com/Aharoni-Lab/mio/pull/97"
-            )
-        global_config: DenoiseConfig = DenoiseConfig.from_id("denoise_example_mean_error")
     elif "black_area" in noise_detection_method:
         global_config: DenoiseConfig = DenoiseConfig.from_id("denoise_noise_detection_test")
     else:
@@ -127,8 +118,8 @@ def test_noisy_frame_detection(video, ground_truth, noise_detection_method, nois
 @pytest.mark.parametrize(
     "min_rows,expected_noisy",
     [
-        (1, True),   # default: any flagged row triggers detection
-        (5, True),   # exactly 5 noisy rows meets the threshold
+        (1, True),  # default: any flagged row triggers detection
+        (5, True),  # exactly 5 noisy rows meets the threshold
         (10, False),  # only 5 noisy rows, below threshold of 10
     ],
 )
@@ -146,6 +137,6 @@ def test_black_area_min_rows(min_rows, expected_noisy):
     )
     detector = BlackAreaDetector(config)
     is_noisy, mask = detector.find_invalid_area(frame)
-    assert is_noisy == expected_noisy, (
-        f"min_rows={min_rows}: expected noisy={expected_noisy}, got {is_noisy}"
-    )
+    assert (
+        is_noisy == expected_noisy
+    ), f"min_rows={min_rows}: expected noisy={expected_noisy}, got {is_noisy}"
diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index 64f0c653..d3d06122 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -26,7 +26,7 @@
 
 STITCH_DATA_DIR = Path(__file__).parent.parent / "data" / "stitch"
 
-EXPECTED_STITCHED_VIDEO_HASH = "c8cdf3149f812ae25e6f3f1a876249e4ce118e9a53aa1805e48b995b01f07a91"
+EXPECTED_STITCHED_VIDEO_HASH = "df937c8651cf142b4d8e2a75140729dcacdc1151ebc3767b48d0ca71578007ff"
 EXPECTED_DEBUG_VIDEO_HASH = (
     "856e6e5c538532bd0fcfb942616686a5cd262aadb51dd8796adf5de69215c94b",
     "a69b6cadf4ab1dd8a1097d2c1be298397206db235fd4c5f68febd1700f15a4b6",
@@ -120,8 +120,10 @@ def test_score_csv_edge_scoring_tiebreaker(stitch_result: StitchedRecording):
     df = stitch_result.scores
     # filter frames where only one video or the other had them
     df = df[~df["compare_video"].isna()]
-    # there should be four frames that could be decided on metadata alone
-    assert len(df[df["selected_edge_score"].isna()]) == 4
+    # there should be 7 frames that could be decided on metadata alone
+    # - 4x on buffer count
+    # - 3x on black pixels
+    assert len(df[df["selected_edge_score"].isna()]) == 7
     # for all those that had to use edge scores, the selected should be greater or equal
     edges_scored = df[~df["selected_edge_score"].isna()]
     for _, row in edges_scored.iterrows():

From f0fe82ddf820f92d1e4f7d46327034b080b88477 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Thu, 23 Apr 2026 21:25:36 -0700
Subject: [PATCH 14/21] align by timestamp

---
 mio/process/stitch.py             | 167 +++++++++++++++++++-----------
 tests/test_process/test_stitch.py |  93 +++++++++++++++--
 2 files changed, 192 insertions(+), 68 deletions(-)

diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 639427f0..9921bd2c 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -31,24 +31,43 @@ def align(recordings: list[Recording]) -> pd.DataFrame:
     """
     Create an alignment map by frame index.
 
-    Note that this **does not** align by timestamp!
-    it assumes that there is some ``frame_num`` in the metadata col for each of the recordings
-    that comes from some common device.
+    Note that this **is not** a general alignment method yet -
+    this is specialized to the case of stitching two recordings of the same underlying data source,
+    as is done when we record multiple FPGA sensors in the miniscope zero.
+    Please raise an issue if you need a general frame alignment mechanisms.
+
+    We have two kinds of alignment, depending on the structure of the metadata:
+
+    * If all the recordings have continuously incrementing `frame_num`s,
+      we align by the ``frame_num``.
+      The `frame_num` is given by the device, and is the same across recordings,
+      even if they start at different times (and capture different ranges of frame nums).
+      This is an **outer join**, keeping all frames
+    * If the recordings have *discontinuous* ``frame_num`` s,
+      e.g. if the device was restarted during acquisition, we align by the acquisition timestamp.
+      This assumes that the system times are closely matching
+      (specifically, more closely than the interval between successive frames in the recording).
+      This is an **inner join**, where we only keep frames where we can align timestamps.
     """
-    metadatas: dict[str, pd.DataFrame] = {r.name: r.metadata for r in recordings}
-    if not all(isinstance(m, pd.DataFrame) for m in metadatas.values()):
+    if not all(isinstance(r.metadata, pd.DataFrame) for r in recordings):
         raise ValueError("All recordings must have metadata csvs to align them")
     if not all(
-        "frame_num" in m.columns and "reconstructed_frame_index" in m.columns
-        for m in metadatas.values()
+        "frame_num" in r.metadata.columns and "reconstructed_frame_index" in r.metadata.columns
+        for r in recordings
     ):
         raise ValueError("All recordings must have frame_num and reconstructed_frame_index columns")
 
-    # find the full set of frames in all the recordings
-    frame_set = set()
-    for m in metadatas.values():
-        frame_set |= set(m["frame_num"])
+    if not any(_has_discontinuous_runs(r.metadata["frame_num"]) for r in recordings):
+        logger.debug("Using frame-num based alignment")
+        return _align_by_frame(recordings)
+    else:
+        logger.debug("Using time-based alignment")
+        return _align_by_time(recordings)
+
 
+def _align_by_frame(recordings: list[Recording]) -> pd.DataFrame:
+    """Align metadata by the frame_num column"""
+    metadatas: dict[str, pd.DataFrame] = {r.name: r.metadata for r in recordings}
     # aggregate mappings from frame nums to the reconstructed frame index
     frame_maps = {
         name: df[["frame_num", "reconstructed_frame_index"]]
@@ -74,6 +93,59 @@ def align(recordings: list[Recording]) -> pd.DataFrame:
     return aligned
 
 
+def _align_by_time(recordings: list[Recording]) -> pd.DataFrame:
+    """
+    Align by the nearest unix timestamp.
+
+    Use the mean of the timestamps from the buffers to get frames that have the most overlap.
+
+    This could be made an outer join by just keeping the leading and trailing rows,
+    and filtering rows with NaNs in the interior regions of buffer_recv_unix_time_x and y
+    but leaving as inner for now to match existing timestamp match fn.
+
+    the inner join functions like "when both frames mutually pick each other as their closest frame"
+    which filters blippy frames that are very short.
+
+    I **think** but have not tested that doing this triple merge method is faster
+    than nested iteration, esp for longer recordings, since these are all vector ops.
+    """
+    metadatas: dict[str, pd.DataFrame] = {r.name: r.metadata for r in recordings}
+    time_maps = {
+        name: df.groupby("reconstructed_frame_index")["buffer_recv_unix_time"].mean().reset_index()
+        for name, df in metadatas.items()
+    }
+
+    # inner join on closest mean timestamp value
+    names = sorted(time_maps.keys())
+    last_name = names.pop(0)
+    aligned = time_maps[last_name].copy().rename(columns={"reconstructed_frame_index": last_name})
+    for name in names:
+        # merge left and right, then take the inner match
+        left = pd.merge_asof(
+            aligned, time_maps[name], on="buffer_recv_unix_time", direction="nearest"
+        )
+        right = pd.merge_asof(
+            time_maps[name], aligned, on="buffer_recv_unix_time", direction="nearest"
+        )
+        left.rename(columns={"reconstructed_frame_index": name}, inplace=True)
+        right.rename(columns={"reconstructed_frame_index": name}, inplace=True)
+
+        # merge on the frame indexes from the left and right -
+        # align when both sides agree they are the closest,
+        # dropping extras from glitches/sampling rate differences
+        aligned = pd.merge(left, right, "inner", on=[last_name, name])
+
+        # keep the left's times, keeping them anchored rather than wandering in each recording
+        aligned = aligned[[c for c in aligned.columns if c != "buffer_recv_unix_time_y"]]
+        aligned.rename(columns={"buffer_recv_unix_time_x": "buffer_recv_unix_time"}, inplace=True)
+        last_name = name
+
+    aligned = aligned.astype({k: "Int64" for k in metadatas})
+    # popping the index gives us the 'index' column
+    aligned = aligned.reset_index()
+    return aligned
+
+
 def stitch(
     recordings: list[Recording],
     noise_config: NoisePatchConfig | None = None,
@@ -171,7 +243,7 @@ def stitch(
                         metadata_rows=buffer_rows,
                     )
                 )
-            result = _select_best_candidate(candidates, row["index"], row["frame_num"])
+            result = _select_best_candidate(candidates, row["index"], row.get("frame_num"))
             selected = [c for c in candidates if c.recording.name == result.selected_video][0]
             if debug_video_writer is not None:
                 debug_frame = np.zeros_like(frames[0], dtype=np.uint8)
@@ -240,7 +312,7 @@ class StitchRecord(BaseModel):
     """
 
     index: int
-    frame_num: int
+    frame_num: int | None = None
     selected_video: str
     compare_video: str | None = None
     selected_num_buffers: int
@@ -261,7 +333,7 @@ def header(cls) -> list[str]:
 
 
 def _select_best_candidate(
-    candidates: list[CandidateFrame], index: int, frame_num: int
+    candidates: list[CandidateFrame], index: int, frame_num: int | None = None
 ) -> StitchRecord:
     """
     Pick the best candidate using metadata scoring with edge-score tiebreak.
@@ -388,54 +460,25 @@ def concat_recordings(
     return Recording.from_video(output_video_path)
 
 
-def _build_timestamp_matches(
-    recordings: list[Recording], threshold_ms: float = 25.0
-) -> list[dict[int, int]]:
+def _has_discontinuous_runs(series: pd.Series) -> bool:
     """
-    Match frames across recordings by nearest unix timestamp.
-
-    For each recording, compute per-frame timestamp as the max
-    buffer_recv_unix_time for each reconstructed_frame_index.
+    Check if a metadata series has multiple discontinuous series of values:
+    e.g. when acquiring frames and the counter is reset.
 
-    Uses recording[0] as the reference. For each frame in ref,
-    find nearest frame in each other recording within threshold.
-
-    Returns list of dicts: [{rec_idx: reconstructed_frame_index, ...}, ...]
-    One entry per matched frame set, ordered by ref recording's frame order.
+    Ignores single-row discontinuities like e.g. from a single buffer having an incorrect frame_num
     """
-    threshold_s = threshold_ms / 1000.0
-
-    # Build per-frame timestamp arrays for each recording
-    per_rec_timestamps: list[tuple[np.ndarray, np.ndarray]] = []
-    for rec in recordings:
-        df = rec.metadata
-        grouped = df.groupby("reconstructed_frame_index")["buffer_recv_unix_time"].max()
-        frame_indices = grouped.index.values
-        timestamps = grouped.values
-        sort_order = np.argsort(timestamps)
-        per_rec_timestamps.append((frame_indices[sort_order], timestamps[sort_order]))
-
-    ref_indices, ref_timestamps = per_rec_timestamps[0]
-    matches: list[dict[int, int]] = []
-
-    for ref_idx, ref_ts in zip(ref_indices, ref_timestamps):
-        match: dict[int, int] = {0: int(ref_idx)}
-        for rec_num in range(1, len(recordings)):
-            other_indices, other_timestamps = per_rec_timestamps[rec_num]
-            pos = np.searchsorted(other_timestamps, ref_ts)
-
-            best_dist = float("inf")
-            best_idx = -1
-            for candidate_pos in [pos - 1, pos]:
-                if 0 <= candidate_pos < len(other_timestamps):
-                    dist = abs(other_timestamps[candidate_pos] - ref_ts)
-                    if dist < best_dist:
-                        best_dist = dist
-                        best_idx = int(other_indices[candidate_pos])
-
-            if best_dist <= threshold_s:
-                match[rec_num] = best_idx
-
-        if len(match) > 1:
-            matches.append(match)
-    return matches
+    # we need the initial NaN for alignment below, so don't drop it yet -
+    # filtering NaNs is presumably cheaper than diffing
+    diff = series.diff()
+    # fast "no" if the whole series is continuous
+    if (diff.dropna() <= 1).all() and (diff.dropna() >= 0).all():
+        return False
+
+    # filter to ignore singleton blips
+    # e.g. frame_num breaks in one buffer,
+    # find numbers that don't return to the prior number or number + 1 in the subsequent rows
+    blips = np.logical_or(diff == diff.shift(-1) * -1, diff == (diff.shift(-1) - 1) * -1)
+
+    # now check if there are any longer lasting discontinuities
+    diff = series[~blips].diff().dropna()
+    return bool((~diff.between(0, 1)).any())
diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index d3d06122..30e366a6 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -19,6 +19,8 @@
     StitchRecord,
     concat_recordings,
     stitch,
+    _align_by_time,
+    _has_discontinuous_runs,
     _score_edges,
 )
 from mio.process.video import trim, remove_frames
@@ -317,10 +319,89 @@ def test_concat_recordings(tmp_path, recordings):
     assert (diffs <= 1).all() and (diffs >= 0).all()
 
 
-def test_stitch_timestamp_matching(tmp_path):
-    """Timestamp matching produces a valid stitched output on real fixtures."""
-    raise NotImplementedError(
-        "We need an actual test of this "
-        "where we just pass two synthesized metadata dataframes "
-        "and ensure that they align as we expect."
+@pytest.mark.parametrize("flip", [True, False])
+def test_align_by_time(tmp_path, flip):
+    """Aligning by timestamp finds the inner join of the closest matching timestamps"""
+    # one normal video with some linspaced times
+    left_idxes = np.ravel(np.repeat(np.arange(50), 5))
+    left_times = np.linspace(0, 1, len(left_idxes))
+    left = pd.DataFrame(
+        {"reconstructed_frame_index": left_idxes, "buffer_recv_unix_time": left_times}
     )
+
+    # one offset video with a blippy frame from a bit flip in the frame_num
+    right_idxes = np.ravel(np.repeat(np.arange(25), 5))
+    right_idxes = np.concat([right_idxes, [25]], axis=0)
+    right_idxes = np.concat([right_idxes, np.ravel(np.repeat(np.arange(26, 52), 5))], axis=0)
+    # make same size so sampling rate is the same
+    right_idxes = right_idxes[: len(left_idxes)]
+    right_times = np.linspace(0, 1, len(right_idxes)) + 0.1
+    right = pd.DataFrame(
+        {"reconstructed_frame_index": right_idxes, "buffer_recv_unix_time": right_times}
+    )
+
+    good, bad = "video1", "video2"
+    if flip:
+        good, bad = "video2", "video1"
+    recordings = [
+        Recording.model_construct(name=good, metadata=left),
+        Recording.model_construct(name=bad, metadata=right),
+    ]
+
+    aligned = _align_by_time(recordings)
+    # we should have received 45 frames: 50 frames in the original - 5 frames in 0.1 seconds of lag
+    assert len(aligned) == 45
+    assert np.array_equal(aligned[good], np.arange(5, 50))
+
+    # we should have dropped frame 25 in the right one
+    assert 25 not in np.array(aligned[bad])
+    assert np.array_equal(aligned[bad], np.concat([np.arange(25), np.arange(26, 46)]))
+
+
+def test_stitch_with_timestamps(stitch_result, tmp_path):
+    """
+    When we scramble the `frame_num`, we can stitch by timestamps.
+    We should get the same result as if we were able to use frame_num in this case.
+    """
+    # use a temporary version of the recordings because we are going to wreck the metadata
+    recordings = {
+        "video1": Recording.from_video(STITCH_DATA_DIR / "video1.avi"),
+        "video2": Recording.from_video(STITCH_DATA_DIR / "video2.avi"),
+    }
+    recordings["video1"].metadata["frame_num"] = np.random.default_rng().integers(
+        0, 1000, size=len(recordings["video1"].metadata)
+    )
+    recordings["video2"].metadata["frame_num"] = np.random.default_rng().integers(
+        0, 1000, size=len(recordings["video2"].metadata)
+    )
+
+    result = stitch(list(recordings.values()), debug_video=True, output_dir=tmp_path)
+    assert (result.scores["selected_video"] == stitch_result.scores["selected_video"]).all()
+
+
+@pytest.mark.parametrize(
+    "series,expected",
+    [
+        pytest.param([1, 1, 1, 2, 2, 2, 3, 3, 3], False, id="contiguous-buffers"),
+        pytest.param([1, 2, 3, 4, 5], False, id="contiguous-frames"),
+        pytest.param([1, 1, 1, 2, 500, 2, 3, 3, 3], False, id="single-bitflip-same-frame"),
+        pytest.param([1, 1, 1, 2, 2, 500, 3, 3, 3], False, id="single-bitflip-next-frame"),
+        pytest.param(
+            [10, 10, 10, 11, 11, 11, 2, 2, 2, 3, 3, 3], True, id="discontiguous-buffers-lower"
+        ),
+        pytest.param(
+            [10, 10, 10, 11, 11, 11, 20, 20, 20, 21, 21, 21],
+            True,
+            id="discontiguous-buffers-higher",
+        ),
+        pytest.param([1, 2, 3, 4, 5, 1, 2, 3, 4, 5], True, id="discontiguous-frames-lower"),
+        pytest.param([1, 2, 3, 4, 5, 10, 11, 12, 13], True, id="discontiguous-frames-higher"),
+    ],
+)
+def test_has_discontinuous_runs(series, expected):
+    """
+    We can determine when some timeseries has discontinuous runs,
+    ignoring when a single value blips incorrectly (like a bit flip in a metadata header).
+    """
+    series = pd.Series(series)
+    assert _has_discontinuous_runs(series) == expected

From e35fb74efc9a11198be414253bd446c56a4a2ae7 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Thu, 23 Apr 2026 22:33:06 -0700
Subject: [PATCH 15/21] dont count contiguous frames as being blips lol

---
 mio/process/stitch.py             |  9 ++++++---
 tests/test_cli_process.py         |  2 +-
 tests/test_process/test_stitch.py |  9 ++++++++-
 tests/test_process/test_video.py  | 10 ----------
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/mio/process/stitch.py b/mio/process/stitch.py
index 9921bd2c..60a590e7 100644
--- a/mio/process/stitch.py
+++ b/mio/process/stitch.py
@@ -469,15 +469,18 @@ def _has_discontinuous_runs(series: pd.Series) -> bool:
     """
     # we need the initial NaN for alignment below, so don't drop it yet -
     # filtering NaNs is presumably cheaper than diffing
-    diff = series.diff()
+    diff = series.diff().fillna(0)
     # fast "no" if the whole series is continuous
-    if (diff.dropna() <= 1).all() and (diff.dropna() >= 0).all():
+    if (diff <= 1).all() and (diff >= 0).all():
         return False
 
     # filter to ignore singleton blips
     # e.g. frame_num breaks in one buffer,
     # find numbers that don't return to the prior number or number + 1 in the subsequent rows
-    blips = np.logical_or(diff == diff.shift(-1) * -1, diff == (diff.shift(-1) - 1) * -1)
+    blips = np.logical_and(
+        ~diff.between(0, 1),
+        np.logical_or(diff == diff.shift(-1) * -1, diff == (diff.shift(-1) - 1) * -1),
+    )
 
     # now check if there are any longer lasting discontinuities
     diff = series[~blips].diff().dropna()
diff --git a/tests/test_cli_process.py b/tests/test_cli_process.py
index c6d10f90..eaacef19 100644
--- a/tests/test_cli_process.py
+++ b/tests/test_cli_process.py
@@ -18,7 +18,7 @@
 EXPECTED_STITCHED_VIDEO_HASH = "df937c8651cf142b4d8e2a75140729dcacdc1151ebc3767b48d0ca71578007ff"
 EXPECTED_CROP_VIDEO_HASH = "432642b1528fcd9ad553cfb3cc3862bef931301bd11d44dc3c2372fc379fa629"
 EXPECTED_STITCHED_TRIMMED_VIDEO_HASH = (
-    "2c62b65ddd537e94e7d3f29e7c46523357d70aefed02d46baa9726ee57798af9"
+    "d7b6858c85e13da69921593570a8eddff3716d0be348219d197f216afbe3867a"
 )
 EXPECTED_REMOVE_FRAMES_HASH = "b76b80f45316bad0a808802b8f5c0d65b99f6f59bc6422b84c1c2a7026ca4b15"
 
diff --git a/tests/test_process/test_stitch.py b/tests/test_process/test_stitch.py
index 30e366a6..6f5b9690 100644
--- a/tests/test_process/test_stitch.py
+++ b/tests/test_process/test_stitch.py
@@ -376,7 +376,9 @@ def test_stitch_with_timestamps(stitch_result, tmp_path):
     )
 
     result = stitch(list(recordings.values()), debug_video=True, output_dir=tmp_path)
-    assert (result.scores["selected_video"] == stitch_result.scores["selected_video"]).all()
+    # we should have an inner join on the frames - so only those without a comparison frame
+    expected = stitch_result.scores[~stitch_result.scores["compare_video"].isna()]
+    assert np.array_equal(result.scores["selected_video"], expected["selected_video"])
 
 
 @pytest.mark.parametrize(
@@ -405,3 +407,8 @@ def test_has_discontinuous_runs(series, expected):
     """
     series = pd.Series(series)
     assert _has_discontinuous_runs(series) == expected
+
+
+def test_test_data_is_considered_continuous(recordings):
+    """Just testing the assumptions of the tests ios all"""
+    assert not any(_has_discontinuous_runs(r.metadata["frame_num"]) for r in recordings.values())
diff --git a/tests/test_process/test_video.py b/tests/test_process/test_video.py
index 640cd3d8..14932170 100644
--- a/tests/test_process/test_video.py
+++ b/tests/test_process/test_video.py
@@ -57,16 +57,6 @@ def test_noise_patch_processor_no_config(random_8bit_video_frame, tmp_path):
         NoisePatchProcessor("denoise_example", denoise_config.noise_patch, tmp_path)
 
 
-def test_noise_patch_processor_no_methods(random_8bit_video_frame, tmp_path):
-    denoise_config = DenoiseConfig.from_id("denoise_example")
-    denoise_config.noise_patch.enable = True
-    denoise_config.noise_patch.method = []
-
-    processor = NoisePatchProcessor("denoise_example", denoise_config.noise_patch, tmp_path)
-    processed_frame = processor.process_frame(random_8bit_video_frame, 0)
-    assert processed_frame is random_8bit_video_frame
-
-
 def test_freqency_mask_processor(video_frame, tmp_path):
     denoise_config = DenoiseConfig.from_id("denoise_example")
     denoise_config.frequency_masking.enable = True

From 04a8aabc5bf974d076b21f8f3b9d16ada5ed75b3 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Thu, 23 Apr 2026 22:37:28 -0700
Subject: [PATCH 16/21] whoops removed the wrong test

---
 tests/test_process/test_video.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/test_process/test_video.py b/tests/test_process/test_video.py
index 14932170..e4a59486 100644
--- a/tests/test_process/test_video.py
+++ b/tests/test_process/test_video.py
@@ -45,16 +45,14 @@ def test_noise_patch_processor(video_frame, tmp_path):
     assert processor.name == "denoise_example"
     assert processor.output_enable
 
-
-def test_noise_patch_processor_no_config(random_8bit_video_frame, tmp_path):
+def test_noise_patch_processor_no_methods(random_8bit_video_frame, tmp_path):
     denoise_config = DenoiseConfig.from_id("denoise_example")
     denoise_config.noise_patch.enable = True
-    denoise_config.noise_patch.gradient_config = None
-    denoise_config.noise_patch.black_area_config = None
+    denoise_config.noise_patch.method = []
 
-    # This should raise a ValueError because the necessary configs are not provided
-    with pytest.raises(ValueError):
-        NoisePatchProcessor("denoise_example", denoise_config.noise_patch, tmp_path)
+    processor = NoisePatchProcessor("denoise_example", denoise_config.noise_patch, tmp_path)
+    processed_frame = processor.process_frame(random_8bit_video_frame, 0)
+    assert processed_frame is random_8bit_video_frame
 
 
 def test_freqency_mask_processor(video_frame, tmp_path):

From 1ae8a31c14b336b751b9b1ed1d34cc8a0d7371d5 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Fri, 24 Apr 2026 15:37:42 -0700
Subject: [PATCH 17/21] update changelog

---
 docs/api/models/dataset.md       |  7 +++++++
 docs/api/models/index.md         |  1 +
 docs/meta/changelog.md           | 25 +++++++++++++++++++++++--
 tests/test_process/test_video.py |  1 +
 4 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 docs/api/models/dataset.md

diff --git a/docs/api/models/dataset.md b/docs/api/models/dataset.md
new file mode 100644
index 00000000..f507cb6b
--- /dev/null
+++ b/docs/api/models/dataset.md
@@ -0,0 +1,7 @@
+# dataset
+
+```{eval-rst}
+.. automodule:: mio.models.dataset
+    :members:
+    :undoc-members:
+```
\ No newline at end of file
diff --git a/docs/api/models/index.md b/docs/api/models/index.md
index ae6c6cf5..107271ab 100644
--- a/docs/api/models/index.md
+++ b/docs/api/models/index.md
@@ -19,6 +19,7 @@ keep what is common common, and what is unique unique.
 buffer
 config
 data
+dataset
 mixins
 models
 sdcard
diff --git a/docs/meta/changelog.md b/docs/meta/changelog.md
index 91d4bd5a..45553049 100644
--- a/docs/meta/changelog.md
+++ b/docs/meta/changelog.md
@@ -1,8 +1,8 @@
 # Changelog
 
-## Upcoming
+## 0.10
 
-### *.*
+### 0.10.0
 
 #### CLI
 
@@ -12,11 +12,32 @@
   - `mio config open` to open the config in default text editor
 - [`#154`](https://github.com/miniscope/mio/pull/154) - add cli command for removing frames from video:
   - `mio process remove_frames` to remove frames by explicitly specified index from videos and metadata
+- [`#155`](https://github.com/miniscope/mio/pull/155) - `mio process concat` - concatenate videos and metadata
 
 #### CI/CD
 
 - [`#157`](https://github.com/miniscope/mio/pull/157) - Add continuous deployment to PyPI 
 
+#### New features
+
+- [`#133`](https://github.com/miniscope/mio/pull/133) - {class}`~mio.models.dataset.Dataset` 
+  organization - group recordings with their metadata, and group multiple recordings collected at the same time.
+- [`#133`](https://github.com/miniscope/mio/pull/133), [`#155`](https://github.com/miniscope/mio/pull/155)
+  Noise-aware stitching: Given two recordings of the same data stream,
+  create a stitched version that picks the best frames from each of them
+- [`#133`](https://github.com/miniscope/mio/pull/133), [`#155`](https://github.com/miniscope/mio/pull/155)
+  Alignment Maps - within a dataset, create an alignment map to align frames between recordings,
+  either by `frame_num` or by timestamps.
+- preserve noise scoring metadata in `_scores.csv` and use to pick frames during stitching
+  
+#### Perf
+
+- [`#155`](https://github.com/miniscope/mio/pull/155) - Vectorized black area detection
+
+#### Removed
+
+- [`#155`](https://github.com/miniscope/mio/pull/155) - Inter-frame mean squared error noise detection, unused.
+
 ## 0.9
 
 ### 0.9.0 - 2026-01-27 - Batch device update, NTP sync, driver import fix
diff --git a/tests/test_process/test_video.py b/tests/test_process/test_video.py
index e4a59486..d1d797db 100644
--- a/tests/test_process/test_video.py
+++ b/tests/test_process/test_video.py
@@ -45,6 +45,7 @@ def test_noise_patch_processor(video_frame, tmp_path):
     assert processor.name == "denoise_example"
     assert processor.output_enable
 
+
 def test_noise_patch_processor_no_methods(random_8bit_video_frame, tmp_path):
     denoise_config = DenoiseConfig.from_id("denoise_example")
     denoise_config.noise_patch.enable = True

From 24ce4ed15179c78e4374e7bcf8dbc8109ad2777f Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Fri, 24 Apr 2026 15:54:34 -0700
Subject: [PATCH 18/21] no json schema generation since we got pandas
 dataframes in models

---
 docs/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index e6796f4a..5c1f1305 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -81,6 +81,8 @@
 # Mock imports for packages we don't have yet - this one is
 # for opal kelley stuff we need to figure out the licensing for
 autodoc_mock_imports = ["routine"]
+autodoc_pydantic_model_show_json = False
+autodoc_pydantic_model_show_json_error_strategy = "coerce"
 
 # todo
 todo_include_todos = True

From 489d091478db365e90eb694961c2545e32244513 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Fri, 24 Apr 2026 15:58:37 -0700
Subject: [PATCH 19/21] use lockfile when testing docs

---
 .github/workflows/docs-test.yml | 9 ++++-----
 pyproject.toml                  | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/docs-test.yml b/.github/workflows/docs-test.yml
index 9be727ad..2abd59b5 100644
--- a/.github/workflows/docs-test.yml
+++ b/.github/workflows/docs-test.yml
@@ -22,10 +22,9 @@ jobs:
           cache: "pip"
 
       - name: Install dependencies
-        run: pip install -e .[docs] pytest-md
+        run: |
+          pip install pdm
+          pdm install
 
       - name: Build docs
-        working-directory: docs
-        env:
-          SPHINXOPTS: "-W --keep-going"
-        run: make html
+        run: pdm run docs-prod
diff --git a/pyproject.toml b/pyproject.toml
index c10fc52e..df1faee7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,7 +100,7 @@ format.composite = [
     "black mio",
     "ruff check --fix",
 ]
-docs-prod = "sphinx-build -M html ./docs ./docs/_build -W -P -E -a"
+docs-prod = "sphinx-build -M html ./docs ./docs/_build -W -E -a --keep-going"
 
 [tool.pdm.build]
 includes = ["mio"]

From 4d3221d93dfe821b55ebe2b1cd5b9bf52918fee8 Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Fri, 24 Apr 2026 16:00:34 -0700
Subject: [PATCH 20/21] what is going on

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index df1faee7..1f58dfb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,7 +100,7 @@ format.composite = [
     "black mio",
     "ruff check --fix",
 ]
-docs-prod = "sphinx-build -M html ./docs ./docs/_build -W -E -a --keep-going"
+docs-prod = "python -m sphinx -M html ./docs ./docs/_build -W -E -a --keep-going"
 
 [tool.pdm.build]
 includes = ["mio"]

From 3b13dc76f9a3857b238b889c9f9b701b88ef155f Mon Sep 17 00:00:00 2001
From: sneakers-the-rat <sneakers-the-rat@protonmail.com>
Date: Fri, 24 Apr 2026 16:02:27 -0700
Subject: [PATCH 21/21] need to install docs specifically i guess

---
 .github/workflows/docs-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs-test.yml b/.github/workflows/docs-test.yml
index 2abd59b5..e46248f2 100644
--- a/.github/workflows/docs-test.yml
+++ b/.github/workflows/docs-test.yml
@@ -24,7 +24,7 @@ jobs:
       - name: Install dependencies
         run: |
           pip install pdm
-          pdm install
+          pdm install --with docs
 
       - name: Build docs
         run: pdm run docs-prod