From 6b57325f4a224b810fd56faf172d31e90852f92d Mon Sep 17 00:00:00 2001
From: Mikhail Petrov <azalio@azalio.net>
Date: Thu, 11 Jun 2026 17:51:07 +0300
Subject: [PATCH 1/2] fix(map-step-runner): allow co-authored test files in
 validate_mutation_boundary (#163)

validate_mutation_boundary flagged co-authored test files (test_*.py,
*_test.*, *.spec.*, conftest.py, anything under tests/) as "outside
affected_files", emitting a scope warning that hard-stopped the first
validate_step 2.4 on essentially every subtask under a test-alongside
policy. The decomposer lists only production files in affected_files, so
the accompanying test was always "unexpected".

Fix (validator side, deterministic): partition out-of-scope paths by test
convention. Test-convention paths are surfaced as `allowed_test_files` and
excluded from `unexpected` (they stay in `actual`, keeping the
false-progress check honest); real production scope leaks are still
reported. This makes the check independent of decomposer description
wording. Reuses the existing _is_test_path helper, extended to also
recognize pytest conftest.py at any depth.

Edited the templates_src .jinja single source and re-rendered all generated
trees (make render-templates; check-render byte-clean).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .map/scripts/map_step_runner.py               | 23 ++++-
 .../templates/map/scripts/map_step_runner.py  | 23 ++++-
 .../map/scripts/map_step_runner.py.jinja      | 23 ++++-
 tests/test_map_step_runner.py                 | 96 ++++++++++++++++++-
 4 files changed, 155 insertions(+), 10 deletions(-)

diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py
index 832a7b41..79c408e8 100755
--- a/.map/scripts/map_step_runner.py
+++ b/.map/scripts/map_step_runner.py
@@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
           "base_ref": str,
           "expected": [str],   # declared affected_files
           "actual": [str],     # files actually changed
-          "unexpected": [str], # actual but not expected (scope leak)
+          "unexpected": [str], # actual but not expected (real scope leak)
+          "allowed_test_files": [str],  # out-of-scope but test-convention;
+                                        # implied by test-alongside policy, NOT leaks
           "strict": bool,
         }
 
@@ -7217,7 +7219,18 @@ def validate_mutation_boundary(
 
     actual = sorted(actual_set)
     expected_set = set(expected)
-    unexpected = sorted(p for p in actual if p not in expected_set)
+    # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
+    # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
+    # IMPLIED by any subtask whose contract requires tests, so they are NOT
+    # scope leaks even when the decomposer listed only production modules in
+    # affected_files. Exclude them from `unexpected` (they stay in `actual`,
+    # which reflects reality and keeps the false-progress check honest); surface
+    # them separately as `allowed_test_files` for auditability. A test file the
+    # blueprint DID declare stays in expected_set and is never an "allowed"
+    # extra. This makes the check independent of decomposer description wording.
+    out_of_scope = [p for p in actual if p not in expected_set]
+    allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
+    unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
     strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"
 
     if not unexpected:
@@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
         "expected": expected,
         "actual": actual,
         "unexpected": unexpected,
+        "allowed_test_files": allowed_test_files,
         "strict": strict,
     }
     if diagnostic_hint:
@@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
     another subtask's production code (a shared *test* edit is far less
     dangerous than a shared *source* edit). Conventions covered: a ``tests/``
     / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
-    names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
+    names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
+    pytest's ``conftest.py`` shared-fixture files.
     """
     norm = path.replace("\\", "/")
     parts = [p for p in norm.split("/") if p]
     if not parts:
         return False
     base = parts[-1]
+    if base == "conftest.py":  # pytest shared fixtures — test infra, not source
+        return True
     if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
         return True
     if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py
index 832a7b41..79c408e8 100755
--- a/src/mapify_cli/templates/map/scripts/map_step_runner.py
+++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py
@@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
           "base_ref": str,
           "expected": [str],   # declared affected_files
           "actual": [str],     # files actually changed
-          "unexpected": [str], # actual but not expected (scope leak)
+          "unexpected": [str], # actual but not expected (real scope leak)
+          "allowed_test_files": [str],  # out-of-scope but test-convention;
+                                        # implied by test-alongside policy, NOT leaks
           "strict": bool,
         }
 
@@ -7217,7 +7219,18 @@ def validate_mutation_boundary(
 
     actual = sorted(actual_set)
     expected_set = set(expected)
-    unexpected = sorted(p for p in actual if p not in expected_set)
+    # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
+    # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
+    # IMPLIED by any subtask whose contract requires tests, so they are NOT
+    # scope leaks even when the decomposer listed only production modules in
+    # affected_files. Exclude them from `unexpected` (they stay in `actual`,
+    # which reflects reality and keeps the false-progress check honest); surface
+    # them separately as `allowed_test_files` for auditability. A test file the
+    # blueprint DID declare stays in expected_set and is never an "allowed"
+    # extra. This makes the check independent of decomposer description wording.
+    out_of_scope = [p for p in actual if p not in expected_set]
+    allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
+    unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
     strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"
 
     if not unexpected:
@@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
         "expected": expected,
         "actual": actual,
         "unexpected": unexpected,
+        "allowed_test_files": allowed_test_files,
         "strict": strict,
     }
     if diagnostic_hint:
@@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
     another subtask's production code (a shared *test* edit is far less
     dangerous than a shared *source* edit). Conventions covered: a ``tests/``
     / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
-    names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
+    names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
+    pytest's ``conftest.py`` shared-fixture files.
     """
     norm = path.replace("\\", "/")
     parts = [p for p in norm.split("/") if p]
     if not parts:
         return False
     base = parts[-1]
+    if base == "conftest.py":  # pytest shared fixtures — test infra, not source
+        return True
     if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
         return True
     if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
diff --git a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja
index 832a7b41..79c408e8 100755
--- a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja
+++ b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja
@@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
           "base_ref": str,
           "expected": [str],   # declared affected_files
           "actual": [str],     # files actually changed
-          "unexpected": [str], # actual but not expected (scope leak)
+          "unexpected": [str], # actual but not expected (real scope leak)
+          "allowed_test_files": [str],  # out-of-scope but test-convention;
+                                        # implied by test-alongside policy, NOT leaks
           "strict": bool,
         }
 
@@ -7217,7 +7219,18 @@ def validate_mutation_boundary(
 
     actual = sorted(actual_set)
     expected_set = set(expected)
-    unexpected = sorted(p for p in actual if p not in expected_set)
+    # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
+    # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
+    # IMPLIED by any subtask whose contract requires tests, so they are NOT
+    # scope leaks even when the decomposer listed only production modules in
+    # affected_files. Exclude them from `unexpected` (they stay in `actual`,
+    # which reflects reality and keeps the false-progress check honest); surface
+    # them separately as `allowed_test_files` for auditability. A test file the
+    # blueprint DID declare stays in expected_set and is never an "allowed"
+    # extra. This makes the check independent of decomposer description wording.
+    out_of_scope = [p for p in actual if p not in expected_set]
+    allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
+    unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
     strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"
 
     if not unexpected:
@@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
         "expected": expected,
         "actual": actual,
         "unexpected": unexpected,
+        "allowed_test_files": allowed_test_files,
         "strict": strict,
     }
     if diagnostic_hint:
@@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
     another subtask's production code (a shared *test* edit is far less
     dangerous than a shared *source* edit). Conventions covered: a ``tests/``
     / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
-    names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
+    names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
+    pytest's ``conftest.py`` shared-fixture files.
     """
     norm = path.replace("\\", "/")
     parts = [p for p in norm.split("/") if p]
     if not parts:
         return False
     base = parts[-1]
+    if base == "conftest.py":  # pytest shared fixtures — test infra, not source
+        return True
     if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
         return True
     if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py
index 61e9f286..e30691ab 100644
--- a/tests/test_map_step_runner.py
+++ b/tests/test_map_step_runner.py
@@ -4530,6 +4530,97 @@ def test_cli_exits_non_zero_on_error_status(self, branch_workspace, tmp_path):
         report = json.loads(result.stdout)
         assert report["status"] == "error"
 
+    def test_co_authored_test_file_not_flagged_as_scope_leak(
+        self, branch_workspace, monkeypatch
+    ):
+        """#163: a co-authored test file beside the production module (same dir)
+        is implied by the test-alongside policy — it must NOT be reported as a
+        scope leak even though the decomposer only listed the production file in
+        affected_files. It stays in `actual` and surfaces in `allowed_test_files`.
+        """
+        repo = branch_workspace.parents[1]
+        self._init_git(repo)
+        self._write_blueprint(branch_workspace, "ST-001", ["pipeline/foo.py"])
+        (repo / "pipeline").mkdir()
+        (repo / "pipeline" / "foo.py").write_text("x = 1\n")  # in affected_files
+        (repo / "pipeline" / "test_foo.py").write_text("def test_x(): pass\n")  # co-authored test
+        subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
+        monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
+        monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
+        report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
+        assert report["status"] == "clean", report
+        assert report["unexpected"] == [], report
+        assert "pipeline/test_foo.py" in report["allowed_test_files"], report
+        assert "pipeline/test_foo.py" in report["actual"], report  # reality preserved
+
+    def test_co_authored_test_in_separate_tree_not_flagged(
+        self, branch_workspace, monkeypatch
+    ):
+        """#163: the common src/ + tests/ split — affected_files lists
+        ``src/foo.py`` while the test lives under a separate ``tests/`` tree.
+        The test file must still be auto-allowed (a same-dir-only rule would
+        wrongly flag this very repo's own layout).
+        """
+        repo = branch_workspace.parents[1]
+        self._init_git(repo)
+        self._write_blueprint(branch_workspace, "ST-001", ["src/foo.py"])
+        (repo / "src").mkdir()
+        (repo / "src" / "foo.py").write_text("x = 1\n")
+        (repo / "tests").mkdir()
+        (repo / "tests" / "test_foo.py").write_text("def test_x(): pass\n")
+        (repo / "tests" / "conftest.py").write_text("import pytest\n")
+        subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
+        monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
+        monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
+        report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
+        assert report["status"] == "clean", report
+        assert report["unexpected"] == [], report
+        assert set(report["allowed_test_files"]) == {
+            "tests/test_foo.py",
+            "tests/conftest.py",
+        }, report
+
+    def test_real_source_leak_still_flagged_when_mixed_with_test(
+        self, branch_workspace, monkeypatch
+    ):
+        """#163 must NOT mask a genuine production scope leak: an out-of-scope
+        *source* file is still `unexpected`, while a co-authored test file is
+        partitioned into `allowed_test_files` — the partition is by convention,
+        not a blanket suppression.
+        """
+        repo = branch_workspace.parents[1]
+        self._init_git(repo)
+        self._write_blueprint(branch_workspace, "ST-001", ["a.py"])
+        (repo / "a.py").write_text("x = 1\n")  # in scope
+        (repo / "b.py").write_text("y = 2\n")  # out-of-scope SOURCE — real leak
+        (repo / "test_b.py").write_text("def test_y(): pass\n")  # out-of-scope TEST — allowed
+        subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
+        monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
+        monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
+        report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
+        assert report["status"] == "warning", report
+        assert report["unexpected"] == ["b.py"], report
+        assert report["allowed_test_files"] == ["test_b.py"], report
+
+    def test_co_authored_test_not_a_violation_even_in_strict_mode(
+        self, branch_workspace, monkeypatch
+    ):
+        """The test-alongside allowance holds under MAP_STRICT_SCOPE=1 too: a
+        co-authored test file alone must not escalate to status='violation'.
+        """
+        repo = branch_workspace.parents[1]
+        self._init_git(repo)
+        self._write_blueprint(branch_workspace, "ST-001", ["a.py"])
+        (repo / "a.py").write_text("x = 1\n")
+        (repo / "a_test.py").write_text("def test_x(): pass\n")  # co-authored test
+        subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
+        monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
+        monkeypatch.setenv("MAP_STRICT_SCOPE", "1")
+        report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
+        assert report["status"] == "clean", report
+        assert report["unexpected"] == [], report
+        assert report["allowed_test_files"] == ["a_test.py"], report
+
 
 class TestDetectCrossSubtaskRegressionRisk:
     """detect_cross_subtask_regression_risk flags when the in-flight subtask
@@ -4737,8 +4828,11 @@ def test_is_test_path_heuristic(self):
         assert map_step_runner._is_test_path("pkg/foo_test.go")
         assert map_step_runner._is_test_path("web/button.spec.ts")
         assert map_step_runner._is_test_path("a/__tests__/b.js")
+        # pytest conftest.py is test infrastructure at any depth (#163)
+        assert map_step_runner._is_test_path("conftest.py")
+        assert map_step_runner._is_test_path("python/pipeline/conftest.py")
         assert not map_step_runner._is_test_path("src/pipeline.py")
-        assert not map_step_runner._is_test_path("contest.py")
+        assert not map_step_runner._is_test_path("contest.py")  # not "conftest"
 
 
 class TestGetSubtaskCli:

From 5d085c90de06ddadf89f31ae0d422a624e522ab9 Mon Sep 17 00:00:00 2001
From: Mikhail Petrov <azalio@azalio.net>
Date: Thu, 11 Jun 2026 17:51:16 +0300
Subject: [PATCH 2/2] test(hooks): bound finalize claude -p in exec-ability
 smoke probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

test_every_configured_hook_execs_via_shebang runs every configured hook
past its MAP_INVOKED_BY guard to assert exec-ability (+x, shebang, no
126/127). map-memory-finalize.py, exec'd that way, calls `claude -p` to
finalize a dirty scratch the loop's earlier memory hooks accumulate in the
shared project. On a machine where the `claude` CLI is installed and
authenticated, that subprocess runs up to its default 50s budget — past the
probe's 20s cap — so the test timed out locally (it passes on CI, which has
no `claude` CLI, so the subprocess fails fast there). Production finalize is
unchanged: it correctly gets 50s under the 60s SessionStart budget.

Bound MAP_MEMORY_FINALIZE_TIMEOUT=5 in the probe's env so the exec check
never blocks on a real finalization. Test-only; no production behavior
change.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/hooks/test_hook_inventory_smoke.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/hooks/test_hook_inventory_smoke.py b/tests/hooks/test_hook_inventory_smoke.py
index b7ab1c3a..1230a57a 100644
--- a/tests/hooks/test_hook_inventory_smoke.py
+++ b/tests/hooks/test_hook_inventory_smoke.py
@@ -422,6 +422,15 @@ def test_every_configured_hook_execs_via_shebang(hook_project: Path) -> None:
     env["CLAUDE_PROJECT_DIR"] = str(hook_project)
     env["PYTHONPATH"] = str(REPO_ROOT / "src")
     env.pop("MAP_INVOKED_BY", None)  # don't let the guard mask the exec path
+    # map-memory-finalize.py, exec'd past its MAP_INVOKED_BY guard, calls
+    # `claude -p` to finalize any dirty scratch the earlier memory hooks in this
+    # loop accumulated in the shared project. Where the `claude` CLI is
+    # installed+authenticated that subprocess can run up to its default 50s
+    # budget — far past this probe's 20s cap, which only checks exec-ability,
+    # not finalize semantics. Bound it so the exec check never blocks on a real
+    # finalization. (CI has no `claude` CLI, so the subprocess fails fast there
+    # regardless; this keeps the local gate green too.)
+    env["MAP_MEMORY_FINALIZE_TIMEOUT"] = "5"
 
     for name in sorted(_configured_hook_names()):
         hook_path = HOOKS_DIR / name