From 6b57325f4a224b810fd56faf172d31e90852f92d Mon Sep 17 00:00:00 2001 From: Mikhail Petrov Date: Thu, 11 Jun 2026 17:51:07 +0300 Subject: [PATCH 1/2] fix(map-step-runner): allow co-authored test files in validate_mutation_boundary (#163) validate_mutation_boundary flagged co-authored test files (test_*.py, *_test.*, *.spec.*, conftest.py, anything under tests/) as "outside affected_files", emitting a scope warning that hard-stopped the first validate_step 2.4 on essentially every subtask under a test-alongside policy. The decomposer lists only production files in affected_files, so the accompanying test was always "unexpected". Fix (validator side, deterministic): partition out-of-scope paths by test convention. Test-convention paths are surfaced as `allowed_test_files` and excluded from `unexpected` (they stay in `actual`, keeping the false-progress check honest); real production scope leaks are still reported. This makes the check independent of decomposer description wording. Reuses the existing _is_test_path helper, extended to also recognize pytest conftest.py at any depth. Edited the templates_src .jinja single source and re-rendered all generated trees (make render-templates; check-render byte-clean). Co-Authored-By: Claude Opus 4.8 (1M context) --- .map/scripts/map_step_runner.py | 23 ++++- .../templates/map/scripts/map_step_runner.py | 23 ++++- .../map/scripts/map_step_runner.py.jinja | 23 ++++- tests/test_map_step_runner.py | 96 ++++++++++++++++++- 4 files changed, 155 insertions(+), 10 deletions(-) diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index 832a7b41..79c408e8 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index 832a7b41..79c408e8 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja index 832a7b41..79c408e8 100755 --- a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja +++ b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py index 61e9f286..e30691ab 100644 --- a/tests/test_map_step_runner.py +++ b/tests/test_map_step_runner.py @@ -4530,6 +4530,97 @@ def test_cli_exits_non_zero_on_error_status(self, branch_workspace, tmp_path): report = json.loads(result.stdout) assert report["status"] == "error" + def test_co_authored_test_file_not_flagged_as_scope_leak( + self, branch_workspace, monkeypatch + ): + """#163: a co-authored test file beside the production module (same dir) + is implied by the test-alongside policy — it must NOT be reported as a + scope leak even though the decomposer only listed the production file in + affected_files. It stays in `actual` and surfaces in `allowed_test_files`. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["pipeline/foo.py"]) + (repo / "pipeline").mkdir() + (repo / "pipeline" / "foo.py").write_text("x = 1\n") # in affected_files + (repo / "pipeline" / "test_foo.py").write_text("def test_x(): pass\n") # co-authored test + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert "pipeline/test_foo.py" in report["allowed_test_files"], report + assert "pipeline/test_foo.py" in report["actual"], report # reality preserved + + def test_co_authored_test_in_separate_tree_not_flagged( + self, branch_workspace, monkeypatch + ): + """#163: the common src/ + tests/ split — affected_files lists + ``src/foo.py`` while the test lives under a separate ``tests/`` tree. + The test file must still be auto-allowed (a same-dir-only rule would + wrongly flag this very repo's own layout). + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["src/foo.py"]) + (repo / "src").mkdir() + (repo / "src" / "foo.py").write_text("x = 1\n") + (repo / "tests").mkdir() + (repo / "tests" / "test_foo.py").write_text("def test_x(): pass\n") + (repo / "tests" / "conftest.py").write_text("import pytest\n") + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert set(report["allowed_test_files"]) == { + "tests/test_foo.py", + "tests/conftest.py", + }, report + + def test_real_source_leak_still_flagged_when_mixed_with_test( + self, branch_workspace, monkeypatch + ): + """#163 must NOT mask a genuine production scope leak: an out-of-scope + *source* file is still `unexpected`, while a co-authored test file is + partitioned into `allowed_test_files` — the partition is by convention, + not a blanket suppression. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + (repo / "a.py").write_text("x = 1\n") # in scope + (repo / "b.py").write_text("y = 2\n") # out-of-scope SOURCE — real leak + (repo / "test_b.py").write_text("def test_y(): pass\n") # out-of-scope TEST — allowed + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "warning", report + assert report["unexpected"] == ["b.py"], report + assert report["allowed_test_files"] == ["test_b.py"], report + + def test_co_authored_test_not_a_violation_even_in_strict_mode( + self, branch_workspace, monkeypatch + ): + """The test-alongside allowance holds under MAP_STRICT_SCOPE=1 too: a + co-authored test file alone must not escalate to status='violation'. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + (repo / "a.py").write_text("x = 1\n") + (repo / "a_test.py").write_text("def test_x(): pass\n") # co-authored test + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.setenv("MAP_STRICT_SCOPE", "1") + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert report["allowed_test_files"] == ["a_test.py"], report + class TestDetectCrossSubtaskRegressionRisk: """detect_cross_subtask_regression_risk flags when the in-flight subtask @@ -4737,8 +4828,11 @@ def test_is_test_path_heuristic(self): assert map_step_runner._is_test_path("pkg/foo_test.go") assert map_step_runner._is_test_path("web/button.spec.ts") assert map_step_runner._is_test_path("a/__tests__/b.js") + # pytest conftest.py is test infrastructure at any depth (#163) + assert map_step_runner._is_test_path("conftest.py") + assert map_step_runner._is_test_path("python/pipeline/conftest.py") assert not map_step_runner._is_test_path("src/pipeline.py") - assert not map_step_runner._is_test_path("contest.py") + assert not map_step_runner._is_test_path("contest.py") # not "conftest" class TestGetSubtaskCli: From 5d085c90de06ddadf89f31ae0d422a624e522ab9 Mon Sep 17 00:00:00 2001 From: Mikhail Petrov Date: Thu, 11 Jun 2026 17:51:16 +0300 Subject: [PATCH 2/2] test(hooks): bound finalize claude -p in exec-ability smoke probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_every_configured_hook_execs_via_shebang runs every configured hook past its MAP_INVOKED_BY guard to assert exec-ability (+x, shebang, no 126/127). map-memory-finalize.py, exec'd that way, calls `claude -p` to finalize a dirty scratch the loop's earlier memory hooks accumulate in the shared project. On a machine where the `claude` CLI is installed and authenticated, that subprocess runs up to its default 50s budget — past the probe's 20s cap — so the test timed out locally (it passes on CI, which has no `claude` CLI, so the subprocess fails fast there). Production finalize is unchanged: it correctly gets 50s under the 60s SessionStart budget. Bound MAP_MEMORY_FINALIZE_TIMEOUT=5 in the probe's env so the exec check never blocks on a real finalization. Test-only; no production behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/hooks/test_hook_inventory_smoke.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/hooks/test_hook_inventory_smoke.py b/tests/hooks/test_hook_inventory_smoke.py index b7ab1c3a..1230a57a 100644 --- a/tests/hooks/test_hook_inventory_smoke.py +++ b/tests/hooks/test_hook_inventory_smoke.py @@ -422,6 +422,15 @@ def test_every_configured_hook_execs_via_shebang(hook_project: Path) -> None: env["CLAUDE_PROJECT_DIR"] = str(hook_project) env["PYTHONPATH"] = str(REPO_ROOT / "src") env.pop("MAP_INVOKED_BY", None) # don't let the guard mask the exec path + # map-memory-finalize.py, exec'd past its MAP_INVOKED_BY guard, calls + # `claude -p` to finalize any dirty scratch the earlier memory hooks in this + # loop accumulated in the shared project. Where the `claude` CLI is + # installed+authenticated that subprocess can run up to its default 50s + # budget — far past this probe's 20s cap, which only checks exec-ability, + # not finalize semantics. Bound it so the exec check never blocks on a real + # finalization. (CI has no `claude` CLI, so the subprocess fails fast there + # regardless; this keeps the local gate green too.) + env["MAP_MEMORY_FINALIZE_TIMEOUT"] = "5" for name in sorted(_configured_hook_names()): hook_path = HOOKS_DIR / name