diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index 832a7b41..79c408e8 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index 832a7b41..79c408e8 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja index 832a7b41..79c408e8 100755 --- a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja +++ b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja @@ -7039,7 +7039,9 @@ def validate_mutation_boundary( "base_ref": str, "expected": [str], # declared affected_files "actual": [str], # files actually changed - "unexpected": [str], # actual but not expected (scope leak) + "unexpected": [str], # actual but not expected (real scope leak) + "allowed_test_files": [str], # out-of-scope but test-convention; + # implied by test-alongside policy, NOT leaks "strict": bool, } @@ -7217,7 +7219,18 @@ def validate_mutation_boundary( actual = sorted(actual_set) expected_set = set(expected) - unexpected = sorted(p for p in actual if p not in expected_set) + # Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* / + # *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are + # IMPLIED by any subtask whose contract requires tests, so they are NOT + # scope leaks even when the decomposer listed only production modules in + # affected_files. Exclude them from `unexpected` (they stay in `actual`, + # which reflects reality and keeps the false-progress check honest); surface + # them separately as `allowed_test_files` for auditability. A test file the + # blueprint DID declare stays in expected_set and is never an "allowed" + # extra. This makes the check independent of decomposer description wording. + out_of_scope = [p for p in actual if p not in expected_set] + allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p)) + unexpected = sorted(p for p in out_of_scope if not _is_test_path(p)) strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1" if not unexpected: @@ -7257,6 +7270,7 @@ def validate_mutation_boundary( "expected": expected, "actual": actual, "unexpected": unexpected, + "allowed_test_files": allowed_test_files, "strict": strict, } if diagnostic_hint: @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool: another subtask's production code (a shared *test* edit is far less dangerous than a shared *source* edit). Conventions covered: a ``tests/`` / ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base - names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest). + names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and + pytest's ``conftest.py`` shared-fixture files. """ norm = path.replace("\\", "/") parts = [p for p in norm.split("/") if p] if not parts: return False base = parts[-1] + if base == "conftest.py": # pytest shared fixtures — test infra, not source + return True if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]): return True if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base): diff --git a/tests/hooks/test_hook_inventory_smoke.py b/tests/hooks/test_hook_inventory_smoke.py index b7ab1c3a..1230a57a 100644 --- a/tests/hooks/test_hook_inventory_smoke.py +++ b/tests/hooks/test_hook_inventory_smoke.py @@ -422,6 +422,15 @@ def test_every_configured_hook_execs_via_shebang(hook_project: Path) -> None: env["CLAUDE_PROJECT_DIR"] = str(hook_project) env["PYTHONPATH"] = str(REPO_ROOT / "src") env.pop("MAP_INVOKED_BY", None) # don't let the guard mask the exec path + # map-memory-finalize.py, exec'd past its MAP_INVOKED_BY guard, calls + # `claude -p` to finalize any dirty scratch the earlier memory hooks in this + # loop accumulated in the shared project. Where the `claude` CLI is + # installed+authenticated that subprocess can run up to its default 50s + # budget — far past this probe's 20s cap, which only checks exec-ability, + # not finalize semantics. Bound it so the exec check never blocks on a real + # finalization. (CI has no `claude` CLI, so the subprocess fails fast there + # regardless; this keeps the local gate green too.) + env["MAP_MEMORY_FINALIZE_TIMEOUT"] = "5" for name in sorted(_configured_hook_names()): hook_path = HOOKS_DIR / name diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py index 61e9f286..e30691ab 100644 --- a/tests/test_map_step_runner.py +++ b/tests/test_map_step_runner.py @@ -4530,6 +4530,97 @@ def test_cli_exits_non_zero_on_error_status(self, branch_workspace, tmp_path): report = json.loads(result.stdout) assert report["status"] == "error" + def test_co_authored_test_file_not_flagged_as_scope_leak( + self, branch_workspace, monkeypatch + ): + """#163: a co-authored test file beside the production module (same dir) + is implied by the test-alongside policy — it must NOT be reported as a + scope leak even though the decomposer only listed the production file in + affected_files. It stays in `actual` and surfaces in `allowed_test_files`. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["pipeline/foo.py"]) + (repo / "pipeline").mkdir() + (repo / "pipeline" / "foo.py").write_text("x = 1\n") # in affected_files + (repo / "pipeline" / "test_foo.py").write_text("def test_x(): pass\n") # co-authored test + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert "pipeline/test_foo.py" in report["allowed_test_files"], report + assert "pipeline/test_foo.py" in report["actual"], report # reality preserved + + def test_co_authored_test_in_separate_tree_not_flagged( + self, branch_workspace, monkeypatch + ): + """#163: the common src/ + tests/ split — affected_files lists + ``src/foo.py`` while the test lives under a separate ``tests/`` tree. + The test file must still be auto-allowed (a same-dir-only rule would + wrongly flag this very repo's own layout). + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["src/foo.py"]) + (repo / "src").mkdir() + (repo / "src" / "foo.py").write_text("x = 1\n") + (repo / "tests").mkdir() + (repo / "tests" / "test_foo.py").write_text("def test_x(): pass\n") + (repo / "tests" / "conftest.py").write_text("import pytest\n") + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert set(report["allowed_test_files"]) == { + "tests/test_foo.py", + "tests/conftest.py", + }, report + + def test_real_source_leak_still_flagged_when_mixed_with_test( + self, branch_workspace, monkeypatch + ): + """#163 must NOT mask a genuine production scope leak: an out-of-scope + *source* file is still `unexpected`, while a co-authored test file is + partitioned into `allowed_test_files` — the partition is by convention, + not a blanket suppression. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + (repo / "a.py").write_text("x = 1\n") # in scope + (repo / "b.py").write_text("y = 2\n") # out-of-scope SOURCE — real leak + (repo / "test_b.py").write_text("def test_y(): pass\n") # out-of-scope TEST — allowed + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "warning", report + assert report["unexpected"] == ["b.py"], report + assert report["allowed_test_files"] == ["test_b.py"], report + + def test_co_authored_test_not_a_violation_even_in_strict_mode( + self, branch_workspace, monkeypatch + ): + """The test-alongside allowance holds under MAP_STRICT_SCOPE=1 too: a + co-authored test file alone must not escalate to status='violation'. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + (repo / "a.py").write_text("x = 1\n") + (repo / "a_test.py").write_text("def test_x(): pass\n") # co-authored test + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.setenv("MAP_STRICT_SCOPE", "1") + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["unexpected"] == [], report + assert report["allowed_test_files"] == ["a_test.py"], report + class TestDetectCrossSubtaskRegressionRisk: """detect_cross_subtask_regression_risk flags when the in-flight subtask @@ -4737,8 +4828,11 @@ def test_is_test_path_heuristic(self): assert map_step_runner._is_test_path("pkg/foo_test.go") assert map_step_runner._is_test_path("web/button.spec.ts") assert map_step_runner._is_test_path("a/__tests__/b.js") + # pytest conftest.py is test infrastructure at any depth (#163) + assert map_step_runner._is_test_path("conftest.py") + assert map_step_runner._is_test_path("python/pipeline/conftest.py") assert not map_step_runner._is_test_path("src/pipeline.py") - assert not map_step_runner._is_test_path("contest.py") + assert not map_step_runner._is_test_path("contest.py") # not "conftest" class TestGetSubtaskCli: