Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions .map/scripts/map_step_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
"base_ref": str,
"expected": [str], # declared affected_files
"actual": [str], # files actually changed
"unexpected": [str], # actual but not expected (scope leak)
"unexpected": [str], # actual but not expected (real scope leak)
"allowed_test_files": [str], # out-of-scope but test-convention;
# implied by test-alongside policy, NOT leaks
"strict": bool,
}

Expand Down Expand Up @@ -7217,7 +7219,18 @@ def validate_mutation_boundary(

actual = sorted(actual_set)
expected_set = set(expected)
unexpected = sorted(p for p in actual if p not in expected_set)
# Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
# *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
# IMPLIED by any subtask whose contract requires tests, so they are NOT
# scope leaks even when the decomposer listed only production modules in
# affected_files. Exclude them from `unexpected` (they stay in `actual`,
# which reflects reality and keeps the false-progress check honest); surface
# them separately as `allowed_test_files` for auditability. A test file the
# blueprint DID declare stays in expected_set and is never an "allowed"
# extra. This makes the check independent of decomposer description wording.
out_of_scope = [p for p in actual if p not in expected_set]
allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"

if not unexpected:
Expand Down Expand Up @@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
"expected": expected,
"actual": actual,
"unexpected": unexpected,
"allowed_test_files": allowed_test_files,
"strict": strict,
}
if diagnostic_hint:
Expand Down Expand Up @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
another subtask's production code (a shared *test* edit is far less
dangerous than a shared *source* edit). Conventions covered: a ``tests/``
/ ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
pytest's ``conftest.py`` shared-fixture files.
"""
norm = path.replace("\\", "/")
parts = [p for p in norm.split("/") if p]
if not parts:
return False
base = parts[-1]
if base == "conftest.py": # pytest shared fixtures — test infra, not source
return True
if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
return True
if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
Expand Down
23 changes: 20 additions & 3 deletions src/mapify_cli/templates/map/scripts/map_step_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
"base_ref": str,
"expected": [str], # declared affected_files
"actual": [str], # files actually changed
"unexpected": [str], # actual but not expected (scope leak)
"unexpected": [str], # actual but not expected (real scope leak)
"allowed_test_files": [str], # out-of-scope but test-convention;
# implied by test-alongside policy, NOT leaks
"strict": bool,
}

Expand Down Expand Up @@ -7217,7 +7219,18 @@ def validate_mutation_boundary(

actual = sorted(actual_set)
expected_set = set(expected)
unexpected = sorted(p for p in actual if p not in expected_set)
# Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
# *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
# IMPLIED by any subtask whose contract requires tests, so they are NOT
# scope leaks even when the decomposer listed only production modules in
# affected_files. Exclude them from `unexpected` (they stay in `actual`,
# which reflects reality and keeps the false-progress check honest); surface
# them separately as `allowed_test_files` for auditability. A test file the
# blueprint DID declare stays in expected_set and is never an "allowed"
# extra. This makes the check independent of decomposer description wording.
out_of_scope = [p for p in actual if p not in expected_set]
allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"

if not unexpected:
Expand Down Expand Up @@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
"expected": expected,
"actual": actual,
"unexpected": unexpected,
"allowed_test_files": allowed_test_files,
"strict": strict,
}
if diagnostic_hint:
Expand Down Expand Up @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
another subtask's production code (a shared *test* edit is far less
dangerous than a shared *source* edit). Conventions covered: a ``tests/``
/ ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
pytest's ``conftest.py`` shared-fixture files.
"""
norm = path.replace("\\", "/")
parts = [p for p in norm.split("/") if p]
if not parts:
return False
base = parts[-1]
if base == "conftest.py": # pytest shared fixtures — test infra, not source
return True
if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
return True
if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
Expand Down
23 changes: 20 additions & 3 deletions src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -7039,7 +7039,9 @@ def validate_mutation_boundary(
"base_ref": str,
"expected": [str], # declared affected_files
"actual": [str], # files actually changed
"unexpected": [str], # actual but not expected (scope leak)
"unexpected": [str], # actual but not expected (real scope leak)
"allowed_test_files": [str], # out-of-scope but test-convention;
# implied by test-alongside policy, NOT leaks
"strict": bool,
}

Expand Down Expand Up @@ -7217,7 +7219,18 @@ def validate_mutation_boundary(

actual = sorted(actual_set)
expected_set = set(expected)
unexpected = sorted(p for p in actual if p not in expected_set)
# Test-alongside policy (#163): co-authored test files (test_*.* / *_test.* /
# *.spec.* / *.test.* / conftest.py / anything under a tests/ dir) are
# IMPLIED by any subtask whose contract requires tests, so they are NOT
# scope leaks even when the decomposer listed only production modules in
# affected_files. Exclude them from `unexpected` (they stay in `actual`,
# which reflects reality and keeps the false-progress check honest); surface
# them separately as `allowed_test_files` for auditability. A test file the
# blueprint DID declare stays in expected_set and is never an "allowed"
# extra. This makes the check independent of decomposer description wording.
out_of_scope = [p for p in actual if p not in expected_set]
allowed_test_files = sorted(p for p in out_of_scope if _is_test_path(p))
unexpected = sorted(p for p in out_of_scope if not _is_test_path(p))
strict = os.environ.get("MAP_STRICT_SCOPE", "0") == "1"

if not unexpected:
Expand Down Expand Up @@ -7257,6 +7270,7 @@ def validate_mutation_boundary(
"expected": expected,
"actual": actual,
"unexpected": unexpected,
"allowed_test_files": allowed_test_files,
"strict": strict,
}
if diagnostic_hint:
Expand Down Expand Up @@ -7289,13 +7303,16 @@ def _is_test_path(path: str) -> bool:
another subtask's production code (a shared *test* edit is far less
dangerous than a shared *source* edit). Conventions covered: a ``tests/``
/ ``test/`` / ``__tests__/`` path segment, ``test_*`` / ``*_test`` base
names, and ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest).
names, ``*.test.*`` / ``*.spec.*`` suffixes (pytest, go test, jest), and
pytest's ``conftest.py`` shared-fixture files.
"""
norm = path.replace("\\", "/")
parts = [p for p in norm.split("/") if p]
if not parts:
return False
base = parts[-1]
if base == "conftest.py": # pytest shared fixtures — test infra, not source
return True
if any(seg in _TEST_DIR_SEGMENTS for seg in parts[:-1]):
return True
if re.match(r"(?:test_.+|.+_test)\.[A-Za-z0-9]+$", base):
Expand Down
9 changes: 9 additions & 0 deletions tests/hooks/test_hook_inventory_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,15 @@ def test_every_configured_hook_execs_via_shebang(hook_project: Path) -> None:
env["CLAUDE_PROJECT_DIR"] = str(hook_project)
env["PYTHONPATH"] = str(REPO_ROOT / "src")
env.pop("MAP_INVOKED_BY", None) # don't let the guard mask the exec path
# map-memory-finalize.py, exec'd past its MAP_INVOKED_BY guard, calls
# `claude -p` to finalize any dirty scratch the earlier memory hooks in this
# loop accumulated in the shared project. Where the `claude` CLI is
# installed+authenticated that subprocess can run up to its default 50s
# budget — far past this probe's 20s cap, which only checks exec-ability,
# not finalize semantics. Bound it so the exec check never blocks on a real
# finalization. (CI has no `claude` CLI, so the subprocess fails fast there
# regardless; this keeps the local gate green too.)
env["MAP_MEMORY_FINALIZE_TIMEOUT"] = "5"

for name in sorted(_configured_hook_names()):
hook_path = HOOKS_DIR / name
Expand Down
96 changes: 95 additions & 1 deletion tests/test_map_step_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4530,6 +4530,97 @@ def test_cli_exits_non_zero_on_error_status(self, branch_workspace, tmp_path):
report = json.loads(result.stdout)
assert report["status"] == "error"

def test_co_authored_test_file_not_flagged_as_scope_leak(
self, branch_workspace, monkeypatch
):
"""#163: a co-authored test file beside the production module (same dir)
is implied by the test-alongside policy — it must NOT be reported as a
scope leak even though the decomposer only listed the production file in
affected_files. It stays in `actual` and surfaces in `allowed_test_files`.
"""
repo = branch_workspace.parents[1]
self._init_git(repo)
self._write_blueprint(branch_workspace, "ST-001", ["pipeline/foo.py"])
(repo / "pipeline").mkdir()
(repo / "pipeline" / "foo.py").write_text("x = 1\n") # in affected_files
(repo / "pipeline" / "test_foo.py").write_text("def test_x(): pass\n") # co-authored test
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
assert report["status"] == "clean", report
assert report["unexpected"] == [], report
assert "pipeline/test_foo.py" in report["allowed_test_files"], report
assert "pipeline/test_foo.py" in report["actual"], report # reality preserved

def test_co_authored_test_in_separate_tree_not_flagged(
self, branch_workspace, monkeypatch
):
"""#163: the common src/ + tests/ split — affected_files lists
``src/foo.py`` while the test lives under a separate ``tests/`` tree.
The test file must still be auto-allowed (a same-dir-only rule would
wrongly flag this very repo's own layout).
"""
repo = branch_workspace.parents[1]
self._init_git(repo)
self._write_blueprint(branch_workspace, "ST-001", ["src/foo.py"])
(repo / "src").mkdir()
(repo / "src" / "foo.py").write_text("x = 1\n")
(repo / "tests").mkdir()
(repo / "tests" / "test_foo.py").write_text("def test_x(): pass\n")
(repo / "tests" / "conftest.py").write_text("import pytest\n")
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
assert report["status"] == "clean", report
assert report["unexpected"] == [], report
assert set(report["allowed_test_files"]) == {
"tests/test_foo.py",
"tests/conftest.py",
}, report

def test_real_source_leak_still_flagged_when_mixed_with_test(
self, branch_workspace, monkeypatch
):
"""#163 must NOT mask a genuine production scope leak: an out-of-scope
*source* file is still `unexpected`, while a co-authored test file is
partitioned into `allowed_test_files` — the partition is by convention,
not a blanket suppression.
"""
repo = branch_workspace.parents[1]
self._init_git(repo)
self._write_blueprint(branch_workspace, "ST-001", ["a.py"])
(repo / "a.py").write_text("x = 1\n") # in scope
(repo / "b.py").write_text("y = 2\n") # out-of-scope SOURCE — real leak
(repo / "test_b.py").write_text("def test_y(): pass\n") # out-of-scope TEST — allowed
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False)
report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
assert report["status"] == "warning", report
assert report["unexpected"] == ["b.py"], report
assert report["allowed_test_files"] == ["test_b.py"], report

def test_co_authored_test_not_a_violation_even_in_strict_mode(
self, branch_workspace, monkeypatch
):
"""The test-alongside allowance holds under MAP_STRICT_SCOPE=1 too: a
co-authored test file alone must not escalate to status='violation'.
"""
repo = branch_workspace.parents[1]
self._init_git(repo)
self._write_blueprint(branch_workspace, "ST-001", ["a.py"])
(repo / "a.py").write_text("x = 1\n")
(repo / "a_test.py").write_text("def test_x(): pass\n") # co-authored test
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo))
monkeypatch.setenv("MAP_STRICT_SCOPE", "1")
report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001")
assert report["status"] == "clean", report
assert report["unexpected"] == [], report
assert report["allowed_test_files"] == ["a_test.py"], report


class TestDetectCrossSubtaskRegressionRisk:
"""detect_cross_subtask_regression_risk flags when the in-flight subtask
Expand Down Expand Up @@ -4737,8 +4828,11 @@ def test_is_test_path_heuristic(self):
assert map_step_runner._is_test_path("pkg/foo_test.go")
assert map_step_runner._is_test_path("web/button.spec.ts")
assert map_step_runner._is_test_path("a/__tests__/b.js")
# pytest conftest.py is test infrastructure at any depth (#163)
assert map_step_runner._is_test_path("conftest.py")
assert map_step_runner._is_test_path("python/pipeline/conftest.py")
assert not map_step_runner._is_test_path("src/pipeline.py")
assert not map_step_runner._is_test_path("contest.py")
assert not map_step_runner._is_test_path("contest.py") # not "conftest"


class TestGetSubtaskCli:
Expand Down
Loading