From 6d10c48cfe12a8974088a27c544df451bb994a42 Mon Sep 17 00:00:00 2001 From: Mikhail Petrov Date: Thu, 11 Jun 2026 18:16:46 +0300 Subject: [PATCH] fix(mutation-boundary): diff committed subtask against its parent (#162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `validate_step 2.4` auto-runs `validate_mutation_boundary`, which compared the working tree against the subtask's declared `affected_files`. In the documented per-subtask close order — commit → `record_subtask_result --commit-sha` → `validate_step 2.4` — the working tree is clean and `last_subtask_commit_sha` already points at the subtask's OWN commit. The diff against that commit is therefore empty, so `actual=[]` while `expected` is non-empty, tripping the false-progress guard with "MONITOR is closing ST-XXX but NO files changed" on every committed subtask and forcing a redundant second `validate_step 2.4` call. Fix: extract `_resolve_subtask_diff_base`, which re-bases onto the subtask commit's parent when the auto-resolved base equals the commit recorded for THIS subtask, so the committed work shows up as `actual`. The parent is probed first so a root commit safely keeps the commit itself. The same base-ref resolution (and the same latent bug) is shared by `_current_subtask_changed_files`, now also routed through the helper. The check is now commit-order-agnostic: it yields the same `actual` whether run before or after the per-subtask commit. Tests: - unit: committed subtask -> `actual==['a.py']`, base_ref re-based to `^` - unit (negative guard): no recorded commit + clean tree -> `actual==[]` so genuine false-progress still fires - orchestrator: documented commit→record→validate flow passes 2.4 on the FIRST call (no redundant second call), `progress_feedback_subtasks` untouched Negative-proofed: disabling the re-base reproduces the exact #162 message. Co-Authored-By: Claude Opus 4.8 (1M context) --- .map/scripts/map_step_runner.py | 128 +++++++++++------- CHANGELOG.md | 15 ++ .../templates/map/scripts/map_step_runner.py | 128 +++++++++++------- .../map/scripts/map_step_runner.py.jinja | 128 +++++++++++------- tests/test_map_orchestrator.py | 50 +++++++ tests/test_map_step_runner.py | 68 ++++++++++ 6 files changed, 379 insertions(+), 138 deletions(-) diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index 79c408e8..e102445c 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -7017,6 +7017,77 @@ def record_scope_baseline(branch: str) -> dict: return {"status": "success", "path": str(path), "count": len(payload["files"]), "files": payload["files"]} +def _resolve_subtask_diff_base( + branch_name: str, subtask_id: str, project_dir: Path +) -> Optional[str]: + """Auto-resolve the git base_ref for diffing a subtask's mutation surface. + + Resolution order: ``last_subtask_commit_sha`` from step_state → ``HEAD`` → + ``None`` (a fresh repo with no commits, where the caller falls through to + porcelain-only). The returned ref is meant to be diffed against the WORKING + TREE (``git diff --name-only ``). + + Crucial special case (#162): the documented per-subtask close order is + ``commit → record_subtask_result --commit-sha → validate_step 2.4``. + ``record_subtask_result`` advances ``last_subtask_commit_sha`` to the + subtask's OWN commit, so by the time the boundary check runs the working + tree is clean and ``git diff `` is empty — which previously + mis-reported "no files changed" and tripped the false-progress guard on + every committed subtask. When the auto-resolved base equals the commit + recorded for THIS subtask, re-base onto that commit's parent so the + committed work shows up in the diff. The parent is probed first so a root + commit (no parent) safely keeps the commit itself. + """ + base_ref: Optional[str] = None + recorded: Optional[str] = None + state_file = project_dir / ".map" / branch_name / "step_state.json" + if state_file.exists(): + try: + state_data = json.loads(state_file.read_text(encoding="utf-8")) + last_sha = state_data.get("last_subtask_commit_sha") + if isinstance(last_sha, str) and last_sha: + base_ref = last_sha + results = state_data.get("subtask_results", {}) + if isinstance(results, dict): + entry = results.get(subtask_id) + if isinstance(entry, dict): + rc = entry.get("commit_sha") + if isinstance(rc, str) and rc: + recorded = rc + except (json.JSONDecodeError, OSError): + pass + if base_ref and recorded and base_ref == recorded: + parent_probe = subprocess.run( + ["git", "rev-parse", "--verify", f"{recorded}^"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if parent_probe.returncode == 0: + return f"{recorded}^" + # Root commit (no parent): no usable parent to re-base onto. Keep the + # commit itself; a subtask whose own commit is the repo's first commit + # is not a real MAP scenario (the framework is always installed atop + # prior history). + return base_ref + if base_ref: + return base_ref + # No recorded subtask commit — probe HEAD before using it; `git rev-parse + # HEAD` fails in a fresh repo with no commits, and we want porcelain-only + # rather than a confusing "ambiguous HEAD". + head_probe = subprocess.run( + ["git", "rev-parse", "--verify", "HEAD"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if head_probe.returncode == 0: + return "HEAD" + return None + + def validate_mutation_boundary( branch: str, subtask_id: str, base_ref: Optional[str] = None ) -> dict: @@ -7076,35 +7147,15 @@ def validate_mutation_boundary( expected_raw = subtask.get("affected_files", []) or [] expected = sorted({str(p) for p in expected_raw if isinstance(p, str)}) - # Pick a base_ref. Caller's explicit arg wins; otherwise fall back to - # last_subtask_commit_sha (so the diff covers only THIS subtask's work). - # If neither resolves to a real commit, skip the commit-range diff entirely - # and rely on porcelain (uncommitted + untracked) — this is the only sane + # Pick a base_ref. Caller's explicit arg wins; otherwise auto-resolve from + # last_subtask_commit_sha (so the diff covers only THIS subtask's work), + # re-basing onto the commit's parent when the subtask is already committed + # (#162). If neither resolves to a real commit, skip the commit-range diff + # entirely and rely on porcelain (uncommitted + untracked) — the only sane # behaviour in a brand-new repo before its first commit. base_ref_explicit = bool(base_ref) if not base_ref: - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - # Probe HEAD before using it — `git rev-parse HEAD` fails in a - # fresh repo with no commits, and we want to fall through to - # porcelain-only rather than emit a confusing "ambiguous HEAD". - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: @@ -7334,27 +7385,12 @@ def _current_subtask_changed_files( Returns ``None`` on any git failure so callers can fail safe to a full gate instead of silently assuming "no changes". + + Shares ``validate_mutation_boundary``'s base-ref resolution (incl. the #162 + re-base onto the subtask's commit parent when it is already committed) via + ``_resolve_subtask_diff_base``. """ - base_ref: Optional[str] = None - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d05fa0a..ffb218b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 (stdlib only) so it works in generated projects without `mapify_cli` importable; the meter is advisory and never blocks a turn. +### Fixed +- **False-progress on every committed subtask (#162)**: `validate_step 2.4` + (which auto-runs `validate_mutation_boundary`) compared the *working tree* + against the contract's `affected_files`. In the documented per-subtask close + order — commit → `record_subtask_result --commit-sha` → `validate_step 2.4` — + the working tree is clean and `last_subtask_commit_sha` already points at the + subtask's OWN commit, so the diff was empty and the gate wrongly rejected + every committed subtask with *"MONITOR is closing ST-XXX but NO files + changed"*, forcing a redundant second call. The base-ref resolution now + re-bases onto the subtask commit's parent when the resolved base is the + subtask's own recorded commit, so the committed work counts as the mutation + surface. Resolution is shared by `validate_mutation_boundary` and + `_current_subtask_changed_files` via a new `_resolve_subtask_diff_base` + helper (root-commit safe). + ## [3.10.0] - 2026-05-19 ### Added diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index 79c408e8..e102445c 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -7017,6 +7017,77 @@ def record_scope_baseline(branch: str) -> dict: return {"status": "success", "path": str(path), "count": len(payload["files"]), "files": payload["files"]} +def _resolve_subtask_diff_base( + branch_name: str, subtask_id: str, project_dir: Path +) -> Optional[str]: + """Auto-resolve the git base_ref for diffing a subtask's mutation surface. + + Resolution order: ``last_subtask_commit_sha`` from step_state → ``HEAD`` → + ``None`` (a fresh repo with no commits, where the caller falls through to + porcelain-only). The returned ref is meant to be diffed against the WORKING + TREE (``git diff --name-only ``). + + Crucial special case (#162): the documented per-subtask close order is + ``commit → record_subtask_result --commit-sha → validate_step 2.4``. + ``record_subtask_result`` advances ``last_subtask_commit_sha`` to the + subtask's OWN commit, so by the time the boundary check runs the working + tree is clean and ``git diff `` is empty — which previously + mis-reported "no files changed" and tripped the false-progress guard on + every committed subtask. When the auto-resolved base equals the commit + recorded for THIS subtask, re-base onto that commit's parent so the + committed work shows up in the diff. The parent is probed first so a root + commit (no parent) safely keeps the commit itself. + """ + base_ref: Optional[str] = None + recorded: Optional[str] = None + state_file = project_dir / ".map" / branch_name / "step_state.json" + if state_file.exists(): + try: + state_data = json.loads(state_file.read_text(encoding="utf-8")) + last_sha = state_data.get("last_subtask_commit_sha") + if isinstance(last_sha, str) and last_sha: + base_ref = last_sha + results = state_data.get("subtask_results", {}) + if isinstance(results, dict): + entry = results.get(subtask_id) + if isinstance(entry, dict): + rc = entry.get("commit_sha") + if isinstance(rc, str) and rc: + recorded = rc + except (json.JSONDecodeError, OSError): + pass + if base_ref and recorded and base_ref == recorded: + parent_probe = subprocess.run( + ["git", "rev-parse", "--verify", f"{recorded}^"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if parent_probe.returncode == 0: + return f"{recorded}^" + # Root commit (no parent): no usable parent to re-base onto. Keep the + # commit itself; a subtask whose own commit is the repo's first commit + # is not a real MAP scenario (the framework is always installed atop + # prior history). + return base_ref + if base_ref: + return base_ref + # No recorded subtask commit — probe HEAD before using it; `git rev-parse + # HEAD` fails in a fresh repo with no commits, and we want porcelain-only + # rather than a confusing "ambiguous HEAD". + head_probe = subprocess.run( + ["git", "rev-parse", "--verify", "HEAD"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if head_probe.returncode == 0: + return "HEAD" + return None + + def validate_mutation_boundary( branch: str, subtask_id: str, base_ref: Optional[str] = None ) -> dict: @@ -7076,35 +7147,15 @@ def validate_mutation_boundary( expected_raw = subtask.get("affected_files", []) or [] expected = sorted({str(p) for p in expected_raw if isinstance(p, str)}) - # Pick a base_ref. Caller's explicit arg wins; otherwise fall back to - # last_subtask_commit_sha (so the diff covers only THIS subtask's work). - # If neither resolves to a real commit, skip the commit-range diff entirely - # and rely on porcelain (uncommitted + untracked) — this is the only sane + # Pick a base_ref. Caller's explicit arg wins; otherwise auto-resolve from + # last_subtask_commit_sha (so the diff covers only THIS subtask's work), + # re-basing onto the commit's parent when the subtask is already committed + # (#162). If neither resolves to a real commit, skip the commit-range diff + # entirely and rely on porcelain (uncommitted + untracked) — the only sane # behaviour in a brand-new repo before its first commit. base_ref_explicit = bool(base_ref) if not base_ref: - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - # Probe HEAD before using it — `git rev-parse HEAD` fails in a - # fresh repo with no commits, and we want to fall through to - # porcelain-only rather than emit a confusing "ambiguous HEAD". - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: @@ -7334,27 +7385,12 @@ def _current_subtask_changed_files( Returns ``None`` on any git failure so callers can fail safe to a full gate instead of silently assuming "no changes". + + Shares ``validate_mutation_boundary``'s base-ref resolution (incl. the #162 + re-base onto the subtask's commit parent when it is already committed) via + ``_resolve_subtask_diff_base``. """ - base_ref: Optional[str] = None - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: diff --git a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja index 79c408e8..e102445c 100755 --- a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja +++ b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja @@ -7017,6 +7017,77 @@ def record_scope_baseline(branch: str) -> dict: return {"status": "success", "path": str(path), "count": len(payload["files"]), "files": payload["files"]} +def _resolve_subtask_diff_base( + branch_name: str, subtask_id: str, project_dir: Path +) -> Optional[str]: + """Auto-resolve the git base_ref for diffing a subtask's mutation surface. + + Resolution order: ``last_subtask_commit_sha`` from step_state → ``HEAD`` → + ``None`` (a fresh repo with no commits, where the caller falls through to + porcelain-only). The returned ref is meant to be diffed against the WORKING + TREE (``git diff --name-only ``). + + Crucial special case (#162): the documented per-subtask close order is + ``commit → record_subtask_result --commit-sha → validate_step 2.4``. + ``record_subtask_result`` advances ``last_subtask_commit_sha`` to the + subtask's OWN commit, so by the time the boundary check runs the working + tree is clean and ``git diff `` is empty — which previously + mis-reported "no files changed" and tripped the false-progress guard on + every committed subtask. When the auto-resolved base equals the commit + recorded for THIS subtask, re-base onto that commit's parent so the + committed work shows up in the diff. The parent is probed first so a root + commit (no parent) safely keeps the commit itself. + """ + base_ref: Optional[str] = None + recorded: Optional[str] = None + state_file = project_dir / ".map" / branch_name / "step_state.json" + if state_file.exists(): + try: + state_data = json.loads(state_file.read_text(encoding="utf-8")) + last_sha = state_data.get("last_subtask_commit_sha") + if isinstance(last_sha, str) and last_sha: + base_ref = last_sha + results = state_data.get("subtask_results", {}) + if isinstance(results, dict): + entry = results.get(subtask_id) + if isinstance(entry, dict): + rc = entry.get("commit_sha") + if isinstance(rc, str) and rc: + recorded = rc + except (json.JSONDecodeError, OSError): + pass + if base_ref and recorded and base_ref == recorded: + parent_probe = subprocess.run( + ["git", "rev-parse", "--verify", f"{recorded}^"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if parent_probe.returncode == 0: + return f"{recorded}^" + # Root commit (no parent): no usable parent to re-base onto. Keep the + # commit itself; a subtask whose own commit is the repo's first commit + # is not a real MAP scenario (the framework is always installed atop + # prior history). + return base_ref + if base_ref: + return base_ref + # No recorded subtask commit — probe HEAD before using it; `git rev-parse + # HEAD` fails in a fresh repo with no commits, and we want porcelain-only + # rather than a confusing "ambiguous HEAD". + head_probe = subprocess.run( + ["git", "rev-parse", "--verify", "HEAD"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=5, + ) + if head_probe.returncode == 0: + return "HEAD" + return None + + def validate_mutation_boundary( branch: str, subtask_id: str, base_ref: Optional[str] = None ) -> dict: @@ -7076,35 +7147,15 @@ def validate_mutation_boundary( expected_raw = subtask.get("affected_files", []) or [] expected = sorted({str(p) for p in expected_raw if isinstance(p, str)}) - # Pick a base_ref. Caller's explicit arg wins; otherwise fall back to - # last_subtask_commit_sha (so the diff covers only THIS subtask's work). - # If neither resolves to a real commit, skip the commit-range diff entirely - # and rely on porcelain (uncommitted + untracked) — this is the only sane + # Pick a base_ref. Caller's explicit arg wins; otherwise auto-resolve from + # last_subtask_commit_sha (so the diff covers only THIS subtask's work), + # re-basing onto the commit's parent when the subtask is already committed + # (#162). If neither resolves to a real commit, skip the commit-range diff + # entirely and rely on porcelain (uncommitted + untracked) — the only sane # behaviour in a brand-new repo before its first commit. base_ref_explicit = bool(base_ref) if not base_ref: - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - # Probe HEAD before using it — `git rev-parse HEAD` fails in a - # fresh repo with no commits, and we want to fall through to - # porcelain-only rather than emit a confusing "ambiguous HEAD". - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: @@ -7334,27 +7385,12 @@ def _current_subtask_changed_files( Returns ``None`` on any git failure so callers can fail safe to a full gate instead of silently assuming "no changes". + + Shares ``validate_mutation_boundary``'s base-ref resolution (incl. the #162 + re-base onto the subtask's commit parent when it is already committed) via + ``_resolve_subtask_diff_base``. """ - base_ref: Optional[str] = None - state_file = project_dir / ".map" / branch_name / "step_state.json" - if state_file.exists(): - try: - state_data = json.loads(state_file.read_text(encoding="utf-8")) - last_sha = state_data.get("last_subtask_commit_sha") - if isinstance(last_sha, str) and last_sha: - base_ref = last_sha - except (json.JSONDecodeError, OSError): - pass - if not base_ref: - head_probe = subprocess.run( - ["git", "rev-parse", "--verify", "HEAD"], - cwd=project_dir, - capture_output=True, - text=True, - timeout=5, - ) - if head_probe.returncode == 0: - base_ref = "HEAD" + base_ref = _resolve_subtask_diff_base(branch_name, subtask_id, project_dir) try: if base_ref: diff --git a/tests/test_map_orchestrator.py b/tests/test_map_orchestrator.py index 9c4d8bf7..4b2d6acb 100644 --- a/tests/test_map_orchestrator.py +++ b/tests/test_map_orchestrator.py @@ -2444,6 +2444,56 @@ def test_false_progress_routes_feedback_when_nothing_changed( r2 = map_orchestrator.validate_step("2.4", branch_dir, recommendation="proceed") assert r2["valid"] is True, r2 + def test_committed_subtask_passes_2_4_without_false_progress( + self, branch_dir, tmp_path, monkeypatch + ): + """#162: the documented per-subtask close order is + commit -> record_subtask_result --commit-sha -> validate_step 2.4. After + the commit the working tree is clean and last_subtask_commit_sha is THIS + subtask's own commit. validate_step 2.4 must NOT fire false-progress on + the FIRST call (no redundant second call): the committed work counts as + the subtask's mutation surface via the parent re-base.""" + state = map_orchestrator.StepState() + state.workflow_status = "IN_PROGRESS" + state.subtask_sequence = ["ST-001"] + state.current_subtask_id = "ST-001" + state.current_step_id = "2.4" + state.current_step_phase = "MONITOR" + state.pending_steps = ["2.4"] + state.completed_steps = ["2.2", "2.3"] + plan_dir = tmp_path / ".map" / branch_dir + plan_dir.mkdir(parents=True, exist_ok=True) + (plan_dir / "blueprint.json").write_text(json.dumps({ + "subtasks": [{"id": "ST-001", "title": "x", "affected_files": ["a.py"]}], + })) + import subprocess as _sp + _sp.run(["git", "init"], cwd=tmp_path, capture_output=True) + _sp.run(["git", "config", "user.email", "t@t.com"], cwd=tmp_path, capture_output=True) + _sp.run(["git", "config", "user.name", "t"], cwd=tmp_path, capture_output=True) + (tmp_path / "seed.txt").write_text("seed") + _sp.run(["git", "add", "seed.txt"], cwd=tmp_path, capture_output=True) + _sp.run(["git", "commit", "-m", "init"], cwd=tmp_path, capture_output=True) + # ST-001's work IS implemented and committed (the documented order). + (tmp_path / "a.py").write_text("x = 1\n") + _sp.run(["git", "add", "a.py"], cwd=tmp_path, capture_output=True) + _sp.run(["git", "commit", "-m", "ST-001"], cwd=tmp_path, capture_output=True) + sha = _sp.run( + ["git", "rev-parse", "HEAD"], cwd=tmp_path, capture_output=True, text=True + ).stdout.strip() + # Mimic record_subtask_result --commit-sha . + state.record_subtask_result("ST-001", ["a.py"], "valid", commit_sha=sha) + state_file = tmp_path / ".map" / branch_dir / "step_state.json" + state.save(state_file) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(tmp_path)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + + r1 = map_orchestrator.validate_step("2.4", branch_dir, recommendation="proceed") + assert r1["valid"] is True, r1 # NO false-progress on the first call + persisted = map_orchestrator.StepState.load(state_file) + assert "ST-001" not in persisted.progress_feedback_subtasks, ( + persisted.progress_feedback_subtasks + ) + class TestPeekCurrentStep: """peek_current_step is the read-only recovery escape hatch for the case diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py index e30691ab..7f611c40 100644 --- a/tests/test_map_step_runner.py +++ b/tests/test_map_step_runner.py @@ -4621,6 +4621,74 @@ def test_co_authored_test_not_a_violation_even_in_strict_mode( assert report["unexpected"] == [], report assert report["allowed_test_files"] == ["a_test.py"], report + def test_already_committed_subtask_diffs_against_parent( + self, branch_workspace, monkeypatch + ): + """#162: the documented per-subtask close order is + commit -> record_subtask_result --commit-sha -> validate_step 2.4. + After the commit the working tree is CLEAN and last_subtask_commit_sha + points at THIS subtask's own commit, so a diff against it is empty and + previously mis-reported actual=[] (false-progress). The validator must + re-base onto the commit's parent so the committed work shows up as + actual. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + # Parent commit (prior history), then ST-001's actual work as its own + # commit — exactly the per-subtask-commit lifecycle. + (repo / "seed.txt").write_text("seed\n") + subprocess.run(["git", "add", "seed.txt"], cwd=repo, capture_output=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True) + (repo / "a.py").write_text("x = 1\n") + subprocess.run(["git", "add", "a.py"], cwd=repo, capture_output=True) + subprocess.run(["git", "commit", "-m", "ST-001"], cwd=repo, capture_output=True) + sha = subprocess.run( + ["git", "rev-parse", "HEAD"], cwd=repo, capture_output=True, text=True + ).stdout.strip() + # Mimic record_subtask_result --commit-sha : it advances + # last_subtask_commit_sha AND records the per-subtask commit_sha. + (branch_workspace / "step_state.json").write_text( + json.dumps( + { + "last_subtask_commit_sha": sha, + "subtask_results": {"ST-001": {"commit_sha": sha}}, + } + ) + ) + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["actual"] == ["a.py"], report # committed work IS visible + assert report["unexpected"] == [], report + # base_ref re-based onto the parent (the committed SHA's ^), not the + # commit itself (which would diff to nothing). + assert report["base_ref"] == f"{sha}^", report + + def test_genuinely_empty_subtask_still_reports_no_actual( + self, branch_workspace, monkeypatch + ): + """Negative guard for #162: when the subtask has NO recorded commit and + nothing changed in the worktree, actual must stay empty so the + orchestrator's false-progress check still fires for a subtask that did + nothing. The #162 re-base only kicks in when the resolved base IS the + subtask's own recorded commit. + """ + repo = branch_workspace.parents[1] + self._init_git(repo) + self._write_blueprint(branch_workspace, "ST-001", ["a.py"]) + (repo / "seed.txt").write_text("seed\n") + subprocess.run(["git", "add", "seed.txt"], cwd=repo, capture_output=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True) + # No commit recorded for ST-001, no work done — base_ref falls back to + # HEAD (the prior commit), diff is empty. + monkeypatch.setenv("CLAUDE_PROJECT_DIR", str(repo)) + monkeypatch.delenv("MAP_STRICT_SCOPE", raising=False) + report = map_step_runner.validate_mutation_boundary("test-branch", "ST-001") + assert report["status"] == "clean", report + assert report["actual"] == [], report # nothing changed -> false-progress can fire + class TestDetectCrossSubtaskRegressionRisk: """detect_cross_subtask_regression_risk flags when the in-flight subtask