From 65c945d7d563c1ad6cdb9fc98df7740425232a31 Mon Sep 17 00:00:00 2001 From: Mikhail Petrov Date: Thu, 11 Jun 2026 18:45:57 +0300 Subject: [PATCH] fix(map-plan): add normalize_blueprint repair pass for decomposer drift (#168) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit task-decomposer routinely emits blueprints that fail the framework's own validate_blueprint_contract (Step 5.6), forcing manual JSON surgery between decompose and validate. Two mechanical self-consistency drifts recur: 1. Forward-dependency ordering — a subtask declared before the dependency it declares. The runtime walker consumes subtasks in declaration order, so the validator rejects this (deadlock guard); array position is the only problem. 2. coverage_map <-> bracket-tag mismatch — a coverage_map[req]=owner whose owner subtask's validation_criteria does not cite [req]. Add a deterministic normalize_blueprint repair pass (runner function + CLI + /map-plan Step 5.55) that: - stably topologically sorts subtasks[] so deps precede dependents (independent subtasks keep their relative order; a true cycle is left untouched for the validator to reject); - injects a [req]-tagged validation criterion for every coverage_map entry whose owner lacks the tag. It never invents coverage_map ownership or rewrites dependency edges, so genuine semantic gaps still fail Step 5.6. Idempotent (changed=false / writes nothing on already-normalized input). Wired into /map-plan as Step 5.55 (before 5.6). Regression tests reproduce the exact #168 scenario (both drifts at once) and prove validate_blueprint_contract flips false->true after normalize, plus idempotency, stable-order preservation, cycle pass-through, and CLI --check/in-place paths. Edited the .jinja single source and re-rendered (make render-templates); all generated trees + the live .map/scripts copy stay byte-identical (check-render). Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/skills/map-plan/SKILL.md | 14 ++ .map/scripts/map_step_runner.py | 212 +++++++++++++++++ CHANGELOG.md | 14 ++ .../templates/map/scripts/map_step_runner.py | 212 +++++++++++++++++ .../templates/skills/map-plan/SKILL.md | 14 ++ .../map/scripts/map_step_runner.py.jinja | 212 +++++++++++++++++ .../skills/map-plan/SKILL.md.jinja | 14 ++ tests/test_map_step_runner.py | 223 ++++++++++++++++++ 8 files changed, 915 insertions(+) diff --git a/.claude/skills/map-plan/SKILL.md b/.claude/skills/map-plan/SKILL.md index ec40bc40..62a6e395 100644 --- a/.claude/skills/map-plan/SKILL.md +++ b/.claude/skills/map-plan/SKILL.md @@ -221,6 +221,19 @@ Do NOT create subtasks for behavior listed under the spec's "Out of Scope > Alre Write decomposer output to `.map//blueprint.json` exactly once. Preserve evidence and metadata. +### Step 5.55: Normalize Blueprint (MANDATORY) + +```bash +python3 .map/scripts/map_step_runner.py normalize_blueprint +``` + +A deterministic repair pass that fixes the two self-consistency drifts the decomposer routinely emits — so you don't hand-edit `blueprint.json` between decompose and validate: + +- **Forward-dependency ordering:** stably topologically sorts `subtasks[]` so every dependency is declared before its dependents (independent subtasks keep their order; a true cycle is left for the validator to reject). +- **coverage_map bracket-tags:** for every `coverage_map[req] = owner` whose owner's `validation_criteria` doesn't already cite `[req]`, appends a `[req]`-tagged criterion. + +It never invents `coverage_map` ownership or rewrites dependency edges — genuine semantic gaps still fail Step 5.6. Idempotent (`changed: false` if nothing needed fixing). + ### Step 5.6: Post-Save Blueprint Validation (MANDATORY) ```bash @@ -287,6 +300,7 @@ Runner functions you'll commonly need from `/map-plan`: |---|---| | `record_plan_artifacts` | Persist spec/blueprint/task-plan into `artifact_manifest.json`. | | `record_workflow_fit [--diff-size SIZE] [--has-new-invariants 0\|1] [--needs-independent-review 0\|1] [--has-clear-acceptance-criteria 0\|1] [--test-first-required 0\|1] [--summary "..."]` | Persist the workflow-fit decision. Use the named flags — bool order is easy to confuse otherwise. | +| `normalize_blueprint [] [--check]` | Deterministically repair decomposer drift before validation: topo-sort `subtasks[]` so deps precede dependents + inject missing `coverage_map` bracket-tags. `--check` reports without writing. | | `validate_blueprint_contract ` | Run schema + semantic checks on `blueprint.json`. | | `list_plans` | List per-branch plan artifacts under `.map/` to pick scope from a multi-roadmap workspace. | | `save_research ` | Persist research-agent findings for a subtask (stdin-fed). | diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index e102445c..e8be77d7 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -2300,6 +2300,208 @@ def _constraint_body(c: dict) -> str: } +def _topo_sort_subtasks( + subtasks: list[object], +) -> tuple[Optional[list[dict[str, object]]], str]: + """Stable topological sort of a blueprint ``subtasks[]`` list. + + Returns ``(sorted_subtasks, note)``. ``sorted_subtasks`` is ``None`` when + the list cannot be reordered safely — a non-object entry, a missing or + duplicate id, or a true dependency cycle — in which case the caller keeps + the original order and lets ``validate_blueprint_contract`` report the + underlying problem. + + The sort is *stable*: among subtasks whose declared dependencies are all + already emitted, the one declared earliest in the original array is emitted + first. Independent subtasks therefore keep their relative order and the + rewrite is minimal — only forward-declared dependencies move earlier. + """ + ids: list[str] = [] + by_id: dict[str, dict[str, object]] = {} + for entry in subtasks: + if not isinstance(entry, dict): + return None, "subtasks contain a non-object entry; skipped reorder" + sid = entry.get("id") + if not isinstance(sid, str) or not sid: + return None, "a subtask is missing a string id; skipped reorder" + if sid in by_id: + return None, f"duplicate subtask id {sid!r}; skipped reorder" + ids.append(sid) + by_id[sid] = entry + + id_set = set(ids) + original_index = {sid: i for i, sid in enumerate(ids)} + + # Only intra-blueprint dependencies constrain ordering. Unknown deps and + # self-references are ignored here — validate_blueprint_contract reports + # those as hard errors; normalization never invents or rewrites them. + deps: dict[str, set[str]] = {} + for sid in ids: + raw = by_id[sid].get("dependencies") + dep_set: set[str] = set() + if isinstance(raw, list): + for dep in raw: + if isinstance(dep, str) and dep in id_set and dep != sid: + dep_set.add(dep) + deps[sid] = dep_set + + # Kahn's algorithm with a stable tie-break: among all nodes whose deps are + # already emitted, pick the one with the smallest original index. + emitted: list[str] = [] + emitted_set: set[str] = set() + remaining = set(ids) + while remaining: + ready = sorted( + (sid for sid in remaining if deps[sid] <= emitted_set), + key=lambda s: original_index[s], + ) + if not ready: + # Nothing emittable -> a dependency cycle remains; leave untouched. + return None, "dependency cycle detected; skipped reorder" + nxt = ready[0] + emitted.append(nxt) + emitted_set.add(nxt) + remaining.discard(nxt) + + return [by_id[sid] for sid in emitted], "" + + +def normalize_blueprint( + blueprint_path: str = "", + branch: Optional[str] = None, + write: bool = True, +) -> dict[str, object]: + """Deterministically repair the two self-consistency violations the + task-decomposer routinely emits, so planning stays self-serve + (``decompose -> normalize -> validate -> proceed``) without manual JSON + surgery (issue #168): + + 1. **Forward-dependency ordering** — stably topologically sort + ``subtasks[]`` so every dependency is declared BEFORE its dependents. + This satisfies the topological invariant that + ``validate_blueprint_contract`` enforces (the runtime walker consumes + subtasks in declaration order) without reordering by hand. A true + dependency cycle is left untouched so the validator still reports it. + 2. **coverage_map bracket-tags** — for every ``coverage_map[req] = owner`` + whose owner subtask's ``validation_criteria`` does not already cite + ``[req]``, append a traceability criterion that does. This is the + auto-fix the validator's ``[AC-N]`` / ``[SC-N]`` lineage check expects. + + Normalization is conservative: it never invents ``coverage_map`` ownership, + never rewrites dependency edges, and never touches a soft constraint that + relies on ``tradeoff_rationale`` instead of coverage. It only fixes the two + mechanical drifts above; genuine semantic gaps (a hard constraint missing + from ``coverage_map``, an unknown/cyclic dependency) remain for the + validator to flag. + + Idempotent: a second call on already-normalized input reports + ``changed: false`` and writes nothing. + """ + branch_name = branch or get_branch_name() + path = ( + Path(blueprint_path) + if blueprint_path + else get_branch_dir(branch_name) / "blueprint.json" + ) + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError: + return { + "status": "error", + "changed": False, + "errors": [f"blueprint not found: {path}"], + "path": str(path), + } + except (json.JSONDecodeError, OSError) as exc: + return { + "status": "error", + "changed": False, + "errors": [f"cannot read blueprint {path}: {exc}"], + "path": str(path), + } + + if not isinstance(payload, dict): + return { + "status": "error", + "changed": False, + "errors": ["blueprint root must be a JSON object"], + "path": str(path), + } + + # Bind the nested lookup so the isinstance narrowing applies to the same + # expression Pyright tracks (a re-invoked payload.get(...) would not narrow). + nested_blueprint = payload.get("blueprint") + blueprint_body = ( + nested_blueprint if isinstance(nested_blueprint, dict) else payload + ) + subtasks = blueprint_body.get("subtasks") + if not isinstance(subtasks, list) or not subtasks: + return { + "status": "error", + "changed": False, + "errors": ["blueprint must contain at least one subtask"], + "path": str(path), + } + + notes: list[str] = [] + + # --- 1. Stable topological sort of subtasks[] ------------------------ + reordered, sort_note = _topo_sort_subtasks(subtasks) + if sort_note: + notes.append(sort_note) + new_order = reordered if reordered is not None else subtasks + order_changed = reordered is not None and [ + s.get("id") for s in reordered + ] != [s.get("id") for s in subtasks if isinstance(s, dict)] + + # --- 2. Inject missing coverage_map bracket-tags --------------------- + coverage_map = payload.get("coverage_map") or blueprint_body.get("coverage_map") + subtasks_by_id = { + s.get("id"): s + for s in new_order + if isinstance(s, dict) and isinstance(s.get("id"), str) + } + injected_tags: list[str] = [] + if isinstance(coverage_map, dict): + for requirement_id, owner in coverage_map.items(): + if not isinstance(owner, str): + continue + owner_subtask = subtasks_by_id.get(owner) + if not isinstance(owner_subtask, dict): + continue + tag = f"[{requirement_id}]" + criteria = owner_subtask.get("validation_criteria") + if not isinstance(criteria, list): + criteria = [] + owner_subtask["validation_criteria"] = criteria + if any(isinstance(c, str) and tag in c for c in criteria): + continue + criteria.append( + f"VC{len(criteria) + 1} {tag}: satisfies coverage_map " + f"requirement {requirement_id}" + ) + injected_tags.append(f"{owner}:{tag}") + + changed = order_changed or bool(injected_tags) + + if order_changed: + blueprint_body["subtasks"] = new_order + + if changed and write: + _write_json_file(path, payload) + + return { + "status": "ok", + "changed": changed, + "reordered": order_changed, + "subtask_order": [s.get("id") for s in new_order if isinstance(s, dict)], + "injected_coverage_tags": injected_tags, + "notes": notes, + "path": str(path), + "written": bool(changed and write), + } + + def record_test_contract_handoff( subtask_id: str, failing_test_command: str = "", @@ -8684,6 +8886,16 @@ def _flag(name: str, default: str) -> str: if not result.get("valid"): sys.exit(1) + elif func_name == "normalize_blueprint": + extra = sys.argv[2:] + dry_run = any(arg in ("--check", "--dry-run") for arg in extra) + positional = [arg for arg in extra if not arg.startswith("--")] + blueprint_path = positional[0] if positional else "" + result = normalize_blueprint(blueprint_path, write=not dry_run) + print(json.dumps(result, indent=2, ensure_ascii=True)) + if result.get("status") != "ok": + sys.exit(1) + elif func_name == "record_test_contract_handoff" and len(sys.argv) >= 3: subtask_id = sys.argv[2] failing_test_command = sys.argv[3] if len(sys.argv) >= 4 else "" diff --git a/CHANGELOG.md b/CHANGELOG.md index ffb218b7..f5a68127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **`normalize_blueprint` deterministic repair pass (#168)**: a new runner + function (and `/map-plan` Step 5.55) that fixes the two self-consistency + drifts the `task-decomposer` routinely emits, so planning is self-serve + (`decompose → normalize → validate → proceed`) instead of requiring manual + JSON surgery between Step 5 and the Step 5.6 contract gate. It (1) stably + topologically sorts `subtasks[]` so every dependency is declared before its + dependents — satisfying `validate_blueprint_contract`'s forward-dependency + invariant without reordering by hand (independent subtasks keep their order; + a true cycle is left for the validator to reject), and (2) for every + `coverage_map[req] = owner` whose owner's `validation_criteria` doesn't cite + `[req]`, appends a `[req]`-tagged criterion. It never invents `coverage_map` + ownership or rewrites dependency edges — genuine semantic gaps still fail + Step 5.6. Idempotent. Run via + `python3 .map/scripts/map_step_runner.py normalize_blueprint [] [--check]`. - **Per-subtask token accounting**: a new `map-token-meter` hook (wired on `SubagentStop` and `Stop`) reads each transcript's per-turn `usage` and attributes input/output/cache-creation/cache-read tokens to the active diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index e102445c..e8be77d7 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -2300,6 +2300,208 @@ def _constraint_body(c: dict) -> str: } +def _topo_sort_subtasks( + subtasks: list[object], +) -> tuple[Optional[list[dict[str, object]]], str]: + """Stable topological sort of a blueprint ``subtasks[]`` list. + + Returns ``(sorted_subtasks, note)``. ``sorted_subtasks`` is ``None`` when + the list cannot be reordered safely — a non-object entry, a missing or + duplicate id, or a true dependency cycle — in which case the caller keeps + the original order and lets ``validate_blueprint_contract`` report the + underlying problem. + + The sort is *stable*: among subtasks whose declared dependencies are all + already emitted, the one declared earliest in the original array is emitted + first. Independent subtasks therefore keep their relative order and the + rewrite is minimal — only forward-declared dependencies move earlier. + """ + ids: list[str] = [] + by_id: dict[str, dict[str, object]] = {} + for entry in subtasks: + if not isinstance(entry, dict): + return None, "subtasks contain a non-object entry; skipped reorder" + sid = entry.get("id") + if not isinstance(sid, str) or not sid: + return None, "a subtask is missing a string id; skipped reorder" + if sid in by_id: + return None, f"duplicate subtask id {sid!r}; skipped reorder" + ids.append(sid) + by_id[sid] = entry + + id_set = set(ids) + original_index = {sid: i for i, sid in enumerate(ids)} + + # Only intra-blueprint dependencies constrain ordering. Unknown deps and + # self-references are ignored here — validate_blueprint_contract reports + # those as hard errors; normalization never invents or rewrites them. + deps: dict[str, set[str]] = {} + for sid in ids: + raw = by_id[sid].get("dependencies") + dep_set: set[str] = set() + if isinstance(raw, list): + for dep in raw: + if isinstance(dep, str) and dep in id_set and dep != sid: + dep_set.add(dep) + deps[sid] = dep_set + + # Kahn's algorithm with a stable tie-break: among all nodes whose deps are + # already emitted, pick the one with the smallest original index. + emitted: list[str] = [] + emitted_set: set[str] = set() + remaining = set(ids) + while remaining: + ready = sorted( + (sid for sid in remaining if deps[sid] <= emitted_set), + key=lambda s: original_index[s], + ) + if not ready: + # Nothing emittable -> a dependency cycle remains; leave untouched. + return None, "dependency cycle detected; skipped reorder" + nxt = ready[0] + emitted.append(nxt) + emitted_set.add(nxt) + remaining.discard(nxt) + + return [by_id[sid] for sid in emitted], "" + + +def normalize_blueprint( + blueprint_path: str = "", + branch: Optional[str] = None, + write: bool = True, +) -> dict[str, object]: + """Deterministically repair the two self-consistency violations the + task-decomposer routinely emits, so planning stays self-serve + (``decompose -> normalize -> validate -> proceed``) without manual JSON + surgery (issue #168): + + 1. **Forward-dependency ordering** — stably topologically sort + ``subtasks[]`` so every dependency is declared BEFORE its dependents. + This satisfies the topological invariant that + ``validate_blueprint_contract`` enforces (the runtime walker consumes + subtasks in declaration order) without reordering by hand. A true + dependency cycle is left untouched so the validator still reports it. + 2. **coverage_map bracket-tags** — for every ``coverage_map[req] = owner`` + whose owner subtask's ``validation_criteria`` does not already cite + ``[req]``, append a traceability criterion that does. This is the + auto-fix the validator's ``[AC-N]`` / ``[SC-N]`` lineage check expects. + + Normalization is conservative: it never invents ``coverage_map`` ownership, + never rewrites dependency edges, and never touches a soft constraint that + relies on ``tradeoff_rationale`` instead of coverage. It only fixes the two + mechanical drifts above; genuine semantic gaps (a hard constraint missing + from ``coverage_map``, an unknown/cyclic dependency) remain for the + validator to flag. + + Idempotent: a second call on already-normalized input reports + ``changed: false`` and writes nothing. + """ + branch_name = branch or get_branch_name() + path = ( + Path(blueprint_path) + if blueprint_path + else get_branch_dir(branch_name) / "blueprint.json" + ) + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError: + return { + "status": "error", + "changed": False, + "errors": [f"blueprint not found: {path}"], + "path": str(path), + } + except (json.JSONDecodeError, OSError) as exc: + return { + "status": "error", + "changed": False, + "errors": [f"cannot read blueprint {path}: {exc}"], + "path": str(path), + } + + if not isinstance(payload, dict): + return { + "status": "error", + "changed": False, + "errors": ["blueprint root must be a JSON object"], + "path": str(path), + } + + # Bind the nested lookup so the isinstance narrowing applies to the same + # expression Pyright tracks (a re-invoked payload.get(...) would not narrow). + nested_blueprint = payload.get("blueprint") + blueprint_body = ( + nested_blueprint if isinstance(nested_blueprint, dict) else payload + ) + subtasks = blueprint_body.get("subtasks") + if not isinstance(subtasks, list) or not subtasks: + return { + "status": "error", + "changed": False, + "errors": ["blueprint must contain at least one subtask"], + "path": str(path), + } + + notes: list[str] = [] + + # --- 1. Stable topological sort of subtasks[] ------------------------ + reordered, sort_note = _topo_sort_subtasks(subtasks) + if sort_note: + notes.append(sort_note) + new_order = reordered if reordered is not None else subtasks + order_changed = reordered is not None and [ + s.get("id") for s in reordered + ] != [s.get("id") for s in subtasks if isinstance(s, dict)] + + # --- 2. Inject missing coverage_map bracket-tags --------------------- + coverage_map = payload.get("coverage_map") or blueprint_body.get("coverage_map") + subtasks_by_id = { + s.get("id"): s + for s in new_order + if isinstance(s, dict) and isinstance(s.get("id"), str) + } + injected_tags: list[str] = [] + if isinstance(coverage_map, dict): + for requirement_id, owner in coverage_map.items(): + if not isinstance(owner, str): + continue + owner_subtask = subtasks_by_id.get(owner) + if not isinstance(owner_subtask, dict): + continue + tag = f"[{requirement_id}]" + criteria = owner_subtask.get("validation_criteria") + if not isinstance(criteria, list): + criteria = [] + owner_subtask["validation_criteria"] = criteria + if any(isinstance(c, str) and tag in c for c in criteria): + continue + criteria.append( + f"VC{len(criteria) + 1} {tag}: satisfies coverage_map " + f"requirement {requirement_id}" + ) + injected_tags.append(f"{owner}:{tag}") + + changed = order_changed or bool(injected_tags) + + if order_changed: + blueprint_body["subtasks"] = new_order + + if changed and write: + _write_json_file(path, payload) + + return { + "status": "ok", + "changed": changed, + "reordered": order_changed, + "subtask_order": [s.get("id") for s in new_order if isinstance(s, dict)], + "injected_coverage_tags": injected_tags, + "notes": notes, + "path": str(path), + "written": bool(changed and write), + } + + def record_test_contract_handoff( subtask_id: str, failing_test_command: str = "", @@ -8684,6 +8886,16 @@ def _flag(name: str, default: str) -> str: if not result.get("valid"): sys.exit(1) + elif func_name == "normalize_blueprint": + extra = sys.argv[2:] + dry_run = any(arg in ("--check", "--dry-run") for arg in extra) + positional = [arg for arg in extra if not arg.startswith("--")] + blueprint_path = positional[0] if positional else "" + result = normalize_blueprint(blueprint_path, write=not dry_run) + print(json.dumps(result, indent=2, ensure_ascii=True)) + if result.get("status") != "ok": + sys.exit(1) + elif func_name == "record_test_contract_handoff" and len(sys.argv) >= 3: subtask_id = sys.argv[2] failing_test_command = sys.argv[3] if len(sys.argv) >= 4 else "" diff --git a/src/mapify_cli/templates/skills/map-plan/SKILL.md b/src/mapify_cli/templates/skills/map-plan/SKILL.md index ec40bc40..62a6e395 100644 --- a/src/mapify_cli/templates/skills/map-plan/SKILL.md +++ b/src/mapify_cli/templates/skills/map-plan/SKILL.md @@ -221,6 +221,19 @@ Do NOT create subtasks for behavior listed under the spec's "Out of Scope > Alre Write decomposer output to `.map//blueprint.json` exactly once. Preserve evidence and metadata. +### Step 5.55: Normalize Blueprint (MANDATORY) + +```bash +python3 .map/scripts/map_step_runner.py normalize_blueprint +``` + +A deterministic repair pass that fixes the two self-consistency drifts the decomposer routinely emits — so you don't hand-edit `blueprint.json` between decompose and validate: + +- **Forward-dependency ordering:** stably topologically sorts `subtasks[]` so every dependency is declared before its dependents (independent subtasks keep their order; a true cycle is left for the validator to reject). +- **coverage_map bracket-tags:** for every `coverage_map[req] = owner` whose owner's `validation_criteria` doesn't already cite `[req]`, appends a `[req]`-tagged criterion. + +It never invents `coverage_map` ownership or rewrites dependency edges — genuine semantic gaps still fail Step 5.6. Idempotent (`changed: false` if nothing needed fixing). + ### Step 5.6: Post-Save Blueprint Validation (MANDATORY) ```bash @@ -287,6 +300,7 @@ Runner functions you'll commonly need from `/map-plan`: |---|---| | `record_plan_artifacts` | Persist spec/blueprint/task-plan into `artifact_manifest.json`. | | `record_workflow_fit [--diff-size SIZE] [--has-new-invariants 0\|1] [--needs-independent-review 0\|1] [--has-clear-acceptance-criteria 0\|1] [--test-first-required 0\|1] [--summary "..."]` | Persist the workflow-fit decision. Use the named flags — bool order is easy to confuse otherwise. | +| `normalize_blueprint [] [--check]` | Deterministically repair decomposer drift before validation: topo-sort `subtasks[]` so deps precede dependents + inject missing `coverage_map` bracket-tags. `--check` reports without writing. | | `validate_blueprint_contract ` | Run schema + semantic checks on `blueprint.json`. | | `list_plans` | List per-branch plan artifacts under `.map/` to pick scope from a multi-roadmap workspace. | | `save_research ` | Persist research-agent findings for a subtask (stdin-fed). | diff --git a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja index e102445c..e8be77d7 100755 --- a/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja +++ b/src/mapify_cli/templates_src/map/scripts/map_step_runner.py.jinja @@ -2300,6 +2300,208 @@ def validate_blueprint_contract( } +def _topo_sort_subtasks( + subtasks: list[object], +) -> tuple[Optional[list[dict[str, object]]], str]: + """Stable topological sort of a blueprint ``subtasks[]`` list. + + Returns ``(sorted_subtasks, note)``. ``sorted_subtasks`` is ``None`` when + the list cannot be reordered safely — a non-object entry, a missing or + duplicate id, or a true dependency cycle — in which case the caller keeps + the original order and lets ``validate_blueprint_contract`` report the + underlying problem. + + The sort is *stable*: among subtasks whose declared dependencies are all + already emitted, the one declared earliest in the original array is emitted + first. Independent subtasks therefore keep their relative order and the + rewrite is minimal — only forward-declared dependencies move earlier. + """ + ids: list[str] = [] + by_id: dict[str, dict[str, object]] = {} + for entry in subtasks: + if not isinstance(entry, dict): + return None, "subtasks contain a non-object entry; skipped reorder" + sid = entry.get("id") + if not isinstance(sid, str) or not sid: + return None, "a subtask is missing a string id; skipped reorder" + if sid in by_id: + return None, f"duplicate subtask id {sid!r}; skipped reorder" + ids.append(sid) + by_id[sid] = entry + + id_set = set(ids) + original_index = {sid: i for i, sid in enumerate(ids)} + + # Only intra-blueprint dependencies constrain ordering. Unknown deps and + # self-references are ignored here — validate_blueprint_contract reports + # those as hard errors; normalization never invents or rewrites them. + deps: dict[str, set[str]] = {} + for sid in ids: + raw = by_id[sid].get("dependencies") + dep_set: set[str] = set() + if isinstance(raw, list): + for dep in raw: + if isinstance(dep, str) and dep in id_set and dep != sid: + dep_set.add(dep) + deps[sid] = dep_set + + # Kahn's algorithm with a stable tie-break: among all nodes whose deps are + # already emitted, pick the one with the smallest original index. + emitted: list[str] = [] + emitted_set: set[str] = set() + remaining = set(ids) + while remaining: + ready = sorted( + (sid for sid in remaining if deps[sid] <= emitted_set), + key=lambda s: original_index[s], + ) + if not ready: + # Nothing emittable -> a dependency cycle remains; leave untouched. + return None, "dependency cycle detected; skipped reorder" + nxt = ready[0] + emitted.append(nxt) + emitted_set.add(nxt) + remaining.discard(nxt) + + return [by_id[sid] for sid in emitted], "" + + +def normalize_blueprint( + blueprint_path: str = "", + branch: Optional[str] = None, + write: bool = True, +) -> dict[str, object]: + """Deterministically repair the two self-consistency violations the + task-decomposer routinely emits, so planning stays self-serve + (``decompose -> normalize -> validate -> proceed``) without manual JSON + surgery (issue #168): + + 1. **Forward-dependency ordering** — stably topologically sort + ``subtasks[]`` so every dependency is declared BEFORE its dependents. + This satisfies the topological invariant that + ``validate_blueprint_contract`` enforces (the runtime walker consumes + subtasks in declaration order) without reordering by hand. A true + dependency cycle is left untouched so the validator still reports it. + 2. **coverage_map bracket-tags** — for every ``coverage_map[req] = owner`` + whose owner subtask's ``validation_criteria`` does not already cite + ``[req]``, append a traceability criterion that does. This is the + auto-fix the validator's ``[AC-N]`` / ``[SC-N]`` lineage check expects. + + Normalization is conservative: it never invents ``coverage_map`` ownership, + never rewrites dependency edges, and never touches a soft constraint that + relies on ``tradeoff_rationale`` instead of coverage. It only fixes the two + mechanical drifts above; genuine semantic gaps (a hard constraint missing + from ``coverage_map``, an unknown/cyclic dependency) remain for the + validator to flag. + + Idempotent: a second call on already-normalized input reports + ``changed: false`` and writes nothing. + """ + branch_name = branch or get_branch_name() + path = ( + Path(blueprint_path) + if blueprint_path + else get_branch_dir(branch_name) / "blueprint.json" + ) + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError: + return { + "status": "error", + "changed": False, + "errors": [f"blueprint not found: {path}"], + "path": str(path), + } + except (json.JSONDecodeError, OSError) as exc: + return { + "status": "error", + "changed": False, + "errors": [f"cannot read blueprint {path}: {exc}"], + "path": str(path), + } + + if not isinstance(payload, dict): + return { + "status": "error", + "changed": False, + "errors": ["blueprint root must be a JSON object"], + "path": str(path), + } + + # Bind the nested lookup so the isinstance narrowing applies to the same + # expression Pyright tracks (a re-invoked payload.get(...) would not narrow). + nested_blueprint = payload.get("blueprint") + blueprint_body = ( + nested_blueprint if isinstance(nested_blueprint, dict) else payload + ) + subtasks = blueprint_body.get("subtasks") + if not isinstance(subtasks, list) or not subtasks: + return { + "status": "error", + "changed": False, + "errors": ["blueprint must contain at least one subtask"], + "path": str(path), + } + + notes: list[str] = [] + + # --- 1. Stable topological sort of subtasks[] ------------------------ + reordered, sort_note = _topo_sort_subtasks(subtasks) + if sort_note: + notes.append(sort_note) + new_order = reordered if reordered is not None else subtasks + order_changed = reordered is not None and [ + s.get("id") for s in reordered + ] != [s.get("id") for s in subtasks if isinstance(s, dict)] + + # --- 2. Inject missing coverage_map bracket-tags --------------------- + coverage_map = payload.get("coverage_map") or blueprint_body.get("coverage_map") + subtasks_by_id = { + s.get("id"): s + for s in new_order + if isinstance(s, dict) and isinstance(s.get("id"), str) + } + injected_tags: list[str] = [] + if isinstance(coverage_map, dict): + for requirement_id, owner in coverage_map.items(): + if not isinstance(owner, str): + continue + owner_subtask = subtasks_by_id.get(owner) + if not isinstance(owner_subtask, dict): + continue + tag = f"[{requirement_id}]" + criteria = owner_subtask.get("validation_criteria") + if not isinstance(criteria, list): + criteria = [] + owner_subtask["validation_criteria"] = criteria + if any(isinstance(c, str) and tag in c for c in criteria): + continue + criteria.append( + f"VC{len(criteria) + 1} {tag}: satisfies coverage_map " + f"requirement {requirement_id}" + ) + injected_tags.append(f"{owner}:{tag}") + + changed = order_changed or bool(injected_tags) + + if order_changed: + blueprint_body["subtasks"] = new_order + + if changed and write: + _write_json_file(path, payload) + + return { + "status": "ok", + "changed": changed, + "reordered": order_changed, + "subtask_order": [s.get("id") for s in new_order if isinstance(s, dict)], + "injected_coverage_tags": injected_tags, + "notes": notes, + "path": str(path), + "written": bool(changed and write), + } + + def record_test_contract_handoff( subtask_id: str, failing_test_command: str = "", @@ -8684,6 +8886,16 @@ if __name__ == "__main__": if not result.get("valid"): sys.exit(1) + elif func_name == "normalize_blueprint": + extra = sys.argv[2:] + dry_run = any(arg in ("--check", "--dry-run") for arg in extra) + positional = [arg for arg in extra if not arg.startswith("--")] + blueprint_path = positional[0] if positional else "" + result = normalize_blueprint(blueprint_path, write=not dry_run) + print(json.dumps(result, indent=2, ensure_ascii=True)) + if result.get("status") != "ok": + sys.exit(1) + elif func_name == "record_test_contract_handoff" and len(sys.argv) >= 3: subtask_id = sys.argv[2] failing_test_command = sys.argv[3] if len(sys.argv) >= 4 else "" diff --git a/src/mapify_cli/templates_src/skills/map-plan/SKILL.md.jinja b/src/mapify_cli/templates_src/skills/map-plan/SKILL.md.jinja index ec40bc40..62a6e395 100644 --- a/src/mapify_cli/templates_src/skills/map-plan/SKILL.md.jinja +++ b/src/mapify_cli/templates_src/skills/map-plan/SKILL.md.jinja @@ -221,6 +221,19 @@ Do NOT create subtasks for behavior listed under the spec's "Out of Scope > Alre Write decomposer output to `.map//blueprint.json` exactly once. Preserve evidence and metadata. +### Step 5.55: Normalize Blueprint (MANDATORY) + +```bash +python3 .map/scripts/map_step_runner.py normalize_blueprint +``` + +A deterministic repair pass that fixes the two self-consistency drifts the decomposer routinely emits — so you don't hand-edit `blueprint.json` between decompose and validate: + +- **Forward-dependency ordering:** stably topologically sorts `subtasks[]` so every dependency is declared before its dependents (independent subtasks keep their order; a true cycle is left for the validator to reject). +- **coverage_map bracket-tags:** for every `coverage_map[req] = owner` whose owner's `validation_criteria` doesn't already cite `[req]`, appends a `[req]`-tagged criterion. + +It never invents `coverage_map` ownership or rewrites dependency edges — genuine semantic gaps still fail Step 5.6. Idempotent (`changed: false` if nothing needed fixing). + ### Step 5.6: Post-Save Blueprint Validation (MANDATORY) ```bash @@ -287,6 +300,7 @@ Runner functions you'll commonly need from `/map-plan`: |---|---| | `record_plan_artifacts` | Persist spec/blueprint/task-plan into `artifact_manifest.json`. | | `record_workflow_fit [--diff-size SIZE] [--has-new-invariants 0\|1] [--needs-independent-review 0\|1] [--has-clear-acceptance-criteria 0\|1] [--test-first-required 0\|1] [--summary "..."]` | Persist the workflow-fit decision. Use the named flags — bool order is easy to confuse otherwise. | +| `normalize_blueprint [] [--check]` | Deterministically repair decomposer drift before validation: topo-sort `subtasks[]` so deps precede dependents + inject missing `coverage_map` bracket-tags. `--check` reports without writing. | | `validate_blueprint_contract ` | Run schema + semantic checks on `blueprint.json`. | | `list_plans` | List per-branch plan artifacts under `.map/` to pick scope from a multi-roadmap workspace. | | `save_research ` | Persist research-agent findings for a subtask (stdin-fed). | diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py index 7f611c40..97d6c25f 100644 --- a/tests/test_map_step_runner.py +++ b/tests/test_map_step_runner.py @@ -667,6 +667,229 @@ def test_validate_blueprint_contract_accepts_contract_sized_plan(branch_workspac assert result["subtask_count"] == 1 +def _blueprint_with_forward_dep_and_missing_tag() -> dict[str, object]: + """Reproduces issue #168: ST-001 depends on ST-002 but is declared FIRST + (forward-dependency ordering violation), and SC-1 is owned by ST-002 whose + validation_criteria does not cite [SC-1] (coverage bracket-tag drift).""" + return { + "summary": "Deliver a user-visible fix with a dependency", + "hard_constraints": [ + {"id": "AC-1", "description": "Timeouts must show a retryable message"}, + ], + "soft_constraints": [ + {"id": "SC-1", "description": "Prefer a concise implementation"}, + ], + "subtasks": [ + { + "id": "ST-001", + "title": "Wire the timeout message", + "aag_contract": "CheckoutService -> handle_timeout() -> retryable error", + "dependencies": ["ST-002"], + "affected_files": ["src/checkout.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1 [AC-1]: timeout shows retryable message"], + }, + { + "id": "ST-002", + "title": "Build the retry helper module", + "aag_contract": "RetryHelper -> backoff() -> bounded retries", + "dependencies": [], + "affected_files": ["src/retry.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1: bounded exponential backoff"], + }, + ], + "coverage_map": {"AC-1": "ST-001", "SC-1": "ST-002"}, + } + + +def test_normalize_blueprint_fixes_issue_168_drift(branch_workspace): + blueprint_path = branch_workspace / "blueprint.json" + blueprint_path.write_text( + json.dumps(_blueprint_with_forward_dep_and_missing_tag()) + ) + + # Before normalize: validator hard-stops on BOTH drifts. + before = map_step_runner.validate_blueprint_contract() + assert before["valid"] is False + assert before["forward_dep_violations"] == ["ST-001->ST-002"] + assert any("SC-1" in str(err) for err in before["errors"]) + + # Normalize: stable topo-sort + bracket-tag injection. + norm = map_step_runner.normalize_blueprint() + assert norm["status"] == "ok" + assert norm["changed"] is True + assert norm["reordered"] is True + assert norm["subtask_order"] == ["ST-002", "ST-001"] + assert norm["injected_coverage_tags"] == ["ST-002:[SC-1]"] + assert norm["written"] is True + + # After normalize: validator passes and the file on disk reflects both fixes. + after = map_step_runner.validate_blueprint_contract() + assert after["valid"] is True + assert after["errors"] == [] + + written = json.loads(blueprint_path.read_text()) + assert [s["id"] for s in written["subtasks"]] == ["ST-002", "ST-001"] + st002 = next(s for s in written["subtasks"] if s["id"] == "ST-002") + assert any("[SC-1]" in c for c in st002["validation_criteria"]) + + +def test_normalize_blueprint_is_idempotent(branch_workspace): + blueprint_path = branch_workspace / "blueprint.json" + blueprint_path.write_text( + json.dumps(_blueprint_with_forward_dep_and_missing_tag()) + ) + + first = map_step_runner.normalize_blueprint() + assert first["changed"] is True + + second = map_step_runner.normalize_blueprint() + assert second["changed"] is False + assert second["reordered"] is False + assert second["injected_coverage_tags"] == [] + assert second["written"] is False + + +def test_normalize_blueprint_preserves_order_for_independent_subtasks(branch_workspace): + blueprint = { + "summary": "Two independent subtasks already in a valid order", + **_blueprint_constraint_fields(), + "subtasks": [ + { + "id": "ST-001", + "title": "First", + "aag_contract": "A -> a() -> x", + "dependencies": [], + "affected_files": ["src/a.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1 [AC-1]: a", "VC2 [INV-1]: b"], + }, + { + "id": "ST-002", + "title": "Second", + "aag_contract": "B -> b() -> y", + "dependencies": [], + "affected_files": ["src/b.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1: standalone"], + }, + ], + "coverage_map": {"AC-1": "ST-001", "INV-1": "ST-001"}, + } + (branch_workspace / "blueprint.json").write_text(json.dumps(blueprint)) + + result = map_step_runner.normalize_blueprint() + + # No dependency edges to reorder and all tags present -> stable no-op. + assert result["reordered"] is False + assert result["changed"] is False + assert result["subtask_order"] == ["ST-001", "ST-002"] + + +def test_normalize_blueprint_leaves_dependency_cycle_untouched(branch_workspace): + blueprint = { + "summary": "Cyclic dependencies cannot be topologically sorted", + **_blueprint_constraint_fields(), + "subtasks": [ + { + "id": "ST-001", + "title": "First", + "aag_contract": "A -> a() -> x", + "dependencies": ["ST-002"], + "affected_files": ["src/a.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1 [AC-1]: a"], + }, + { + "id": "ST-002", + "title": "Second", + "aag_contract": "B -> b() -> y", + "dependencies": ["ST-001"], + "affected_files": ["src/b.py"], + "expected_diff_size": "small", + "concern_type": "runtime", + "one_logical_step": True, + "validation_criteria": ["VC1: standalone"], + }, + ], + "coverage_map": {"AC-1": "ST-001"}, + } + (branch_workspace / "blueprint.json").write_text(json.dumps(blueprint)) + + result = map_step_runner.normalize_blueprint() + + assert result["reordered"] is False + assert any("cycle" in str(note) for note in result["notes"]) + # A true cycle is left untouched for validate_blueprint_contract to reject. + written = json.loads((branch_workspace / "blueprint.json").read_text()) + assert [s["id"] for s in written["subtasks"]] == ["ST-001", "ST-002"] + + +def test_normalize_blueprint_cli_check_does_not_write(tmp_path): + blueprint_path = tmp_path / "blueprint.json" + blueprint_path.write_text( + json.dumps(_blueprint_with_forward_dep_and_missing_tag()) + ) + original = blueprint_path.read_text() + + result = subprocess.run( + [ + sys.executable, + str(SCRIPTS_PATH / "map_step_runner.py"), + "normalize_blueprint", + str(blueprint_path), + "--check", + ], + cwd=tmp_path, + check=True, + capture_output=True, + text=True, + ) + + payload = json.loads(result.stdout) + assert payload["status"] == "ok" + assert payload["changed"] is True + assert payload["written"] is False + # --check must not mutate the file on disk. + assert blueprint_path.read_text() == original + + +def test_normalize_blueprint_cli_writes_in_place(tmp_path): + blueprint_path = tmp_path / "blueprint.json" + blueprint_path.write_text( + json.dumps(_blueprint_with_forward_dep_and_missing_tag()) + ) + + result = subprocess.run( + [ + sys.executable, + str(SCRIPTS_PATH / "map_step_runner.py"), + "normalize_blueprint", + str(blueprint_path), + ], + cwd=tmp_path, + check=True, + capture_output=True, + text=True, + ) + + payload = json.loads(result.stdout) + assert payload["written"] is True + written = json.loads(blueprint_path.read_text()) + assert [s["id"] for s in written["subtasks"]] == ["ST-002", "ST-001"] + + def test_acceptance_coverage_report_tracks_downstream_evidence(branch_workspace): blueprint = { "summary": "Deliver a user-visible fix",