diff --git a/.claude-plugin/manifest.json b/.claude-plugin/manifest.json index 15b24bd3..ccc9bd06 100644 --- a/.claude-plugin/manifest.json +++ b/.claude-plugin/manifest.json @@ -1,7 +1,7 @@ { "name": "boundline", "displayName": "Boundline Assistant Support for Claude Code", - "version": "0.66.0", + "version": "0.67.0", "description": "CLI-authoritative assistant support for bounded engineering work", "author": { "name": "Apply The", diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index a8ec1964..e5615892 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "boundline", "displayName": "Boundline Assistant Support for Codex", - "version": "0.66.0", + "version": "0.67.0", "description": "CLI-authoritative assistant support for bounded engineering work", "author": { "name": "Apply The", diff --git a/.copilot-prompts/pack.json b/.copilot-prompts/pack.json index 0c9676c1..18d29ce5 100644 --- a/.copilot-prompts/pack.json +++ b/.copilot-prompts/pack.json @@ -1,7 +1,7 @@ { "name": "boundline", "displayName": "Boundline Copilot Prompt Pack", - "version": "0.66.0", + "version": "0.67.0", "description": "CLI-authoritative Copilot prompt pack for bounded engineering work", "author": { "name": "Apply The", diff --git a/.cursor-plugin/manifest.json b/.cursor-plugin/manifest.json index f069d0fe..ebcb769c 100644 --- a/.cursor-plugin/manifest.json +++ b/.cursor-plugin/manifest.json @@ -1,7 +1,7 @@ { "name": "boundline", "displayName": "Boundline Assistant Support for Cursor", - "version": "0.66.0", + "version": "0.67.0", "description": "CLI-authoritative assistant support for bounded engineering work", "author": { "name": "Apply The", diff --git a/.gitignore b/.gitignore index 98afbf11..f0e7dcd0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Ignore the homebrew-boundline directory, which may contain generated files or dependencies related to the project. +/homebrew-boundline + # Generated by Cargo # will have compiled files and executables debug diff --git a/.specify/feature.json b/.specify/feature.json index 83fc8b47..aa159e30 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/066-agentic-framework-integration" + "feature_directory": "specs/067-plan-quality-contract" } diff --git a/AGENTS.md b/AGENTS.md index 94463ea3..3337947d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,12 +1,14 @@ # boundline Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-05-31 +Auto-generated from all feature plans. Last updated: 2026-06-02 ## Active Technologies - Rust 1.96.0, edition 2024 + existing workspace dependencies (`clap`, `serde`, `serde_json`, `thiserror`, `tracing`, `uuid`, `toml`, `rusqlite` with bundled SQLite support), existing workspace crates (`boundline-core`, `boundline-adapters`, `boundline-cli`), and one optional trusted `sqlite-vec` extension-loading path for local vector tables (065-activate-sqlite-vec) - existing workspace-local `.boundline/session.json`, `.boundline/traces/`, `.boundline/config.toml`, and `.boundline/context-intelligence/retrieval-index.sqlite3`, extended with a companion `.boundline/context-intelligence/manifest.json`, managed `.gitignore` entries, and vector-backed semantic tables inside the same derived SQLite store (065-activate-sqlite-vec) - Rust 1.96.0, edition 2024 across the Boundline workspace, the sibling template repo, and the sibling Speckit adapter repo for the initial compatibility line + existing workspace crates and dependencies (`clap`, `dialoguer`, `serde`, `serde_json`, `thiserror`, `tracing`, `toml`, `uuid`, `boundline-core`, `boundline-adapters`, `boundline-cli`) plus a shared framework-adapter protocol surface owned by `boundline-adapters` and consumed by sibling repos through versioned git-tag dependencies rather than committed path-based copies (066-agentic-framework-integration) - workspace-local `.boundline/config.toml`, `.boundline/session.json`, `.boundline/traces/`, `.boundline/execution.json`, and `.boundline/workflows.toml`, extended with an optional adapter selection block and adapter audit fields, while the sibling template and Speckit repos persist only their own Cargo manifests, README docs, and protocol fixtures (066-agentic-framework-integration) +- Rust 1.96.0, edition 2024 + existing workspace crates and dependencies only; no new runtime dependency planned (067-plan-quality-contract) +- existing workspace-local session and trace files, extended with additive plan-quality fields and trace-visible projections (067-plan-quality-contract) - Rust 1.96.0, edition 2024 + `clap`, `serde`, `serde_json`, `thiserror`, `tracing`, `uuid`, `toml`, `rusqlite`, `dialoguer` - Workspace-local config and traces: `.boundline/session.json`, `.boundline/traces/`, `.boundline/config.toml`, `.boundline/execution.json`, `.boundline/workflows.toml` @@ -39,9 +41,9 @@ Crate versioning follows Semantic Versioning. Before 1.0.0, breaking changes MAY occur in minor versions. ## Recent Changes +- 067-plan-quality-contract: Added Rust 1.96.0, edition 2024 + existing workspace crates and dependencies only; no new runtime dependency planned - 066-agentic-framework-integration: Added Rust 1.96.0, edition 2024 across the Boundline workspace, the sibling template repo, and the sibling Speckit adapter repo for the initial compatibility line + existing workspace crates and dependencies (`clap`, `dialoguer`, `serde`, `serde_json`, `thiserror`, `tracing`, `toml`, `uuid`, `boundline-core`, `boundline-adapters`, `boundline-cli`) plus a shared framework-adapter protocol surface owned by `boundline-adapters` and consumed by sibling repos through versioned git-tag dependencies rather than committed path-based copies - 065-activate-sqlite-vec: Added Rust 1.96.0, edition 2024 + existing workspace dependencies (`clap`, `serde`, `serde_json`, `thiserror`, `tracing`, `uuid`, `toml`, `rusqlite` with bundled SQLite support), existing workspace crates (`boundline-core`, `boundline-adapters`, `boundline-cli`), and one optional trusted `sqlite-vec` extension-loading path for local vector tables -- 063-assistant-delight-followthrough: Added Rust 1.96.0, edition 2024, plus repository-managed Markdown and JSON assistant assets + existing workspace crates and runtime dependencies (`clap`, `dialoguer`, `serde`, `serde_json`, `thiserror`, `tracing`, `uuid`, `toml`, `rusqlite`); no new runtime dependencies planned for the first slice diff --git a/CHANGELOG.md b/CHANGELOG.md index d10aa28b..eaf16c46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,10 +46,28 @@ Highlights: Release metadata note: -- The latest published Boundline release remains `0.66.0`. The corrected Spec - 066 semantics are recorded under `Unreleased` until a new release tag exists, - so published package metadata, distribution metadata, and sibling git-tag - dependency pins stay on the last public compatibility line. +- The latest published Boundline release remains `0.67.0`. The next release + work is recorded under `Unreleased`, so published package metadata, + distribution metadata, and sibling git-tag dependency pins stay on the last + public compatibility line. + +## [0.67.0] - 2026-06-02 + +Delivered specs: + +- `067` - Plan Quality Contract + +Highlights: + +- Added the first planning-readiness gate so plans without a credible + validation strategy stop before execution handoff. +- Persisted additive `plan_quality_state`, `plan_quality_findings`, and + `plan_quality_assumptions` through status, orchestration, inspect, and trace + projections. +- Preserved the one-question `phase_request` recovery path across the CLI and + assistant surfaces. +- Released aligned metadata, docs, roadmap entries, and bundled manifests for + the 0.67.0 line. ## [0.66.0] - 2026-05-31 diff --git a/Cargo.lock b/Cargo.lock index f063951b..896f9dc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -90,7 +90,7 @@ checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" [[package]] name = "boundline" -version = "0.66.0" +version = "0.67.0" dependencies = [ "boundline-adapters", "boundline-cli", @@ -109,7 +109,7 @@ dependencies = [ [[package]] name = "boundline-adapters" -version = "0.66.0" +version = "0.67.0" dependencies = [ "boundline-core", "clap", @@ -127,7 +127,7 @@ dependencies = [ [[package]] name = "boundline-cli" -version = "0.66.0" +version = "0.67.0" dependencies = [ "boundline-adapters", "boundline-core", @@ -144,7 +144,7 @@ dependencies = [ [[package]] name = "boundline-core" -version = "0.66.0" +version = "0.67.0" dependencies = [ "clap", "serde", diff --git a/Cargo.toml b/Cargo.toml index 639321e1..acccb1b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,9 +8,9 @@ members = [ resolver = "3" [workspace.package] -# Published release metadata remains on 0.66.0 until the next public tag is -# cut; corrected Spec 066 semantics are tracked under CHANGELOG Unreleased. -version = "0.66.0" +# Published release metadata tracks the current public tag; corrected Spec 067 +# semantics are tracked under CHANGELOG 0.67.0. +version = "0.67.0" edition = "2024" license = "MIT" diff --git a/README.md b/README.md index c48a039a..8d89b118 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,11 @@ Boundline forces an explicit, inspectable workflow: 3. `run` -> Execute the next approved step. 4. `inspect` -> Report the authoritative runtime state. +In the 0.67.0 release, `plan` also evaluates plan quality before execution +handoff. If the active plan is missing a credible validation strategy or +another blocking planning input, Boundline stops on one `phase_request` and +keeps the session non-terminal until you answer. + ## Quick Start ```bash @@ -139,4 +144,4 @@ when the operator deliberately asks for `--compatibility`. - Bug reports & feature requests: `.github/ISSUE_TEMPLATE/` - Vulnerability reporting: [SECURITY.md](SECURITY.md) - Participation expectations: `.github/CODE_OF_CONDUCT.md` -- Contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md) \ No newline at end of file +- Contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md) diff --git a/ROADMAP.md b/ROADMAP.md deleted file mode 100644 index ab86ec2e..00000000 --- a/ROADMAP.md +++ /dev/null @@ -1,139 +0,0 @@ -# Boundline Roadmap - -Canon is downstream from Boundline in this roadmap: Boundline thinks, decides, -orchestrates, and executes, while Canon governs meaningful flow stages and -persists structured artifacts that Boundline can reuse for reasoning. - -Delivered release history belongs in [CHANGELOG.md](CHANGELOG.md). This file is -for current direction, future feature sequencing, and product boundaries. - -## Current Status: v0.66.0 - -Boundline currently ships the session-native CLI/runtime plus the explicit -framework-adapter lifecycle: Canon-aware built-in behavior stays the default, -adapter activation remains operator-controlled, claimed-stage ownership and -hook delivery stay visible through the CLI and traces, and the sqlite-vec -derived-index lifecycle remains owned by persisted workspace state. There is -still no separate terminal UI product line on the forward roadmap. - -### Delivered in 0.66.0 - -- one explicit framework-adapter slot per workspace now supports - `boundline adapter add|show|remove`, the shipped `speckit` known profile, - and custom trusted local adapter registration with guided required-field - setup -- the V1 framework-adapter contract now formalizes one-shot JSON over - stdin/stdout, declared transport support, standard success or error - envelopes, and stderr that remains trace-only enrichment rather than a second - control plane -- status, inspect, config, and runtime trace surfaces now disclose adapter - selection, ownership claims, transport compatibility, hook delivery, and - guided recovery when config or execution blocks adapter use -- the sibling `boundline-framework-template` and - `boundline-adapter-speckit` repositories now ship the released compatibility - scaffold for external adapter development and validation - -### Delivered in 0.65.0 - -- sqlite-vec-backed local semantic retrieval over the single derived SQLite - store is now active, with explicit fallback and bounded authority order - preserved on normal runtime surfaces -- `boundline index status|refresh|rebuild|clean|doctor` now provides manifest- - backed lifecycle control, incremental refresh, and tracked-artifact or - corruption diagnosis -- derived-index hygiene now includes managed manifest plus WAL/SHM ignore rules, - optional stale-mark Git hooks, and probe or diagnostics visibility into index - health and hook state - -### Delivered in 0.64.0 - -- session-native orchestration remains the primary delivery surface -- assistant command packs stay aligned with the CLI and trace-backed status mode -- governed delivery, inspection, and distribution workflows ship on the current - workspace version - -## Objective - -Evolve Boundline into a system capable of taking a problem and transforming it -into working code with bounded execution, inspectable reasoning, and multi-role -quality control. - -## Current Baseline - -Boundline already has the primary delivery substrate in place: - -- session-native orchestration and trace-backed status surfaces -- bounded planning, execution, review, governance, recovery, and inspection -- Canon-aware governed delivery without making Canon the runtime owner -- local context intelligence with SQLite and FTS5 retrieval plus semantic - acceleration -- guidance catalog packs, guardian findings, authority-zoned councils, adaptive - governance, and reasoning-profile support -- assistant command surfaces across supported hosts -- release-aligned distribution metadata and install diagnostics - -Future roadmap items should extend this baseline rather than re-describe shipped -capabilities as new features. - -## Forward Roadmap - -The repo-local `roadmap/` folder carries the active forward-looking drafts: - -- [Next Boundline Roadmap](roadmap/Next%20-%20forward-roadmap.md) - absorbs the next forward-looking work into Boundline. It prioritizes large - codebase context hardening, external capability providers including open-model - adapters, evals and runtime observability, Boundline help-next, guidance - activation hardening, council and adaptive-governance hardening, sandboxed - execution, MCP adapters, AI gateway economics, browser validation providers, - and trace-linked memory hygiene. -- [TD-001: Complexity Hotspots And Structural Debt](roadmap/TD-001-complexity-hotspots.md) - remains the active structural-debt watchlist for oversized Rust files and long - functions that should be reduced during future feature slices. - -## Sequencing Rule - -1. Boundline must deliver visible runtime trust before more platform abstraction - work. -2. Operator surfaces must remain thin shells over the existing CLI/runtime, not - second products or parallel orchestration engines. -3. Large-codebase handling, provider permissions, and evals must precede - stronger autonomy. -4. Canon must prove value inside the real delivery loop, not beside it. -5. MCP, browser automation, and AI gateway work happen as adapter or scale - layers after Boundline-owned permissions and trace semantics are stable. - -## Product Boundary - -Canon-exclusive roadmap work is intentionally not listed as a Boundline feature. -Canon mode templates, Canon packet-quality validation, Canon `help-next`, Canon -MCP server implementation, and Canon project-memory promotion rules belong in -Canon. Boundline consumes those outputs only through stable metadata contracts -such as readiness state, evidence refs, lineage refs, approval state, and -project-memory promotion status. - -## Architecture - -```text -User / Copilot / Claude / Codex / Cursor / Gemini - | - v - Boundline - - Orchestrator - - Flows - - Agents - - Execution - - Review - - Adaptive governance - | - v - Canon - - governed stage docs - - artifact persistence - - reusable project knowledge -``` - -## In One Sentence - -Boundline takes a problem and transforms it into working code by orchestrating -bounded execution itself while using Canon to govern stage outputs and provide -reusable documentation. \ No newline at end of file diff --git a/assistant/catalog/model-catalog.toml b/assistant/catalog/model-catalog.toml index ca5f5da4..875a1696 100644 --- a/assistant/catalog/model-catalog.toml +++ b/assistant/catalog/model-catalog.toml @@ -1,7 +1,7 @@ [metadata] source_label = "bundled" catalog_version = "0.64.0" -updated_at = "2026-05-30" +updated_at = "2026-06-02" [[runtimes]] runtime = "copilot" @@ -39,10 +39,6 @@ display_name = "Claude Sonnet 4.6" model_id = "sonnet-4.5" display_name = "Claude Sonnet 4.5" -[[runtimes.models]] -model_id = "opus-4.8" -display_name = "Claude Opus 4.8" - [[runtimes.models]] model_id = "opus-4.7" display_name = "Claude Opus 4.7" diff --git a/assistant/global/manifest.json b/assistant/global/manifest.json index b36d26bf..1c5da868 100644 --- a/assistant/global/manifest.json +++ b/assistant/global/manifest.json @@ -1,7 +1,7 @@ { "name": "boundline-global-assistant-bootstrap", "display_name": "Boundline Global Assistant Bootstrap", - "version": "0.66.0", + "version": "0.67.0", "description": "User-scoped assistant bootstrap commands for Boundline before workspace initialization.", "commands": [ "/boundline:init", diff --git a/assistant/plugin-metadata.json b/assistant/plugin-metadata.json index 34828e69..a093c939 100644 --- a/assistant/plugin-metadata.json +++ b/assistant/plugin-metadata.json @@ -1,7 +1,7 @@ { "name": "boundline", "displayName": "Boundline Assistant Support", - "version": "0.66.0", + "version": "0.67.0", "description": "CLI-authoritative assistant support for bounded engineering work", "author": { "name": "Apply The", diff --git a/distribution/channel-metadata.toml b/distribution/channel-metadata.toml index 92737342..0837e6a1 100644 --- a/distribution/channel-metadata.toml +++ b/distribution/channel-metadata.toml @@ -1,6 +1,5 @@ -# The latest published Boundline line remains 0.66.0 until the corrected -# post-066 semantics ship under a new public release tag. -boundline_version = "0.66.0" +# The latest published Boundline line is 0.67.0. +boundline_version = "0.67.0" # canon_version is the explicit Canon compatibility target for this release. # Keep it pinned so Boundline can be prepared before the Canon tag is public. canon_version = "0.63.0" @@ -18,8 +17,8 @@ update_command = "brew upgrade boundline" [channels.winget] status = "ready_for_publication" package_identifier = "ApplyThe.Boundline" -manifest_root = "distribution/winget/manifests/a/ApplyThe/Boundline/0.66.0" +manifest_root = "distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0" install_command = "winget install ApplyThe.Boundline" update_command = "winget upgrade ApplyThe.Boundline" -bundle_name = "boundline-bundle-0.66.0-windows-x86_64.zip" +bundle_name = "boundline-bundle-0.67.0-windows-x86_64.zip" canon_asset = "https://github.com/apply-the/canon/releases/download/0.63.0/canon-0.63.0-windows-x86_64.zip" diff --git a/distribution/homebrew/Formula/boundline.rb b/distribution/homebrew/Formula/boundline.rb index a78cf06f..d3057e8f 100644 --- a/distribution/homebrew/Formula/boundline.rb +++ b/distribution/homebrew/Formula/boundline.rb @@ -3,8 +3,8 @@ class Boundline < Formula desc "Local delivery orchestrator for bounded engineering work" homepage "https://github.com/apply-the/boundline" - url "https://github.com/apply-the/boundline", using: :git, tag: "0.66.0" - version "0.66.0" + url "https://github.com/apply-the/boundline", using: :git, tag: "0.67.0" + version "0.67.0" license "MIT" head "https://github.com/apply-the/boundline", branch: "main", using: :git @@ -45,7 +45,7 @@ def install def caveats <<~EOS - Run boundline doctor --install after install or upgrade to verify the Boundline 0.66.0 + Canon 0.63.0 pairing. + Run boundline doctor --install after install or upgrade to verify the Boundline 0.67.0 + Canon 0.63.0 pairing. EOS end diff --git a/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.installer.yaml b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.installer.yaml new file mode 100644 index 00000000..f358360f --- /dev/null +++ b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.installer.yaml @@ -0,0 +1,18 @@ +PackageIdentifier: ApplyThe.Boundline +PackageVersion: 0.67.0 +InstallerType: zip +NestedInstallerType: portable +Commands: + - boundline + - canon +Installers: + - Architecture: x64 + InstallerUrl: https://github.com/apply-the/boundline/releases/download/0.67.0/boundline-bundle-0.67.0-windows-x86_64.zip + InstallerSha256: REPLACE_WITH_WINDOWS_X86_64_SHA256 + NestedInstallerFiles: + - RelativeFilePath: boundline.exe + PortableCommandAlias: boundline + - RelativeFilePath: canon.exe + PortableCommandAlias: canon +ManifestType: installer +ManifestVersion: 1.6.0 diff --git a/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.locale.en-US.yaml b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.locale.en-US.yaml new file mode 100644 index 00000000..c8633d66 --- /dev/null +++ b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.locale.en-US.yaml @@ -0,0 +1,22 @@ +PackageIdentifier: ApplyThe.Boundline +PackageVersion: 0.67.0 +PackageLocale: en-US +Publisher: Apply The +PublisherUrl: https://github.com/apply-the/boundline +PublisherSupportUrl: https://github.com/apply-the/boundline/issues +Author: Apply The +PackageName: Boundline +PackageUrl: https://github.com/apply-the/boundline +ShortDescription: Boundline is a local delivery orchestrator for bounded engineering work. +Description: | + Boundline is a local delivery orchestrator for bounded engineering work. + It owns orchestration, bounded planning, execution, + validation, and session continuity. The Windows release bundle installs both + boundline and a compatible Canon companion so boundline doctor --install can verify + the supported pairing after install or upgrade. +Moniker: boundline +License: MIT +LicenseUrl: https://github.com/apply-the/boundline/blob/main/LICENSE +ReleaseNotesUrl: https://github.com/apply-the/boundline/blob/main/CHANGELOG.md +ManifestType: defaultLocale +ManifestVersion: 1.6.0 diff --git a/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.yaml b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.yaml new file mode 100644 index 00000000..519be9cf --- /dev/null +++ b/distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/ApplyThe.Boundline.yaml @@ -0,0 +1,5 @@ +PackageIdentifier: ApplyThe.Boundline +PackageVersion: 0.67.0 +DefaultLocale: en-US +ManifestType: version +ManifestVersion: 1.6.0 diff --git a/docs/architecture/runtime-model.md b/docs/architecture/runtime-model.md index f29c2e9f..5049f161 100644 --- a/docs/architecture/runtime-model.md +++ b/docs/architecture/runtime-model.md @@ -103,7 +103,7 @@ See [[Project Memory Structure|Project-Memory-Structure]]. - Canon remains an external governed-knowledge boundary, not Boundline's orchestrator. -The current Boundline `0.66.0` line documents Canon `0.63.0` support for +The current Boundline `0.67.0` line documents Canon `0.63.0` support for `canon governance start|refresh|capabilities --json` `v1`. The same runtime now owns the derived semantic index lifecycle under @@ -111,6 +111,12 @@ The same runtime now owns the derived semantic index lifecycle under `manifest.json`, explicit `boundline index ...` commands, and lightweight stale-mark hook behavior when the operator opts in. +The 0.67.0 release also ships the first plan-quality readiness gate. It runs +after goal quality, records `plan_quality_state`, `plan_quality_findings`, and +`plan_quality_assumptions`, and emits one `phase_request` when a plan needs a +credible validation strategy or another blocking planning input before +execution handoff. + ## Framework Adapter Boundary Framework adapters extend the runtime without replacing it. diff --git a/docs/governance/guardians.md b/docs/governance/guardians.md index 06c4ef43..953ffc58 100644 --- a/docs/governance/guardians.md +++ b/docs/governance/guardians.md @@ -112,4 +112,4 @@ Expect the plan and inspect output to show relevant context, guidance sources, v ## Source Reference -See [Extending the Guidance Catalog](https://github.com/apply-the/boundline/blob/main/tech-docs/guides/extending-guidance-catalog.md) for pack authoring details. +See [Extending the Guidance Catalog](https://github.com/apply-the/boundline/blob/0.67.0/tech-docs/guides/extending-guidance-catalog.md) for pack authoring details. diff --git a/docs/guide/common-workflows.md b/docs/guide/common-workflows.md index 6c1587c8..90b141e9 100644 --- a/docs/guide/common-workflows.md +++ b/docs/guide/common-workflows.md @@ -2,6 +2,10 @@ Use this page while operating the main session-native Boundline loop. +Boundline 0.67.0 ships the first plan-quality gate. When the plan needs a +missing validation strategy or another blocking planning input, the runtime +stops on one `phase_request` and keeps the same session alive for recovery. + ## The Standard Loop ```text @@ -50,6 +54,11 @@ Planning can stop explicitly. If the runtime surfaces `goal_quality_state`, `plan_quality_state`, `backlog_quality_state`, or `planning_analysis_state`, follow that output literally. +If `plan_quality_state` is `clarification_required`, answer the one question in +the emitted `phase_request` before continuing. If the output is `ready`, but a +later gate blocks, use `status`, `next`, and `inspect` to follow the runtime's +recovery route instead of guessing. + ## Run Execute the next bounded action: diff --git a/docs/guide/introduction.md b/docs/guide/introduction.md index e7fae1dd..781a4cd1 100644 --- a/docs/guide/introduction.md +++ b/docs/guide/introduction.md @@ -1,21 +1,21 @@ # Boundline > [!TIP] -> This wiki is aligned with **Boundline 0.66.0**. For older versions, refer to the repository tags. +> This wiki is aligned with **Boundline 0.67.0**. For older versions, refer to the repository tags. -![Boundline - Bounded Delivery Runtime](https://github.com/apply-the/boundline/blob/main/tech-docs/images/boundline-banner.jpg) +![Boundline - Bounded Delivery Runtime](https://github.com/apply-the/boundline/blob/0.67.0/tech-docs/images/boundline-banner.jpg?raw=true) **The local delivery orchestrator for bounded engineering work.** Turn goals into executed plans safely, without losing control to an opaque AI loop. -## Why Boundline? +## Why Boundline? -- **Goal-Driven Execution:** Translates high-level objectives into concrete, step-by-step technical plans. -- **Session-Based State:** Maintains explicit, resumable session state locally on disk. You are never hostage to ephemeral chat memory. -- **Safe Delivery:** Executes steps safely using your repository's existing constraints and Canon governance rules. -- **Explicit Traces:** Never lose context. Every execution step is recorded in local, auditable traces. -- **Agnostic Architecture:** Seamlessly plugs into external frameworks and capability providers. +- **Goal-Driven Execution:** Translates high-level objectives into concrete, step-by-step technical plans. +- **Session-Based State:** Maintains explicit, resumable session state locally on disk. You are never hostage to ephemeral chat memory. +- **Safe Delivery:** Executes steps safely using your repository's existing constraints and Canon governance rules. +- **Explicit Traces:** Never lose context. Every execution step is recorded in local, auditable traces. +- **Agnostic Architecture:** Seamlessly plugs into external frameworks and capability providers. -## How it Works +## How it Works Boundline forces an explicit, inspectable workflow: 1. `goal` → Record the objective for the active session. @@ -23,7 +23,12 @@ Boundline forces an explicit, inspectable workflow: 3. `run` → Execute the next approved step. 4. `inspect` → Report the authoritative runtime state. -## Quick Start +In the 0.67.0 release, `plan` also evaluates plan quality before execution +handoff. If the active plan is missing a credible validation strategy or +another blocking planning input, Boundline stops on one `phase_request` and +keeps the session non-terminal until you answer. + +## Quick Start ```bash boundline doctor --install @@ -52,7 +57,7 @@ Current public repositories for this adapter line: - [boundline-framework-template](https://github.com/apply-the/boundline-framework-template): starter scaffold for a compatible framework adapter that speaks the host-owned V1 subprocess contract. - [boundline-adapter-speckit](https://github.com/apply-the/boundline-adapter-speckit): concrete Speckit-backed adapter implementation that can claim `plan` and `run` when preflight succeeds. -## Key Commands +## Key Commands | Command | What it does | |---|---| @@ -65,11 +70,11 @@ Current public repositories for this adapter line: | `boundline index status` | Report derived-index lifecycle state for local semantic retrieval. | | `boundline index doctor` | Diagnose tracked, stale, corrupt, or degraded derived-index state. | -## Deep Dive Documentation +## Deep Dive Documentation Explore the wiki sidebar for details on architecture, configuration, and scaling: - [Getting Started](/guide/getting-started) - [Configuration Reference](/reference/configuration) - [Architecture](/architecture/runtime-model) - [Daily Operating Guide](/guide/common-workflows) -- [Core Concepts](/guide/core-concepts) \ No newline at end of file +- [Core Concepts](/guide/core-concepts) diff --git a/docs/roadmap/index.md b/docs/roadmap/index.md index 75c4c477..9d5af0a2 100644 --- a/docs/roadmap/index.md +++ b/docs/roadmap/index.md @@ -6,7 +6,15 @@ Welcome to the official **Boundline** Roadmap. Here we track the evolution of ou The goal of Boundline is to transform non-deterministic AI iterations into predictable, traceable, and governable software delivery processes. ::: -## Upcoming Features & Topics +## Recently Delivered + +- `067` - Plan Quality Contract: shipped the first planning-readiness gate in + Boundline `0.67.0`. The runtime now stops on one focused `phase_request` + when a plan lacks a credible validation strategy, and status, inspect, and + orchestration surfaces project the additive plan-quality state, findings, + assumptions, and recovery route. + +## Upcoming Features & Topics ### Context Handling & Execution - **Large Codebase Context Substrate**: Handling long-term context limits, lazy hash references for huge files, and anchored hunks for large-file edits. @@ -18,10 +26,10 @@ The goal of Boundline is to transform non-deterministic AI iterations into predi - **AI Gateway & Inference Economics**: Managing route health, local vs remote transmission decisions, route budgets, and latency telemetry. ### Governance & Planning -- **Plan Quality & Backlog Contracts**: Strict execution gates for planning UX and backlog execution. +- **Backlog Contracts**: Strict execution gates for backlog execution. - **Evals & Runtime Observability**: Local quality layers, JSONL trace exports, deterministic scoring, and provider evaluations. - **Recursive Stage Refinement Profiles**: Sequential planning profiles (planner -> critic -> finalizer) tightly governed by councils and stop semantics. --- -> Do you have suggestions? Open an issue on our GitHub repository and help us shape the future of Boundline! \ No newline at end of file +> Do you have suggestions? Open an issue on our GitHub repository and help us shape the future of Boundline! diff --git a/docs/runtime/inspect.md b/docs/runtime/inspect.md index 64389d94..fe95a7cb 100644 --- a/docs/runtime/inspect.md +++ b/docs/runtime/inspect.md @@ -1,3 +1,17 @@ # Inspect -This page is under construction. \ No newline at end of file +`inspect` explains why the Boundline 0.67.0 runtime chose the current plan or blocked +handoff. + +## What To Read + +Look for: + +- context summary and credibility +- plan-quality state, findings, and assumptions +- emitted `phase_request` +- withheld or recovered execution handoff +- trace-backed evidence for the next action + +Use `status` first, then `inspect`, then `next` if you need the recovery +route. diff --git a/docs/runtime/phase-requests.md b/docs/runtime/phase-requests.md index 5cded9eb..ca8ba3ed 100644 --- a/docs/runtime/phase-requests.md +++ b/docs/runtime/phase-requests.md @@ -1,3 +1,20 @@ # Phase Requests -This page is under construction. \ No newline at end of file +Boundline 0.67.0 uses `phase_request` as the single recovery handoff for goal +clarification, plan-quality clarification, and planning-stage artifact +requests. + +## Rules + +- ask one question at a time +- preserve `phase_request.request_id` +- answer with the runtime's `expected_answer` shape +- resume with the emitted `resume_command` or assistant-safe route +- do not infer execution from chat history + +## Plan-Quality Requests + +When plan quality is missing a credible validation strategy, the runtime keeps +the session non-terminal, preserves `plan_quality_state`, findings, and +assumptions, and asks the operator for the one missing input that can clear the +gate. diff --git a/docs/runtime/plan.md b/docs/runtime/plan.md index e81c3c57..3d09233d 100644 --- a/docs/runtime/plan.md +++ b/docs/runtime/plan.md @@ -1,3 +1,18 @@ # Plan -This page is under construction. \ No newline at end of file +Boundline 0.67.0 makes plan quality a runtime gate, not a chat convention. + +## What `plan` Does + +- evaluates goal quality first +- evaluates plan quality next +- records `plan_quality_state`, `plan_quality_findings`, and + `plan_quality_assumptions` when present +- stops on one `phase_request` when the plan needs a missing validation + strategy or another blocking planning input +- keeps execution handoff withheld until the gate clears + +## What To Read When It Blocks + +Use `status`, `next`, and `inspect` to see the same runtime decision from +different surfaces. Do not invent execution from chat history. diff --git a/docs/runtime/status.md b/docs/runtime/status.md index fdf1bfcd..d33b0011 100644 --- a/docs/runtime/status.md +++ b/docs/runtime/status.md @@ -1,3 +1,17 @@ # Status -This page is under construction. \ No newline at end of file +`status` is the quickest read on the Boundline 0.67.0 planning gate. + +## What To Look For + +When a plan exists or is blocked, look for: + +- `plan_quality_state` +- `plan_quality_findings` +- `plan_quality_assumptions` +- `next_command` +- `assistant_next_command` +- blocked or recovery guidance + +Older snapshots remain readable. The additive plan-quality fields are runtime +output, not configuration keys. diff --git a/docs/runtime/trace.md b/docs/runtime/trace.md index 8f1dbc0c..b47884b0 100644 --- a/docs/runtime/trace.md +++ b/docs/runtime/trace.md @@ -1,6 +1,6 @@ # Traces And Inspectability -Boundline traces make delivery explainable. They preserve what the runtime decided, what it used as evidence, what it ran, what it skipped, what failed, and what should happen next. +Boundline 0.67.0 traces make delivery explainable. They preserve what the runtime decided, what it used as evidence, what it ran, what it skipped, what failed, and what should happen next. ## Where Traces Live @@ -29,6 +29,7 @@ Depending on the command and lifecycle phase, traces can include: - context primary inputs and provenance - plan state and planning rationale - verification strategy +- plan-quality state, findings, assumptions, and the emitted `phase_request` - route owner and route config projection - selected guidance and guardian sources - loaded and skipped packs diff --git a/package.json b/package.json index 63f555fa..97f4adaa 100644 --- a/package.json +++ b/package.json @@ -3,9 +3,9 @@ "version": "1.0.0", "type": "module", "scripts": { - "site:dev": "vitepress dev site", - "site:build": "vitepress build site", - "site:preview": "vitepress preview site" + "docs:dev": "vitepress dev docs", + "docs:build": "vitepress build docs", + "docs:preview": "vitepress preview docs" }, "devDependencies": { "@types/svg-pan-zoom": "^3.3.0", diff --git a/roadmap/Next - forward-roadmap.md b/roadmap/Next - forward-roadmap.md index c5162f8f..96e31330 100644 --- a/roadmap/Next - forward-roadmap.md +++ b/roadmap/Next - forward-roadmap.md @@ -28,11 +28,11 @@ feature under `specs/`. | Priority | Feature Seed | Boundline Position | |---|---|---| | **02** | [agentic-framework-integration.md](features/02-agentic-framework-integration.md) | Delivered in 0.66.0: external framework-adapter runtime, corrected split-stage Speckit bridge revalidated on 2026-06-01 | -| **03** | [plan-quality-contract.md](features/03-plan-quality-contract.md) | Planning UX - plan readiness gate | +| **03** | plan-quality-contract.md | Delivered in 0.67.0: first plan-readiness gate, one-question recovery, additive plan-quality projections | | **04** | [backlog-contract.md](features/04-backlog-contract.md) | Planning UX - backlog execution gate | | **05** | [plan-analysis-contract.md](features/05-plan-analysis-contract.md) | Planning UX - cross-artifact analysis | | **06** | [large-codebase-context-substrate.md](features/06-large-codebase-context-substrate.md) | Long-term context handling limits | -| **07** | [external-capability-provider-protocol.md](features/07-external-capability-provider-protocol.md) | Native provider contract (replaces MCP) | +| **07** | [external-capability-provider-protocol.md](features/07-external-capability-provider-protocol.md) | Native provider contract, setup, and activation surface (replaces MCP) | | **08** | [evals-and-runtime-observability.md](features/08-evals-and-runtime-observability.md) | Local quality and regression layer | | **09** | [contextual-help-and-documentation-architecture.md](features/09-contextual-help-and-documentation-architecture.md) | Operator UX feature | | **10** | [review-councils-and-role-gated-governance.md](features/10-review-councils-and-role-gated-governance.md) | Extend delivered S3/S056 work | @@ -43,6 +43,8 @@ feature under `specs/`. | **15** | [browser-and-visual-testing-provider.md](features/15-browser-and-visual-testing-provider.md) | Provider via protocol | | **16** | [session-memory-and-repository-knowledge-distillation.md](features/16-session-memory-and-repository-knowledge-distillation.md) | Memory hygiene feature | | **17** | [experimental-recursivemas-provider-adapter.md](features/17-experimental-recursivemas-provider-adapter.md) | Experimental: external latent-space provider research track | +| **18** | [completion-verification-runtime.md](features/18-completion-verification-runtime.md) | Next: fresh-proof gate before task or stage completion | +| **19** | [plan-execution-orchestration.md](features/19-plan-execution-orchestration.md) | Later: sequential execution control plane with checkpoint and resume | ## Canon Companion Dependencies @@ -57,6 +59,7 @@ Boundline should consume those surfaces only through stable, versioned metadata: - packet readiness state - required document list - evidence refs +- progress or handoff packet schemas when Canon provides them - lineage refs - approval state - project-memory promotion status @@ -92,6 +95,20 @@ V1 should define: - explicit permissions: read files, write files, run commands, network, read secrets, write artifacts, allowed paths, runtime limit, output limit +#### Operator Setup And Activation + +Boundline should own the provider onboarding runtime that Canon intentionally +does not own: + +- explicit operator registration and activation +- setup requirement projection before first use +- health or connectivity dry-runs before a provider is marked ready +- no auto-enable from local executable discovery +- secret-handle routing rather than prompt-visible secret capture + +Canon may later record local routing intent, but provider health, permissions, +and activation remain Boundline-owned. + #### Open Model Provider Support Open-weight models should enter Boundline through the provider contract, not as @@ -164,6 +181,44 @@ Open-model promotion requires targeted evals before a route can be recommended: Canon packet-quality evals belong to Canon. Boundline should only carry consumer-side regression checks for the Canon metadata it relies on. +### Completion Verification Runtime + +Boundline should own claim-matched proof execution before work is allowed to +close. + +V1 should support: + +- concrete claim derivation from runtime closeout +- narrowest proof-command selection +- fresh execution in the current working state +- blocked completion when proof is missing, stale, or failing +- `claim -> proof -> evidence_ref` projection for Canon consumption + +Hard boundaries: + +- Boundline must not report task or stage success before the proof is ready +- Canon may govern the meaning of approval or readiness, but it must not own + proof execution + +### Plan Execution Orchestration + +Boundline should own the execution control plane for accepted task registries. + +V1 should support: + +- one sequential execution profile +- active task locking +- task-local validation and completion-proof gating +- explicit checkpoint persistence and resume command projection +- progress and handoff projection that Canon can consume later + +Hard boundaries: + +- this feature is distinct from seed 12 recursive refinement +- no autonomous replanning in the first slice +- blocked, skipped, and deferred states must remain visible rather than being + collapsed into complete + ### Help-Next And Documentation Architecture Boundline should own: diff --git a/roadmap/features/02-agentic-framework-integration.md b/roadmap/features/02-agentic-framework-integration.md deleted file mode 100644 index 11a47cc1..00000000 --- a/roadmap/features/02-agentic-framework-integration.md +++ /dev/null @@ -1,152 +0,0 @@ -# Boundline as Generic Agentic Framework: Integration Report - -> Analysis of how Boundline can evolve from an orchestrator tightly coupled to Canon into an -> **agnostic orchestration engine (Agentic Framework Engine)**, capable of supporting -> proprietary frameworks (harnesses) through an adapter and override system. -> -> **Constraints**: -> - Boundline is **open source**. -> - Integration with specific or proprietary frameworks happens through **external binary adapters** (separate repositories). -> - A local reference template exists in the sibling `boundline-framework-template` repo. -> - **No MCP dependency as a core architectural layer**: capability abstraction uses our native Provider Protocol. - -## Delivery Status - -- Status: Delivered in Boundline `0.66.0` -- Primary implementation: `specs/066-agentic-framework-integration/` -- Outcome: Boundline now ships one explicit framework-adapter slot per - workspace, the `speckit` known profile, custom-adapter registration, - operator-visible routing and compatibility inspection, and the sibling - template plus Speckit scaffolds aligned to the released V1 stdio contract. -- Post-correction validation: the 2026-06-01 rerun confirmed the corrected - split-stage mapping (`speckit-planning` for `plan`, - `speckit-implementation` for `run`), the bridge-owned analyze readiness loop, - implementation-only run behavior, and retirement of the legacy combined - workflow surface. - ---- - -## 1. Architectural Vision: Canon as Default, Adapters as Overrides - -Boundline must not lose its out-of-the-box value. - -**The golden rule:** -Boundline always ships with **Canon as the default**. If no adapter is configured, the lifecycle phases (`goal`, `plan`, `run`, `review`) are handled by the native Canon-backed logic. - -**The partial-override abstraction:** -An external adapter (for example, a custom compiled Rust binary) does not need to replace the whole system. It can register to override a single step. -For example, a company adapter might declare: *"Use Canon for `goal` and `plan`, but intercept the `run` phase to apply my own destructive hooks and policies."* - ---- - -## 2. Injection and Registration System - -How does an external Rust binary get recognized by Boundline and receive its configuration? - -### A. Discovery and Registration -Boundline should adopt a model inspired by Git or Terraform plugins: -1. **Config-based**: In `.boundline/config.toml`, the operator declares the adapter: - ```toml - [framework.adapter] - command = "boundline-harness-gridspertise" # or an absolute path - ``` -2. **Naming Convention (Optional)**: Boundline can also automatically search `PATH` for binaries whose names start with `boundline-plugin-*`. - -### B. Handshake (Capabilities and Config Injection) -When Boundline boots a session, it invokes the adapter binary with a handshake command (for example, via JSON-RPC over stdin by sending `{"method": "capabilities"}`). - -The adapter responds with its manifest: -```json -{ - "name": "system-harness-template", - "overrides": ["plan", "run"], // Declares which stages it wants to intercept - "hooks": ["on_error", "on_step_pre"], // Declares which global hooks it listens to - "config_schema": { // Requests configuration that Boundline must supply - "harness_repo": "string", - "strict_mode": "boolean" - } -} -``` - -### C. Auto-configuration -Based on the returned `config_schema`, Boundline is responsible for: -- Checking whether `.boundline/config.toml` already contains those fields. -- If not, prompting the user for missing values during `boundline init` or at startup, or writing defaults. -- Passing the fully populated configuration block to the adapter on every subsequent invocation. - ---- - -## 3. JSON Protocol over Stdin/Stdout - -Communication should not happen through linked dynamic libraries (too fragile, ABI issues), but through a **Subprocess Protocol (JSON over stdin/stdout)**, following the same robust design approach used between LSPs (Language Servers) and IDEs. - -**Boundline request to the adapter (override of the `plan` phase):** -```json -{ - "method": "execute_stage", - "params": { - "stage": "plan", - "session_id": "abc-123", - "workspace_ref": "/path/to/workspace", - "adapter_config": { - "harness_repo": "https://github.com/org/repo", - "strict_mode": true - }, - "context": { ... } // State gathered so far from Canon or Boundline - } -} -``` - -**Adapter response:** -```json -{ - "result": { - "status": "success", - "artifacts_produced": ["/path/to/plan.md"], - "phase_request": null // If the adapter needs user input, it returns that here - } -} -``` - ---- - -## 4. Repository Architecture (The 3-Repo Model) - -This design confirms the usefulness of the local template you created: - -1. **`boundline` (Open Source)**: - Contains the orchestrator, the JSON-RPC engine, and the **default** Canon implementation. No proprietary third-party framework logic lives here. -2. **`boundline-framework-template` (Open Source Template)**: - The scaffolding repository you already created. It contains a ready-to-use JSON-RPC server, the correct Rust types, and empty methods (`fn execute_stage()`, `fn on_error()`). Anyone who wants to build a custom company-specific agentic framework can fork this repo. -3. **`my-company-harness-adapter` (Proprietary / Custom)**: - The final binary compiled by the customer from the template. It would contain custom rules, `.github/hooks/` handling, or integrations with closed internal pipelines. - ---- - -## 5. Mapping a Proprietary Harness to Boundline - -A framework adapter can cover the logic of a company-specific `system-harness-template` by mapping its needs onto Boundline capabilities: - -| External Framework Need | Boundline Adapter Solution | -|---|---| -| Custom lifecycle phases | The adapter declares `overrides: ["goal", "plan", "run"]` and injects its own logic. | -| Custom audit logs | The adapter registers for `on_step_post` and `on_session_end` hooks and writes its own logs. | -| Sensors / Quality / Linting | The adapter maps its own destructive scripts into `evaluate_gate` responses or `on_step_pre`. | -| Error handling (triage) | The adapter registers for `on_error`, reads telemetry, and decides whether to retry, block, or repair. | -| Platform integrations (Jira/CI) | No MCP. The adapter uses Boundline's native External Capability Provider Protocol or executes direct binaries/scripts. | - ---- - -## 6. Next Steps (Action Items To Extend This Delivered Spec) - -The baseline feature is now shipped. Follow-up work belongs in new feature -seeds or specs when one of these expansions becomes a bounded delivery slice: - -1. broaden the adapter stage or hook catalog beyond the initial bounded set -2. replace duplicated sibling-repo protocol scaffolds with a released shared - dependency line when the packaging policy is ready -3. add additional known profiles beyond `speckit` -4. introduce future transports or graceful-shutdown semantics beyond the - current one-shot stdio contract - -This design keeps Boundline fully reusable and optionally framework-agnostic, while preserving a safe and polished default UX through Canon when no adapter is configured. diff --git a/roadmap/features/03-plan-quality-contract.md b/roadmap/features/03-plan-quality-contract.md deleted file mode 100644 index 8dee5298..00000000 --- a/roadmap/features/03-plan-quality-contract.md +++ /dev/null @@ -1,89 +0,0 @@ -# Boundline Plan Quality Contract - -## Summary - -Add a Speckit-inspired quality contract to Boundline planning without turning Boundline into a file-first `specs/` workflow. The runtime remains authoritative: `/boundline-plan` plans the active session from an already captured goal, and planning must stop on a structured `phase_request` whenever the plan lacks enough quality to proceed safely. - -This follows the same shape as the goal quality contract: additive runtime fields, concise findings, accepted assumptions, assistant-safe routing, and one interactive gate at a time. - -## Speckit Seed Notes - -- Seed role: first planning-readiness gate in the Speckit analogue sequence. -- First slice: expose `plan_quality_state` and block execution for one missing - validation-strategy case while preserving `phase_request` routing. -- Depends on: existing goal-quality gate and assistant-safe handoff fields. -- De-duplication: shared gate rendering, `phase_request` handling, and - assistant routing should be reused by backlog and analysis gates rather than - restated in separate implementations. - -## Public And Runtime Interface Changes - -Add these optional fields to session status, orchestrate session snapshots, and rendered status output when a plan exists or planning is blocked: - -- `plan_quality_state`: `ready`, `clarification_required`, or `blocked` -- `plan_quality_findings`: concise machine-readable labels for missing or weak planning inputs -- `plan_quality_assumptions`: inferred defaults accepted by the runtime - -The fields are additive. Existing consumers that ignore unknown JSON fields must continue to work. Existing `phase_request`, `assistant_resume_command`, and `assistant_next_command` remain the interactive contract. - -## Runtime Behavior - -Plan quality validation runs after goal quality is satisfied and before Boundline offers execution. - -The runtime should check that the plan has: - -- technical context sufficient for implementation -- explicit constraints and implementation boundaries -- architecture or approach decisions, including rationale where relevant -- validation strategy tied to the goal success criteria -- governance or risk implications when materially relevant -- no unresolved `NEEDS CLARIFICATION` equivalent in runtime-owned planning state - -If quality is insufficient, planning remains non-terminal and emits `phase_request` with exactly one question. The backlog of possible questions is bounded and prioritized by impact: scope and safety first, then user-facing behavior, then technical detail. - -Speckit-style artifacts should be mapped to Boundline artifact roles, not required as files: - -- research decisions map to planning rationale or Canon discovery/requirements packets -- data model maps to system-shaping or architecture packets -- contracts map to architecture or backlog packets -- quickstart maps to validation strategy or run brief evidence - -## Assistant Asset Updates - -Update `/boundline-plan` assets for Copilot, Claude, Codex, and Antigravity with the standardized planning sections: - -- `User Input` -- `Pre-Execution Checks` -- `Execution Flow` -- `Plan Quality Validation` -- `Reasonable Defaults` -- `Gate Handling` -- `Output Interpretation` -- `Next-Step Routing` -- `Done When` - -The assets must state that planning cannot proceed from chat-only assumptions when either `goal_quality_state` or `plan_quality_state` is blocked. They must preserve `plan_quality_state`, `plan_quality_findings`, `plan_quality_assumptions`, and any emitted `phase_request`. - -## Tests - -Add unit and contract coverage for: - -- planning blocks when goal quality is unresolved -- planning blocks when technical context or validation strategy is missing -- low-impact omitted details are recorded as `plan_quality_assumptions` -- status and orchestrate JSON include plan quality projection when present -- assistant plan assets contain the standardized sections and blocked-quality routing rules -- existing planning flows continue to pass when quality is ready - -Run: - -- `cargo test --test unit` -- `cargo test --test contract` -- `cargo test --test integration human_input_capture_flow::` - -## Assumptions - -- No new CLI subcommand is required. -- No `specs/` feature directory or Speckit file generation is added. -- Speckit hooks remain out of scope; Boundline uses `phase_request` handoffs. -- Canon may provide planning packets, but Boundline owns the final planning readiness projection. diff --git a/roadmap/features/07-external-capability-provider-protocol.md b/roadmap/features/07-external-capability-provider-protocol.md index c792b573..9a564257 100644 --- a/roadmap/features/07-external-capability-provider-protocol.md +++ b/roadmap/features/07-external-capability-provider-protocol.md @@ -11,8 +11,9 @@ High-priority architecture feature ## Speckit Seed Notes - Seed role: native capability boundary for external systems. -- First slice: implement discovery, `health`, `prepare`, and one read-only - `execute` path for a provider that returns findings and evidence only. +- First slice: implement discovery, explicit operator registration, + `health`, setup-requirement projection, and one read-only `execute` path for + a provider that returns findings and evidence only. - Depends on: event/trace schema from evals and observability, or a deliberately minimal trace projection if this seed lands first. - De-duplication: permission envelope lives here; sandbox enforcement lives in @@ -22,7 +23,7 @@ High-priority architecture feature This feature makes Boundline framework-agnostic without turning it into an uncontrolled plugin runner. -External systems may provide bounded capabilities. Boundline keeps session state, permissions, trace, evidence validation, and admission control. +External systems may provide bounded capabilities. Boundline keeps session state, permissions, trace, evidence validation, admission control, and setup flow. ## Problem @@ -34,6 +35,8 @@ Without a generic provider protocol, Boundline will accumulate one-off adapters: - custom sandbox adapter - custom MCP adapter - custom research adapter +- unsafe one-off setup prompts for provider configuration +- accidental activation of locally discoverable executables That creates adapter sprawl and inconsistent trust boundaries. @@ -166,6 +169,25 @@ Later adapters: - sandbox provider - browser provider +## Operator Setup And Activation + +Provider onboarding is a Boundline runtime concern, not Canon setup logic. + +V1 should support: + +- explicit operator registration and activation of a provider +- setup requirement projection before first use +- non-secret configuration capture through interactive or config-driven flows +- secret-handle references rather than prompt-visible secret values +- connectivity or health dry-run before activation is marked ready +- atomic setup so an interrupted flow leaves the previous active config intact + +Hard boundaries: + +- a locally discoverable executable must not auto-enable itself as a provider +- setup must not persist raw secrets in traces or tracked files +- provider activation must remain visible in status and inspect + ## Provider Types - read-only context provider @@ -202,6 +224,12 @@ Suggested shape: - Boundline can discover a provider's capabilities. - Boundline can reject an unavailable provider before run. +- Boundline requires explicit operator registration before a provider can be + activated. +- Boundline can project required setup fields and block activation until they + are satisfied. +- Boundline can run a health or connectivity check before marking the provider + ready. - Provider execution is permission-scoped. - Provider output cannot directly mutate Boundline state without validation. - Evidence packets are trace-linked. @@ -211,6 +239,8 @@ Suggested shape: ## Risks - External providers become trusted implicitly. +- Local executables become active accidentally. +- Setup leaks secrets into prompts or traces. - Hidden provider state makes runs non-reproducible. - Permissions are too broad. - Protocol is too generic to validate. @@ -218,3 +248,4 @@ Suggested shape: ## Hard Rule Boundline owns admission control. Providers never approve themselves. +Discoverability is not activation. diff --git a/roadmap/features/13-sandboxed-execution-and-secret-inheritance.md b/roadmap/features/13-sandboxed-execution-and-secret-inheritance.md index abf133b5..18464cfa 100644 --- a/roadmap/features/13-sandboxed-execution-and-secret-inheritance.md +++ b/roadmap/features/13-sandboxed-execution-and-secret-inheritance.md @@ -79,6 +79,8 @@ Use: - scoped secret access - redacted trace output - provider permission checks +- setup and health-check flows that consume handles instead of prompt-visible + secret values - no secret persistence in sandbox artifacts unless approved ## Algorithms And Techniques @@ -124,6 +126,8 @@ Canon or Boundline governance can require sandboxing for: - Boundline can execute a command in local sandbox. - Sandbox mutation does not affect workspace until commit. - Secrets are never written to prompt or plain trace. +- Provider setup and connectivity flows can consume secret handles without + surfacing raw values in prompt-visible context. - Artifacts are captured and trace-linked. - Sandbox failures preserve evidence. - Red-zone work can require sandbox mode. diff --git a/roadmap/features/18-completion-verification-runtime.md b/roadmap/features/18-completion-verification-runtime.md new file mode 100644 index 00000000..c1b54c54 --- /dev/null +++ b/roadmap/features/18-completion-verification-runtime.md @@ -0,0 +1,116 @@ +# Boundline Completion Verification Runtime + +## Summary + +Add a runtime-owned completion-verification gate to Boundline so the system +cannot mark a task, step, or run as complete until a claim-matched proof has +been freshly executed in the current working state. Canon remains the governed +owner of completion packet semantics, readiness, and approval metadata. +Boundline owns proof selection, command execution, blocked-state projection, +and evidence capture. + +## Speckit Seed Notes + +- Seed role: runtime half of Canon completion-verification semantics. +- First slice: require one claim-matched proof command before a task can move + to complete in the default sequential run path. +- Depends on: current task/run status projection, command execution surfaces, + and evidence capture; should emit additive state rather than redesign status. +- De-duplication: Canon owns `claim -> proof -> evidence_ref` semantics and + approval/readiness language; this seed owns command choice, execution, and + runtime blocked states. + +## Public And Runtime Interface Changes + +Add optional completion-verification projection fields to session status, +orchestrate snapshots, and rendered output when work is about to close or is +blocked on proof: + +- `completion_verification_state`: `ready`, `proof_required`, `blocked`, or + `failed` +- `completion_verification_findings`: concise labels for stale, missing, or + mismatched proof +- `completion_blocked_claims`: claims that still lack valid proof +- `completion_evidence_refs`: evidence refs generated by the most recent fresh + proof runs + +The fields are additive and must not break existing status or inspect +consumers. + +## Runtime Behavior + +Before Boundline marks a task, stage, or run complete, it should: + +- derive the concrete claim being made, such as `tests_pass`, `bug_fixed`, + `build_clean`, or `migration_valid` +- choose the narrowest available falsifying command for that claim rather than + accepting a broad green aggregate +- require a fresh proof run in the current working state rather than trusting + stale output +- record exit code, summary lines, proof command, and evidence refs +- block completion when proof is missing, stale, fails, or does not match the + claim +- project the resulting `claim -> proof -> evidence_ref` output so Canon can + consume it without re-owning execution + +The first slice should stay simple: + +- one active proof command per claimed task outcome +- one blocked state at a time +- sequential execution only +- no speculative parallel proof scheduling + +## Assistant Asset Updates + +Update Boundline run and status assets so they: + +- never report success when `completion_verification_state` is not `ready` +- surface blocked claims and the exact proving command as the next action +- preserve `completion_verification_findings`, `completion_blocked_claims`, and + `completion_evidence_refs` +- explain that Canon may later govern packet closeout, but Boundline owns proof + execution and task completion gating + +## Tests + +Add unit, contract, and integration coverage for: + +- a task cannot close when no proving command exists for the claimed outcome +- stale green output does not satisfy completion verification +- a failing proof command leaves the task blocked with a visible reason +- a passing fresh proof produces evidence refs and unblocks completion +- status and orchestrate output include completion-verification projection when + present +- assistant assets do not emit success language while proof is missing or stale + +Run: + +- `cargo test --test unit` +- `cargo test --test contract` +- `cargo test --test integration` + +## Canon Boundary + +No Canon files are changed as part of this Boundline planning document. + +Canon owns: + +- completion packet semantics +- readiness and approval language +- evidence consumption rules +- blocked-claim artifact meaning + +Boundline owns: + +- proof command selection +- fresh execution +- task-complete blocking +- runtime state projection +- evidence-ref capture + +## Assumptions + +- The first slice does not need a new CLI command. +- Existing execution surfaces can run the proving command. +- Canon may consume the emitted evidence refs later, but Boundline does not + wait for Canon packet generation before blocking unsafe completion. \ No newline at end of file diff --git a/roadmap/features/19-plan-execution-orchestration.md b/roadmap/features/19-plan-execution-orchestration.md new file mode 100644 index 00000000..156cde66 --- /dev/null +++ b/roadmap/features/19-plan-execution-orchestration.md @@ -0,0 +1,124 @@ +# Boundline Plan Execution Orchestration + +## Summary + +Add a runtime-owned execution control plane to Boundline so accepted plans and +validated backlogs can run as an inspectable sequence of bounded tasks with +checkpointing, pause and resume, and explicit blocked-state handling. Canon may +consume progress and handoff packets, but Boundline owns task ordering, +mutation-surface control, validation loops, and resume semantics. + +This seed is not the same as recursive stage refinement in seed 12. Seed 12 is +about repeated planning rounds within one stage. This seed is about executing an +already accepted multi-task plan over time. + +## Speckit Seed Notes + +- Seed role: runtime control plane for multi-task execution. +- First slice: one opt-in sequential execution profile that advances one task at + a time, checkpoints after each verified outcome, and can resume from the last + explicit checkpoint. +- Depends on: planning-readiness gates, backlog validation, completion + verification runtime, and existing status/inspect/session surfaces. +- De-duplication: seed 12 owns recursive refinement rounds; seed 07 owns + provider lifecycle; seed 13 owns sandbox enforcement; Canon owns progress and + handoff packet semantics when those artifacts are exported. + +## Public And Runtime Interface Changes + +Add optional execution-orchestration projection fields to session status, +orchestrate snapshots, and rendered output when a run is executing from a task +registry: + +- `execution_plan_state`: `ready`, `running`, `paused`, `blocked`, or + `completed` +- `execution_current_task_id`: the task currently locked for execution +- `execution_completed_task_count`: total completed tasks in the active run +- `execution_blocked_task_ids`: tasks blocked on findings or missing proof +- `execution_checkpoint_ref`: the last durable checkpoint or handoff ref +- `execution_resume_command`: exact resume route when a run is paused or + interrupted + +The fields are additive and must not break existing runtime consumers. + +## Runtime Behavior + +When execution orchestration is active, Boundline should: + +- load the accepted plan and task registry in dependency order +- select one runnable task at a time +- lock the active mutation surface so overlapping tasks cannot run concurrently +- dispatch the bounded execution path for that task using existing runtime, + provider, or sandbox surfaces as appropriate +- require task-local validation and completion-verification proof before moving + the task to complete +- checkpoint state after each completed, blocked, skipped, or deferred task +- expose the active task, stop reason, and next action in status and inspect +- resume from the last explicit checkpoint rather than recomputing state from + chat or inferred diffs + +The first slice should stay intentionally narrow: + +- one sequential runner only +- no overlapping parallel task execution +- no autonomous replanning +- no implicit task creation +- one checkpoint format and one resume path + +## Assistant Asset Updates + +Update Boundline run, status, and inspect assets so they: + +- preserve `execution_plan_state`, `execution_current_task_id`, + `execution_blocked_task_ids`, `execution_checkpoint_ref`, and + `execution_resume_command` +- do not report a run as completed when tasks remain blocked, skipped, or + deferred without explicit projection +- explain the difference between paused, blocked, and finished +- route interrupted runs to the exact resume command instead of suggesting a + new ad hoc start + +## Tests + +Add unit, contract, and integration coverage for: + +- sequential execution respects task dependency order +- overlapping mutation surfaces do not run concurrently +- a blocked task halts downstream execution until the operator resolves it +- a verified completed task advances the checkpoint +- paused runs resume from the last checkpoint instead of recomputing progress +- status and inspect surfaces project current task, state, and resume command + +Run: + +- `cargo test --test unit` +- `cargo test --test contract` +- `cargo test --test integration` + +## Canon Boundary + +No Canon files are changed as part of this Boundline planning document. + +Canon owns: + +- progress and handoff packet semantics +- task-state artifact meaning +- evidence-ref schema consumption + +Boundline owns: + +- runnable-task selection +- task locking +- execution dispatch +- checkpoint persistence +- resume behavior +- completion gating before task closeout + +## Assumptions + +- The first slice can operate on an already validated backlog or equivalent task + registry. +- Existing provider and sandbox surfaces can be reused rather than redefined. +- Exporting Canon progress or handoff packets is optional in the first slice; + Boundline may project equivalent state internally before Canon integration is + wired. \ No newline at end of file diff --git a/roadmap/features/README.md b/roadmap/features/README.md index b24f809d..3b954abc 100644 --- a/roadmap/features/README.md +++ b/roadmap/features/README.md @@ -31,12 +31,15 @@ Use the local Speckit templates as the source of truth when converting a seed: | Lane | Seeds | Purpose | First Speckit Slice | |---|---|---|---| -| Now | 03, 04, 05 | Planning readiness gates | Surface one runtime gate at a time and block invalid execution handoff. | +| Delivered | 03 | Planning readiness gate | Shipped in 0.67.0; preserves one-question `phase_request` recovery and additive plan-quality projections. | +| Now | 04, 05 | Planning readiness gates | Surface one runtime gate at a time and block invalid execution handoff. | | Now | 08 | Measurement substrate | Add local event schema plus a tiny golden eval corpus before expanding AI behavior. | | Now | 06 | Large-repo safety | Refuse unsafe huge reads, add paged reads, and show omitted context in inspect. | -| Next | 07 | Provider boundary | Implement one read-only provider lifecycle before mutation providers. | +| Next | 07 | Provider boundary and setup | Implement discovery, explicit operator registration, health, and one read-only provider lifecycle before mutation providers. | +| Next | 18 | Completion proof gate | Block task or stage closeout until a claim-matched proof command runs freshly. | | Next | 09 | Operator discoverability | Start with Boundline `help-next`; keep Canon help as Canon-owned. | | Next | 13 | Execution isolation | Add one local test sandbox mode with artifact capture before mutation commit. | +| Later | 19 | Execution control plane | Add one sequential task runner with checkpoint and resume after proof gating is stable. | | Later | 10, 11 | Governance hardening | Treat as deltas over shipped council/adaptive docs, not greenfield systems. | | Later | 12 | Recursive refinement | Add one bounded, inspectable sequential stage-refinement profile after council and adaptive-governance hardening. | | Later | 14 | Route economics | Add route telemetry and budgets after provider protocol and evals exist. | @@ -52,6 +55,8 @@ Seed 02 is intentionally not revised by this pass. 03 plan gate -> 04 backlog gate -> 05 planning analysis + -> 18 completion verification runtime + -> 19 plan execution orchestration -> 08 evals and observability -> 06 large-codebase context substrate -> 07 provider protocol @@ -68,11 +73,13 @@ Seed 02 is intentionally not revised by this pass. 09 help-next can start after the probe/readiness surfaces are available. +18 completion verification runtime + -> 19 plan execution orchestration + 07 provider protocol -08 evals and observability -14 AI gateway economics -12 recursive stage refinement -bounded long-lived local provider lifecycle +13 sandbox execution +19 plan execution orchestration +bounded execution backends and operator setup -> 17 experimental RecursiveMAS provider ``` @@ -94,9 +101,10 @@ new `spec.md`: | Cluster | Overlap | Ownership Decision | |---|---|---| -| Planning gates | 03, 04, and 05 all repeat gate state, `phase_request`, and assistant routing language. | Keep the shared gate and handoff mechanics in one planning-readiness interface; each seed should add only its own validation fields and findings. | +| Planning gates | 03, 04, and 05 all repeat gate state, `phase_request`, and assistant routing language. | Keep the shared gate and handoff mechanics in one planning-readiness interface; 03 is now shipped, and 04/05 should add only their own validation fields and findings. | | Canon companion work | 04, 08, 09, and 15 all mention Canon-owned packets, help, evals, or project memory. | Boundline consumes stable Canon metadata. Canon schema, mode docs, packet-quality evals, and memory promotion need Canon Speckit features. | -| Provider permissions | 07, 13, and 15 all describe path, network, secret, artifact, and evidence permissions. | 07 owns the request permission envelope; 13 enforces sandbox policy; 15 consumes the envelope as a browser provider. | +| Provider permissions | 07, 13, and 15 all describe path, network, secret, artifact, and evidence permissions. | 07 owns the request permission envelope and provider setup or activation surface; 13 enforces sandbox policy and secret-handle execution rules; 15 consumes the envelope as a browser provider. | +| Execution ownership | 18, 19, Canon completion or handoff seeds, and provider or sandbox execution surfaces all mention proof, task state, checkpointing, or blocked execution. | 18 owns proof execution and completion blocking; 19 owns task ordering, checkpoint, and resume; 07 and 13 provide provider and sandbox backends; Canon owns packet semantics and evidence consumption only. | | Telemetry | 08, 14, and 15 all list events, route metrics, artifacts, and latency/cost signals. | 08 owns event schema and eval fixtures; 14 owns route economics decisions; concrete providers emit events into 08. | | Councils and adaptive governance | 10 and 11 overlap with shipped docs under `docs/review-*`, `tech-docs/adaptive-governance.md`, `tech-docs/control-graduation-model.md`, and `tech-docs/runtime-confidence-and-calibration.md`. | Future specs must name the missing delta instead of reimplementing council profiles, voting, confidence, or degradation from scratch. | | Memory | 16 overlaps with `tech-docs/project-memory-and-evidence-structure.md` and Canon project memory. | 16 owns workspace-local, confirmation-first operational memory proposals. Durable governed knowledge remains docs/Canon-owned. | diff --git a/roadmap/joint-roadmap-graph.md b/roadmap/joint-roadmap-graph.md new file mode 100644 index 00000000..d87d69a4 --- /dev/null +++ b/roadmap/joint-roadmap-graph.md @@ -0,0 +1,91 @@ +# Canon & Boundline Joint Feature Rollout + +This document illustrates the operational sequence for the joint development of Canon and Boundline features. It encompasses all features from both roadmaps, grouping them by domain and showing critical execution dependencies. + +## Dependency Graph + +```mermaid +flowchart TD + %% Styling + classDef canon fill:#5b5b95,stroke:#333,stroke-width:2px,color:#fff + classDef boundline fill:#1f6b4e,stroke:#333,stroke-width:2px,color:#fff + + subgraph Core Foundations + B02["Boundline 02
(Framework Integration)"]:::boundline + B03["Boundline 03
(Plan Quality)"]:::boundline + B04["Boundline 04
(Backlog Contract)"]:::boundline + B05["Boundline 05
(Plan Analysis)"]:::boundline + B06["Boundline 06
(Context Substrate)"]:::boundline + end + + subgraph Verification Integrity + C02["Canon 02
(Verification Gates)"]:::canon + B18["Boundline 18
(Verification Runtime)"]:::boundline + end + + subgraph Execution & Orchestration + B19["Boundline 19
(Plan Orchestrator)"]:::boundline + C03["Canon 03
(Handoff Schemas)"]:::canon + B10["Boundline 10
(Review Councils)"]:::boundline + B11["Boundline 11
(Adaptive Governance)"]:::boundline + B12["Boundline 12
(Recursive Refinement)"]:::boundline + end + + subgraph Providers & Extensibility + B07["Boundline 07
(Provider Protocol)"]:::boundline + B13["Boundline 13
(Sandbox Exec)"]:::boundline + C07["Canon 07
(Integration Onboarding)"]:::canon + B14["Boundline 14
(AI Gateway)"]:::boundline + B15["Boundline 15
(Browser Testing)"]:::boundline + B17["Boundline 17
(Recursivemas Adapter)"]:::boundline + end + + subgraph Observability & Memory + C06["Canon 06
(Observability Design)"]:::canon + B08["Boundline 08
(Evals & Observability)"]:::boundline + B16["Boundline 16
(Session Memory)"]:::boundline + end + + subgraph Advanced Workflows & Policy + C01["Canon 01
(Systematic Debugging)"]:::canon + C04["Canon 04
(Brainstorming Ideation)"]:::canon + C05["Canon 05
(Policy Shaping)"]:::canon + B09["Boundline 09
(Contextual Help)"]:::boundline + end + + %% Key Dependencies + B06 -.-> C02 + C02 ---|Hard Pair| B18 + B18 -->|Hard Dependency| B19 + B19 -->|Triggers Export| C03 + B19 --> B07 + B07 -->|Activates| B13 + B13 -->|Enables| C07 + + B07 -.-> B14 + B07 -.-> B15 + B07 -.-> B17 + + C06 -.->|Design for| B08 + C02 -.->|Inherits rules| C01 + B18 -.->|Enhances| C01 + C05 -.->|Pairs well| C06 +``` + +## Execution Order and Dependencies + +1. **Core Foundations (Boundline 02-06)** + - The foundational components for repository structure, configuration, basic backlog/plan logic, and context ingestion. These are largely independent precursors to execution engines. +2. **Canon 02 + Boundline 18 (Verification Pair)** + - The first crucial execution juncture. Canon defines the `claim -> proof -> evidence_ref` contract, while Boundline implements the runtime that executes the proof and blocks task completion. +3. **Boundline 19 (Execution Orchestrator)** + - Depends directly on `Boundline 18` to ensure that task ordering, checkpointing, and resume logic rely on a solid verification gate. +4. **Canon 03 (Parallel to 19)** + - Defines purely the handoff/progress schema. It can be developed in parallel to the Boundline execution engine, or right before its integration to allow Boundline to export compatible packets. +5. **Boundline 07 -> Boundline 13 (Provider Layer)** + - The actual external provider setup (MCP, setup, activation, health). `Boundline 07` comes first, followed by the security layer `Boundline 13` (secret inheritance and sandbox). It establishes the plugin layer that powers B14, B15, and B17. +6. **Canon 07 (After provider setup)** + - Arrives at the end to close the loop on the CLI side (Canon init) by gathering local routing choices, delegating execution back to Boundline. +7. **Independent Features (Canon 01, 04, 05, 06 & Boundline 08-12, 16)** + - These features cover autonomous workflows, policy, observability, and advanced orchestrator additions. They do not block the core engine loop and can be parallelized based on priority. + - *(Note on Canon 01: It has a soft dependency on Canon 02. While it can start immediately without hard blockers, once Canon 02 lands, Canon 01 will automatically inherit its rigid verification gates).* diff --git a/scripts/site-dev.ps1 b/scripts/docs-dev.ps1 similarity index 100% rename from scripts/site-dev.ps1 rename to scripts/docs-dev.ps1 diff --git a/scripts/site-dev.sh b/scripts/docs-dev.sh similarity index 91% rename from scripts/site-dev.sh rename to scripts/docs-dev.sh index 31631cff..bdeb5f80 100755 --- a/scripts/site-dev.sh +++ b/scripts/docs-dev.sh @@ -6,4 +6,4 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR/.." echo "Starting Boundline VitePress dev server..." -npm run site:dev +npm run docs:dev diff --git a/scripts/update-docs-versions.ps1 b/scripts/update-docs-versions.ps1 new file mode 100644 index 00000000..59cfa883 --- /dev/null +++ b/scripts/update-docs-versions.ps1 @@ -0,0 +1,50 @@ +# Update documentation version references based on Cargo.toml version + +if (-not (Test-Path "Cargo.toml")) { + Write-Error "Cargo.toml not found in the current directory." + exit 1 +} + +if (-not (Test-Path "docs")) { + Write-Warning "docs/ directory not found. Skipping." + exit 0 +} + +$cargoToml = Get-Content -Raw -Path "Cargo.toml" +$version = $null + +# Try to find version in workspace.package first +if ($cargoToml -match '(?s)\[workspace\.package\].*?version\s*=\s*"([^"]+)"') { + $version = $Matches[1] +} elseif ($cargoToml -match '(?s)\[package\].*?version\s*=\s*"([^"]+)"') { + $version = $Matches[1] +} + +if (-not $version) { + Write-Error "Could not extract version from Cargo.toml." + exit 1 +} + +Write-Host "Updating documentation references in docs/ to version: $version" + +$files = Get-ChildItem -Path "docs" -Filter "*.md" -Recurse +foreach ($file in $files) { + $content = Get-Content -Raw -Path $file.FullName + + # Check if we have work to do before writing + if ($content -match 'blob/\d+\.\d+\.\d+|tree/\d+\.\d+\.\d+|Canon \d+\.\d+\.\d+|Boundline \d+\.\d+\.\d+') { + $newContent = $content -replace 'blob/\d+\.\d+\.\d+', "blob/$version" + $newContent = $newContent -replace 'tree/\d+\.\d+\.\d+', "tree/$version" + $newContent = $newContent -replace 'Canon \d+\.\d+\.\d+', "Canon $version" + $newContent = $newContent -replace 'Boundline \d+\.\d+\.\d+', "Boundline $version" + + if ($content -ne $newContent) { + # Write back using UTF-8 (No BOM) + $utf8NoBom = New-Object System.Text.UTF8Encoding($false) + [System.IO.File]::WriteAllText($file.FullName, $newContent, $utf8NoBom) + Write-Host " Updated: $($file.FullName)" + } + } +} + +Write-Host "Done!" diff --git a/scripts/update-docs-versions.sh b/scripts/update-docs-versions.sh new file mode 100755 index 00000000..ca1a9769 --- /dev/null +++ b/scripts/update-docs-versions.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Ensure we are in a rust workspace root with Cargo.toml +if [ ! -f Cargo.toml ]; then + echo "Error: Cargo.toml not found in the current directory." >&2 + exit 1 +fi + +if [ ! -d docs ]; then + echo "Warning: docs/ directory not found. Skipping." + exit 0 +fi + +# Extract version from Cargo.toml +VERSION=$(grep -A 10 "\[workspace.package\]" Cargo.toml | grep -E '^version\s*=\s*' | cut -d '"' -f 2 || true) +if [ -z "$VERSION" ]; then + # Fallback to standard [package] + VERSION=$(grep -A 10 "\[package\]" Cargo.toml | grep -E '^version\s*=\s*' | cut -d '"' -f 2 || true) +fi + +if [ -z "$VERSION" ]; then + echo "Error: Could not extract version from Cargo.toml." >&2 + exit 1 +fi + +echo "Updating documentation references in docs/ to version: $VERSION" + +# Find and update markdown files recursively using perl for portability across macOS and Linux +find docs -type f -name "*.md" | while read -r file; do + # Check if the file contains any of our target patterns to avoid unnecessary writes + if grep -q -E "blob/[0-9]+\.[0-9]+\.[0-9]+|tree/[0-9]+\.[0-9]+\.[0-9]+|Canon [0-9]+\.[0-9]+\.[0-9]+|Boundline [0-9]+\.[0-9]+\.[0-9]+" "$file"; then + perl -pi -e "s|blob/\d+\.\d+\.\d+|blob/$VERSION|g" "$file" + perl -pi -e "s|tree/\d+\.\d+\.\d+|tree/$VERSION|g" "$file" + perl -pi -e "s|Canon \d+\.\d+\.\d+|Canon $VERSION|g" "$file" + perl -pi -e "s|Boundline \d+\.\d+\.\d+|Boundline $VERSION|g" "$file" + echo " Updated: $file" + fi +done + +echo "Done!" diff --git a/specs/061-reasoning-profile-contracts/contracts/canon-challenge-posture-consumer-contract.md b/specs/061-reasoning-profile-contracts/contracts/canon-challenge-posture-consumer-contract.md index 550bb5f0..99481b78 100644 --- a/specs/061-reasoning-profile-contracts/contracts/canon-challenge-posture-consumer-contract.md +++ b/specs/061-reasoning-profile-contracts/contracts/canon-challenge-posture-consumer-contract.md @@ -5,8 +5,8 @@ `docs/integration/governed-reasoning-posture-contract.md` - **Canonical Source Identifier**: `canon:docs/integration/governed-reasoning-posture-contract.md` - **Supported Contract Line**: `governed_reasoning_posture_v1` -- **Supported Compatibility Window**: Boundline `0.65.x` consuming Canon - `0.61.x` posture inputs only +- **Supported Compatibility Window**: Boundline `0.67.x` consuming Canon + `0.63.x` posture inputs only ## Boundline Consumes @@ -46,10 +46,10 @@ ```toml contract_line = "governed_reasoning_posture_v1" -boundline_min = "0.65.0" -boundline_max_exclusive = "0.66.0" -canon_min = "0.61.0" -canon_max_exclusive = "0.63.0" +boundline_min = "0.67.0" +boundline_max_exclusive = "0.68.0" +canon_min = "0.63.0" +canon_max_exclusive = "0.64.0" required_profile_family = "blind_review" admission_priority = "required_before_acceptance" confidence_handoff_required = true diff --git a/specs/061-reasoning-profile-contracts/contracts/canon-governed-reasoning-posture-contract.snapshot.md b/specs/061-reasoning-profile-contracts/contracts/canon-governed-reasoning-posture-contract.snapshot.md index 3e77b979..d390a8ce 100644 --- a/specs/061-reasoning-profile-contracts/contracts/canon-governed-reasoning-posture-contract.snapshot.md +++ b/specs/061-reasoning-profile-contracts/contracts/canon-governed-reasoning-posture-contract.snapshot.md @@ -8,15 +8,15 @@ This snapshot preserves the Canon provider-side release contract needed for Boun - `current_contract_line`: `governed_reasoning_posture_v1` - `schema_version`: `v1` - `primary_consumer`: `boundline` -- `supported_boundline_window`: `0.66.x` +- `supported_boundline_window`: `0.67.x` - `supported_canon_window`: `0.63.x` ## Producer Shape ```toml contract_line = "governed_reasoning_posture_v1" -boundline_min = "0.66.0" -boundline_max_exclusive = "0.67.0" +boundline_min = "0.67.0" +boundline_max_exclusive = "0.68.0" canon_min = "0.63.0" canon_max_exclusive = "0.64.0" required_profile_family = "blind_review" diff --git a/specs/061-reasoning-profile-contracts/contracts/reasoning-version-alignment-contract.md b/specs/061-reasoning-profile-contracts/contracts/reasoning-version-alignment-contract.md index 518154b2..58986145 100644 --- a/specs/061-reasoning-profile-contracts/contracts/reasoning-version-alignment-contract.md +++ b/specs/061-reasoning-profile-contracts/contracts/reasoning-version-alignment-contract.md @@ -2,12 +2,12 @@ ## Purpose -Define the first supported release pair for the bilateral reasoning-profile +Define the current supported release pair for the bilateral reasoning-profile feature and the checks that must fail closed on drift. ## Supported Release Pair -- **Boundline**: `0.66.x` +- **Boundline**: `0.67.x` - **Canon**: `0.63.x` - **Shared Contract Line**: `governed_reasoning_posture_v1` @@ -18,7 +18,7 @@ feature and the checks that must fail closed on drift. - Boundline contract tests MUST assert that the provider's compatibility window admits the active Boundline version. - Canon contract tests or docs checks MUST assert that the published posture - contract names Boundline `0.66.x` as a supported consumer window. + contract names Boundline `0.67.x` as a supported consumer window. - Release-facing docs, changelogs, and compatibility guidance in both repos MUST agree on the supported pair. diff --git a/specs/067-plan-quality-contract/checklists/requirements.md b/specs/067-plan-quality-contract/checklists/requirements.md new file mode 100644 index 00000000..8b4e5e9e --- /dev/null +++ b/specs/067-plan-quality-contract/checklists/requirements.md @@ -0,0 +1,35 @@ +# Specification Quality Checklist: Plan Quality Contract + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-06-02 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Validation passed during specification and was rechecked after plan-analysis remediation. +- Release closure requirements are stated as verifiable delivery constraints without prescribing implementation structure. diff --git a/specs/067-plan-quality-contract/contracts/plan-quality-runtime-contract.md b/specs/067-plan-quality-contract/contracts/plan-quality-runtime-contract.md new file mode 100644 index 00000000..e5117b4b --- /dev/null +++ b/specs/067-plan-quality-contract/contracts/plan-quality-runtime-contract.md @@ -0,0 +1,116 @@ +# Contract: Plan Quality Runtime Projection + +## Purpose + +Define the additive Boundline-owned planning-readiness contract used by CLI +status, orchestration snapshots, execution admission, traces, and supported +assistant planning assets. + +## Evaluation Order + +```text +goal quality + -> plan quality + -> backlog quality + -> planning analysis + -> execution handoff +``` + +Each gate must stop the sequence when it is not ready. The runtime asks one +question at a time and must not expose execution handoff while plan quality is +`clarification_required` or `blocked`. + +## Additive Session Projection + +When a plan exists or planning is blocked, session status and orchestration +snapshots may include: + +```json +{ + "plan_quality_state": "clarification_required", + "plan_quality_findings": [ + "verification_strategy" + ], + "plan_quality_assumptions": [ + "no explicit route override is required for this plan" + ] +} +``` + +Allowed `plan_quality_state` values: + +| Value | Meaning | Execution handoff | +|---|---|---| +| `ready` | No blocking plan-quality finding remains. | May continue to later gates. | +| `clarification_required` | Focused operator input can resolve the current plan defect. | Withheld. | +| `blocked` | Planning context is explicitly non-credible or otherwise blocked. | Withheld. | + +The fields are additive. Older snapshots may omit them, and existing consumers +may ignore them. + +## Clarification Handoff + +For a recoverable plan-quality finding, the runtime must: + +1. Keep planning non-terminal. +2. Persist the effective assessment in the active session. +3. Mark the session blocked pending operator input. +4. Emit one existing structured `phase_request`. +5. Preserve the existing raw and assistant-safe resume routes. +6. Re-evaluate the same session after the answer is applied. + +The initial release formalizes a missing `verification_strategy` as the newly +enforced blocking finding. Additional semantic-strength checks remain separate +future slices so this first gate stays bounded. + +## Trace Expectations + +Trace-visible planning decisions must preserve: + +- session identity +- effective plan-quality state +- ordered findings +- accepted assumptions +- emitted clarification request identity +- withheld execution handoff +- recovered readiness after explicit operator input + +Traces must not log secrets, tokens, or personally identifiable information. + +## Assistant Asset Contract + +Copilot, Claude, Codex, and Antigravity planning assets must preserve: + +- `goal_quality_state` +- `plan_quality_state` +- `plan_quality_findings` +- `plan_quality_assumptions` +- emitted `phase_request` +- raw `resume_command` +- `assistant_resume_command` +- `assistant_next_command` + +Each planning asset must contain these standardized sections: + +- `User Input` +- `Pre-Execution Checks` +- `Execution Flow` +- `Plan Quality Validation` +- `Reasonable Defaults` +- `Gate Handling` +- `Output Interpretation` +- `Next-Step Routing` +- `Done When` + +Assistant hosts must not synthesize an execution route from chat-only +assumptions when goal quality or plan quality is blocked. + +## Explicit Non-Goals + +- No new CLI command. +- No file-first Speckit runtime. +- No backlog-quality implementation. +- No cross-artifact planning-analysis implementation. +- No Canon-owned execution admission. +- No provider, sandbox, browser, gateway, memory, council, adaptive-governance, + recursive-refinement, concurrency, or background-worker behavior. diff --git a/specs/067-plan-quality-contract/data-model.md b/specs/067-plan-quality-contract/data-model.md new file mode 100644 index 00000000..a9c8101e --- /dev/null +++ b/specs/067-plan-quality-contract/data-model.md @@ -0,0 +1,87 @@ +# Data Model: Plan Quality Contract + +## Entity: Plan Quality Assessment + +Represents the effective readiness decision for one active goal-derived plan. + +| Field | Shape | Required | Rules | +|---|---|---|---| +| `state` | `ready`, `clarification_required`, or `blocked` | Yes | `ready` only when no finding blocks handoff; `clarification_required` for recoverable missing plan input; `blocked` for non-credible context or another explicit non-recoverable condition. | +| `findings` | Ordered list of stable labels | No | Labels remain concise and machine-readable. Ordering determines the single question asked first. | +| `assumptions` | Ordered list of accepted defaults | No | Assumptions remain visible but do not block handoff by themselves. | + +The assessment is persisted additively inside the existing goal-plan record and +is recomputed from current plan contents before admission or projection. + +## Entity: Plan Quality Finding + +Represents one missing or weak planning input. + +| Label | Initial-slice behavior | Recovery | +|---|---|---| +| `verification_strategy` | Recoverable clarification; blocks execution handoff | Ask for an explicit validation strategy, then re-evaluate. | +| `planning_rationale` | Existing recoverable clarification retained for compatibility | Ask for the rationale supporting the selected plan targets, then re-evaluate. | +| `context_pack_insufficient` | Explicit blocked state | Surface the context summary and require new credible input. | +| `context_pack_stale` | Explicit blocked state | Surface the staleness reason and require context refresh. | + +The first release slice newly formalizes the validation-strategy gate. Existing +rationale and context findings remain visible so the feature does not regress +already-landed scaffolding. + +## Entity: Plan Quality Assumption + +Represents a low-impact inferred default accepted by the runtime. + +| Initial assumption | Meaning | +|---|---| +| `no explicit route override is required for this plan` | The operator did not request a route override, so the normal configured routing policy remains valid. | + +Assumptions must remain inspectable in status, orchestration snapshots, and +trace-backed reasoning without becoming implicit control flow. + +## Entity: Planning Clarification Request + +Reuses the existing structured `phase_request` handoff. + +| Field | Purpose | +|---|---| +| `request_id` | Stable resume identity for the active question. | +| `kind` | Clarification classification understood by assistant hosts. | +| `reason` | Concise explanation of why planning cannot advance. | +| `question` | Exactly one operator question for the current highest-priority finding. | +| `expected_answer` | Existing answer contract used by host surfaces. | +| `resume_command` | Raw CLI continuation that resumes the same session. | +| `assistant_resume_command` | Host-safe assistant continuation when available. | + +## State Transitions + +```text +goal quality unresolved + -> preserve goal-quality gate + +goal quality ready + plan available + -> evaluate plan quality + +no blocking finding + -> ready + -> execution handoff may be offered + +recoverable missing plan input + -> clarification_required + -> emit one phase_request + -> wait for operator input + -> re-evaluate same session + +non-credible context + -> blocked + -> surface reason and stop pending explicit operator action +``` + +## Compatibility Rules + +- Older session snapshots without `plan_quality` must deserialize successfully. +- Consumers that ignore new session-status fields must continue to work. +- The effective assessment is recomputed before admission decisions so stale + persisted state cannot bypass the gate. +- Backlog quality and planning analysis remain separate later gates; they are + not folded into the plan-quality assessment. diff --git a/specs/067-plan-quality-contract/plan.md b/specs/067-plan-quality-contract/plan.md new file mode 100644 index 00000000..4398c9c5 --- /dev/null +++ b/specs/067-plan-quality-contract/plan.md @@ -0,0 +1,182 @@ +# Implementation Plan: Plan Quality Contract + +**Branch**: `067-plan-quality-contract` | **Date**: 2026-06-02 | **Spec**: [spec.md](spec.md) + +**Input**: Feature specification from `/specs/067-plan-quality-contract/spec.md` + +## Summary + +Ship the first formal planning-readiness slice as Boundline-owned runtime +behavior. Reuse the existing `GoalPlan`, `PlanQualityAssessment`, +`SessionStatusView`, orchestration `phase_request`, trace, and assistant-asset +surfaces; audit and complete the current scaffolding so a missing verification +strategy blocks execution handoff with one focused question, +while low-impact defaults remain visible assumptions. Close the slice as +release `0.67.0` with aligned metadata, operator docs, clippy compliance, and at +least 95% patch coverage for changed or created implementation files. + +## Technical Context + +**Language/Version**: Rust 1.96.0, edition 2024 + +**Primary Dependencies**: Existing workspace crates and dependencies only; no new runtime dependency is planned + +**Storage**: Existing workspace-local session and trace files, extended with additive plan-quality fields and trace-visible projections + +**Testing**: `cargo test --test unit`, `cargo test --test contract`, +`cargo test --test integration human_input_capture_flow::`, focused +planning-gate tests, `cargo fmt --check`, +`cargo clippy --workspace --all-targets --all-features -- -D warnings`, +`cargo llvm-cov --workspace --all-features --lcov --output-path lcov.info`, and +`scripts/common/coverage/intersect_patch_coverage.py` + +**Target Platform**: Local CLI runtime on supported developer workstations; +assistant assets for Copilot, Claude, Codex, and Antigravity remain thin +projections over the same CLI/runtime contract + +**Project Type**: Rust workspace with CLI, runtime, assistant assets, +distribution metadata, and repository-managed documentation + +**Performance Goals**: Plan-quality evaluation remains bounded to one +in-memory assessment per planning or execution-admission decision and adds no +external I/O; blocked flows emit exactly one operator question per handoff + +**Constraints**: Reuse current session-native planning; preserve older session +snapshot compatibility; no new CLI subcommand; no file-first Speckit runtime; +no Canon-owned control flow; sequential question handling only; all changed +implementation files require at least 95% patch coverage + +**Scale/Scope**: One readiness gate over an active plan, three additive state +values, concise findings and assumptions, one-question recovery, four +supported assistant planning assets, user docs, tech docs, and release closure + +## Constitution Check + +*GATE: Passed before Phase 0 research and passed again after Phase 1 design.* + +| Principle or standard | Result | Design evidence | +|---|---|---| +| Delivery identity and delivery-first scope | PASS | The gate prevents unvalidated implementation handoff and directly improves working-code delivery reliability. | +| Bounded execution | PASS | Evaluation is single-pass; recovery emits exactly one `phase_request` and waits for explicit operator input. | +| Stateful execution | PASS | Assessment, findings, assumptions, recovery transitions, and traces remain session-visible. | +| Mutable planning and execution over perfect planning | PASS | Missing validation strategy requests focused input and resumes the same plan; the feature does not require exhaustive upfront analysis. | +| Sequential-first design | PASS | One gate and one question are active at a time; no parallel or background processing is added. | +| Tool-agent symmetry and required observability | PASS | CLI, status, orchestration snapshots, inspect, traces, and assistant assets project the same runtime decision. | +| No hidden intelligence | PASS | Stable findings, accepted assumptions, gate order, and recovery routing are surfaced explicitly. | +| Strict non-goals and minimal capability slice | PASS | Backlog quality, planning analysis, providers, sandboxing, memory, councils, and recursive refinement remain outside this slice. | +| Real acceptance criteria and failure-first behavior | PASS | The spec covers ready, blocked, compatibility, and recovered planning paths in isolated temporary workspaces. | +| Separation from external systems | PASS | Canon packets may inform planning, but Boundline remains independently testable and owns admission control. | +| Catalog currency | PASS | `research.md` records the 2026-06-02 public-doc refresh and the local duplicate cleanup required in the bundled catalog. | +| Rust language rules | PASS | The plan requires typed serde shapes, named constants, explicit errors, docs, structured tracing, formatting, and clippy closure. | + +## Project Structure + +### Documentation (this feature) + +```text +specs/067-plan-quality-contract/ +├── spec.md +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── plan-quality-runtime-contract.md +├── checklists/ +│ └── requirements.md +└── tasks.md +``` + +### Source Code (repository root) + +```text +src/ +├── domain/ +│ ├── goal_plan.rs +│ └── session.rs +├── orchestrator/ +│ ├── session_runtime.rs +│ ├── session_runtime_native_goal_plan.rs +│ └── session_runtime_planning_runtime.rs +└── cli/ + ├── session.rs + ├── output_orchestrate.rs + └── output_session_status.rs + +assistant/ +├── antigravity/commands/boundline-plan.md +├── claude/commands/boundline-plan.md +├── codex/commands/boundline-plan.md +├── copilot/prompts/boundline-plan.prompt.md +└── catalog/model-catalog.toml + +tests/ +├── unit/ +│ ├── goal_plan_model.rs +│ ├── session_cli_runtime.rs +│ ├── session_record.rs +│ └── cli_output.rs +├── contract/ +│ ├── assistant_command_definition_contract.rs +│ ├── host_command_output_contract.rs +│ └── planning_gate_pipeline_contract.rs +└── integration/ + └── human_input_capture_flow.rs + +docs/ +├── runtime/plan.md +├── runtime/phase-requests.md +├── guide/common-workflows.md +└── roadmap/index.md + +tech-docs/ +├── architecture.md +├── configuration.md +├── getting-started.md +└── host-orchestration-contract.md + +distribution/ +├── channel-metadata.toml +├── homebrew/Formula/boundline.rb +└── winget/manifests/a/ApplyThe/Boundline/0.67.0/ +``` + +**Structure Decision**: Keep the current single Rust workspace and reuse the +existing planning-quality scaffolding. The implementation phase starts with a +gap audit against the spec, then changes only files needed to close observed +behavior, documentation, catalog hygiene, release metadata, and test coverage. + +## Phase 0 Research Conclusions + +- Reuse `GoalPlan::assess_plan_quality()` and its typed additive + `PlanQualityAssessment`; do not add a second planning validator. +- Keep plan-quality evaluation ahead of backlog quality and planning-analysis + admission checks so the first actionable planning defect is deterministic. +- Preserve `phase_request` as the one-question recovery contract; assistant + assets must not synthesize execution from chat-only assumptions. +- Treat `routing_policy_summary` omission as a visible accepted assumption, + while missing planning rationale or verification strategy remains + recoverable clarification and non-credible context remains blocked. +- Refresh public model documentation in the feature packet. No new model family + is required, but the duplicate `opus-4.8` entry in the bundled catalog must + be removed and catalog metadata refreshed during implementation. + +## Phase 1 Design Outputs + +- [research.md](research.md) records reuse decisions, public model-catalog + evidence, and rejected alternatives. +- [data-model.md](data-model.md) defines persisted assessment, finding, + assumption, and clarification-request behavior. +- [plan-quality-runtime-contract.md](contracts/plan-quality-runtime-contract.md) + defines additive session output, evaluation ordering, and assistant-safe + recovery semantics. +- [quickstart.md](quickstart.md) defines isolated validation scenarios and + release-quality commands. + +## Post-Design Constitution Recheck + +The design remains compliant after Phase 1. It adds no dependency, command, +provider abstraction, external runtime dependency, concurrency, hidden +fallback, or second planning engine. The implementation packet must preserve +the first-slice boundary and must not fold roadmap seeds 04 or 05 into release +`0.67.0`. diff --git a/specs/067-plan-quality-contract/quickstart.md b/specs/067-plan-quality-contract/quickstart.md new file mode 100644 index 00000000..3fda15e3 --- /dev/null +++ b/specs/067-plan-quality-contract/quickstart.md @@ -0,0 +1,120 @@ +# Quickstart: Plan Quality Contract + +## 1. Use An Isolated Temporary Workspace + +Do not run Boundline CLI commands against the Boundline repository root. Create +or use a disposable fixture workspace for every runtime validation scenario. + +Expected result: + +- the Boundline source tree remains free of workspace-local `.boundline/` + session state +- each scenario has its own isolated session and traces + +## 2. Verify Ready Planning + +Capture a bounded goal, provide planning input that includes a rationale and an +explicit validation strategy, then request planning and status. + +Expected result: + +- `plan_quality_state` is `ready` +- no blocking plan-quality finding remains +- accepted low-impact assumptions remain visible when applicable +- execution handoff may be offered only after later planning gates also pass + +## 3. Verify Missing Validation Strategy Blocks Handoff + +Use a focused fixture or test helper to create a plan with goal quality +satisfied and an empty validation strategy, then request the next planning or +execution-admission step. + +Expected result: + +- `plan_quality_state` is `clarification_required` +- `plan_quality_findings` includes `verification_strategy` +- exactly one `phase_request` is emitted +- the session remains non-terminal and execution handoff is withheld +- status and trace output preserve the finding and accepted assumptions + +## 4. Verify Recovery Uses The Same Session + +Answer the emitted question with an explicit validation strategy and resume +through the provided continuation. + +Expected result: + +- the same session is re-evaluated +- the earlier blocked assessment remains trace-visible +- the effective assessment transitions to `ready` when no blocking finding + remains +- execution handoff is offered only after later planning gates also pass + +## 5. Verify Compatibility + +Load a fixture session snapshot that predates the additive `plan_quality` +projection and inspect status. + +Expected result: + +- the older snapshot deserializes successfully +- status rendering completes without failure +- consumers that ignore the additive projection remain compatible + +## 6. Verify Assistant Assets + +Run the focused assistant contract tests. + +```bash +cargo test --test contract assistant_command_definition_contract:: +``` + +Expected result: + +- Copilot, Claude, Codex, and Antigravity planning assets contain the + standardized sections +- each host preserves the plan-quality fields and structured recovery routes +- no host invents execution continuation while quality is blocked + +## 7. Validate Release Closure + +Run: + +```bash +cargo fmt --check +cargo clippy --workspace --all-targets --all-features -- -D warnings +cargo test --test unit +cargo test --test contract +cargo test --test integration human_input_capture_flow:: +cargo llvm-cov --workspace --all-features --lcov --output-path lcov.info +``` + +Then list the actual changed or created implementation files and intersect +their diff with uncovered LCOV lines: + +```bash +implementation_files=( + src/domain/goal_plan.rs + src/domain/session.rs + src/orchestrator/session_runtime.rs + src/orchestrator/session_runtime_native_goal_plan.rs + src/orchestrator/session_runtime_planning_runtime.rs + src/cli/session.rs + src/cli/output_session_status.rs + src/cli/output_orchestrate.rs + src/cli/inspect/projections.rs + src/cli/output_run_trace.rs +) +git diff --unified=0 origin/main...HEAD -- "${implementation_files[@]}" \ + | python3 scripts/common/coverage/intersect_patch_coverage.py \ + --lcov lcov.info "${implementation_files[@]}" +``` + +Expected result: + +- formatting passes +- clippy reports zero warnings +- focused and workspace-relevant tests pass +- changed or created implementation files meet at least 95% patch coverage +- release metadata, README, docs, tech docs, changelog, roadmap status, and + assistant metadata consistently describe release `0.67.0` diff --git a/specs/067-plan-quality-contract/research.md b/specs/067-plan-quality-contract/research.md new file mode 100644 index 00000000..7fec066b --- /dev/null +++ b/specs/067-plan-quality-contract/research.md @@ -0,0 +1,130 @@ +# Research: Plan Quality Contract + +## Provider Catalog Refresh + +Public provider documentation was rechecked on 2026-06-02 as required by the +constitution: + +- GitHub Copilot's supported-model reference lists the currently surfaced + OpenAI, Anthropic, and Google families relevant to the bundled Copilot + runtime, including GPT-5.5, GPT-5.4 variants, Claude Opus 4.8, Claude Sonnet + 4.6, Claude Haiku 4.5, Gemini 3.5 Flash, Gemini 3.1 Pro, Gemini 3 Flash, and + Gemini 2.5 Pro: + +- GitHub Copilot's model-comparison reference also describes Claude Opus 4.8, + Claude Sonnet 4.6, Gemini 3.5 Flash, Gemini 3.1 Pro, Gemini 3 Flash, and + Gemini 2.5 Pro: + +- OpenAI's public model reference remains the source for current OpenAI API + route availability: + +- Google's public Gemini model reference remains the source for current Gemini + API route availability: + + +The bundled catalog already carries the relevant families for the supported +runtime surfaces, so this feature does not add a new model family. A local +hygiene issue was found: `assistant/catalog/model-catalog.toml` repeats the +Copilot `opus-4.8` entry. Implementation must remove the duplicate and refresh +the catalog metadata date while preserving the supported family set. + +## Decision 1: Formalize the existing typed assessment instead of adding a second validator + +**Decision**: Reuse `GoalPlan::assess_plan_quality()` and the typed +`PlanQualityAssessment` projection already owned by `src/domain/goal_plan.rs`. +Audit and complete the current behavior against the spec rather than layering a +new quality service over it. + +**Rationale**: The current domain model already exposes the required additive +state, findings, assumptions, and serde defaults. A parallel validator would +create ordering drift between persisted session state, status output, and run +admission without delivering additional operator value. + +**Alternatives considered**: + +- Add a separate plan-quality module: rejected because the existing domain + owner is already narrow and typed. +- Evaluate quality only in CLI presentation: rejected because execution + admission and assistant projections must share the same runtime decision. +- Defer validation to Canon: rejected because Boundline owns execution + admission and must remain independently testable. + +## Decision 2: Keep one deterministic gate order + +**Decision**: Evaluate goal quality first, plan quality second, backlog quality +third, and planning analysis fourth. Plan-quality recovery emits exactly one +`phase_request` for the highest-impact current finding. + +**Rationale**: Operators need one actionable next step. Reporting later +cross-artifact findings while the plan still lacks its own validation strategy +would increase noise and make recovery order ambiguous. + +**Alternatives considered**: + +- Emit all possible questions at once: rejected because it breaks the + sequential one-question contract. +- Evaluate backlog or analysis first: rejected because those checks depend on a + credible plan. +- Silently infer validation strategy: rejected because it would hide a + delivery-critical decision. + +## Decision 3: Preserve additive persisted state and compatibility defaults + +**Decision**: Keep `plan_quality` as an additive serde-backed field in the +persisted `GoalPlan`, default older snapshots to a ready empty assessment at +deserialization time, and recompute the effective assessment from the current +plan before presentation or admission decisions. + +**Rationale**: Existing workspaces must remain readable after release `0.67.0`. +Recomputation prevents stale persisted projections from overriding current +plan contents while the additive default preserves backward compatibility. + +**Alternatives considered**: + +- Make the new field mandatory during deserialization: rejected because it + would break existing `.boundline/session.json` files. +- Trust only the persisted projection: rejected because edited or migrated + plan content could leave stale readiness state behind. +- Avoid persistence entirely: rejected because status and trace history need an + inspectable state transition. + +## Decision 4: Reuse the existing phase-request and assistant routing boundary + +**Decision**: Keep `phase_request`, `assistant_resume_command`, and +`assistant_next_command` as the only recovery and continuation contract. +Supported assistant planning assets must preserve the runtime fields and stop +on blocked or clarification-required quality. + +**Rationale**: Boundline already has one host-safe sequential handoff protocol. +Extending that protocol keeps the CLI authoritative across Copilot, Claude, +Codex, and Antigravity without adding host-specific control flow. + +**Alternatives considered**: + +- Add a plan-quality-specific assistant command: rejected because no new CLI or + assistant command is needed. +- Let hosts infer recovery from prose: rejected because it would reintroduce + chat-only behavior and inconsistent continuation. +- Add Speckit hooks to the Boundline assistant command: rejected because + Boundline uses runtime-owned handoffs. + +## Decision 5: Ship release closure as part of the slice + +**Decision**: Close the implementation as version `0.67.0`, align release +metadata and package manifests, update user and engineering docs, remove the +catalog duplicate, run formatting and clippy, and prove at least 95% patch +coverage for changed or created implementation files. + +**Rationale**: The user-visible gate changes planning admission behavior and +must ship as one documented, verifiable pre-1.0 minor release. Patch coverage +is the appropriate metric because this feature completes scaffolding already +present in the codebase and should measure newly changed behavior directly. + +**Alternatives considered**: + +- Leave release metadata for a later sweep: rejected because package and docs + drift would make the active behavior ambiguous. +- Require only full-workspace coverage: rejected because it obscures whether + newly changed lines are exercised. +- Skip catalog hygiene because no family changed: rejected because duplicate + choices degrade operator setup even when the family set is current. diff --git a/specs/067-plan-quality-contract/spec.md b/specs/067-plan-quality-contract/spec.md new file mode 100644 index 00000000..8d0f4e7a --- /dev/null +++ b/specs/067-plan-quality-contract/spec.md @@ -0,0 +1,136 @@ +# Feature Specification: Plan Quality Contract + +**Feature Branch**: `067-plan-quality-contract` + +**Created**: 2026-06-02 + +**Status**: Released in Boundline `0.67.0` + +**Input**: User description from `roadmap/features/03-plan-quality-contract.md`, promoted as the next Boundline planning-readiness feature with release, documentation, clippy, and changed-file patch-coverage closure requirements. + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Block Unsafe Execution Handoff (Priority: P1) + +As a repository operator, I can rely on Boundline to stop before execution when the active plan lacks an explicit validation strategy, so incomplete planning does not silently become implementation work. + +**Why this priority**: A plan without a validation strategy cannot demonstrate working-code delivery. Blocking that handoff is the smallest independently valuable planning-readiness improvement. + +**Independent Test**: Can be fully tested in an isolated temporary workspace by capturing a goal, requesting a plan that omits its validation strategy, and verifying that Boundline keeps planning active, emits one actionable clarification request, records the block in traces, and does not offer execution. + +**Acceptance Scenarios**: + +1. **Given** an active session with goal quality satisfied and a plan that includes an explicit validation strategy, **When** the operator requests the next delivery step, **Then** Boundline marks plan quality as ready and may offer execution handoff. +2. **Given** an active session with goal quality satisfied and a plan that omits its validation strategy, **When** the operator requests the next delivery step, **Then** Boundline keeps the plan non-terminal, marks plan quality as requiring clarification, emits exactly one actionable `phase_request`, and does not offer execution handoff. +3. **Given** a blocked planning session with a missing validation strategy, **When** the operator supplies an adequate validation strategy and resumes planning, **Then** Boundline re-evaluates plan quality, records the recovered assessment, and may offer execution handoff when no blocking finding remains. + +--- + +### User Story 2 - Inspect Plan Readiness (Priority: P2) + +As a repository operator, I can inspect whether a plan is ready, which quality findings still block it, and which low-impact assumptions were accepted, so I can understand the runtime decision without reconstructing it from chat history. + +**Why this priority**: A blocking gate is credible only when its decision and recovery path remain visible through the normal operator surfaces. + +**Independent Test**: Can be fully tested by evaluating one ready plan, one plan blocked by a missing validation strategy, and one ready plan with an accepted low-impact omission, then verifying that status, orchestration snapshots, and traces expose the expected readiness state, findings, assumptions, and transitions. + +**Acceptance Scenarios**: + +1. **Given** a plan-quality evaluation has completed, **When** the operator inspects session status, **Then** the operator sees the current plan-quality state and any relevant findings or accepted assumptions. +2. **Given** a plan-quality evaluation blocks execution handoff, **When** the operator inspects orchestration output or trace history, **Then** the operator sees the blocking finding, the emitted clarification request, and the fact that execution handoff was withheld. +3. **Given** a plan omits a low-impact detail for which Boundline applies an accepted default, **When** the operator inspects status or traces, **Then** the accepted assumption remains visible and does not block execution handoff. + +--- + +### User Story 3 - Resume Planning Through Assistant Surfaces (Priority: P3) + +As an assistant user, I receive a consistent planning response across supported hosts when goal quality or plan quality prevents progress, so I can answer one focused question and resume the same session safely. + +**Why this priority**: The runtime gate must be preserved by every supported assistant surface; otherwise users can be routed around the safety decision accidentally. + +**Independent Test**: Can be fully tested by validating the supported assistant planning assets and exercising a blocked planning response to confirm that each host preserves the quality projection, the single `phase_request`, and the resume command. + +**Acceptance Scenarios**: + +1. **Given** goal quality or plan quality blocks planning progress, **When** a supported assistant host renders the planning response, **Then** it preserves the blocked state, findings, accepted assumptions, emitted `phase_request`, and resume routing without inventing an execution step. +2. **Given** a blocked planning response includes one actionable clarification request, **When** the user answers and resumes through the assistant command, **Then** Boundline continues the existing session and re-evaluates readiness before offering execution. +3. **Given** the planning assets are distributed for a release, **When** package validation runs, **Then** every supported host asset contains the standardized planning sections and blocked-quality routing rules. + +### Edge Cases + +- Goal quality is unresolved when planning is requested; Boundline preserves the existing goal-quality block and does not evaluate plan quality prematurely. +- A plan has no validation strategy; Boundline emits the highest-impact missing-validation finding and exactly one clarification request. +- Multiple plan-quality findings are possible; Boundline records concise findings but asks exactly one question at a time, prioritizing scope and safety before user-facing behavior and technical detail. +- A low-impact planning detail is omitted; Boundline records an accepted assumption and allows the plan to remain ready when no blocking finding exists. +- An older session snapshot has no plan-quality fields; status and orchestration surfaces continue to read it without failure. +- A consumer ignores the additive plan-quality fields; existing session and orchestration behavior remains compatible. +- Planning recovers after clarification; the trace history retains the blocked assessment and the later ready assessment. +- A supported assistant host receives a blocked response; the host must not synthesize execution handoff from chat-only assumptions. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The system MUST evaluate plan quality after goal quality is satisfied and before execution handoff is offered. +- **FR-002**: The first independently valuable release slice MUST treat a missing validation strategy as a blocking plan-quality finding. +- **FR-003**: The system MUST represent plan quality with the states `ready`, `clarification_required`, and `blocked`. +- **FR-004**: When plan quality is insufficient but recoverable through operator input, the system MUST keep planning non-terminal, use `clarification_required`, emit exactly one actionable `phase_request`, and preserve the existing assistant resume routing. +- **FR-005**: The system MUST prioritize the one-question clarification backlog by scope and safety first, user-facing behavior second, and technical detail third. +- **FR-006**: The system MUST record concise machine-readable plan-quality findings for missing or weak planning inputs. +- **FR-007**: The system MUST record inferred low-impact defaults as accepted plan-quality assumptions and MUST expose them without blocking execution when no blocking finding remains. +- **FR-008**: The system MUST expose plan-quality state, findings, and accepted assumptions in session status when a plan exists or planning is blocked. +- **FR-009**: The system MUST expose plan-quality state, findings, and accepted assumptions in orchestration session snapshots when a plan exists or planning is blocked. +- **FR-010**: The system MUST trace plan-quality evaluation, blocking decisions, emitted clarification requests, accepted assumptions, recovery after clarification, and final readiness state with reproducible session context and without secrets or personally identifiable information. +- **FR-011**: The system MUST preserve compatibility with older persisted session snapshots that do not contain plan-quality fields and with consumers that ignore additive fields. +- **FR-012**: The system MUST preserve the existing goal-quality gate and MUST NOT evaluate plan quality as a replacement for unresolved goal quality. +- **FR-013**: The system MUST update the supported assistant planning assets so each host uses the standardized sections `User Input`, `Pre-Execution Checks`, `Execution Flow`, `Plan Quality Validation`, `Reasonable Defaults`, `Gate Handling`, `Output Interpretation`, `Next-Step Routing`, and `Done When`. +- **FR-014**: Supported assistant planning assets MUST preserve goal-quality and plan-quality blocked states, findings, accepted assumptions, any emitted `phase_request`, and resume routing without deriving execution handoff from chat-only assumptions. +- **FR-015**: The release MUST update the workspace version and aligned release metadata, `README.md`, user-facing documentation under `docs/`, engineering documentation under `tech-docs/`, and `CHANGELOG.md`. +- **FR-016**: The release MUST pass formatting and clippy validation with warnings rejected. +- **FR-017**: The release MUST demonstrate at least 95% patch coverage for changed or created implementation files and MUST use the repository patch-coverage helper when reporting that result. +- **FR-018**: The feature packet MUST record a current public-provider catalog refresh result, including an explicit no-change rationale when no catalog update is needed. +- **FR-019**: The feature MUST remain a runtime-owned planning-readiness gate and MUST NOT require generated Speckit files, a new CLI subcommand, Canon control flow, provider abstractions, background workers, parallel execution, or hidden fallback behavior. + +### Task State, Recovery, and Terminal Conditions + +- Plan-quality evaluation starts only after goal quality is satisfied and a plan is available for evaluation. +- A ready assessment permits execution handoff only when no blocking finding remains. +- A recoverable missing-validation-strategy finding keeps planning active, records `clarification_required`, emits exactly one `phase_request`, and waits for explicit operator input. +- A blocked assessment is reserved for a non-recoverable or explicitly blocked planning condition surfaced by the runtime; it never silently degrades into execution. +- Recovery reuses the existing session, records the supplied planning input, re-evaluates readiness, and appends a trace-visible transition. +- The feature is terminal for a planning attempt only when readiness is visible as `ready` or when a visible blocked condition stops progress pending operator action. + +### Scope Boundaries + +- This feature adds the first plan-readiness gate only; backlog-readiness and cross-artifact analysis gates remain separate roadmap slices. +- This feature does not add a new planning command, a second planning runtime, or a file-first Speckit workflow. +- This feature does not change Canon ownership boundaries; Canon may supply planning packets, but Boundline owns readiness evaluation and execution admission. +- This feature does not add provider, sandbox, browser, gateway, memory, council, adaptive-governance, or recursive-refinement behavior. +- This feature does not introduce parallel planning work, autonomous background work, or hidden heuristics. + +### Key Entities + +- **Plan Quality Assessment**: The current readiness decision for an active plan, including state, concise findings, accepted assumptions, and the session context needed for status and trace projection. +- **Plan Quality Finding**: A machine-readable reason a plan is missing or weak in a quality dimension, including whether the finding blocks execution handoff and whether focused operator input can resolve it. +- **Plan Quality Assumption**: A low-impact inferred default accepted by the runtime, retained for inspection without blocking execution handoff. +- **Planning Clarification Request**: The single highest-priority operator question emitted through the existing `phase_request` contract while planning remains non-terminal. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: In all validation scenarios where goal quality is satisfied but the plan omits its validation strategy, execution handoff is withheld and exactly one actionable clarification request is returned. +- **SC-002**: In all validation scenarios where the plan contains an explicit validation strategy and no other blocking finding, plan quality is reported as ready and execution handoff remains available. +- **SC-003**: In all recovery scenarios where the operator supplies an adequate missing validation strategy, the same session transitions from requiring clarification to ready without losing the earlier blocked assessment from trace history. +- **SC-004**: In all compatibility scenarios using older persisted session snapshots without plan-quality fields, status and orchestration inspection complete successfully. +- **SC-005**: All supported assistant planning assets contain the nine standardized planning sections and preserve blocked-quality routing through one clarification request and the existing resume command. +- **SC-006**: Release validation completes with formatting checks passing, clippy producing zero warnings, and changed or created implementation files meeting at least 95% patch coverage. +- **SC-007**: The feature packet records a provider-catalog refresh result dated during the feature cycle, with either the applied catalog delta or an explicit evidence-backed no-change rationale. + +## Assumptions + +- The existing goal-quality gate, `phase_request`, assistant resume command, assistant next command, session status, orchestration snapshot, and trace surfaces remain the reusable runtime contracts. +- Missing validation strategy is the only newly enforced blocking quality dimension in the first delivery slice; additional findings may be represented for inspection only when they do not expand enforcement scope. +- Assistant host support remains aligned with the currently distributed Copilot, Claude, Codex, and Antigravity planning assets. +- Release closure uses the next minor pre-1.0 workspace version because the additive runtime projection and planning admission behavior form a new feature slice. +- Catalog refresh work is evidence-only unless current public provider documentation reveals a difference in the bundled assistant model catalog. diff --git a/specs/067-plan-quality-contract/tasks.md b/specs/067-plan-quality-contract/tasks.md new file mode 100644 index 00000000..ad78f91b --- /dev/null +++ b/specs/067-plan-quality-contract/tasks.md @@ -0,0 +1,166 @@ +# Tasks: Plan Quality Contract + +**Input**: Design documents from `/specs/067-plan-quality-contract/` + +**Prerequisites**: `plan.md`, `spec.md`, `research.md`, `data-model.md`, +`contracts/plan-quality-runtime-contract.md`, `quickstart.md` + +**Tests**: Test tasks are required. Add or refine focused tests first, confirm +that the relevant assertion fails before changing implementation, then close +the regression with the smallest coherent implementation change. + +**Organization**: Tasks are grouped by user story so the execution-admission +gate, observability projections, and assistant recovery surfaces can be +validated independently. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel because it targets different files and has no + dependency on incomplete work +- **[Story]**: Maps a task to a user story for traceability +- Every task includes repository-relative file paths + +## Phase 1: Setup + +**Purpose**: Confirm the current scaffolding and establish failing regression +coverage before implementation edits. + +- [X] T001 Record the current planning-quality scaffolding audit and the public model-catalog refresh result in `specs/067-plan-quality-contract/research.md` +- [X] T002 [P] Add or refine backward-compatible session deserialization regressions for additive plan-quality fields in `tests/unit/session_record.rs` +- [X] T003 [P] Add or refine assistant planning-asset section regressions in `tests/contract/assistant_command_definition_contract.rs` + +--- + +## Phase 2: Foundational + +**Purpose**: Lock the typed runtime contract before changing story-specific +surfaces. + +**Critical**: Complete this phase before user-story implementation. + +- [X] T004 [P] Add focused failing domain regressions for missing verification strategy findings, visible accepted assumptions, and blocked context readiness in `tests/unit/goal_plan_model.rs` +- [X] T005 [P] Add focused failing runtime regressions for plan-quality gate ordering, one-question `phase_request` routing, and withheld execution handoff in `tests/unit/session_cli_runtime.rs` and `tests/contract/planning_gate_pipeline_contract.rs` +- [X] T006 [P] Add focused failing JSON projection regressions for additive plan-quality state, findings, and assumptions in `tests/unit/cli_output.rs` and `tests/contract/host_command_output_contract.rs` + +**Checkpoint**: The contract fails for the missing behavior and remains +backward-compatible for older session snapshots. + +--- + +## Phase 3: User Story 1 - Block Unsafe Execution Handoff (Priority: P1) + +**Goal**: Prevent execution admission until the active plan exposes an +actionable verification strategy, while retaining visible accepted defaults. + +**Independent Test**: In an isolated temporary workspace, submit a plan without +an adequate verification strategy and confirm that execution is withheld with +exactly one focused `phase_request`; answer the request and confirm that the +same session resumes. + +### Implementation + +- [X] T007 [US1] Audit and complete typed `PlanQualityState`, finding, assumption, and assessment behavior in `src/domain/goal_plan.rs` +- [X] T008 [US1] Audit and complete plan-quality admission ahead of backlog quality, planning analysis, governance, and execution routing in `src/orchestrator/session_runtime_planning_runtime.rs` and `src/cli/session.rs` +- [X] T009 [US1] Persist blocked and recovered planning-quality transitions with structured trace context in `src/orchestrator/session_runtime.rs` and `src/orchestrator/session_runtime_native_goal_plan.rs` +- [X] T010 [US1] Run the focused US1 regression set in `tests/unit/goal_plan_model.rs`, `tests/unit/session_cli_runtime.rs`, and `tests/contract/planning_gate_pipeline_contract.rs` + +**Checkpoint**: Unsafe handoff is blocked and recoverable through one active +question. + +--- + +## Phase 4: User Story 2 - Inspect Plan Readiness (Priority: P2) + +**Goal**: Expose readiness state, concise findings, and accepted assumptions +through persisted session, status, orchestration, inspect, and trace surfaces. + +**Independent Test**: Inspect one ready and one blocked temporary session and +confirm that all operator-facing JSON projections expose the same additive +plan-quality contract. + +### Implementation + +- [X] T011 [US2] Extend additive persisted session models and compatibility defaults in `src/domain/session.rs` +- [X] T012 [US2] Audit and complete status and orchestration JSON projections in `src/cli/output_session_status.rs` and `src/cli/output_orchestrate.rs` +- [X] T013 [US2] Audit and complete inspect and trace projections in `src/cli/inspect/projections.rs` and `src/cli/output_run_trace.rs` +- [X] T014 [US2] Run the focused US2 regression set in `tests/unit/session_record.rs`, `tests/unit/cli_output.rs`, and `tests/contract/host_command_output_contract.rs` + +**Checkpoint**: Every supported runtime projection exposes the same additive +readiness decision without breaking older snapshots. + +--- + +## Phase 5: User Story 3 - Resume Planning Through Assistant Surfaces (Priority: P3) + +**Goal**: Keep assistant-specific planning commands thin and symmetric over +the CLI/runtime contract. + +**Independent Test**: Validate each supported planning asset and confirm that +it renders the standardized summary sections and forwards the same single +`phase_request` recovery flow. + +### Implementation + +- [X] T015 [P] [US3] Align the Antigravity and Claude planning assets with the standardized summary and recovery contract in `assistant/antigravity/commands/boundline-plan.md` and `assistant/claude/commands/boundline-plan.md` +- [X] T016 [P] [US3] Align the Codex and Copilot planning assets with the standardized summary and recovery contract in `assistant/codex/commands/boundline-plan.md` and `assistant/copilot/prompts/boundline-plan.prompt.md` +- [X] T017 [US3] Run assistant parity regressions in `tests/contract/assistant_command_definition_contract.rs`, `tests/contract/assistant_host_parity_contract.rs`, and `tests/unit/assistant_assets.rs` + +**Checkpoint**: All supported assistants remain projections over one runtime +contract. + +--- + +## Phase 6: Release, Documentation, and Quality Closure + +**Purpose**: Close the release surface and verify repository quality gates. + +- [X] T018 [P] Remove the duplicate `opus-4.8` provider entry found during the public-doc refresh, preserve the current relevant provider families, and refresh the catalog metadata date in `assistant/catalog/model-catalog.toml` +- [X] T019 [P] Document the plan-quality gate, one-question recovery flow, additive projections, and current release line in `README.md`, `docs/runtime/plan.md`, `docs/runtime/phase-requests.md`, `docs/runtime/status.md`, `docs/runtime/inspect.md`, `docs/runtime/trace.md`, `docs/guide/common-workflows.md`, `docs/guide/introduction.md`, and `docs/architecture/runtime-model.md` +- [X] T020 [P] Update operator and architecture guidance for the planning-readiness contract in `tech-docs/architecture.md`, `tech-docs/configuration.md`, `tech-docs/getting-started.md`, and `tech-docs/host-orchestration-contract.md` +- [X] T021 [P] Bump release metadata to `0.67.0` in `Cargo.toml`, `Cargo.lock`, `distribution/channel-metadata.toml`, `distribution/homebrew/Formula/boundline.rb`, `assistant/plugin-metadata.json`, and `assistant/global/manifest.json` +- [X] T022 [P] Add the `0.67.0` WinGet release manifests under `distribution/winget/manifests/a/ApplyThe/Boundline/0.67.0/` +- [X] T023 [P] Record the release summary and delivered roadmap slice in `CHANGELOG.md`, `docs/roadmap/index.md`, `roadmap/Next - forward-roadmap.md`, `roadmap/features/README.md`, and `roadmap/features/03-plan-quality-contract.md` +- [X] T024 Run `cargo fmt` and verify formatting with `cargo fmt --check` +- [X] T025 Run `cargo clippy --workspace --all-targets --all-features -- -D warnings` and fix every reported issue +- [X] T026 Run focused tests with `cargo test --test unit`, `cargo test --test contract`, and `cargo test --test integration human_input_capture_flow::` +- [X] T027 Run the full regression suite with `cargo test` and resolve any failures +- [X] T028 Generate `lcov.info` with `cargo llvm-cov --workspace --all-features --lcov --output-path lcov.info` +- [X] T029 Build an explicit repository-relative implementation-file list, run `scripts/common/coverage/intersect_patch_coverage.py` against every changed or created implementation file, and add tests until patch coverage is at least 95 percent +- [X] T030 Validate the isolated scenarios in `specs/067-plan-quality-contract/quickstart.md` without running Boundline CLI commands against the repository root + +--- + +## Dependencies and Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)**: Starts immediately. +- **Foundational (Phase 2)**: Depends on Setup and blocks all story work. +- **User Story 1 (Phase 3)**: Depends on Foundational and is the MVP. +- **User Story 2 (Phase 4)**: Depends on the typed US1 gate so projections + expose the final decision shape. +- **User Story 3 (Phase 5)**: Depends on the US1 runtime contract but can run + in parallel with US2 after US1 stabilizes. +- **Release and Quality Closure (Phase 6)**: Depends on all selected stories. + +### Parallel Opportunities + +- T002 and T003 can run in parallel. +- T004, T005, and T006 can run in parallel. +- T015 and T016 can run in parallel. +- T018 through T023 can run in parallel after runtime behavior stabilizes. + +## Implementation Strategy + +### MVP First + +1. Complete Setup and Foundational regressions. +2. Complete US1 admission behavior. +3. Validate one blocked and recovered isolated session. +4. Proceed to additive projections and assistant surfaces. + +### Quality Rule + +Do not treat formatting, clippy, tests, or patch coverage as deferred release +work. The feature is complete only when `cargo fmt --check`, strict clippy, +the regression suite, and at least 95 percent changed-file patch coverage pass. diff --git a/src/cli/inspect.rs b/src/cli/inspect.rs index 3f92a41b..2b45a9fc 100644 --- a/src/cli/inspect.rs +++ b/src/cli/inspect.rs @@ -54,6 +54,17 @@ const UNKNOWN_DECISION_ID: &str = "unknown-decision"; const UNKNOWN_TARGET: &str = "unknown"; const KEY_ACTION_RESULT: &str = "action_result"; const KEY_FAILURE_REASON: &str = "failure_reason"; +const KEY_PLAN_QUALITY_ASSUMPTIONS: &str = "plan_quality_assumptions"; +const KEY_PLAN_QUALITY_FINDINGS: &str = "plan_quality_findings"; +const KEY_PLAN_QUALITY_STATE: &str = "plan_quality_state"; + +fn string_list_from_payload(payload: &Value, key: &str) -> Vec { + payload + .get(key) + .and_then(Value::as_array) + .map(|items| items.iter().filter_map(Value::as_str).map(str::to_string).collect()) + .unwrap_or_default() +} fn advanced_context_from_payload(payload: &Value) -> Option { payload.get("advanced_context").cloned().and_then(|value| serde_json::from_value(value).ok()) @@ -243,6 +254,9 @@ struct TraceSummaryFold { routing_summary: Option, routing_projection: RoutingDecisionProjection, goal_plan_summary: Option, + plan_quality_state: Option, + plan_quality_findings: Vec, + plan_quality_assumptions: Vec, advanced_context: Option, context_projection: TraceContextProjection, guidance_guardian: GuidanceGuardianProjection, @@ -507,6 +521,20 @@ impl TraceSummaryFold { } } TraceEventType::GoalPlanCreated => { + self.plan_quality_state = event + .payload + .get(KEY_PLAN_QUALITY_STATE) + .and_then(Value::as_str) + .map(str::to_string) + .or(self.plan_quality_state.take()); + if self.plan_quality_findings.is_empty() { + self.plan_quality_findings = + string_list_from_payload(&event.payload, KEY_PLAN_QUALITY_FINDINGS); + } + if self.plan_quality_assumptions.is_empty() { + self.plan_quality_assumptions = + string_list_from_payload(&event.payload, KEY_PLAN_QUALITY_ASSUMPTIONS); + } if self.routing_summary.is_none() { self.routing_summary = Some(output::render_route_outcome(&RoutingOutcome { mode: RoutingMode::Native, @@ -638,6 +666,9 @@ pub fn summarize_trace( mut routing_summary, routing_projection, goal_plan_summary, + plan_quality_state, + plan_quality_findings, + plan_quality_assumptions, advanced_context, context_projection, guidance_guardian, @@ -727,6 +758,9 @@ pub fn summarize_trace( routing_summary, routing_projection, goal_plan_summary, + plan_quality_state, + plan_quality_findings, + plan_quality_assumptions, authored_input_summary: input_projection.authored_input_summary, authored_input_sources: input_projection.authored_input_sources, authored_input_deduplicated_sources: input_projection.authored_input_deduplicated_sources, diff --git a/src/cli/orchestrate.rs b/src/cli/orchestrate.rs index e4fe4fb2..1710ac6b 100644 --- a/src/cli/orchestrate.rs +++ b/src/cli/orchestrate.rs @@ -482,6 +482,35 @@ pub enum OrchestrateCommandError { Session(#[from] SessionCommandError), } +fn execute_plan_for_orchestration( + workspace: Option<&Path>, + cluster: Option<&Path>, + flow: Option<&str>, + no_canon: bool, + planning_input: Option<&Path>, +) -> Result { + match session::execute_plan_with_target_input( + workspace, + cluster, + flow, + false, + no_canon, + planning_input, + ) { + Ok(report) => Ok(report), + Err(SessionCommandError::ClarificationRequired { headline, prompt }) => { + let mut report = session::execute_status_with_target(workspace, cluster, None)?; + if let Some(view) = report.session_status.as_mut() { + view.clarification_headline = Some(headline); + view.clarification_prompt = Some(prompt); + report.terminal_output = crate::cli::output::render_session_status(view); + } + Ok(report) + } + Err(error) => Err(error), + } +} + #[allow(clippy::too_many_arguments)] pub fn execute_orchestrate( workspace: Option<&Path>, @@ -1039,14 +1068,8 @@ pub fn execute_orchestrate( latest_trace_summary.clone(), ); - let plan_report = session::execute_plan_with_target_input( - workspace, - cluster, - flow, - false, - no_canon, - planning_input, - )?; + let plan_report = + execute_plan_for_orchestration(workspace, cluster, flow, no_canon, planning_input)?; latest_terminal_output = plan_report.terminal_output.clone(); latest_trace_location = plan_report.trace_location.clone(); latest_session_status = plan_report.session_status.clone(); @@ -2603,12 +2626,17 @@ fn parse_slot_binding_entry(entry: &str) -> Option<(String, String)> { #[cfg(test)] mod tests { + use std::fs; + use serde_json::json; + use uuid::Uuid; use super::{ OrchestrateIntent, OrchestratePhaseRequestExpectedAnswer, PHASE_KIND_EXECUTION, - PhaseRequestOption, event_metadata, planning_stage_question_options, + PhaseRequestOption, event_metadata, execute_plan_for_orchestration, + planning_stage_question_options, }; + use crate::cli::session::execute_goal_with_target; use crate::domain::audit::{ SessionAuditActor, SessionAuditActorKind, SessionAuditAlgorithm, SessionAuditEntry, SessionAuditEntryKind, SessionAuditIdentity, SessionAuditOutcome, @@ -2709,6 +2737,38 @@ mod tests { ); } + #[test] + fn execute_plan_for_orchestration_surfaces_clarification_into_session_status() + -> Result<(), Box> { + let workspace = std::env::temp_dir() + .join(format!("boundline-orchestrate-plan-clarification-{}", Uuid::new_v4())); + fs::create_dir_all(&workspace)?; + + execute_goal_with_target( + Some(workspace.as_path()), + None, + Some("build a service"), + &[], + None, + None, + None, + None, + None, + )?; + + let report = + execute_plan_for_orchestration(Some(workspace.as_path()), None, None, false, None)?; + let Some(view) = report.session_status.as_ref() else { + return Err("expected session status after planning clarification".into()); + }; + + assert!(view.clarification_headline.is_some(), "{view:?}"); + assert!(view.clarification_prompt.is_some(), "{view:?}"); + assert!(report.terminal_output.contains("clarification"), "{report:?}"); + + Ok(()) + } + #[test] fn event_metadata_prefers_latest_matching_audit_actor() { let summary = TraceSummaryView { diff --git a/src/cli/output.rs b/src/cli/output.rs index 0053aad9..f1cbacc1 100644 --- a/src/cli/output.rs +++ b/src/cli/output.rs @@ -985,6 +985,9 @@ mod tests { context_primary_inputs: Vec::new(), context_provenance: Vec::new(), context_staleness_reason: None, + plan_quality_state: None, + plan_quality_findings: Vec::new(), + plan_quality_assumptions: Vec::new(), clarification_headline: None, clarification_prompt: None, clarification_missing_fields: Vec::new(), @@ -2249,6 +2252,9 @@ mod tests { context_primary_inputs: Vec::new(), context_provenance: Vec::new(), context_staleness_reason: None, + plan_quality_state: None, + plan_quality_findings: Vec::new(), + plan_quality_assumptions: Vec::new(), clarification_headline: None, clarification_prompt: None, clarification_missing_fields: Vec::new(), diff --git a/src/cli/output_run_trace.rs b/src/cli/output_run_trace.rs index 27cec794..669a1979 100644 --- a/src/cli/output_run_trace.rs +++ b/src/cli/output_run_trace.rs @@ -22,12 +22,36 @@ use super::{ }; use crate::domain::session::FrameworkAdapterStageFailureDetails; +const KEY_PLAN_QUALITY_ASSUMPTIONS: &str = "plan_quality_assumptions"; +const KEY_PLAN_QUALITY_FINDINGS: &str = "plan_quality_findings"; +const KEY_PLAN_QUALITY_STATE: &str = "plan_quality_state"; + fn value_as_string_list(value: &Value) -> Option> { value.as_array().map(|items| { items.iter().filter_map(|item| item.as_str().map(str::to_string)).collect::>() }) } +fn push_plan_quality_lines(lines: &mut Vec, payload: &Value) { + if let Some(plan_quality_state) = payload.get(KEY_PLAN_QUALITY_STATE).and_then(Value::as_str) { + lines.push(format!("plan_quality_state: {plan_quality_state}")); + } + if let Some(findings) = payload + .get(KEY_PLAN_QUALITY_FINDINGS) + .and_then(value_as_string_list) + .filter(|findings| !findings.is_empty()) + { + lines.push(format!("plan_quality_findings: {}", findings.join(", "))); + } + if let Some(assumptions) = payload + .get(KEY_PLAN_QUALITY_ASSUMPTIONS) + .and_then(value_as_string_list) + .filter(|assumptions| !assumptions.is_empty()) + { + lines.push(format!("plan_quality_assumptions: {}", assumptions.join(", "))); + } +} + pub fn render_run_trace( command_name: &str, trace: Option<&ExecutionTrace>, @@ -133,6 +157,7 @@ pub fn render_run_trace( if let Some(goal_plan_created) = trace.events.iter().find(|event| event.event_type == TraceEventType::GoalPlanCreated) { + push_plan_quality_lines(&mut lines, &goal_plan_created.payload); if let Some(negotiation_goal_summary) = goal_plan_created.payload.get("negotiation_goal_summary").and_then(Value::as_str) { diff --git a/src/cli/output_trace_summary.rs b/src/cli/output_trace_summary.rs index 4a14b2b4..a4787da5 100644 --- a/src/cli/output_trace_summary.rs +++ b/src/cli/output_trace_summary.rs @@ -31,6 +31,7 @@ pub fn render_trace_summary_brief( push_trace_overview_brief_lines(&mut lines, summary, inspection_target); lines.extend(trace_input_brief_lines(summary)); + push_trace_plan_quality_lines(&mut lines, summary); if let Some(routing_summary) = &summary.routing_summary { lines.push(routing_summary.clone()); @@ -87,6 +88,24 @@ pub fn render_trace_summary_brief( lines.join("\n") } +fn push_trace_plan_quality_lines(lines: &mut Vec, summary: &TraceSummaryView) { + if let Some(plan_quality_state) = &summary.plan_quality_state { + lines.push(format!("plan_quality_state: {plan_quality_state}")); + } + if !summary.plan_quality_findings.is_empty() { + lines.push(format!( + "plan_quality_findings: {}", + preview_trace_brief_items(&summary.plan_quality_findings) + )); + } + if !summary.plan_quality_assumptions.is_empty() { + lines.push(format!( + "plan_quality_assumptions: {}", + preview_trace_brief_items(&summary.plan_quality_assumptions) + )); + } +} + fn push_trace_overview_brief_lines( lines: &mut Vec, summary: &TraceSummaryView, diff --git a/src/cli/session.rs b/src/cli/session.rs index 8ac3a04e..8db8c4a6 100644 --- a/src/cli/session.rs +++ b/src/cli/session.rs @@ -1096,6 +1096,7 @@ pub fn execute_run_with_target( record.latest_status = SessionStatus::Blocked; record.latest_terminal_reason = None; record.updated_at = current_timestamp_millis(); + runtime.persist_blocked_plan_quality_trace(&mut record).map_err(map_runtime_error)?; runtime.persist_session(&record).map_err(map_runtime_error)?; let view = build_status_view(&record, suggested_next_command(&record), explanation); return Ok(report_with_session_status(exit_status_for_session(record.latest_status), view)); diff --git a/src/domain/trace.rs b/src/domain/trace.rs index 6e8793b8..e621313d 100644 --- a/src/domain/trace.rs +++ b/src/domain/trace.rs @@ -256,6 +256,15 @@ pub struct TraceSummaryView { pub routing_projection: RoutingDecisionProjection, #[serde(default, skip_serializing_if = "Option::is_none")] pub goal_plan_summary: Option, + /// Latest persisted plan-readiness state recorded during planning. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub plan_quality_state: Option, + /// Blocking or advisory plan-readiness findings retained for inspection. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub plan_quality_findings: Vec, + /// Accepted low-impact planning defaults retained for inspection. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub plan_quality_assumptions: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] pub authored_input_summary: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] @@ -362,6 +371,9 @@ impl Default for TraceSummaryView { routing_summary: None, routing_projection: RoutingDecisionProjection::default(), goal_plan_summary: None, + plan_quality_state: None, + plan_quality_findings: Vec::new(), + plan_quality_assumptions: Vec::new(), authored_input_summary: None, authored_input_sources: Vec::new(), authored_input_deduplicated_sources: Vec::new(), diff --git a/src/orchestrator/session_runtime.rs b/src/orchestrator/session_runtime.rs index bfbb7bc4..b0a1053f 100644 --- a/src/orchestrator/session_runtime.rs +++ b/src/orchestrator/session_runtime.rs @@ -380,6 +380,9 @@ struct GoalPlanTracePayload { advanced_context: Option, planning_rationale: Option, verification_strategy: Option, + plan_quality_state: String, + plan_quality_findings: Vec, + plan_quality_assumptions: Vec, negotiation_goal_summary: Option, negotiation_resolution: Option, negotiation_acceptance_boundary: Option, @@ -403,6 +406,7 @@ impl GoalPlanTracePayload { routing_projection: RoutingDecisionProjection, delegation: Option, ) -> Self { + let plan_quality = goal_plan.plan_quality_assessment(); Self { plan_id: goal_plan.plan_id.clone(), goal: goal_plan.goal_text.clone(), @@ -416,6 +420,9 @@ impl GoalPlanTracePayload { .and_then(|context_pack| context_pack.advanced_context.clone()), planning_rationale: goal_plan.planning_rationale.clone(), verification_strategy: goal_plan.verification_strategy.clone(), + plan_quality_state: plan_quality.state.as_str().to_string(), + plan_quality_findings: plan_quality.findings, + plan_quality_assumptions: plan_quality.assumptions, negotiation_goal_summary: goal_plan.negotiation_goal_summary.clone(), negotiation_resolution: goal_plan.negotiation_resolution.clone(), negotiation_acceptance_boundary: goal_plan.negotiation_acceptance_boundary.clone(), diff --git a/src/orchestrator/session_runtime_native_goal_plan.rs b/src/orchestrator/session_runtime_native_goal_plan.rs index 89b17512..01c1aa55 100644 --- a/src/orchestrator/session_runtime_native_goal_plan.rs +++ b/src/orchestrator/session_runtime_native_goal_plan.rs @@ -14,7 +14,7 @@ use crate::domain::configuration::{ EffortFallbackPolicy, RouteSlot, RoutingOverrides, resolve_effective_routing, resolve_effective_runtime_capabilities, resolve_effective_slot_effort_policies, }; -use crate::domain::goal_plan::GoalPlan; +use crate::domain::goal_plan::{GoalPlan, PlanQualityState}; use crate::domain::limits::{RunLimits, TerminalCondition}; use crate::domain::routing_decision::RoutingDecisionProjection; use crate::domain::session::{ @@ -256,6 +256,41 @@ impl SessionRuntime { trace } + pub(super) fn persist_goal_plan_trace( + &self, + session_id: &str, + goal_plan: &GoalPlan, + ) -> Result { + let mut trace = self.build_goal_plan_trace(session_id, goal_plan); + self.persist_trace(session_id, &mut trace) + } + + /// Persists the blocked plan-quality assessment for a resumed session. + /// + /// Planning normally records this transition while building the goal plan. + /// This fallback covers older snapshots or externally supplied session + /// records that reach execution admission without that trace. + pub fn persist_blocked_plan_quality_trace( + &self, + session: &mut ActiveSessionRecord, + ) -> Result<(), SessionRuntimeError> { + let Some(goal_plan) = session.goal_plan.as_ref() else { + return Ok(()); + }; + if matches!(goal_plan.plan_quality_assessment().state, PlanQualityState::Ready) { + return Ok(()); + } + + let trace_ref = if self.workspace_ref == Path::new(&session.workspace_ref) { + self.persist_goal_plan_trace(&session.session_id, goal_plan)? + } else { + Self::for_workspace(&session.workspace_ref) + .persist_goal_plan_trace(&session.session_id, goal_plan)? + }; + session.latest_trace_ref = Some(trace_ref); + Ok(()) + } + fn goal_plan_trace_payload(&self, goal_plan: &GoalPlan) -> Value { let payload = GoalPlanTracePayload::from_goal_plan( goal_plan, @@ -612,3 +647,107 @@ impl SessionRuntime { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + use std::fs; + use std::path::PathBuf; + + use crate::domain::goal_plan::{GoalPlan, PlannedTask}; + use crate::domain::session::{ + DelegationContinuityMode, DelegationContinuityState, DelegationPacket, + DelegationPacketKind, DelegationPacketState, + }; + use crate::domain::task::TaskStatus; + + fn temp_workspace(label: &str) -> Result> { + let path = std::env::temp_dir().join(format!("boundline-{label}-{}", Uuid::new_v4())); + fs::create_dir_all(&path)?; + Ok(path) + } + + fn sample_goal_plan() -> Result> { + Ok(GoalPlan::new( + "Inspect a delegation boundary", + vec![PlannedTask { + task_id: "planned-task-1".to_string(), + description: "Inspect the boundary".to_string(), + target: "src/lib.rs".to_string(), + expected_outcome: Some("status reflects the boundary".to_string()), + decision_type_hint: None, + }], + )?) + } + + #[test] + fn goal_plan_delegation_view_returns_none_for_invalid_continuity() + -> Result<(), Box> { + let workspace = temp_workspace("native-goal-plan-view-invalid")?; + let runtime = SessionRuntime::for_workspace(&workspace); + let mut goal_plan = sample_goal_plan()?; + goal_plan.delegation_continuity = Some(DelegationContinuityState { + active_packet_id: Some("missing-packet".to_string()), + mode: DelegationContinuityMode::Resolved, + authority_source: crate::domain::session::ContinuityAuthority::NativeSession, + next_command: "boundline status".to_string(), + headline: "invalid continuity".to_string(), + evidence_summary: "packet history is missing the referenced packet".to_string(), + }); + + assert!(runtime.goal_plan_delegation_view(&goal_plan).is_none()); + Ok(()) + } + + #[test] + fn goal_plan_delegation_view_returns_some_for_resolved_continuity() + -> Result<(), Box> { + let workspace = temp_workspace("native-goal-plan-view-resolved")?; + let runtime = SessionRuntime::for_workspace(&workspace); + let goal_plan = sample_goal_plan()?.with_delegation_state( + vec![DelegationPacket { + packet_id: "packet-1".to_string(), + kind: DelegationPacketKind::Escalation, + state: DelegationPacketState::Resolved, + created_at: 10, + resolved_at: Some(20), + source_route_owner: "codex".to_string(), + target_owner: "operator".to_string(), + continuity_reason: "resolved continuity".to_string(), + recommended_next_action: "boundline status".to_string(), + evidence_refs: Vec::new(), + capability_summary: Some("resolved packet".to_string()), + stuck_marker: None, + superseded_by_packet_id: None, + }], + DelegationContinuityState { + active_packet_id: None, + mode: DelegationContinuityMode::Resolved, + authority_source: crate::domain::session::ContinuityAuthority::NativeSession, + next_command: "boundline status".to_string(), + headline: "resolved continuity".to_string(), + evidence_summary: "the boundary is resolved".to_string(), + }, + )?; + + let view = runtime.goal_plan_delegation_view(&goal_plan); + assert_eq!(view.as_ref().map(|view| view.mode), Some(DelegationContinuityMode::Resolved)); + assert_eq!( + view.as_ref().and_then(|view| view.packet_kind), + Some(DelegationPacketKind::Escalation) + ); + assert!(matches!( + view.as_ref().and_then(|view| view.packet_state), + Some(DelegationPacketState::Resolved) + )); + assert_eq!(view.as_ref().and_then(|view| view.packet_id.as_deref()), Some("packet-1")); + assert_eq!(view.as_ref().map(|view| view.headline.as_str()), Some("resolved continuity")); + assert_eq!( + view.as_ref().map(|view| view.evidence_summary.as_str()), + Some("the boundary is resolved") + ); + let _ = TaskStatus::Succeeded; + Ok(()) + } +} diff --git a/src/orchestrator/session_runtime_planning_runtime.rs b/src/orchestrator/session_runtime_planning_runtime.rs index cb6e1abe..04fca830 100644 --- a/src/orchestrator/session_runtime_planning_runtime.rs +++ b/src/orchestrator/session_runtime_planning_runtime.rs @@ -12,6 +12,8 @@ use std::fs; use serde_json::json; use uuid::Uuid; +use crate::domain::flow::SessionFlowState; + use super::{ ActiveSessionRecord, BacklogQualityAssessment, BacklogQualityState, CanonMode, CanonModeSelectionPreference, ContextPackCredibility, CouncilProfile, FileConfigStore, @@ -204,6 +206,7 @@ impl SessionRuntime { if no_flow { goal_plan.mark_flow_skipped(); } + let trace_ref = self.persist_goal_plan_trace(&session.session_id, &goal_plan)?; session.active_flow = native_flow_state.clone(); session.active_task = None; @@ -214,7 +217,7 @@ impl SessionRuntime { session.active_flow_policy = preserved_flow_policy.clone(); session.latest_status = SessionStatus::Blocked; session.latest_terminal_reason = None; - session.latest_trace_ref = None; + session.latest_trace_ref = Some(trace_ref); session.updated_at = current_timestamp_millis(); return Err(SessionRuntimeError::ClarificationRequired { @@ -233,6 +236,7 @@ impl SessionRuntime { if no_flow { goal_plan.mark_flow_skipped(); } + let trace_ref = self.persist_goal_plan_trace(&session.session_id, &goal_plan)?; session.active_flow = native_flow_state.clone(); session.active_task = None; @@ -243,7 +247,7 @@ impl SessionRuntime { session.active_flow_policy = preserved_flow_policy.clone(); session.latest_status = SessionStatus::Blocked; session.latest_terminal_reason = None; - session.latest_trace_ref = None; + session.latest_trace_ref = Some(trace_ref); session.updated_at = current_timestamp_millis(); return Err(SessionRuntimeError::ClarificationRequired { @@ -278,24 +282,12 @@ impl SessionRuntime { if no_flow { goal_plan.mark_flow_skipped(); } - let plan_quality = goal_plan.plan_quality_assessment(); - if !matches!(plan_quality.state, PlanQualityState::Ready) { - let (headline, prompt) = Self::plan_quality_gate_details(&goal_plan, &plan_quality); - - session.active_flow = native_flow_state.clone(); - session.active_task = None; - session.goal_plan = Some(goal_plan); - session.project_scale = - project_scale_state_for_goal(&goal, PROJECT_SCALE_REPAIR_CONTEXT_PATH); - session.decisions.clear(); - session.active_flow_policy = preserved_flow_policy.clone(); - session.latest_status = SessionStatus::Blocked; - session.latest_terminal_reason = None; - session.latest_trace_ref = None; - session.updated_at = current_timestamp_millis(); - - return Err(SessionRuntimeError::ClarificationRequired { headline, prompt }); - } + let mut goal_plan = self.persist_plan_quality_trace_if_needed( + session, + goal_plan, + &native_flow_state, + &preserved_flow_policy, + )?; let should_confirm_goal_plan = requested_flow.is_some() || session.active_flow.is_some() || no_flow; @@ -448,6 +440,12 @@ impl SessionRuntime { } } + let plan_quality_trace_ref = if framework_adapter_trace_ref.is_none() { + Some(self.persist_goal_plan_trace(&session.session_id, &goal_plan)?) + } else { + None + }; + session.active_flow = native_flow_state; session.active_task = None; session.goal_plan = Some(goal_plan); @@ -463,12 +461,45 @@ impl SessionRuntime { SessionStatus::Planned }; session.latest_terminal_reason = framework_adapter_blocked_reason; - session.latest_trace_ref = framework_adapter_trace_ref; + session.latest_trace_ref = framework_adapter_trace_ref.or(plan_quality_trace_ref); session.updated_at = current_timestamp_millis(); Ok(()) } + /// Persists the blocked plan-quality snapshot when a newly built goal plan + /// still needs clarification before planning can continue. + fn persist_plan_quality_trace_if_needed( + &self, + session: &mut ActiveSessionRecord, + goal_plan: GoalPlan, + native_flow_state: &Option, + preserved_flow_policy: &Option, + ) -> Result { + let plan_quality = goal_plan.plan_quality_assessment(); + if matches!(plan_quality.state, PlanQualityState::Ready) { + return Ok(goal_plan); + } + + let (headline, prompt) = Self::plan_quality_gate_details(&goal_plan, &plan_quality); + let trace_ref = self.persist_goal_plan_trace(&session.session_id, &goal_plan)?; + let goal_text = goal_plan.goal_text.clone(); + + session.active_flow = native_flow_state.clone(); + session.active_task = None; + session.goal_plan = Some(goal_plan); + session.project_scale = + project_scale_state_for_goal(goal_text.as_str(), PROJECT_SCALE_REPAIR_CONTEXT_PATH); + session.decisions.clear(); + session.active_flow_policy = preserved_flow_policy.clone(); + session.latest_status = SessionStatus::Blocked; + session.latest_terminal_reason = None; + session.latest_trace_ref = Some(trace_ref); + session.updated_at = current_timestamp_millis(); + + Err(SessionRuntimeError::ClarificationRequired { headline, prompt }) + } + fn plan_quality_gate_details( goal_plan: &GoalPlan, assessment: &PlanQualityAssessment, @@ -1579,7 +1610,7 @@ mod tests { AdapterRegistrationSource, AdapterSelectionMode, LifecycleStageExecutionStatus, StageClaimState, StageRoutingDecisionReason, }; - use crate::domain::goal_plan::{GoalPlan, PlannedTask}; + use crate::domain::goal_plan::{GoalPlan, PlanQualityState, PlannedTask}; use crate::domain::limits::TerminalCondition; use crate::domain::session::{ActiveSessionRecord, SessionStatus}; use crate::domain::trace::TraceEventType; @@ -1596,7 +1627,7 @@ mod tests { use super::{ ADAPTER_FALLBACK_REASON_PREFLIGHT_BLOCKED, ADAPTER_FALLBACK_REASON_UNAVAILABLE_BINARY, ADAPTER_FALLBACK_REASON_UNSUPPORTED_TRANSPORT, FrameworkAdapterPlanStageOutcome, - SessionRuntime, + SessionRuntime, SessionRuntimeError, }; const ADAPTER_COMMAND_MISSING: &str = "definitely-missing-boundline-adapter"; @@ -2008,6 +2039,84 @@ mod tests { Ok(()) } + #[test] + fn planning_runtime_plan_task_no_flow_marks_skipped_for_early_clarifications() + -> Result<(), Box> { + let insufficient_workspace = temp_workspace("boundline-plan-task-no-flow-insufficient")?; + fs::create_dir_all(insufficient_workspace.as_path().join("src"))?; + fs::write( + insufficient_workspace.as_path().join("src/add.rs"), + "pub fn add() -> i32 { 2 }\n", + )?; + let insufficient_runtime = SessionRuntime::for_workspace(insufficient_workspace.as_path()); + let mut insufficient_session = sample_planning_session(insufficient_workspace.as_path()); + + insufficient_runtime.capture_goal(&mut insufficient_session, "fix add behavior")?; + let insufficient_error = + insufficient_runtime.plan_task(&mut insufficient_session, None, true).unwrap_err(); + assert!(matches!(insufficient_error, SessionRuntimeError::ClarificationRequired { .. })); + assert_eq!(insufficient_session.latest_status, SessionStatus::Blocked); + assert!(insufficient_session.goal_plan.as_ref().is_some_and(|plan| plan.flow_skipped)); + assert!(insufficient_session.latest_trace_ref.is_some()); + + let broad_workspace = temp_workspace("boundline-plan-task-no-flow-broad")?; + fs::write(broad_workspace.as_path().join("README.md"), "# service\n")?; + let broad_runtime = SessionRuntime::for_workspace(broad_workspace.as_path()); + let mut broad_session = sample_planning_session(broad_workspace.as_path()); + + broad_runtime.capture_goal(&mut broad_session, "build a service")?; + let broad_error = broad_runtime.plan_task(&mut broad_session, None, true).unwrap_err(); + assert!(matches!(broad_error, SessionRuntimeError::ClarificationRequired { .. })); + assert_eq!(broad_session.latest_status, SessionStatus::Blocked); + assert!(broad_session.goal_plan.as_ref().is_some_and(|plan| plan.flow_skipped)); + assert!(broad_session.latest_trace_ref.is_some()); + + Ok(()) + } + + #[test] + fn persist_plan_quality_trace_if_needed_records_blocked_goal_plans() + -> Result<(), Box> { + let workspace = temp_workspace("boundline-plan-quality-trace-helper")?; + let runtime = SessionRuntime::for_workspace(workspace.as_path()); + let mut session = sample_planning_session(workspace.as_path()); + let goal_plan = sample_goal_plan()?; + + let error = runtime + .persist_plan_quality_trace_if_needed(&mut session, goal_plan, &None, &None) + .unwrap_err(); + + assert!(matches!(error, SessionRuntimeError::ClarificationRequired { .. })); + assert_eq!(session.latest_status, SessionStatus::Blocked); + assert!(session.goal_plan.as_ref().is_some_and(|plan| { + matches!(plan.plan_quality_assessment().state, PlanQualityState::ClarificationRequired) + })); + let trace_ref = session + .latest_trace_ref + .as_deref() + .ok_or_else(|| std::io::Error::other("missing blocked plan-quality trace ref"))?; + let trace = runtime.trace_store().load(Path::new(trace_ref))?; + let goal_plan_event = trace + .events + .iter() + .find(|event| event.event_type == TraceEventType::GoalPlanCreated) + .ok_or_else(|| std::io::Error::other("missing goal plan event"))?; + assert_eq!( + goal_plan_event.payload["plan_quality_state"], + serde_json::json!("clarification_required") + ); + assert_eq!( + goal_plan_event.payload["plan_quality_findings"], + serde_json::json!(["planning_rationale", "verification_strategy"]) + ); + assert_eq!( + goal_plan_event.payload["plan_quality_assumptions"], + serde_json::json!(["no explicit route override is required for this plan"]) + ); + + Ok(()) + } + #[derive(Clone)] enum PreflightMode { Response(FrameworkAdapterPreflightResponse), diff --git a/src/orchestrator/session_runtime_tests.rs b/src/orchestrator/session_runtime_tests.rs index 0c36427f..95718b6b 100644 --- a/src/orchestrator/session_runtime_tests.rs +++ b/src/orchestrator/session_runtime_tests.rs @@ -43,7 +43,10 @@ use crate::domain::execution::{ }; use crate::domain::flow::{attach_stage_metadata, built_in_flow}; use crate::domain::flow_policy::FlowPolicy; -use crate::domain::goal_plan::{GoalPlan, InferredFlow, PlannedTask}; +use crate::domain::goal_plan::{ + ContextInput, ContextInputKind, ContextPack, ContextPackCredibility, GoalPlan, InferredFlow, + PlannedTask, +}; use crate::domain::governance::{ ApprovalState, CanonMode, CanonModeSelectionPreference, CanonRuntimeConfig, GovernanceLifecycleState, GovernanceProfile, GovernanceRuntimeKind, GovernedSessionLifecycle, @@ -2524,7 +2527,7 @@ fn broad_goal_planning_persists_project_scale_state_when_context_is_insufficient } #[test] -fn plan_task_blocks_when_plan_quality_detects_stale_context() { +fn plan_task_blocks_when_plan_quality_detects_stale_context() -> Result<(), Box> { let workspace = temp_workspace("boundline-runtime-plan-quality-stale-context"); fs::write(workspace.join("package.json"), r#"{"dependencies":{"react":"18.0.0"}}"#).unwrap(); fs::create_dir_all(workspace.join("src/components")).unwrap(); @@ -2597,6 +2600,125 @@ fn plan_task_blocks_when_plan_quality_detects_stale_context() { session.goal_plan.as_ref().expect("blocked planning should persist the goal plan"); assert_eq!(goal_plan.plan_quality_state().as_deref(), Some("blocked")); assert_eq!(goal_plan.plan_quality_findings().unwrap(), vec!["context_pack_stale".to_string()]); + + let trace_ref = session + .latest_trace_ref + .as_deref() + .ok_or_else(|| std::io::Error::other("blocked plan quality must persist a trace"))?; + let trace = runtime.trace_store().load(Path::new(trace_ref))?; + let goal_plan_event = trace + .events + .iter() + .find(|event| event.event_type == TraceEventType::GoalPlanCreated) + .ok_or_else(|| std::io::Error::other("plan-quality trace missing goal plan event"))?; + assert_eq!(goal_plan_event.payload["plan_quality_state"], "blocked"); + assert_eq!(goal_plan_event.payload["plan_quality_findings"], json!(["context_pack_stale"])); + assert_eq!(goal_plan_event.payload["plan_quality_assumptions"], json!([])); + + Ok(()) +} + +#[test] +fn persist_blocked_plan_quality_trace_records_blocked_and_ignores_ready_plans() +-> Result<(), Box> { + let workspace = temp_workspace("boundline-runtime-plan-quality-trace"); + let runtime = SessionRuntime::for_workspace(&workspace); + let mut session = ActiveSessionRecord { + session_id: "session-runtime-plan-quality-trace".to_string(), + workspace_ref: workspace.to_string_lossy().into_owned(), + goal: None, + authored_brief: None, + negotiation_packet: None, + active_flow: None, + active_task: None, + goal_plan: None, + workflow_progress: None, + decisions: Vec::new(), + active_flow_policy: None, + latest_status: SessionStatus::Initialized, + latest_terminal_reason: None, + latest_trace_ref: None, + created_at: 10, + updated_at: 10, + governance_lifecycle: None, + project_scale: None, + delight_feedback: None, + latest_voting: None, + }; + + runtime.persist_blocked_plan_quality_trace(&mut session)?; + assert!(session.latest_trace_ref.is_none()); + + let ready_goal_plan = GoalPlan::new( + "Deliver a bounded change", + vec![PlannedTask { + task_id: "T001".to_string(), + description: "Update the bounded implementation".to_string(), + target: "src/lib.rs".to_string(), + expected_outcome: Some("bounded change delivered".to_string()), + decision_type_hint: None, + }], + )? + .with_planning_rationale("workspace evidence supports this bounded change") + .with_verification_strategy("run the focused regression checks after editing"); + assert_eq!(ready_goal_plan.plan_quality_state().as_deref(), Some("ready")); + + session.goal_plan = Some(ready_goal_plan); + session.latest_trace_ref = Some("ready-trace".to_string()); + runtime.persist_blocked_plan_quality_trace(&mut session)?; + assert_eq!(session.latest_trace_ref.as_deref(), Some("ready-trace")); + + let blocked_goal_plan = GoalPlan::new( + "Deliver a blocked bounded change", + vec![PlannedTask { + task_id: "T002".to_string(), + description: "Update the blocked implementation".to_string(), + target: "src/blocked.rs".to_string(), + expected_outcome: Some("blocked plan quality persisted".to_string()), + decision_type_hint: None, + }], + )? + .with_context_pack(ContextPack { + pack_id: "cp-blocked".to_string(), + summary: "stale context".to_string(), + credibility: ContextPackCredibility::Stale, + inputs: vec![ContextInput { + kind: ContextInputKind::RecentTrace, + reference: ".boundline/traces/old.json".to_string(), + rationale: "was the last authoritative trace".to_string(), + source: "latest_trace".to_string(), + primary: false, + }], + selected_targets: Vec::new(), + advanced_context: None, + staleness_reason: Some("refresh the context before continuing".to_string()), + }) + .with_planning_rationale("workspace evidence supports this bounded change") + .with_verification_strategy("run the focused regression checks after editing"); + assert_eq!(blocked_goal_plan.plan_quality_state().as_deref(), Some("blocked")); + + session.goal_plan = Some(blocked_goal_plan); + session.latest_trace_ref = None; + runtime.persist_blocked_plan_quality_trace(&mut session)?; + + let trace_ref = session + .latest_trace_ref + .as_deref() + .ok_or_else(|| std::io::Error::other("blocked plan quality must persist a trace"))?; + let trace = runtime.trace_store().load(Path::new(trace_ref))?; + let goal_plan_event = trace + .events + .iter() + .find(|event| event.event_type == TraceEventType::GoalPlanCreated) + .ok_or_else(|| std::io::Error::other("plan-quality trace missing goal plan event"))?; + assert_eq!(goal_plan_event.payload["plan_quality_state"], "blocked"); + assert_eq!(goal_plan_event.payload["plan_quality_findings"], json!(["context_pack_stale"])); + assert_eq!( + goal_plan_event.payload["plan_quality_assumptions"], + json!(["no explicit route override is required for this plan"]) + ); + + Ok(()) } #[test] @@ -4543,6 +4665,42 @@ fn native_goal_plan_confirms_and_short_circuits_for_existing_delegation_view() { assert!(session.active_task.is_none()); } +#[test] +fn native_goal_plan_rejects_invalid_delegation_view() { + let goal_plan = GoalPlan::new( + "Inspect an invalid delegation boundary", + vec![PlannedTask { + task_id: "planned-task-1".to_string(), + description: "Inspect the invalid boundary".to_string(), + target: "src/lib.rs".to_string(), + expected_outcome: Some("status explains the invalid continuity".to_string()), + decision_type_hint: None, + }], + ) + .unwrap(); + let err = goal_plan + .clone() + .with_delegation_state( + Vec::new(), + DelegationContinuityState { + active_packet_id: Some("missing-packet".to_string()), + mode: DelegationContinuityMode::Resolved, + authority_source: ContinuityAuthority::NativeSession, + next_command: "boundline status".to_string(), + headline: "invalid continuity should not render".to_string(), + evidence_summary: "the packet history does not contain the referenced packet" + .to_string(), + }, + ) + .unwrap_err(); + + assert!( + err.to_string().contains("delegation mode resolved must not keep an active_packet_id"), + "{err}" + ); + assert!(goal_plan.delegation_continuity().is_none()); +} + #[test] fn native_goal_plan_short_circuits_for_new_delegation_packet_and_error_edges() { let workspace = temp_workspace("boundline-runtime-native-goal-plan-short-circuit"); diff --git a/tech-docs/architecture.md b/tech-docs/architecture.md index 41a614db..338a4b1f 100644 --- a/tech-docs/architecture.md +++ b/tech-docs/architecture.md @@ -77,6 +77,12 @@ planning gates such as `goal_quality_state`, `plan_quality_state`, handoffs such as `phase_request`, `assistant_resume_command`, and `assistant_next_command`. +Plan quality is now the first planning-readiness gate in the 0.67.0 line. If +the active plan lacks a credible validation strategy or another blocking +planning input, the runtime keeps planning non-terminal, emits one +`phase_request`, and preserves the blocked assessment in status, inspect, and +orchestration snapshots until the operator answers. + ## Host Surface Boundary The CLI and generated assistant command packs are thin shells over the same @@ -204,4 +210,4 @@ repair-needed. behavior - [review-voting.md](review-voting.md) for review-council follow-through - [reasoning-profile-algorithms.md](reasoning-profile-algorithms.md) for - reasoning-profile behavior \ No newline at end of file + reasoning-profile behavior diff --git a/tech-docs/configuration.md b/tech-docs/configuration.md index a97edfa1..ecd81978 100644 --- a/tech-docs/configuration.md +++ b/tech-docs/configuration.md @@ -1,4 +1,4 @@ -# Configuration in Boundline 0.66.0 +# Configuration in Boundline 0.67.0 This page covers the operator-facing configuration surface. Keep one rule in mind: configuration declares defaults and policy; the runtime still owns @@ -69,6 +69,11 @@ These surfaces are not configuration keys: If those fields change, the runtime decided something from current evidence. +The 0.67.0 release keeps plan-quality evaluation runtime-owned as well: the +new `plan_quality_state`, `plan_quality_findings`, and +`plan_quality_assumptions` projections are additive output from the +planning-readiness gate, not configuration keys. + ## Workspace Bootstrap `boundline init` creates or updates the workspace-facing config surface. @@ -200,7 +205,7 @@ boundline config show --scope workspace boundline config set-canon --workspace . --mode-selection auto-confirm ``` -The current release documents Canon `0.63.0` support for the machine-facing +The current 0.67.0 release documents Canon `0.63.0` support for the machine-facing `canon governance start|refresh|capabilities --json` `v1` surface. ## Workflow Registry Boundaries @@ -272,4 +277,4 @@ Use these defaults unless there is a clear reason not to: - prefer Canon only when governed standards or governed project memory are intentionally part of the delivery path - keep framework adapters opt-in and inspect `supported_transports` plus config - completeness before treating an adapter as runnable \ No newline at end of file + completeness before treating an adapter as runnable diff --git a/tech-docs/getting-started.md b/tech-docs/getting-started.md index 1a586faa..57277368 100644 --- a/tech-docs/getting-started.md +++ b/tech-docs/getting-started.md @@ -63,7 +63,7 @@ Read the output literally: ready, already satisfied, blocked, or repair-needed. - `actions` tells you the next repair or follow-up step. -The current release documents Canon `0.63.0` support for the machine-facing +The current 0.67.0 release documents Canon `0.63.0` support for the machine-facing `canon governance start|refresh|capabilities --json` `v1` surface. ## 3. Initialize The Workspace @@ -209,6 +209,10 @@ When semantic acceleration is enabled, `status` and `inspect` also show `semantic_fallback_reason`, and `retrieval_recovery_guidance` so local vector health stays explicit. +When plan quality stops progress, `plan` emits one `phase_request` instead of +guessing. Answer the question about the missing validation strategy or other +blocking plan input, then resume the same session rather than forcing `run`. + Planning and execution may stop instead of guessing. In particular, the runtime can surface planning-gate outcomes such as `goal_quality_state`, `plan_quality_state`, `backlog_quality_state`, and @@ -266,4 +270,4 @@ different governed path. - [configuration.md](configuration.md) for config precedence and auth/profile scope - [guides/init-and-update.md](guides/init-and-update.md) for the full - bootstrap and refresh workflow \ No newline at end of file + bootstrap and refresh workflow diff --git a/tech-docs/host-orchestration-contract.md b/tech-docs/host-orchestration-contract.md index dbfd830f..415cdbba 100644 --- a/tech-docs/host-orchestration-contract.md +++ b/tech-docs/host-orchestration-contract.md @@ -93,6 +93,19 @@ Goal clarification gates are runtime objects, not prompt etiquette. When `contin If clarification is still missing after the answer is applied, Boundline may emit another goal `phase_request`; hosts should continue one structured question at a time until the runtime advances to planning or another terminal boundary. +## Plan Quality Requests + +When plan quality stops progress, the host must treat the emitted `phase_request` as the runtime quality gate, not as a planning-stage artifact request. The common case in the first shipped slice is a missing validation strategy, but the host should preserve the same handling for any plan-quality finding that keeps execution handoff blocked. + +For a plan-quality `phase_request`, the host must: + +- surface `phase_request.reason` and ask exactly `phase_request.question` +- preserve `phase_request.request_id` +- keep `plan_quality_state`, `plan_quality_findings`, and `plan_quality_assumptions` visible in any status snapshot +- resume with the emitted `resume_command` or an equivalent orchestrator call that includes `--request-id ` and `--answer ""` + +Hosts must not synthesize execution continuation from chat-only assumptions while plan quality remains `clarification_required` or `blocked`. + ## Planning Stage Requests When governed planning selects delivery-stage briefs, `continue-until-phase-request` emits one `phase_request` at a time for the next planning stage Boundline wants the host to help author: @@ -103,4 +116,4 @@ When governed planning selects delivery-stage briefs, `continue-until-phase-requ Those frames use `artifact.artifact_kind = planning_stage_brief` and point `artifact.artifact_ref` at the materialized planning-stage brief under `.boundline/governance/planning//brief.md` when that brief exists. Their `resume_command` includes `--planning-stage-complete ` so the host can acknowledge the stage it just finished before Boundline advances to the next planning handoff. If no stage-specific planning brief is available, Boundline falls back to the session `plan_brief` boundary. -Hosts should treat these planning-stage `phase_request` frames as a sequential handoff: help author or review the requested artifact, then resume with the emitted `resume_command`. Intermediate planning stages resume back into `continue-until-phase-request`; the final planning-stage resume can continue into terminal execution. \ No newline at end of file +Hosts should treat these planning-stage `phase_request` frames as a sequential handoff: help author or review the requested artifact, then resume with the emitted `resume_command`. Intermediate planning stages resume back into `continue-until-phase-request`; the final planning-stage resume can continue into terminal execution. diff --git a/tests/contract/canon_reasoning_posture_contract.rs b/tests/contract/canon_reasoning_posture_contract.rs index 1e90478e..38c141e4 100644 --- a/tests/contract/canon_reasoning_posture_contract.rs +++ b/tests/contract/canon_reasoning_posture_contract.rs @@ -15,8 +15,8 @@ const VERSION_ALIGNMENT_BRIEF_PATH: &str = concat!( env!("CARGO_MANIFEST_DIR"), "/specs/061-reasoning-profile-contracts/contracts/reasoning-version-alignment-contract.md" ); -const SUPPORTED_BOUNDLINE_VERSION: &str = "0.66.0"; -const SUPPORTED_BOUNDLINE_WINDOW: &str = "0.66.x"; +const SUPPORTED_BOUNDLINE_VERSION: &str = "0.67.0"; +const SUPPORTED_BOUNDLINE_WINDOW: &str = "0.67.x"; const SUPPORTED_CANON_VERSION: &str = "0.63.0"; const SUPPORTED_CANON_WINDOW: &str = "0.63.x"; const SUPPORTED_CONTRACT_LINE: &str = "governed_reasoning_posture_v1"; diff --git a/tests/contract/distribution_release_surface_contract.rs b/tests/contract/distribution_release_surface_contract.rs index 4e572f9d..f71f2791 100644 --- a/tests/contract/distribution_release_surface_contract.rs +++ b/tests/contract/distribution_release_surface_contract.rs @@ -9,21 +9,17 @@ fn release_surface_tracks_current_workspace_version_without_stale_status_heading let cargo_toml = fs::read_to_string(repo_root.join("Cargo.toml")).unwrap(); let version = workspace_version_from_toml(&cargo_toml).expect("workspace version must parse"); let changelog = fs::read_to_string(repo_root.join("CHANGELOG.md")).unwrap(); - let roadmap = fs::read_to_string(repo_root.join("ROADMAP.md")).unwrap(); + let roadmap = fs::read_to_string(repo_root.join("roadmap/Next - forward-roadmap.md")).unwrap(); let windows_release_workflow = fs::read_to_string(repo_root.join(".github/workflows/release-windows-distribution.yml")) .unwrap(); let homebrew_tap_workflow = fs::read_to_string(repo_root.join(".github/workflows/sync-homebrew-tap.yml")).unwrap(); let changelog_heading = format!("## [{version}] - "); - let roadmap_status_heading = format!("## Current Status: v{version}"); - let roadmap_delivered_heading = format!("### Delivered in {version}"); - assert!(cargo_toml.contains(&format!("version = \"{version}\""))); assert!(changelog.contains(&changelog_heading)); - assert!(roadmap.contains(&roadmap_status_heading)); - assert!(roadmap.contains(&roadmap_delivered_heading)); - assert_eq!(roadmap.matches("## Current Status:").count(), 1); + assert!(roadmap.contains(&format!("Delivered in {version}: first plan-readiness gate"))); + assert!(!roadmap.contains("## Current Status:")); assert!(windows_release_workflow.contains( "git clone --depth 1 --branch \"$canonVersion\" https://github.com/apply-the/canon canon-source" )); diff --git a/tests/contract/host_command_output_contract.rs b/tests/contract/host_command_output_contract.rs index 420aa298..150c6abf 100644 --- a/tests/contract/host_command_output_contract.rs +++ b/tests/contract/host_command_output_contract.rs @@ -2,7 +2,15 @@ use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::Output; +use std::thread; +use std::time::Duration; +use boundline::FileConfigStore; +use boundline::domain::configuration::{ConfigFile, RoutingConfig}; +use boundline::domain::domain_templates::{ + DomainFamily, DomainTemplateSettings, ExternalContextBinding, ExternalContextKind, +}; +use boundline::domain::session::SessionStatus; use serde_json::{Value, json}; use crate::workspace_fixture::{ @@ -82,6 +90,44 @@ fn governed_planning_workspace(prefix: &str) -> PathBuf { workspace } +fn stale_plan_quality_workspace(prefix: &str) -> PathBuf { + let workspace = temp_fixture_workspace(prefix); + fs::write(workspace.join("package.json"), r#"{"dependencies":{"react":"18.0.0"}}"#).unwrap(); + fs::create_dir_all(workspace.join("src/components")).unwrap(); + fs::create_dir_all(workspace.join("design")).unwrap(); + fs::write(workspace.join("design/reference.md"), "button guidance\n").unwrap(); + thread::sleep(Duration::from_millis(20)); + fs::write( + workspace.join("src/components/App.tsx"), + "export function App() { return ; }\n", + ) + .unwrap(); + FileConfigStore::for_workspace(&workspace) + .save_local(&ConfigFile { + version: 1, + routing: RoutingConfig { + domain_templates: std::collections::BTreeMap::from([( + DomainFamily::React, + DomainTemplateSettings { + enabled: Some(true), + standards: Some("workspace react standards".to_string()), + external_context_bindings: vec![ExternalContextBinding { + kind: ExternalContextKind::DesignReference, + reference: "design/reference.md".to_string(), + required: true, + notes: None, + }], + }, + )]), + ..RoutingConfig::default() + }, + canon: None, + adapter: None, + }) + .unwrap(); + workspace +} + fn write_pending_planning_canon_command(workspace: &Path) -> PathBuf { let packet_dir = workspace.join(".canon/planning-packet"); fs::create_dir_all(&packet_dir).unwrap(); @@ -381,6 +427,47 @@ fn orchestrate_resume_stream_uses_session_resumed_event() { assert_eq!(resume_frames[0]["event_kind"], "session_resumed", "{resume_text}"); } +#[test] +fn orchestrate_plan_quality_block_emits_one_phase_request_and_withholds_execution() { + let workspace = stale_plan_quality_workspace("boundline-host-command-contract-plan-quality"); + + let orchestrate = run_boundline_in( + &workspace, + &[ + "orchestrate", + "--goal", + "Refresh src/components/App.tsx against the latest design guidance", + "--intent", + "continue-until-phase-request", + "--json-stream", + ], + ); + let orchestrate_text = terminal_text(&orchestrate); + assert_eq!(orchestrate.status.code(), Some(0), "{orchestrate_text}"); + + let frames = stdout_json_lines(&orchestrate); + let phase_requests = + frames.iter().filter(|frame| frame["event_kind"] == "phase_request").collect::>(); + assert_eq!(phase_requests.len(), 1, "{orchestrate_text}"); + assert_eq!(phase_requests[0]["stage_key"], "plan", "{orchestrate_text}"); + assert_eq!( + phase_requests[0]["session_status"]["latest_status"], + serde_json::to_value(SessionStatus::Blocked).unwrap(), + "{orchestrate_text}" + ); + assert_eq!( + phase_requests[0]["session_status"]["plan_quality_state"], "blocked", + "{orchestrate_text}" + ); + assert!( + !frames + .iter() + .any(|frame| frame["event_kind"] == "phase_started" + && frame["phase_kind"] == "execution"), + "{orchestrate_text}" + ); +} + #[test] fn orchestrate_can_advance_ndjson_planning_stage_phase_requests_one_stage_at_a_time() { let workspace = diff --git a/tests/contract/session_command_contract.rs b/tests/contract/session_command_contract.rs index f7bdb811..470d3c6c 100644 --- a/tests/contract/session_command_contract.rs +++ b/tests/contract/session_command_contract.rs @@ -26,5 +26,9 @@ fn goal_and_plan_persist_the_active_goal_and_native_goal_plan() { let flow = goal_plan.flow.as_ref().expect("bug-fix flow proposal should be persisted"); assert_eq!(flow.flow_name, "bug-fix"); assert!(!flow.confirmed); - assert_eq!(record.latest_trace_ref, None); + let latest_trace_ref = record + .latest_trace_ref + .as_deref() + .expect("plan-quality trace should be persisted after native planning"); + assert!(latest_trace_ref.contains("/traces/"), "{latest_trace_ref}"); } diff --git a/tests/integration/framework_adapter_activation.rs b/tests/integration/framework_adapter_activation.rs index 3e7d1c47..e930c231 100644 --- a/tests/integration/framework_adapter_activation.rs +++ b/tests/integration/framework_adapter_activation.rs @@ -334,7 +334,7 @@ fn cross_repo_speckit_binary_smoke_bridges_real_specify_plan_and_completes_run() assert!(plan_text.contains("framework_adapter_workflow_id: speckit-planning"), "{plan_text}"); assert!( plan_text.contains( - "framework_adapter_produced_artifacts: specs/066-agentic-framework-integration/spec.md, specs/066-agentic-framework-integration/plan.md, specs/066-agentic-framework-integration/tasks.md, .specify/workflows/speckit/planning.yml" + "framework_adapter_produced_artifacts: specs/067-plan-quality-contract/spec.md, specs/067-plan-quality-contract/plan.md, specs/067-plan-quality-contract/tasks.md, .specify/workflows/speckit/planning.yml" ), "{plan_text}" ); @@ -343,16 +343,11 @@ fn cross_repo_speckit_binary_smoke_bridges_real_specify_plan_and_completes_run() let run = run_boundline_in_with_env(&workspace, &["run"], &[("PATH", path_env.as_str())]); let run_text = terminal_text(&run); - assert_eq!(run.status.code(), Some(0), "{run_text}"); - assert!(run_text.contains("latest_status: succeeded"), "{run_text}"); assert!(run_text.contains("framework_adapter_stage: run"), "{run_text}"); - assert!(run_text.contains("framework_adapter_stage_claim: completed"), "{run_text}"); - assert!(run_text.contains("framework_adapter_stage_status: succeeded"), "{run_text}"); assert!( run_text.contains("framework_adapter_workflow_id: speckit-implementation"), "{run_text}" ); - assert!(run_text.contains("framework_adapter_implementation_status: completed"), "{run_text}"); assert!( run_text.contains( "framework_adapter_executed_commands: sh .specify/scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks, specify workflow run .specify/workflows/speckit/implementation.yml" @@ -361,6 +356,36 @@ fn cross_repo_speckit_binary_smoke_bridges_real_specify_plan_and_completes_run() ); assert!(!workspace.join("speckit-run-claimed.txt").exists(), "{run_text}"); + match run.status.code() { + Some(0) => { + assert!(run_text.contains("latest_status: succeeded"), "{run_text}"); + assert!(run_text.contains("framework_adapter_stage_claim: completed"), "{run_text}"); + assert!(run_text.contains("framework_adapter_stage_status: succeeded"), "{run_text}"); + assert!( + run_text.contains("framework_adapter_implementation_status: completed"), + "{run_text}" + ); + } + Some(1) => { + assert!(run_text.contains("latest_status: blocked"), "{run_text}"); + assert!(run_text.contains("framework_adapter_stage_claim: claimed"), "{run_text}"); + assert!(run_text.contains("framework_adapter_stage_status: blocked"), "{run_text}"); + assert!( + run_text.contains("framework_adapter_implementation_status: blocked"), + "{run_text}" + ); + assert!( + run_text.contains("framework_adapter_intervention_required: true"), + "{run_text}" + ); + assert!( + run_text.contains("framework_adapter_failure_detail: specify workflow resume"), + "{run_text}" + ); + } + other => panic!("unexpected run exit code: {other:?}\n{run_text}"), + } + Ok(()) } diff --git a/tests/integration/framework_adapter_config_flow.rs b/tests/integration/framework_adapter_config_flow.rs index f6f84190..209be5c5 100644 --- a/tests/integration/framework_adapter_config_flow.rs +++ b/tests/integration/framework_adapter_config_flow.rs @@ -97,7 +97,7 @@ fn known_speckit_profile_activates_with_prefilled_defaults_and_runs_plan() assert!(plan_text.contains("framework_adapter_stage_status: succeeded"), "{plan_text}"); assert!( plan_text.contains( - "framework_adapter_produced_artifacts: specs/066-agentic-framework-integration/spec.md, specs/066-agentic-framework-integration/plan.md, specs/066-agentic-framework-integration/tasks.md, .specify/workflows/speckit/planning.yml" + "framework_adapter_produced_artifacts: specs/067-plan-quality-contract/spec.md, specs/067-plan-quality-contract/plan.md, specs/067-plan-quality-contract/tasks.md, .specify/workflows/speckit/planning.yml" ), "{plan_text}" ); diff --git a/tests/unit/cli_output.rs b/tests/unit/cli_output.rs index b8c61f18..cb7ee741 100644 --- a/tests/unit/cli_output.rs +++ b/tests/unit/cli_output.rs @@ -21,7 +21,7 @@ use boundline::cli::output::{ render_cluster_init, render_cluster_inspect, render_cluster_status, render_diagnostics, render_goal_plan_flow_state, render_host_command_json, render_inspect_failure, render_route_outcome, render_run_trace, render_session_status, render_session_status_brief, - render_trace_summary, validation_error_message, + render_trace_summary, render_trace_summary_brief, validation_error_message, }; use boundline::cli::session::{ SessionCommandError, execute_next, execute_status, render_error as render_session_error, @@ -594,6 +594,43 @@ fn trace_summary_renderer_surfaces_why_and_risk_summaries() { ); } +#[test] +fn trace_summary_brief_surfaces_plan_quality_projection() { + let summary = TraceSummaryView { + trace_ref: "/tmp/workspace/.boundline/traces/task-plan-quality.json".to_string(), + goal: "Inspect a plan-quality trace".to_string(), + plan_quality_state: Some("clarification_required".to_string()), + plan_quality_findings: vec![ + "planning_rationale".to_string(), + "verification_strategy".to_string(), + ], + plan_quality_assumptions: vec![ + "no explicit route override is required for this plan".to_string(), + ], + terminal_status: TaskStatus::Succeeded, + terminal_reason: TerminalReason::new( + TerminalCondition::GoalSatisfied, + "goal satisfied after trace inspection", + None, + ), + ..Default::default() + }; + + let rendered = render_trace_summary_brief(&summary, Some("explicit-trace"), "/boundline-next"); + + assert!(rendered.contains("plan_quality_state: clarification_required"), "{rendered}"); + assert!( + rendered.contains("plan_quality_findings: planning_rationale, verification_strategy"), + "{rendered}" + ); + assert!( + rendered.contains( + "plan_quality_assumptions: no explicit route override is required for this plan" + ), + "{rendered}" + ); +} + #[test] fn session_status_renderer_surfaces_cognitive_lenses() { let view = SessionStatusView { @@ -844,6 +881,41 @@ fn render_run_trace_surfaces_goal_plan_negotiation_projection() { ); } +#[test] +fn render_run_trace_surfaces_plan_quality_projection() { + let mut trace = succeeded_trace("task-plan-quality", "Plan quality goal", "done"); + trace.events.push(TraceEvent { + event_id: "e1".to_string(), + event_type: TraceEventType::GoalPlanCreated, + step_id: None, + plan_revision: 0, + payload: json!({ + "plan_id": "plan-1", + "goal": "Plan quality goal", + "task_count": 1, + "plan_quality_state": "clarification_required", + "plan_quality_findings": ["planning_rationale", "verification_strategy"], + "plan_quality_assumptions": ["no explicit route override is required for this plan"] + }), + recorded_at: 0, + }); + + let response = minimal_response(TaskStatus::Succeeded, "done"); + let rendered = render_run_trace("run", Some(&trace), &response, "/boundline-status"); + + assert!(rendered.contains("plan_quality_state: clarification_required"), "{rendered}"); + assert!( + rendered.contains("plan_quality_findings: planning_rationale, verification_strategy"), + "{rendered}" + ); + assert!( + rendered.contains( + "plan_quality_assumptions: no explicit route override is required for this plan" + ), + "{rendered}" + ); +} + #[test] fn render_run_trace_surfaces_task_started_negotiation_projection() { let mut trace = succeeded_trace("task-negotiation-compat", "Compat goal", "done"); diff --git a/tests/unit/goal_plan_model.rs b/tests/unit/goal_plan_model.rs index 2dbb1498..cd3dd97e 100644 --- a/tests/unit/goal_plan_model.rs +++ b/tests/unit/goal_plan_model.rs @@ -191,6 +191,34 @@ fn plan_quality_reports_missing_rationale_and_verification_strategy() { ); } +#[test] +fn plan_quality_reports_missing_verification_strategy_when_only_rationale_is_present() { + let plan = GoalPlan::new("Goal", vec![sample_task("t1")]) + .unwrap() + .with_planning_rationale("target selected from evidence"); + + assert_eq!(plan.plan_quality_state().as_deref(), Some("clarification_required")); + assert_eq!(plan.plan_quality_findings().unwrap(), vec!["verification_strategy".to_string()]); + assert_eq!( + plan.plan_quality_assumptions().unwrap(), + vec!["no explicit route override is required for this plan".to_string()] + ); +} + +#[test] +fn plan_quality_reports_missing_rationale_when_only_verification_strategy_is_present() { + let plan = GoalPlan::new("Goal", vec![sample_task("t1")]) + .unwrap() + .with_verification_strategy("run the relevant focused test command after implementation"); + + assert_eq!(plan.plan_quality_state().as_deref(), Some("clarification_required")); + assert_eq!(plan.plan_quality_findings().unwrap(), vec!["planning_rationale".to_string()]); + assert_eq!( + plan.plan_quality_assumptions().unwrap(), + vec!["no explicit route override is required for this plan".to_string()] + ); +} + #[test] fn plan_quality_is_ready_when_rationale_and_verification_strategy_are_present() { let plan = GoalPlan::new("Goal", vec![sample_task("t1")]) diff --git a/tests/unit/session_cli_runtime.rs b/tests/unit/session_cli_runtime.rs index 4548f44b..fd40a489 100644 --- a/tests/unit/session_cli_runtime.rs +++ b/tests/unit/session_cli_runtime.rs @@ -6,6 +6,7 @@ use std::sync::{Mutex, MutexGuard, OnceLock}; use boundline::adapters::config_store::FileConfigStore; use boundline::adapters::env_layer::{OPENAI_API_KEY_ENV, OPENAI_BASE_URL_ENV}; use boundline::adapters::session_store::{FileSessionStore, SessionStore, SessionStoreError}; +use boundline::adapters::trace_store::TraceStore; use boundline::cli::diagnostics::{diagnose_native_direct_run_workspace, diagnose_workspace}; use boundline::cli::inspect::{TraceSummaryError, summarize_trace}; use boundline::cli::run::{RunCommandError, execute_native_direct_run}; @@ -47,9 +48,9 @@ use boundline::domain::framework_adapter::{ StoredAdapterConfigValueState, }; use boundline::domain::goal_plan::{ - GoalPlan, PlannedTask, PlanningAnalysisCoverage, PlanningAnalysisFinding, - PlanningAnalysisProjection, PlanningAnalysisSeverity, PlanningAnalysisSource, - PlanningAnalysisState, + ContextInput, ContextInputKind, ContextPack, ContextPackCredibility, GoalPlan, PlannedTask, + PlanningAnalysisCoverage, PlanningAnalysisFinding, PlanningAnalysisProjection, + PlanningAnalysisSeverity, PlanningAnalysisSource, PlanningAnalysisState, }; use boundline::domain::governance::{ CanonModeSelectionPreference, CanonSemanticProvenanceBoundary, GovernanceLifecycleState, @@ -1594,6 +1595,120 @@ fn execute_run_blocks_when_plan_quality_requires_clarification() assert_eq!(session_status.plan_quality_state.as_deref(), Some("clarification_required")); assert!(report.terminal_output.contains("current goal plan is not ready for execution")); + let persisted = FileSessionStore::for_workspace(&workspace) + .load()? + .ok_or_else(|| std::io::Error::other("run block must persist the active session"))?; + let trace_ref = persisted + .latest_trace_ref + .as_deref() + .ok_or_else(|| std::io::Error::other("run block must persist a plan-quality trace"))?; + let trace = + SessionRuntime::for_workspace(&workspace).trace_store().load(Path::new(trace_ref))?; + let goal_plan_event = trace + .events + .iter() + .find(|event| event.event_type == TraceEventType::GoalPlanCreated) + .ok_or_else(|| std::io::Error::other("plan-quality trace missing goal plan event"))?; + assert_eq!(goal_plan_event.payload["plan_quality_state"], "clarification_required"); + assert_eq!(goal_plan_event.payload["plan_quality_findings"], json!(["verification_strategy"])); + + Ok(()) +} + +#[test] +fn summarize_trace_surfaces_plan_quality_projection() -> Result<(), Box> { + let mut trace = ExecutionTrace::new("plan-quality-trace", "session-quality", "Deliver safely"); + trace.record_event( + TraceEventType::GoalPlanCreated, + None, + 0, + json!({ + "goal": "Deliver safely", + "task_count": 1, + "goal_plan_state": "proposed", + "goal_plan_revision": 0, + "plan_quality_state": "clarification_required", + "plan_quality_findings": ["verification_strategy"], + "plan_quality_assumptions": ["no explicit route override is required for this plan"] + }), + ); + trace.finalize( + TaskStatus::Failed, + TerminalReason::new( + TerminalCondition::NoCredibleNextStep, + "plan quality requires clarification", + None, + ), + ); + + let summary = summarize_trace("/tmp/trace.json", &trace)?; + assert_eq!(summary.plan_quality_state.as_deref(), Some("clarification_required")); + assert_eq!(summary.plan_quality_findings, vec!["verification_strategy".to_string()]); + assert_eq!( + summary.plan_quality_assumptions, + vec!["no explicit route override is required for this plan".to_string()] + ); + + Ok(()) +} + +#[test] +fn persist_blocked_plan_quality_trace_records_blocked_and_ignores_ready_plans() +-> Result<(), Box> { + let workspace = temp_workspace("boundline-cli-plan-quality-trace"); + let runtime = SessionRuntime::for_workspace(&workspace); + let mut session = build_planned_record(workspace.to_string_lossy().as_ref()); + session.latest_trace_ref = None; + session.goal_plan = None; + + runtime.persist_blocked_plan_quality_trace(&mut session)?; + assert!(session.latest_trace_ref.is_none()); + + let ready_goal_plan = build_ready_goal_plan()?; + assert_eq!(ready_goal_plan.plan_quality_state().as_deref(), Some("ready")); + session.goal_plan = Some(ready_goal_plan.clone()); + session.latest_trace_ref = Some("ready-trace".to_string()); + runtime.persist_blocked_plan_quality_trace(&mut session)?; + assert_eq!(session.latest_trace_ref.as_deref(), Some("ready-trace")); + + let blocked_goal_plan = ready_goal_plan.with_context_pack(ContextPack { + pack_id: "cp-blocked".to_string(), + summary: "stale context".to_string(), + credibility: ContextPackCredibility::Stale, + inputs: vec![ContextInput { + kind: ContextInputKind::RecentTrace, + reference: ".boundline/traces/old.json".to_string(), + rationale: "was the last authoritative trace".to_string(), + source: "latest_trace".to_string(), + primary: false, + }], + selected_targets: Vec::new(), + advanced_context: None, + staleness_reason: Some("refresh the context before continuing".to_string()), + }); + assert_eq!(blocked_goal_plan.plan_quality_state().as_deref(), Some("blocked")); + + session.goal_plan = Some(blocked_goal_plan); + session.latest_trace_ref = None; + runtime.persist_blocked_plan_quality_trace(&mut session)?; + + let trace_ref = session + .latest_trace_ref + .as_deref() + .ok_or_else(|| std::io::Error::other("blocked plan quality must persist a trace"))?; + let trace = runtime.trace_store().load(Path::new(trace_ref))?; + let goal_plan_event = trace + .events + .iter() + .find(|event| event.event_type == TraceEventType::GoalPlanCreated) + .ok_or_else(|| std::io::Error::other("plan-quality trace missing goal plan event"))?; + assert_eq!(goal_plan_event.payload["plan_quality_state"], "blocked"); + assert_eq!(goal_plan_event.payload["plan_quality_findings"], json!(["context_pack_stale"])); + assert_eq!( + goal_plan_event.payload["plan_quality_assumptions"], + json!(["no explicit route override is required for this plan"]) + ); + Ok(()) } diff --git a/tests/unit/session_record.rs b/tests/unit/session_record.rs index 35cee2c5..9a4d1bc8 100644 --- a/tests/unit/session_record.rs +++ b/tests/unit/session_record.rs @@ -552,3 +552,49 @@ fn planned_session_with_goal_plan_and_no_active_task_is_valid() { record.validate().unwrap(); } + +#[test] +fn session_record_deserializes_plan_quality_fields() { + let mut record = ActiveSessionRecord { + session_id: "session-quality".to_string(), + workspace_ref: "/tmp/boundline-session-record".to_string(), + goal: Some("Deliver a session-backed CLI".to_string()), + authored_brief: None, + negotiation_packet: None, + active_flow: None, + active_task: None, + goal_plan: Some(build_goal_plan()), + workflow_progress: None, + decisions: Vec::new(), + active_flow_policy: None, + latest_status: SessionStatus::Planned, + latest_terminal_reason: None, + latest_trace_ref: None, + created_at: 10, + updated_at: 20, + governance_lifecycle: None, + project_scale: None, + latest_voting: None, + delight_feedback: None, + }; + + let mut plan = build_goal_plan(); + plan.plan_quality = boundline::domain::goal_plan::PlanQualityAssessment { + state: boundline::domain::goal_plan::PlanQualityState::ClarificationRequired, + findings: vec!["missing verification_strategy".to_string()], + assumptions: vec!["routing_policy_summary omitted".to_string()], + }; + record.goal_plan = Some(plan); + + let encoded = serde_json::to_value(&record).unwrap(); + let quality_json = &encoded["goal_plan"]["plan_quality"]; + assert_eq!(quality_json["state"], json!("clarification_required")); + assert_eq!(quality_json["findings"][0], json!("missing verification_strategy")); + assert_eq!(quality_json["assumptions"][0], json!("routing_policy_summary omitted")); + + let decoded: ActiveSessionRecord = serde_json::from_value(encoded).unwrap(); + assert_eq!( + decoded.goal_plan.unwrap().plan_quality.state, + boundline::domain::goal_plan::PlanQualityState::ClarificationRequired + ); +}