diff --git a/.beads/.jsonl.lock b/.beads/.jsonl.lock deleted file mode 100644 index e69de29b..00000000 diff --git a/.claude/commands.json b/.claude/commands.json index aabde158..796e726a 100644 --- a/.claude/commands.json +++ b/.claude/commands.json @@ -92,6 +92,14 @@ "description": "Codebase analysis", "phase": "analysis" }, + "strataudit": { + "file": "skills/strataudit.md", + "cli": "sdp-strataudit run", + "mode": "hybrid", + "llm_subagents": ["analyst", "architect"], + "description": "Evidence-backed strategy traceability audit", + "phase": "analysis" + }, "feature": { "file": "skills/feature.md", "cli": "sdp plan", diff --git a/.gitignore b/.gitignore index 9fd963c0..29d7ea0c 100644 --- a/.gitignore +++ b/.gitignore @@ -62,8 +62,15 @@ archive/ !.sdp/evidence/ !.sdp/checkpoints/ .sdp/log/events.jsonl +.sdp/log/events.jsonl.lock +.sdp/state.json +.sdp/review_verdict.json +.sdp/tier_metrics.json sdp-plugin/.sdp/log/events.jsonl **/events.jsonl +**/events.jsonl.lock +.beads/.jsonl.lock +sdp-plugin/.beads/.jsonl.lock # Build output bin/ diff --git a/.opencode/commands/strataudit.md b/.opencode/commands/strataudit.md new file mode 100644 index 00000000..a6cda43b --- /dev/null +++ b/.opencode/commands/strataudit.md @@ -0,0 +1,35 @@ +--- +description: Evidence-backed strategy traceability audit over a document corpus; use when the user needs document-grounded alignment analysis across strategy, architecture, design, or implementation materials. Prefer an injected host-native runtime when available, otherwise use a configured OpenAI-compatible runtime; OpenRouter is the default network accelerator, not the only path. +agent: architect +--- + +# /strataudit — StratAudit + +## Overview + +This command implements the `strataudit` skill from the SDP workflow. + +See `/prompts/skills/strataudit/SKILL.md` for complete documentation. + +## Usage + +```bash +/strataudit [arguments] +``` + +## Implementation + +The command delegates to the `strataudit` skill, which provides: + +- mode-based document-backed audit flow +- explicit runtime selection order +- structured artifact output +- trust-oriented failure and refusal behavior + +## Related + +- Skills: `prompts/skills/strataudit/SKILL.md` +- Reference: `docs/reference/strataudit-evidence-policy.md` +- Reference: `docs/reference/strataudit-runtime-policy.md` +- Reference: `docs/reference/strataudit-output-modes.md` +- Agents: `prompts/agents/architect.md` diff --git a/.sdp/checkpoints/F050-checkpoint.json b/.sdp/checkpoints/F050-checkpoint.json deleted file mode 100644 index 3b8f3c82..00000000 --- a/.sdp/checkpoints/F050-checkpoint.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "feature": "F050", - "agent_id": "agent-20260205-163000", - "status": "in_progress", - "completed_ws": [], - "current_ws": null, - "execution_plan": { - "phase_1_critical": [ - "00-050-01", # Go Project Setup - READY - "00-050-02", # TDD Runner - blocked by 01 - "00-050-09", # Drift Detector - blocked by 01 - "00-050-14", # Command Auto-Retry - READY (standalone) - "00-050-10", # Checkpoint - READY (standalone) - "00-050-11" # Orchestrator - blocked by 02,03,10 - ], - "phase_2_essential": [ - "00-050-03", # Beads Wrapper - "00-050-04", # CLI Commands - "00-050-05", # Quality Gates - "00-050-07" # Telemetry - ], - "phase_3_polish": [ - "00-050-06", # Quality Watcher - "00-050-08", # Telemetry Analyzer - "00-050-12", # CLI Polish - "00-050-13" # Python Removal - ] - }, - "started_at": "2026-02-05T16:30:00Z", - "beads_mapping": { - "00-050-01": "sdp-x8p", - "00-050-02": "sdp-gtw", - "00-050-03": "sdp-o8h", - "00-050-04": "sdp-645", - "00-050-05": "sdp-fv7", - "00-050-06": "sdp-8nw", - "00-050-07": "sdp-x05", - "00-050-08": "sdp-ofe", - "00-050-09": "sdp-vch", - "00-050-10": "sdp-0hs", - "00-050-11": "sdp-3pd", - "00-050-12": "sdp-asl", - "00-050-13": "sdp-4vl" - }, - "retry_strategy": { - "max_parallel_agents": 3, - "wave_execution": true, - "checkpoint_interval": "after_each_ws" - } -} diff --git a/.sdp/review_verdict.json b/.sdp/review_verdict.json deleted file mode 100644 index b02ce58e..00000000 --- a/.sdp/review_verdict.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "feature": "sdp-acbs", - "verdict": "APPROVED", - "timestamp": "2026-02-18T16:10:00Z", - "reviewers": { - "qa": "PASS", - "security": "PASS", - "devops": "PASS", - "sre": "PASS", - "techlead": "PASS", - "docs": "PASS" - }, - "summary": "Installer portability, build defaults, and init compatibility fixes reviewed by specialists; no blocking issues remain." -} diff --git a/.sdp/state.json b/.sdp/state.json deleted file mode 100644 index 7f87420b..00000000 --- a/.sdp/state.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "active_ws": null, - "activated_at": null, - "scope_files": null -} \ No newline at end of file diff --git a/.sdp/tier_metrics.json b/.sdp/tier_metrics.json deleted file mode 100644 index b0874bd2..00000000 --- a/.sdp/tier_metrics.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "WS-001": { - "ws_id": "WS-001", - "current_tier": "T2", - "total_attempts": 16, - "successful_attempts": 12, - "consecutive_failures": 0, - "last_updated": "2026-01-25T11:13:06.221147" - } -} \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 9401884c..491c5c6d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ Quick reference for using SDP CLI v0.9.8 with Claude Code. @vision "AI-powered task manager" # Strategic planning @reality --quick # Codebase analysis @feature "Add user authentication" # Plan feature -@build 00-001-01 # Execute workstream +@build 00-001-01 # Execute executable leaf workstream @review # Quality check ``` @@ -83,9 +83,9 @@ New project? **@ux** — UX research for user-facing features (standalone or auto-triggered by @feature) -**@oneshot** — Workstreams exist, want autonomous execution with checkpoint/resume +**@oneshot** — Workstream tree exists, want autonomous execution of ready leaf workstreams with checkpoint/resume -**@build** — Execute a single workstream (use instead of @oneshot for 1-2 WS) +**@build** — Execute a single executable leaf workstream (use instead of @oneshot for 1-2 leaf WS) --- @@ -103,7 +103,7 @@ New project? | `@ux` | UX research (mental model elicitation) | Planning | | `@design` | Workstream design (EnterPlanMode) | Planning | | `@oneshot` | Execution orchestrator (autonomous) | Execution | -| `@build` | Execute single workstream (TDD) | Execution | +| `@build` | Execute single executable leaf workstream (TDD) | Execution | | `@review` | Multi-agent quality review | Execution | | `@deploy` | Merge feature branch to main | Execution | @@ -166,14 +166,14 @@ New project? # 1. Plan feature @feature "Add payment processing" -# 2. Execute all workstreams +# 2. Execute all ready leaf workstreams @oneshot ``` ### Manual Flow (learning or debugging) ```bash -@build 00-050-01 # Execute one at a time +@build 00-050-01 # Execute one leaf at a time @build 00-050-02 @review # Review when done @deploy # Deploy @@ -188,8 +188,9 @@ New project? - [PROTOCOL.md](docs/PROTOCOL.md) 2. **Key concepts:** - - **Workstream (WS)**: Atomic task, one-shot execution - - **Feature**: 5-30 workstreams + - **Aggregate Workstream**: non-executable container or roll-up over 2+ leaf workstreams + - **Leaf Workstream**: atomic executable unit + - **Feature**: 5-30 workstreams total, but only leaves are directly executable - **Release**: 10-30 features 3. **Install Beads CLI** (task tracking): diff --git a/README.md b/README.md index f56aab28..57254933 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ Canonical prompt sources live in `prompts/`. Tool-specific directories such as ` | [QUICKSTART.md](docs/QUICKSTART.md) | Recommended first-success path | | [CLI_REFERENCE.md](docs/CLI_REFERENCE.md) | Current `sdp` command surfaces | | [PROTOCOL.md](docs/PROTOCOL.md) | Current protocol overview | +| [PRODUCT_CONTRACT.md](docs/PRODUCT_CONTRACT.md) | Product definition and mode policy | | [reference/README.md](docs/reference/README.md) | Reference index and legacy-doc status | | [.codex/INSTALL.md](.codex/INSTALL.md) | Codex-specific install notes | | [MANIFESTO.md](docs/MANIFESTO.md) | Vision and rationale | diff --git a/docs/PRODUCT_CONTRACT.md b/docs/PRODUCT_CONTRACT.md new file mode 100644 index 00000000..8553caa6 --- /dev/null +++ b/docs/PRODUCT_CONTRACT.md @@ -0,0 +1,304 @@ +# SDP Product Contract + +> **Version:** 1.0.0 +> **Status:** Stable +> **Last Updated:** 2026-04-18 + +## Overview + +This document is the **single source of truth** for SDP's product definition. It defines the two user paths, the stage model, control surfaces, and harness support policy. + +All other documentation (QUICKSTART.md, PROTOCOL.md) references this contract. If you find contradictions, this document takes precedence. + +--- + +## User Paths + +### Path 1: Local Mode (Default) + +**Target:** Individual developers working locally. + +**Entry Point:** `@feature` skill. + +**Flow:** + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ LOCAL MODE — DEFAULT PATH │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. BOOTSTRAP │ +│ ├─ Install SDP: curl install.sh | sh │ +│ ├─ Run: sdp init --auto │ +│ └─ Result: .sdp/config.yml + project structure │ +│ │ +│ 2. INTAKE │ +│ ├─ Run: @feature "Add user authentication" │ +│ ├─ Agent asks: technical approach, UI/UX, testing, security │ +│ └─ Result: docs/intent/sdp-XXX.json + docs/drafts/beads-sdp-XXX.md │ +│ │ +│ 3. SHAPING │ +│ ├─ Run: @design beads-sdp-XXX │ +│ ├─ Agent explores codebase, creates workstreams │ +│ └─ Result: docs/workstreams/beads-sdp-XXX.md (5-30 leaf WS) │ +│ │ +│ 4. EXECUTION │ +│ ├─ Option A (Autonomous): @oneshot │ +│ │ ├─ Orchestrator executes all ready leaf WS │ +│ │ ├─ Saves checkpoints after each WS │ +│ │ └─ Resumes from interruption │ +│ │ │ +│ └─ Option B (Manual): @build 00-XXX-01 (repeat per WS) │ +│ ├─ Execute single leaf workstream with TDD │ +│ └─ Commit when complete │ +│ │ +│ 5. FINDINGS │ +│ ├─ Run: @review │ +│ ├─ Multi-agent quality review (6 agents) │ +│ └─ Result: APPROVED / CHANGES_REQUESTED │ +│ │ +│ 6. DELIVERY │ +│ ├─ Run: @deploy │ +│ ├─ Generate: PR, changelog, git tag │ +│ └─ Merge to main │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +**Control Surfaces:** +- **Primary:** Claude Code CLI (skills: @feature, @build, @review, @deploy) +- **Companion:** Beads CLI (bd ready, bd create, bd close) + +**Data Storage:** Local git repo + `.sdp/` directory. + +--- + +### Path 2: Operator Mode (Advanced) + +**Target:** Platform teams running SDP in CI/CD. + +**Entry Point:** `sdp-evidence` CLI binary. + +**Flow:** + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ OPERATOR MODE — ADVANCED PATH │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. BOOTSTRAP │ +│ ├─ Install evidence CLI: go install github.com/fall-out-bug/sdp/sdp-evidence/cmd/sdp-evidence@latest │ +│ ├─ Configure CI: .github/workflows/sdp-evidence.yml │ +│ └─ Result: Evidence gate in PR pipeline │ +│ │ +│ 2. INTAKE │ +│ ├─ Same as Local Mode (@feature → @design) │ +│ └─ Or: Import workstreams from external tools │ +│ │ +│ 3. SHAPING │ +│ ├─ Same as Local Mode │ +│ └─ Or: Use Strataudit for corpus analysis │ +│ │ +│ 4. EXECUTION │ +│ ├─ Agents run in CI (OpenCode, Claude Code, etc.) │ +│ ├─ Evidence envelope emitted: .sdp/evidence/.json │ +│ └─ Hash-chain provenance enforced │ +│ │ +│ 5. FINDINGS │ +│ ├─ Run: sdp-evidence validate .sdp/evidence/.json │ +│ ├─ Validate: completeness, schema, hash-chain │ +│ └─ Result: VALID / INVALID + details │ +│ │ +│ 6. DELIVERY │ +│ ├─ PR gate: Evidence validation must pass │ +│ ├─ Run: sdp-evidence gate check │ +│ └─ Block merge if evidence missing or invalid │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +**Control Surfaces:** +- **Primary:** `sdp-evidence` CLI (validate, gate check) +- **Companion:** CI/CD system (GitHub Actions, GitLab CI, etc.) + +**Data Storage:** Evidence envelopes in `.sdp/evidence/` + git for provenance. + +--- + +## Stage Model + +SDP follows a six-stage model. Both paths use the same stages, but differ in control surfaces. + +| Stage | Purpose | Local Mode Trigger | Operator Mode Trigger | +|-------|---------|-------------------|----------------------| +| **Bootstrap** | Initialize project | `sdp init --auto` | `sdp-evidence init` | +| **Intake** | Gather requirements | `@feature` skill | Same (local dev) | +| **Shaping** | Plan workstreams | `@design` skill | Same (local dev) | +| **Execution** | Implement work | `@oneshot` or `@build` | CI agents + evidence emit | +| **Findings** | Quality validation | `@review` skill | `sdp-evidence validate` | +| **Delivery** | Ship to production | `@deploy` skill | PR gate + merge | + +**Key Principle:** Stages are sequential but can loop back (e.g., Findings → Execution if review fails). + +--- + +## Control Surfaces + +### Primary Control Surface + +**Claude Code CLI** (recommended for Local Mode): + +```bash +@feature "Add X" # Intake + Shaping +@oneshot # Execution +@review # Findings +@deploy # Delivery +``` + +### Companion Control Surface + +**Beads CLI** (task tracking): + +```bash +bd ready # Find ready tasks +bd create --title="X" # Create task +bd close # Close task +``` + +### Operator Control Surface + +**sdp-evidence CLI** (Operator Mode): + +```bash +sdp-evidence validate .sdp/evidence/.json +sdp-evidence gate check +``` + +### Board Visibility + +**Evidence Dashboard** (planned): + +- View evidence envelopes across all features +- Filter by stage, agent, harness +- Trace hash-chain provenance + +### Quickstart Commands (planned) + +**CLI-based first experience:** + +```bash +sdp assess [project-path] # Read-only project assessment +sdp try "task description" # Try a task on temporary branch +sdp adopt # Adopt successful trial into SDP +``` + +These commands provide a lightweight entry point before full SDP setup. Currently in planning phase. + +--- + +## Harness Support Policy + +SDP is designed to work across multiple AI harnesses. Support levels: + +### Recommended + +| Harness | Version | Notes | +|---------|---------|-------| +| **Claude Code** | Latest | Primary target. Full skill support. | +| **OpenCode** | Latest | Full skill support via `.opencode/` adapters. | + +### Supported + +| Harness | Version | Notes | +|---------|---------|-------| +| **Cursor** | Latest | Skills via `.cursor/` adapters. | +| **Windsurf** | Latest | Skills via `.windsurf/` adapters. | + +**"Supported" means:** Skills load and execute, but some advanced features may be limited. + +### Compatible + +| Harness | Version | Notes | +|---------|---------|-------| +| **Copilot** | Latest | Protocol-compatible (use prompts/schemas manually). | +| **Zed** | Latest | Protocol-compatible (use prompts/schemas manually). | + +**"Compatible" means:** You can use SDP prompts/schemas, but no skill auto-loading. + +--- + +## Default Path Diagram + +This diagram is reusable across all SDP documentation: + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ BOOTSTRAP │────▶│ INTAKE │────▶│ SHAPING │────▶│ EXECUTION │ +│ │ │ │ │ │ │ │ +│ sdp init │ │ @feature │ │ @design │ │ @oneshot │ +│ │ │ │ │ │ │ @build │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ + │ + ▼ + ┌──────────────┐ + │ FINDINGS │◀─────┐ + │ │ │ + │ @review │ │ + │ │ │ + └──────────────┘ │ + │ │ + ▼ │ + ┌──────────────┐ │ + │ DELIVERY │──────┘ (if CHANGES_REQUESTED) + │ │ + │ @deploy │ + │ │ + └──────────────┘ +``` + +--- + +## Contract Validity + +This contract is valid for: + +- **SDP Protocol:** v0.10.0+ +- **SDP CLI:** v0.9.8+ +- **Evidence CLI:** v1.0.0+ (when released) + +### Versioning Policy + +- **Major version bump:** Breaking change to stages, paths, or harness support. +- **Minor version bump:** New skills, new harness support, UX improvements. +- **Patch version bump:** Bug fixes, documentation updates. + +--- + +## Related Documents + +| Document | Purpose | Link | +|----------|---------|------| +| **QUICKSTART.md** | Get started in 5 minutes | [Quick Start](QUICKSTART.md) | +| **PROTOCOL.md** | Full protocol specification | [Protocol](PROTOCOL.md) | +| **PRODUCT_VISION.md** | Product vision and positioning | [Vision](../PRODUCT_VISION.md) | +| **CLAUDE.md** | Claude Code integration guide | [Claude Guide](../CLAUDE.md) | + +--- + +## Change Log + +| Date | Version | Change | +|------|---------|--------| +| 2026-04-18 | 1.0.0 | Initial product contract (F097) | + +--- + +## Getting Help + +- **Documentation:** [docs/](../) +- **Issues:** [GitHub Issues](https://github.com/fall-out-bug/sdp/issues) +- **Community:** [OpenCode Ecosystem](https://github.com/kubeopencode) + +--- + +**End of Contract** diff --git a/docs/PROTOCOL.md b/docs/PROTOCOL.md index 47113f82..2286b672 100644 --- a/docs/PROTOCOL.md +++ b/docs/PROTOCOL.md @@ -2,6 +2,8 @@ This document describes the current public SDP model at a high level. For exact command behavior, use `sdp --help`. +For the product-level definition of Local Mode, Operator Mode, stages, and support policy, see [PRODUCT_CONTRACT.md](PRODUCT_CONTRACT.md). + ## What Ships Today SDP currently ships three layers: @@ -67,9 +69,10 @@ Use these sources in order: 1. `sdp --help` for CLI behavior 2. [README.md](../README.md) and [QUICKSTART.md](QUICKSTART.md) for onboarding -3. [CLI_REFERENCE.md](CLI_REFERENCE.md) for the current command map -4. [reference/skills.md](reference/skills.md) for prompt-surface layout -5. `prompts/` source files when you need exact prompt definitions +3. [PRODUCT_CONTRACT.md](PRODUCT_CONTRACT.md) for product definition and mode policy +4. [CLI_REFERENCE.md](CLI_REFERENCE.md) for the current command map +5. [reference/skills.md](reference/skills.md) for prompt-surface layout +6. `prompts/` source files when you need exact prompt definitions ## Legacy Note diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md index 8e98291f..cf0fbb05 100644 --- a/docs/QUICKSTART.md +++ b/docs/QUICKSTART.md @@ -4,6 +4,8 @@ Get from zero to a real first success in one repo. Use this guide when you are adopting SDP in your own project. If you are contributing to SDP itself, start with `DEVELOPMENT.md` and `CONTRIBUTING.md`. +> This guide follows Local Mode from [PRODUCT_CONTRACT.md](PRODUCT_CONTRACT.md). For the full product definition, see that document. + ## 0. Pick the Right Mode | Mode | Start here when | Requires | @@ -140,7 +142,7 @@ bd create --title="..." bd close ``` -Once Beads is in place, SDP also installs prompt surfaces such as `/feature`, `/build`, `/review`, and `/oneshot`. That mode assumes workstreams and operator discipline already exist; it is not required for a first run. +Once Beads is in place, SDP also installs prompt surfaces such as `/feature`, `/build`, `/review`, `/oneshot`, and `/strataudit`. That mode assumes workstreams and operator discipline already exist; it is not required for a first run. Important distinction: @@ -152,5 +154,6 @@ Important distinction: - Use [CLI_REFERENCE.md](CLI_REFERENCE.md) for current command behavior. - Use [PROTOCOL.md](PROTOCOL.md) for the current protocol overview. +- Use [PRODUCT_CONTRACT.md](PRODUCT_CONTRACT.md) for product definition and mode policy. - Use [reference/README.md](reference/README.md) for the reference index and legacy-doc status. - Use [MANIFESTO.md](MANIFESTO.md) for vision and rationale. diff --git a/docs/reference/skills.md b/docs/reference/skills.md index 7626ce90..908f9ea3 100644 --- a/docs/reference/skills.md +++ b/docs/reference/skills.md @@ -26,11 +26,12 @@ Do not treat adapter directories as the source of truth. Edit `prompts/`. | `/feature` | `prompts/commands/feature.md`, `prompts/skills/feature/SKILL.md` | Planning entry point | | `/idea` | `prompts/commands/idea.md`, `prompts/skills/idea/SKILL.md` | Requirements capture | | `/design` | `prompts/commands/design.md`, `prompts/skills/design/SKILL.md` | Workstream planning | -| `/build` | `prompts/commands/build.md`, `prompts/skills/build/SKILL.md` | Single-workstream execution | +| `/build` | `prompts/commands/build.md`, `prompts/skills/build/SKILL.md` | Single leaf-workstream execution | | `/review` | `prompts/commands/review.md`, `prompts/skills/review/SKILL.md` | Review and verdict loop | | `/oneshot` | `prompts/commands/oneshot.md`, `prompts/skills/oneshot/SKILL.md` | Outer-loop feature execution | | `/deploy` | `prompts/commands/deploy.md`, `prompts/skills/deploy/SKILL.md` | Prompt-level release handoff surface | | `/beads` | `prompts/commands/beads.md`, `prompts/skills/beads/SKILL.md` | Beads task-tracker integration | +| `/strataudit` | `prompts/commands/strataudit.md`, `prompts/skills/strataudit/SKILL.md` | Evidence-backed strategy traceability audit | | `/debug`, `/hotfix`, `/bugfix`, `/issue` | matching files under `prompts/commands/` and `prompts/skills/` | Investigation and fix flows | ## Current Operating Reality @@ -51,4 +52,5 @@ Important distinction: - [../CLI_REFERENCE.md](../CLI_REFERENCE.md) for current CLI surfaces - [../PROTOCOL.md](../PROTOCOL.md) for the current protocol overview +- [../PRODUCT_CONTRACT.md](../PRODUCT_CONTRACT.md) for product definition and mode policy - [../../prompts/README.md](../../prompts/README.md) for prompt layout diff --git a/docs/reference/strataudit-evidence-policy.md b/docs/reference/strataudit-evidence-policy.md new file mode 100644 index 00000000..e2e55246 --- /dev/null +++ b/docs/reference/strataudit-evidence-policy.md @@ -0,0 +1,48 @@ +# StratAudit Evidence Policy + +StratAudit must distinguish verified evidence from derived and inferred claims. + +## Authority Order + +From strongest to weakest: + +1. exact document text, quote, or span +2. document metadata and local context such as section or level +3. derived but inspectable artifacts such as coverage tables or saved traces +4. model-assisted inference over inspected evidence +5. analyst prose and executive summary + +Lower layers may summarize higher layers. Lower layers may not overrule them. + +## Claim Classes + +| Class | Meaning | Allowed wording | +|------|---------|-----------------| +| `verified` | backed by inspectable source text or span plus context | "verified", "shown in", "documented in" | +| `derived` | computed from inspectable artifacts with a clear denominator | "derived from artifacts", "coverage shows" | +| `inferred` | model or analyst synthesis built on evidence but not directly quoted | "inferred", "likely", "suggests" | +| `unsupported` | evidence is missing, ambiguous, or below trust bar | do not make the claim | + +## Hard Rules + +- never fabricate quotes, spans, or source locations +- never call similarity alone a verified trace +- never translate entity names in the evidence layer unless explicitly requested +- never publish a coverage percentage without saying what the denominator is +- never let the executive summary sound stronger than the strongest underlying claim class + +## Minimum Evidence Bundle + +Every substantive claim should expose or be traceable to: + +- source document or artifact +- quote/span, trace row, or coverage table +- claim class: `verified`, `derived`, or `inferred` +- caveat when provenance is partial + +## Downgrade and Refusal Rules + +- if source text is missing, downgrade `verified` to `inferred` or refuse +- if the denominator is unclear, refuse to claim numeric coverage +- if the runtime produced plausible prose but no inspectable evidence, mark it `unsupported` +- if the user requests broader certainty than the evidence allows, say so directly diff --git a/docs/reference/strataudit-output-modes.md b/docs/reference/strataudit-output-modes.md new file mode 100644 index 00000000..dda5116a --- /dev/null +++ b/docs/reference/strataudit-output-modes.md @@ -0,0 +1,31 @@ +# StratAudit Output Modes + +The portable skill should not behave like one vague "run the audit" instruction. +Different user intents need different outputs and different trust boundaries. + +## Modes + +| Mode | Primary question | Inputs | Must emit | Must not claim | +|------|------------------|--------|-----------|----------------| +| `corpus-audit` | "What corpus do we actually have and is it usable?" | corpus root or source dirs | corpus inventory, exclusions, per-level coverage, trust caveats | final strategic alignment verdict | +| `traceability-audit` | "What links to what and where are the gaps?" | corpus or reusable artifacts | entities, traces, findings, trust caveats | verified traces without inspectable support | +| `coverage-audit` | "What is covered, by level and by document?" | corpus or reusable artifacts | coverage table with explicit denominators | percentages without denominator or caveat | +| `evidence-pack` | "Show me the proof behind the claims." | existing `.strataudit` artifacts preferred | quote/span references, trace rows, grouped findings, caveats | executive confidence beyond the evidence pack | +| `report-redraft` | "Make the report better without changing truth." | existing evidence pack or prior artifacts | rewritten sections and explicit unchanged trust boundaries | new entities, new traces, or upgraded certainty | + +## Default Routing + +- unknown corpus quality -> `corpus-audit` +- direct alignment or trace question -> `traceability-audit` +- explicit coverage question -> `coverage-audit` +- request for proof or trace drill-down -> `evidence-pack` +- request to rewrite an existing audit report -> `report-redraft` + +## Output Discipline + +Every mode should expose: + +- artifact paths or artifact references +- selected runtime or artifact-only path +- key caveats +- what is not claimed diff --git a/docs/reference/strataudit-runtime-policy.md b/docs/reference/strataudit-runtime-policy.md new file mode 100644 index 00000000..5fd992fc --- /dev/null +++ b/docs/reference/strataudit-runtime-policy.md @@ -0,0 +1,47 @@ +# StratAudit Runtime Policy + +StratAudit is runtime-neutral. The skill must choose a runtime by capability and +trust needs, not by vendor preference. + +## Runtime Order + +1. harness-injected host-native runtime +2. configured OpenAI-compatible runtime +3. OpenRouter as the default network enhancer/fallback +4. no runtime only for artifact-only modes + +## Capability Requirements By Mode + +| Mode | Required capability | +|------|---------------------| +| `corpus-audit` | text extraction and enough reasoning to summarize corpus quality; can reuse artifacts if they already exist | +| `traceability-audit` | structured extraction, embeddings, and conservative verification | +| `coverage-audit` | inspectable coverage artifacts or enough runtime support to produce them | +| `evidence-pack` | no runtime if artifacts already exist; otherwise same floor as the audit that produced them | +| `report-redraft` | no runtime if rewriting from existing artifacts only; must not introduce new claims | + +## Selection Rules + +- prefer host-native models when they meet the capability bar +- prefer artifact reuse over recomputation when the user is redrafting or packaging evidence +- do not silently fall back from a stronger mode to a weaker one +- if the runtime cannot support the requested mode, fail closed with an explicit explanation + +## CLI Boundary + +`sdp-strataudit` can resolve configured network runtimes. It cannot create a +host-native runtime by itself. + +That means: + +- harnesses such as Cursor, Codex, Claude, or OpenCode can inject native runtimes when available +- OpenRouter remains useful as a capability amplifier, not as the only valid execution path + +## Must Be Reported + +Every run should state: + +- selected mode +- runtime class used +- whether artifacts were reused or regenerated +- any trust caveat caused by runtime limits diff --git a/prompts/agents/orchestrator.md b/prompts/agents/orchestrator.md index 53c8a8a8..88a129b3 100644 --- a/prompts/agents/orchestrator.md +++ b/prompts/agents/orchestrator.md @@ -43,19 +43,21 @@ See [GIT_SAFETY.md](../.claude/GIT_SAFETY.md) for full guidelines. ## Role -Execute all workstreams of a feature autonomously, managing dependencies, handling errors, and ensuring quality. +Execute all ready executable leaf workstreams of a feature autonomously, +managing dependencies, handling errors, and ensuring quality. ## Core Responsibilities 1. **Planning** - - Identify all workstreams for the feature - - Build dependency graph (from WS files or Beads) + - Identify the full workstream tree for the feature + - Separate aggregate/container workstreams from executable leaf workstreams + - Build the leaf execution dependency graph (from WS files or Beads) - Determine optimal execution order (topological sort) 2. **Execution** - - Execute each WS using `@build` skill + - Execute each ready leaf WS using `@build` skill - @build handles: Beads status + TDD + quality gates + commit - - Update checkpoint after each completed WS + - Update checkpoint after each completed leaf WS - **CRITICAL: Continue immediately to next WS without stopping** - **DO NOT ask user for decision after each batch** - **DO NOT provide progress summary until ALL complete** @@ -68,15 +70,15 @@ Execute all workstreams of a feature autonomously, managing dependencies, handli 4. **Quality Assurance** - Verify all Acceptance Criteria met - Ensure coverage ≥ 80% - - Run @review after all WS complete + - Run @review after all executable leaf WS complete - Run @deploy if @review approved ## Decision Making ### Autonomous Decisions (No Human Needed) -- **Execution order**: Based on dependency graph -- **Which @build to call**: Use ws_id (e.g., `@build 00-050-01`) +- **Execution order**: Based on the executable leaf dependency graph +- **Which @build to call**: Use a leaf ws_id (e.g., `@build 00-050-01`) - **Retries**: Retry failed WS up to 2 times - **Implementation**: @build handles all implementation details - **Minor fixes**: Linter errors, type hints, imports @@ -98,17 +100,18 @@ Input: Feature ID (F050) - Detect Beads: `bd --version` + `.beads/` exists - Glob workstreams: docs/workstreams/backlog/00-050-*.md - If Beads enabled: Read .beads-sdp-mapping.jsonl - - Build dependency graph (check "Dependencies:" in each WS) + - Compile workstream tree: aggregate vs leaf + - Build leaf dependency graph (check frontmatter parentage + dependencies) - Create checkpoint: .oneshot/{feature_id}-checkpoint.json ↓ -2. Loop: While WS remaining - - Find ready WS (all dependencies satisfied) +2. Loop: While executable leaf WS remain + - Find ready leaf WS (all dependencies satisfied; aggregate parents do not dispatch) - Execute: @build {ws_id} - If Beads: Beads IN_PROGRESS → TDD → quality → Beads CLOSED → commit - If no Beads: TDD → quality → commit - Update checkpoint with completed ws_id (SILENTLY, no user interaction) - Report progress with timestamp (CONTINUE immediately, do not stop) - - **DO NOT STOP until ALL workstreams complete OR CRITICAL blocker** + - **DO NOT STOP until ALL executable leaf workstreams complete OR CRITICAL blocker** ↓ 3. Final Review - Execute: @review {feature_id} @@ -123,8 +126,8 @@ Input: Feature ID (F050) **CRITICAL EXECUTION RULES:** -1. **Continuous Execution**: Execute ALL workstreams in ONE session - - ✅ Update checkpoint after each WS (transparent, no stop) +1. **Continuous Execution**: Execute ALL ready executable leaf workstreams in ONE session + - ✅ Update checkpoint after each leaf WS (transparent, no stop) - ❌ DO NOT stop after each batch - ❌ DO NOT ask "What would you like me to do?" - ✅ Continue immediately to next WS @@ -132,7 +135,7 @@ Input: Feature ID (F050) 2. **Only Stop For:** - ⛔ CRITICAL blocker (circular deps, scope overflow) - ⛔ Quality gate failure after 2 retries - - ✅ ALL workstreams complete (then provide summary) + - ✅ ALL executable leaf workstreams complete (then provide summary) 3. **Checkpoint Behavior:** - Save checkpoint: `.oneshot/{feature_id}-checkpoint.json` @@ -146,7 +149,7 @@ Input: Feature ID (F050) When Beads is **enabled** (`bd --version` works, `.beads/` exists): ```bash -# @build does this for each WS: +# @build does this for each executable leaf WS: bd update {beads_id} --status in_progress # Execute TDD cycle bd close {beads_id} --reason "WS completed" @@ -157,7 +160,7 @@ git commit When Beads is **NOT enabled**: ```bash -# @build does this for each WS: +# @build does this for each executable leaf WS: # Execute TDD cycle git commit ``` @@ -176,7 +179,7 @@ You don't need to call bd commands directly — @build handles detection automat ## Quality Standards -Every WS must pass: +Every executable leaf WS must pass: | Check | Requirement | |-------|-------------| @@ -204,21 +207,21 @@ You work with **any language** — @build skill is language-agnostic: **LOG progress updates BUT continue execution immediately:** ```markdown -[15:23] Executing 00-050-01: Workstream Parser (MEDIUM, 0 deps) +[15:23] Executing 00-050-01: Leaf Workstream Parser (MEDIUM, 0 deps) [15:23] → Running @build 00-050-01... [15:45] ✅ COMPLETE (22m, 85% coverage, commit: a1b2c3d) [15:45] Checkpoint updated: 1/18 complete [15:45] → Continuing to next WS: 00-050-02... ``` -**DO NOT STOP after each WS. Continue immediately.** +**DO NOT STOP after each leaf WS. Continue immediately.** ### Success (Final Summary Only) ```markdown ## ✅ Feature F050 COMPLETE -**All 18 workstreams executed in 3h 45m** +**All 18 executable leaf workstreams executed in 3h 45m** Coverage: 84.5% Tests: 87/87 passing @@ -230,7 +233,7 @@ Status: completed Ready for: @review F050 (then @deploy F050 if approved) ``` -**ONLY provide final summary when ALL workstreams complete.** +**ONLY provide final summary when ALL executable leaf workstreams complete.** ### Issues (Log and Continue) @@ -284,7 +287,7 @@ Create `.oneshot/{feature_id}-checkpoint.json`: } ``` -Update checkpoint after **each completed workstream** (transparently, without stopping). +Update checkpoint after **each completed executable leaf workstream** (transparently, without stopping). ## When to Stop and Ask User @@ -303,7 +306,7 @@ Update checkpoint after **each completed workstream** (transparently, without st - Linter errors after retry - Architecture violations -4. **ALL Workstreams Complete** +4. **ALL Executable Leaf Workstreams Complete** - Checkpoint status: "completed" - Provide final summary - Ask user for UAT @@ -311,15 +314,15 @@ Update checkpoint after **each completed workstream** (transparently, without st **DO NOT STOP for:** - ❌ After each batch of workstreams - ❌ After each checkpoint save -- ❌ After successful workstream completion +- ❌ After successful leaf workstream completion - ❌ For progress reports - ❌ For non-critical errors -**Rule of Thumb:** If workstream completed successfully (even after retry), continue immediately to next. If CRITICAL blocker, stop and escalate. +**Rule of Thumb:** If a leaf workstream completed successfully (even after retry), continue immediately to the next ready leaf. If CRITICAL blocker, stop and escalate. ## Key Principles -1. **Continuous Execution**: Execute ALL workstreams in ONE session without stopping +1. **Continuous Execution**: Execute ALL ready executable leaf workstreams in ONE session without stopping - ✅ Update checkpoints transparently (no user interaction) - ✅ Log progress with timestamps - ❌ DO NOT stop after each batch @@ -328,7 +331,7 @@ Update checkpoint after **each completed workstream** (transparently, without st 3. **Transparency**: Log all actions with timestamps, but continue execution 4. **Fail fast**: Stop ONLY at CRITICAL blockers, save checkpoint, escalate 5. **Follow specs**: Implement exactly what's specified, no "improvements" -6. **Use @build**: Don't implement directly — @build handles TDD + quality + Beads +6. **Use @build**: Don't implement directly — @build handles TDD + quality + Beads for executable leaf workstreams ## Context Files @@ -343,7 +346,7 @@ Read before starting: Invoke when: - User calls `@oneshot F050` - User wants autonomous feature execution -- Feature has 5-30 workstreams +- Feature has 5-30 workstreams total; only leaves are directly executable Don't use for: - Single WS execution (use `@build` directly) @@ -353,7 +356,7 @@ Don't use for: ## Success Criteria Feature is complete when: -- All WS executed (checkpoint status: "completed") +- All executable leaf WS executed (checkpoint status: "completed") - All quality gates passed - @review verdict: APPROVED - @deploy executed (merged feature branch to main) diff --git a/prompts/skills/build/SKILL.md b/prompts/skills/build/SKILL.md index 0b2868d0..84db1f4d 100644 --- a/prompts/skills/build/SKILL.md +++ b/prompts/skills/build/SKILL.md @@ -1,6 +1,6 @@ --- name: build -description: Execute ONE workstream with TDD, guard enforcement, and ws-verdict output +description: Execute ONE executable leaf workstream with TDD, guard enforcement, and ws-verdict output cli: sdp guard activate llm: Spawn subagents for TDD cycle version: 8.2.0 @@ -16,18 +16,19 @@ changes: # build > **CLI:** `sdp guard activate ` (scope enforcement) -> **LLM:** Execute one workstream following TDD discipline +> **LLM:** Execute one executable leaf workstream following TDD discipline -Execute **this ONE workstream**. After commit, **STOP**. Continuation is the orchestrator's job (@oneshot / sdp orchestrate). +Execute **this ONE executable leaf workstream**. After commit, **STOP**. +Continuation is the orchestrator's job (@oneshot / sdp orchestrate). -**Batch syntax:** `/build 00-053-16..25` (or `/build 00-053-16 00-053-17 … 00-053-25`) — run workstreams sequentially. Stop on first failure. Report: N done, M failed. +**Batch syntax:** `/build 00-053-16..25` (or `/build 00-053-16 00-053-17 … 00-053-25`) — run leaf workstreams sequentially. Stop on first failure. Report: N done, M failed. --- ## CRITICAL RULES 1. **CHECK EXISTING CODE FIRST** — Run `@reality --quick` or grep before starting new features. Output `existing_work_summary` in ws-verdict — **required**. Short summary: files/functions/risks found before implementation. -2. **ONE WORKSTREAM** — Execute this workstream only. After commit, STOP. Do not start the next WS. +2. **ONE EXECUTABLE LEAF** — Execute this workstream only if it is a leaf. If the target is an aggregate/container workstream, STOP and hand control back to `@oneshot` or target a child leaf explicitly. After commit, STOP. Do not start the next WS. 3. **USE SPAWN OR DO IT YOURSELF** — If spawn available, use it. If not, implement manually. 4. **POST-COMPACTION RECOVERY** — After context compaction, run `bd ready` to find your task. Never drift to side tasks. 5. **MODERN GO FOR GO CODE** — When touched files are Go, load `@go-modern` and prefer safe stdlib modernizations before inventing helpers. @@ -64,6 +65,10 @@ When user invokes `@build 00-067-01`: sdp guard activate 00-067-01 ``` + Read the workstream frontmatter before doing real work. If `ws_kind` exists and + is not `leaf`, STOP with a clear error: aggregate/container workstreams are not + direct execution targets. + 2. **TDD cycle** (spawn subagents if available, else do yourself): - Implementer: RED → GREEN → REFACTOR per AC. **Orchestrator contract:** Emit phase markers so orchestrator can parse: `TDD:RED` (writing failing test), `TDD:GREEN` (test passes), `TDD:REFACTOR` (cleanup). One marker per phase. - Spec Reviewer: Verify each AC with evidence @@ -118,7 +123,8 @@ Evidence lifecycle (create/patch `.sdp/evidence/*.json`) is orchestrator or post ## Beads Integration - **Before:** `bd update {beads_id} --status in_progress` -- **Success:** Run `bd close {beads_id} --reason "WS completed"` for each bead in WS frontmatter (e.g. `Feature: (sdp_dev-hryg)` or `## Beads` list). Resolve beads from `.beads-sdp-mapping.jsonl` by `sdp_id`, or from WS body (`Feature: … (beads_id)`, `Bead:`, `Beads:`). +- **Leaf-only dispatch:** only executable leaf workstreams may carry an open `primary` Beads issue. +- **Success:** Run `bd close {beads_id} --reason "WS completed"` for each bound leaf issue in WS frontmatter (for example `## Beads` with `primary:` and `finding:` roles). Resolve beads from `.beads-sdp-mapping.jsonl` by `sdp_id`, or from the WS body. - **Failure:** `bd update {beads_id} --status blocked` --- diff --git a/prompts/skills/feature/SKILL.md b/prompts/skills/feature/SKILL.md index ad808615..3655b6f4 100644 --- a/prompts/skills/feature/SKILL.md +++ b/prompts/skills/feature/SKILL.md @@ -1,6 +1,6 @@ --- name: feature -description: Feature planning orchestrator (discovery -> idea -> ux -> design -> workstreams) +description: Feature planning orchestrator (discovery -> idea -> ux -> design -> workstream tree) version: 8.0.0 depends_on: "@discovery v1" changes: @@ -43,14 +43,29 @@ For each deliverable in the feature, create a workstream file: docs/workstreams/backlog/00-FFF-SS.md ``` +Use one of two shapes: + +- **Leaf workstream** — directly executable contract slice +- **Aggregate workstream** — non-executable container or roll-up over `2+` leaf workstreams + +Only leaf workstreams are direct `@build` targets. + **Workstream file format:** ```markdown -# 00-FFF-SS: Feature Name — Step Description +--- +ws_id: 00-FFF-SS +feature_id: FFFF +status: open +priority: P1 +size: M +depends_on: [] +ws_kind: leaf|aggregate +parent_ws_id: null|00-FFF-SS +dispatch_lifecycle: active +--- -Feature: FFFF (sdp_dev-XXXX) -Phase: N -Status: Backlog +# 00-FFF-SS: Feature Name — Step Description ## Goal @@ -61,9 +76,10 @@ One paragraph: what this workstream does and why. - path/to/file/or/dir (exact files or directory prefixes this WS touches) - ... -## Dependencies +## Beads -- 00-FFF-S1: prior workstream (if any) +- primary: sdplab-XXXX # leaf only +- finding: sdplab-YYYY # optional on leaf or aggregate ## Acceptance Criteria @@ -73,9 +89,16 @@ One paragraph: what this workstream does and why. - [ ] go test ./... passes ``` +Rules: + +- `aggregate` must not have a `primary` Beads issue +- `leaf` may have one open `primary` +- use `parent_ws_id` only when a leaf belongs to an aggregate +- maximum nesting depth is one aggregate layer + ### Step C: Create Beads Issues -For each workstream created: +For each executable leaf workstream created: ```bash bd create --title="WS FFF-SS: Short title" --type=task ``` @@ -85,12 +108,15 @@ Update `.beads-sdp-mapping.jsonl`: {"sdp_id":"00-FFF-SS","beads_id":"sdp_dev-XXXX","updated_at":"2026-..."} ``` -### Step D: Validate Counts +Aggregate workstreams do not get a `primary` execution issue. If an aggregate needs +tracking for roll-up risk, use a `finding` issue instead. + +### Step D: Validate Shapes ```bash -echo "Mapping: $(wc -l < .beads-sdp-mapping.jsonl)" -echo "Backlog: $(ls docs/workstreams/backlog/*.md | wc -l)" -# Must be equal +echo "Leafs with primary: $(rg -l \"^- primary:\" docs/workstreams/backlog/00-FFF-*.md | wc -l)" +echo "Mappings: $(rg -c '\"sdp_id\":\"00-FFF-' .beads-sdp-mapping.jsonl)" +# Primary mappings must match executable leaf workstreams, not total backlog files ``` ### Step E: Report @@ -99,7 +125,7 @@ Output: - Feature ID + number of workstreams created - Workstream file names - Beads issue IDs -- Ready-to-run command: `@build 00-FFF-01` or `@oneshot F0FF` +- Ready-to-run command: first leaf `@build 00-FFF-01` or `@oneshot F0FF` --- @@ -138,7 +164,8 @@ Read scope files. grep/rg for conflicts. Categorize: FILE CONFLICT, DATA BOUNDAR ### Step 4: Verify Outputs -Check discovery brief, idea spec, ux output, workstreams exist. +Check discovery brief, idea spec, ux output, workstreams exist, and that direct +execution targets are leaf workstreams rather than aggregates. --- @@ -156,5 +183,5 @@ The user is only asked to annotate if they want to (not required). - @idea — Requirements - @ux — UX research - @design — Workstream planning -- @build — Execute single workstream -- @oneshot — Execute all workstreams for a feature +- @build — Execute single executable leaf workstream +- @oneshot — Execute all ready leaf workstreams for a feature diff --git a/prompts/skills/strataudit/SKILL.md b/prompts/skills/strataudit/SKILL.md new file mode 100644 index 00000000..53b36457 --- /dev/null +++ b/prompts/skills/strataudit/SKILL.md @@ -0,0 +1,87 @@ +--- +name: strataudit +description: Use when the user needs evidence-backed alignment analysis, traceability gaps, coverage, or a source-grounded evidence pack across strategy, architecture, design, or implementation documents in a real corpus. +version: 1.1.0 +--- + +# @strataudit - Strategy Traceability Audit + +Run a document-backed strategy audit over a real corpus or existing `.strataudit` +artifacts. This skill is for evidence-backed audit work, not free-form strategy prose. + +## Use When + +- the user needs alignment analysis across strategic and delivery documents +- the answer must be backed by extracted entities, traces, findings, or saved artifacts +- the user needs one of these modes: `corpus-audit`, `traceability-audit`, `coverage-audit`, `evidence-pack`, `report-redraft` + +## Do Not Use When + +- the user wants only a short narrative summary +- there is no accessible corpus and no existing artifacts +- the task is brainstorming or roadmap generation from scratch +- the problem is operational debugging rather than document traceability + +## Safety Guards + +- only rely on real document text or saved audit artifacts +- do not fabricate quotes, traces, or initiatives +- preserve source language in the evidence layer unless explicitly asked to derive display text +- similarity alone is never enough to call a trace verified +- if provenance is weak, downgrade the claim or refuse to make it + +## Audit Modes + +| Mode | Use when | Must emit | +|------|----------|-----------| +| `corpus-audit` | corpus quality and ingest readiness are unclear | corpus inventory, exclusions, level coverage, caveats | +| `traceability-audit` | the user wants cross-level alignment and missing links | traces, findings, caveats | +| `coverage-audit` | the user asks what is and is not covered | coverage summary with explicit denominators | +| `evidence-pack` | the user wants inspectable proof behind claims | source-backed findings, trace tables, caveats | +| `report-redraft` | the user wants a better report from existing artifacts | rewritten sections with unchanged trust boundaries | + +Start with `corpus-audit` when corpus quality is unknown. Use `report-redraft` +only when an evidence pack or prior audit artifacts already exist. + +## Runtime Order + +1. use an injected host-native runtime from the harness when available +2. otherwise use a configured OpenAI-compatible runtime +3. use OpenRouter as the default network enhancer/fallback +4. use artifact-only mode when the question can be answered from existing outputs +5. use `sdp-strataudit run` only as CLI fallback + +The CLI can resolve configured network runtimes. It cannot create a host-native runtime on its own. + +## Workflow + +1. choose the audit mode that matches the user's question +2. validate inputs and trust boundary +3. resolve runtime by policy or reuse existing artifacts +4. run ingest → extract → link → analyze → report, or inspect saved artifacts +5. return artifact paths plus trust caveats and what is not claimed + +## Refuse When + +- the user asks for verified alignment without inspectable provenance +- the corpus root is missing and there are no prior artifacts +- the requested mode needs runtime capabilities that are unavailable +- the requested summary is broader than the evidence pack supports + +## Output + +- `.strataudit/report.json` +- `.strataudit/report.html` +- `.strataudit/similarity_distribution.json` +- `.strataudit/strataudit.db` +- explicit runtime choice or artifact-only path +- key trust caveats and what is not claimed + +## References + +- `docs/QUICKSTART.md` +- `docs/reference/skills.md` +- `docs/reference/strataudit-evidence-policy.md` +- `docs/reference/strataudit-runtime-policy.md` +- `docs/reference/strataudit-output-modes.md` +- `sdp-strataudit run` diff --git a/schema/index.json b/schema/index.json index 06670746..2b98ec8c 100644 --- a/schema/index.json +++ b/schema/index.json @@ -19,6 +19,7 @@ { "id": "docs-findings", "path": "findings/docs-findings.schema.json", "title": "Documentation Findings Report" }, { "id": "handoff-analyst", "path": "handoff-analyst.schema.json", "title": "Analyst Handoff" }, { "id": "handoff-coder", "path": "handoff-coder.schema.json", "title": "Coder Handoff" }, - { "id": "handoff-reviewer", "path": "handoff-reviewer.schema.json", "title": "Reviewer Handoff" } + { "id": "handoff-reviewer", "path": "handoff-reviewer.schema.json", "title": "Reviewer Handoff" }, + { "id": "ux-metrics", "path": "ux-metrics.schema.json", "title": "SDP UX Metrics" } ] } diff --git a/schema/ux-metrics.schema.json b/schema/ux-metrics.schema.json new file mode 100644 index 00000000..239e3723 --- /dev/null +++ b/schema/ux-metrics.schema.json @@ -0,0 +1,153 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SDP UX Metrics", + "description": "User experience metrics for SDP CLI adoption journey (local-first, no cloud dependency)", + "type": "object", + "required": ["metric_type", "timestamp", "value"], + "properties": { + "metric_type": { + "type": "string", + "enum": [ + "time_to_first_value", + "step_abandon_rate", + "reset_uninstall_frequency", + "brownfield_init_completion", + "recovery_success_rate", + "second_session_return" + ], + "description": "Type of UX metric being measured" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 timestamp when the metric was recorded" + }, + "value": { + "oneOf": [ + {"type": "number"}, + {"type": "boolean"}, + {"type": "string"} + ], + "description": "Metric value (duration in ms, percentage, boolean, or enum)" + }, + "session_id": { + "type": "string", + "description": "Unique session identifier for correlation" + }, + "project_type": { + "type": "string", + "enum": ["greenfield", "brownfield", "unknown"], + "description": "Project type for brownfield-specific metrics" + }, + "step_name": { + "type": "string", + "description": "Workflow step name (e.g., 'assess', 'try', 'adopt')" + }, + "step_number": { + "type": "integer", + "minimum": 1, + "description": "Step number in the workflow sequence" + }, + "context": { + "type": "object", + "description": "Additional contextual information", + "properties": { + "command": {"type": "string"}, + "error": {"type": "string"}, + "feature_id": {"type": "string"}, + "workstream_id": {"type": "string"}, + "exit_reason": {"type": "string"}, + "recovery_type": {"type": "string"}, + "init_phase": {"type": "string"} + } + } + }, + "definitions": { + "time_to_first_value": { + "description": "Time from 'sdp init' to first successful feature delivery (milliseconds)", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "number", + "minimum": 0, + "unit": "milliseconds" + } + } + }, + "step_abandon_rate": { + "description": "Percentage of sessions where user stops mid-workflow without completion", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "number", + "minimum": 0, + "maximum": 100, + "unit": "percentage" + } + } + }, + "reset_uninstall_frequency": { + "description": "How often users reset or uninstall (count per time period)", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "number", + "minimum": 0 + }, + "action": { + "type": "string", + "enum": ["reset", "uninstall", "config_clear"] + } + } + }, + "brownfield_init_completion": { + "description": "Percentage of brownfield projects completing init successfully", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "boolean" + }, + "project_type": { + "type": "string", + "enum": ["brownfield"] + }, + "init_phase": { + "type": "string", + "enum": ["discovery", "hook_install", "config_setup", "validation", "complete"] + } + } + }, + "recovery_success_rate": { + "description": "Percentage of successful error recovery attempts", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "boolean" + }, + "recovery_type": { + "type": "string", + "enum": ["auto_fix", "manual guidance", "docs link", "support_escalation"] + } + } + }, + "second_session_return": { + "description": "Whether user returns for a second session within 7 days", + "type": "object", + "allOf": [{"$ref": "#/properties/value"}], + "properties": { + "value": { + "type": "boolean" + }, + "days_since_first_session": { + "type": "number", + "minimum": 0 + } + } + } + } +} diff --git a/scripts/sync-skills-to-commands.js b/scripts/sync-skills-to-commands.js index 3b7bb8b8..71158a1e 100755 --- a/scripts/sync-skills-to-commands.js +++ b/scripts/sync-skills-to-commands.js @@ -53,6 +53,7 @@ function getAgentForSkill(skillName) { 'hotfix': 'builder', 'issue': 'planner', 'oneshot': 'orchestrator', + 'strataudit': 'architect', 'test': 'builder', 'codereview': 'reviewer' }; diff --git a/sdp-plugin/.beads/.jsonl.lock b/sdp-plugin/.beads/.jsonl.lock deleted file mode 100644 index e69de29b..00000000 diff --git a/sdp-plugin/cmd/sdp/adopt.go b/sdp-plugin/cmd/sdp/adopt.go new file mode 100644 index 00000000..3e71e91b --- /dev/null +++ b/sdp-plugin/cmd/sdp/adopt.go @@ -0,0 +1,194 @@ +package main + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "time" + + "github.com/fall-out-bug/sdp/internal/sdpinit" + "github.com/fall-out-bug/sdp/internal/telemetry" + "github.com/spf13/cobra" +) + +func adoptCmd() *cobra.Command { + var force bool + + cmd := &cobra.Command{ + Use: "adopt", + Short: "Adopt current changes into SDP", + Long: `Convert a successful 'sdp try' session into a full SDP setup: + - Creates .sdp/ directory structure (equivalent to 'sdp init') + - Creates .claude/settings.json with SDP skill configuration + - Commits both .sdp/ and .claude/ to git + - Preserves all code changes from the trial + +This is the next step after accepting a trial with 'sdp try --keep'.`, + Example: ` # Adopt current changes + sdp adopt + + # Force adopt even if .sdp exists + sdp adopt --force`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + startTime := time.Now() + projectPath := "." + + // Convert to absolute path + absPath, err := filepath.Abs(projectPath) + if err != nil { + return fmt.Errorf("failed to resolve path: %w", err) + } + + // Check if .sdp already exists and is initialized + sdpPath := filepath.Join(absPath, ".sdp") + claudeSettingsPath := filepath.Join(absPath, ".claude", "settings.json") + + sdpExists := false + alreadyInitialized := false + + if _, err := os.Stat(sdpPath); err == nil { + sdpExists = true + } + + if _, err := os.Stat(claudeSettingsPath); err == nil { + alreadyInitialized = true + } + + if alreadyInitialized && !force { + return fmt.Errorf("SDP already initialized. Use --force to reinitialize") + } + + if sdpExists && !force { + fmt.Println("⚠ .sdp directory exists but may not be fully initialized") + fmt.Println(" Use --force to reinitialize completely") + } + + // Initialize telemetry collector (after checks, UX metrics now go to user config dir) + uxMetrics, err := telemetry.NewUXMetricsCollector("") + if err != nil { + // Don't fail the command if telemetry fails + fmt.Fprintf(os.Stderr, "Warning: failed to initialize telemetry: %v\n", err) + } + + // Create .sdp/ directory structure before sdpinit (which only creates .claude/) + if err := createSDPDirectory(absPath); err != nil { + return fmt.Errorf("failed to create .sdp/ directory: %w", err) + } + fmt.Println("✓ .sdp/ directory created") + + // Run SDP init (creates .claude/ with settings, skills, agents) + fmt.Println("Adopting project into SDP...") + cfg := sdpinit.Config{ + ProjectType: "auto", + Force: force, + Headless: false, + } + if err := sdpinit.Run(cfg); err != nil { + return fmt.Errorf("failed to initialize SDP: %w", err) + } + + fmt.Println("✓ SDP structure created") + + // Commit the .sdp/ and .claude/ structure + fmt.Println("\nCommitting SDP structure...") + commitSuccess := true + if err := commitSDPStructure(); err != nil { + commitSuccess = false + fmt.Printf("⚠ Warning: failed to commit SDP structure: %v\n", err) + fmt.Println(" Please commit manually: git add .sdp/ .claude/ && git commit -m 'Initialize SDP'") + } else { + fmt.Println("✓ SDP structure committed") + } + + fmt.Println("\nNext steps:") + fmt.Println(" 1. Review the .sdp/ structure") + fmt.Println(" 2. Continue with SDP workflow: sdp plan 'your feature'") + + // Record telemetry + if uxMetrics != nil && commitSuccess { + duration := time.Since(startTime) + if err := uxMetrics.RecordAdoptComplete("unknown", duration); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to record telemetry: %v\n", err) + } + } + + return nil + }, + } + + cmd.Flags().BoolVar(&force, "force", false, "Reinitialize even if .sdp exists") + + return cmd +} + +// createSDPDirectory creates the .sdp/ directory structure with essential config files. +// This is separate from sdpinit.Run() which only creates .claude/. +func createSDPDirectory(projectPath string) error { + sdpDir := filepath.Join(projectPath, ".sdp") + + // Create .sdp/ subdirectories + dirs := []string{ + filepath.Join(sdpDir, "log"), + filepath.Join(sdpDir, "evidence"), + filepath.Join(sdpDir, "checkpoints"), + filepath.Join(sdpDir, "metrics"), + } + for _, dir := range dirs { + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("create %s: %w", dir, err) + } + } + + // Create .sdp/config.yml if it doesn't exist + configPath := filepath.Join(sdpDir, "config.yml") + if _, err := os.Stat(configPath); os.IsNotExist(err) { + configContent := `version: "1.0.0" +evidence: + enabled: true + log_path: ".sdp/log/events.jsonl" +` + if err := os.WriteFile(configPath, []byte(configContent), 0644); err != nil { + return fmt.Errorf("create config.yml: %w", err) + } + } + + // Create .sdp/guard-rules.yml if it doesn't exist + guardPath := filepath.Join(sdpDir, "guard-rules.yml") + if _, err := os.Stat(guardPath); os.IsNotExist(err) { + guardContent := `# SDP Guard Rules +# Controls which files can be edited per workstream +version: "1.0.0" +` + if err := os.WriteFile(guardPath, []byte(guardContent), 0644); err != nil { + return fmt.Errorf("create guard-rules.yml: %w", err) + } + } + + return nil +} + +// commitSDPStructure commits the .sdp/ and .claude/ structure to git +func commitSDPStructure() error { + // Add .sdp/ and .claude/ directories + addCmd := exec.Command("git", "add", ".sdp/", ".claude/") + if output, err := addCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to add to git: %s: %w", string(output), err) + } + + // Check if there's anything to commit + statusCmd := exec.Command("git", "diff", "--cached", "--quiet") + if err := statusCmd.Run(); err == nil { + // No changes to commit (exit status 0 means no differences) + return fmt.Errorf("no changes to commit") + } + + // Commit the changes + commitCmd := exec.Command("git", "commit", "-m", "Initialize SDP structure") + if output, err := commitCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to commit: %s: %w", string(output), err) + } + + return nil +} diff --git a/sdp-plugin/cmd/sdp/adopt_test.go b/sdp-plugin/cmd/sdp/adopt_test.go new file mode 100644 index 00000000..70bd10b6 --- /dev/null +++ b/sdp-plugin/cmd/sdp/adopt_test.go @@ -0,0 +1,234 @@ +package main + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/fall-out-bug/sdp/internal/telemetry" +) + +func TestAdoptCmdAlreadyInitialized(t *testing.T) { + // Get original working directory + originalWd, _ := os.Getwd() + + // Create temp directory + tmpDir := t.TempDir() + + // Change to temp directory + t.Cleanup(func() { os.Chdir(originalWd) }) + if err := os.Chdir(tmpDir); err != nil { + t.Fatalf("Failed to chdir: %v", err) + } + + // Setup git repo + setupTestGitRepo(t, tmpDir) + + // Create .sdp and .claude directories to simulate initialized state + sdpDir := filepath.Join(tmpDir, ".sdp") + if err := os.MkdirAll(sdpDir, 0755); err != nil { + t.Fatalf("failed to create .sdp: %v", err) + } + + claudeDir := filepath.Join(tmpDir, ".claude") + if err := os.MkdirAll(claudeDir, 0755); err != nil { + t.Fatalf("failed to create .claude: %v", err) + } + if err := os.WriteFile(filepath.Join(claudeDir, "settings.json"), []byte("{}"), 0644); err != nil { + t.Fatalf("failed to create settings.json: %v", err) + } + + // Create command + cmd := adoptCmd() + cmd.SetArgs([]string{}) + + // Execute command - should fail + err := cmd.RunE(cmd, []string{}) + if err == nil { + t.Error("Expected error when already initialized, got nil") + } + if !strings.Contains(err.Error(), "SDP already initialized") { + t.Errorf("Expected error about already initialized, got: %v", err) + } +} + +func TestAdoptCmdWithForce(t *testing.T) { + // Get original working directory + originalWd, _ := os.Getwd() + + // Create temp directory + tmpDir := t.TempDir() + + // Change to temp directory + t.Cleanup(func() { os.Chdir(originalWd) }) + if err := os.Chdir(tmpDir); err != nil { + t.Fatalf("Failed to chdir: %v", err) + } + + // Setup git repo + setupTestGitRepo(t, tmpDir) + + // Create .sdp and .claude directories to simulate initialized state + sdpDir := filepath.Join(tmpDir, ".sdp") + if err := os.MkdirAll(sdpDir, 0755); err != nil { + t.Fatalf("failed to create .sdp: %v", err) + } + + claudeDir := filepath.Join(tmpDir, ".claude") + if err := os.MkdirAll(claudeDir, 0755); err != nil { + t.Fatalf("failed to create .claude: %v", err) + } + if err := os.WriteFile(filepath.Join(claudeDir, "settings.json"), []byte("{}"), 0644); err != nil { + t.Fatalf("failed to create settings.json: %v", err) + } + + // Create command with force flag + cmd := adoptCmd() + cmd.SetArgs([]string{}) + if err := cmd.Flags().Set("force", "true"); err != nil { + t.Fatalf("failed to set force flag: %v", err) + } + + // Execute command - should succeed (may fail on actual init, but shouldn't fail on already initialized check) + err := cmd.RunE(cmd, []string{}) + // We expect this might fail due to actual init issues, but NOT due to "already initialized" + if err != nil && strings.Contains(err.Error(), "already initialized") { + t.Errorf("Should not fail with 'already initialized' when using --force, got: %v", err) + } +} + +func TestAdoptTelemetry(t *testing.T) { + // Create temp directory + tmpDir := t.TempDir() + + // Setup git repo + setupTestGitRepo(t, tmpDir) + + // Create temp telemetry dir + telemetryDir := t.TempDir() + + // Create UX metrics collector with temp dir + uxMetrics, err := telemetry.NewUXMetricsCollector(telemetryDir) + if err != nil { + t.Fatalf("failed to create UX metrics collector: %v", err) + } + + // Record adopt complete + err = uxMetrics.RecordAdoptComplete("test-project", 100) + if err != nil { + t.Fatalf("failed to record adopt complete: %v", err) + } + + // Verify event was written + eventsFile := uxMetrics.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("failed to read events file: %v", err) + } + + content := string(data) + if !strings.Contains(content, "metric_type") { + t.Errorf("expected event to contain metric_type, got: %s", content) + } + if !strings.Contains(content, "time_to_first_value") { + t.Errorf("expected event to contain time_to_first_value, got: %s", content) + } + if !strings.Contains(content, "adopt") { + t.Errorf("expected event to contain adopt step, got: %s", content) + } +} + +func TestCommitSDPStructure(t *testing.T) { + tests := []struct { + name string + setupRepo func(t *testing.T, dir string) + wantErr bool + errContains string + }{ + { + name: "commits successfully", + setupRepo: func(t *testing.T, dir string) { + setupTestGitRepo(t, dir) + + // Create .sdp directory + sdpDir := filepath.Join(dir, ".sdp") + if err := os.MkdirAll(sdpDir, 0755); err != nil { + t.Fatalf("failed to create .sdp: %v", err) + } + + // Create .claude directory + claudeDir := filepath.Join(dir, ".claude") + if err := os.MkdirAll(claudeDir, 0755); err != nil { + t.Fatalf("failed to create .claude: %v", err) + } + + // Create a file in .sdp + if err := os.WriteFile(filepath.Join(sdpDir, "config.yml"), []byte("test: true"), 0644); err != nil { + t.Fatalf("failed to create config file: %v", err) + } + }, + wantErr: false, + }, + { + name: "fails when no SDP structure exists", + setupRepo: func(t *testing.T, dir string) { + setupTestGitRepo(t, dir) + }, + wantErr: true, + errContains: "failed to add", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + tt.setupRepo(t, tmpDir) + + oldWd, _ := os.Getwd() + if err := os.Chdir(tmpDir); err != nil { + t.Fatalf("failed to chdir: %v", err) + } + defer os.Chdir(oldWd) + + err := commitSDPStructure() + + if (err != nil) != tt.wantErr { + t.Errorf("commitSDPStructure() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if tt.wantErr && tt.errContains != "" { + if err == nil { + t.Errorf("expected error containing %q, got nil", tt.errContains) + } else if !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("error = %q, want error containing %q", err.Error(), tt.errContains) + } + } + }) + } +} + +// setupTestGitRepo creates a minimal git repository for testing +func setupTestGitRepo(t *testing.T, dir string) { + t.Helper() + + commands := [][]string{ + {"git", "init"}, + {"git", "config", "user.email", "test@example.com"}, + {"git", "config", "user.name", "Test User"}, + {"git", "checkout", "-b", "main"}, + {"sh", "-c", "echo test > README.md"}, + {"git", "add", "README.md"}, + {"git", "commit", "-m", "initial commit"}, + } + + for _, cmdArgs := range commands { + cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) + cmd.Dir = dir + if output, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git setup failed: %v: %s", err, string(output)) + } + } +} diff --git a/sdp-plugin/cmd/sdp/assess.go b/sdp-plugin/cmd/sdp/assess.go new file mode 100644 index 00000000..68466195 --- /dev/null +++ b/sdp-plugin/cmd/sdp/assess.go @@ -0,0 +1,226 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "github.com/fall-out-bug/sdp/internal/assess" + "github.com/fall-out-bug/sdp/internal/telemetry" + "github.com/spf13/cobra" +) + +func assessCmd() *cobra.Command { + var outputPath string + var jsonOutput bool + + cmd := &cobra.Command{ + Use: "assess [project-path]", + Short: "Assess project without making changes", + Long: `Perform a read-only scan of the repository to detect: + - Programming language + - Frameworks and libraries + - Project structure + - Testing setup + - CI/CD configuration + - Monorepo patterns + +Outputs recommendations to stdout only. No files are created.`, + Example: ` # Assess current directory + sdp assess + + # Assess specific project + sdp assess /path/to/project + + # Output JSON + sdp assess --json`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + startTime := time.Now() + + // Determine project path + projectPath := "." + if len(args) > 0 { + projectPath = args[0] + } + + // Convert to absolute path + absPath, err := filepath.Abs(projectPath) + if err != nil { + return fmt.Errorf("failed to resolve path: %w", err) + } + + // Check if path exists + if _, err := os.Stat(absPath); os.IsNotExist(err) { + return fmt.Errorf("path does not exist: %s", absPath) + } + + // Run assessment (read-only, no clean-state check needed) + result, err := assess.Assess(absPath) + if err != nil { + return fmt.Errorf("assessment failed: %w", err) + } + + // Output results + var outputWriter = os.Stdout + if outputPath != "" { + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer f.Close() + outputWriter = f + } + + if jsonOutput { + if err := printAssessmentJSONTo(result, outputWriter); err != nil { + return err + } + } else { + if err := printAssessmentTo(result, absPath, outputWriter); err != nil { + return err + } + } + + // Initialize telemetry collector (after assessment, to avoid creating files in assessed repos) + uxMetrics, err := telemetry.NewUXMetricsCollector("") + if err != nil { + // Don't fail the command if telemetry fails + fmt.Fprintf(os.Stderr, "Warning: failed to initialize telemetry: %v\n", err) + } + + // Record telemetry + if uxMetrics != nil { + duration := time.Since(startTime) + projectType := result.Language + if projectType == "" { + projectType = "unknown" + } + + if err := uxMetrics.RecordAssessComplete(projectType, duration); err != nil { + // Don't fail the command if telemetry fails + fmt.Fprintf(os.Stderr, "Warning: failed to record telemetry: %v\n", err) + } + } + + return nil + }, + } + + cmd.Flags().StringVarP(&outputPath, "output", "o", "", "Write output to file") + cmd.Flags().BoolVar(&jsonOutput, "json", false, "Output JSON format") + + return cmd +} + +func printAssessment(result *assess.Assessment, projectPath string) error { + return printAssessmentTo(result, projectPath, os.Stdout) +} + +func printAssessmentTo(result *assess.Assessment, projectPath string, w io.Writer) error { + fmt.Fprintln(w, "SDP Project Assessment") + fmt.Fprintln(w, "=====================") + fmt.Fprintf(w, "Project: %s\n\n", projectPath) + + // Language + fmt.Fprintf(w, "Language: %s\n", result.Language) + + // Frameworks + if len(result.Framework) > 0 { + fmt.Fprintf(w, "Frameworks: %s\n", strings.Join(result.Framework, ", ")) + } else { + fmt.Fprintln(w, "Frameworks: None detected") + } + + // Structure + if len(result.Structure) > 0 { + fmt.Fprintf(w, "Structure: %s\n", strings.Join(result.Structure, ", ")) + } else { + fmt.Fprintln(w, "Structure: standard") + } + + // Flags + fmt.Fprintf(w, "Monorepo: %v\n", result.IsMonorepo) + fmt.Fprintf(w, "Has Tests: %v\n", result.HasTests) + fmt.Fprintf(w, "Has CI: %v\n", result.HasCI) + + // Recommendations + fmt.Fprintln(w, "\nRecommendations") + fmt.Fprintln(w, "--------------") + + if len(result.Recommendations) == 0 { + fmt.Fprintln(w, "No recommendations - project looks good!") + } else { + for _, rec := range result.Recommendations { + priorityIcon := "ℹ" + if rec.Priority == "high" { + priorityIcon = "⚠" + } else if rec.Priority == "medium" { + priorityIcon = "→" + } + + fmt.Fprintf(w, "%s [%s] %s\n", priorityIcon, rec.Category, rec.Title) + fmt.Fprintf(w, " %s\n\n", rec.Message) + } + } + + return nil +} + +func printAssessmentJSON(result *assess.Assessment) error { + return printAssessmentJSONTo(result, os.Stdout) +} + +func printAssessmentJSONTo(result *assess.Assessment, w io.Writer) error { + // Define a JSON-serializable structure + type JSONRecommendation struct { + Category string `json:"category"` + Title string `json:"title"` + Message string `json:"message"` + Priority string `json:"priority"` + } + + type JSONAssessment struct { + Language string `json:"language"` + Frameworks []string `json:"frameworks"` + Structure []string `json:"structure"` + IsMonorepo bool `json:"is_monorepo"` + HasTests bool `json:"has_tests"` + HasCI bool `json:"has_ci"` + Recommendations []JSONRecommendation `json:"recommendations"` + } + + // Convert recommendations to JSON format + jsonRecs := make([]JSONRecommendation, len(result.Recommendations)) + for i, rec := range result.Recommendations { + jsonRecs[i] = JSONRecommendation{ + Category: rec.Category, + Title: rec.Title, + Message: rec.Message, + Priority: rec.Priority, + } + } + + jsonResult := JSONAssessment{ + Language: result.Language, + Frameworks: result.Framework, + Structure: result.Structure, + IsMonorepo: result.IsMonorepo, + HasTests: result.HasTests, + HasCI: result.HasCI, + Recommendations: jsonRecs, + } + + // Marshal to JSON with proper escaping + data, err := json.MarshalIndent(jsonResult, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal assessment to JSON: %w", err) + } + + fmt.Fprintln(w, string(data)) + return nil +} diff --git a/sdp-plugin/cmd/sdp/main.go b/sdp-plugin/cmd/sdp/main.go index 49064420..0cda4d7b 100644 --- a/sdp-plugin/cmd/sdp/main.go +++ b/sdp-plugin/cmd/sdp/main.go @@ -143,6 +143,9 @@ directory (.claude/, .cursor/, .opencode/, or .codex/).`, rootCmd.AddCommand(initCmd()) rootCmd.AddCommand(doctorCmd()) + rootCmd.AddCommand(assessCmd()); + rootCmd.AddCommand(tryCmd()); + rootCmd.AddCommand(adoptCmd()); rootCmd.AddCommand(hooksCmd()) rootCmd.AddCommand(guardCmd()) rootCmd.AddCommand(collisionCmd()) diff --git a/sdp-plugin/cmd/sdp/metrics.go b/sdp-plugin/cmd/sdp/metrics.go index 3e6b0dd9..55ee64c8 100644 --- a/sdp-plugin/cmd/sdp/metrics.go +++ b/sdp-plugin/cmd/sdp/metrics.go @@ -72,6 +72,9 @@ func metricsCollectCmd() *cobra.Command { sdp metrics collect --watermark`, RunE: func(cmd *cobra.Command, args []string) error { // Default output path: .sdp/metrics/latest.json + + // Ensure metrics directory exists only when actually collecting + initMetricsDir() if outputPath == "" { outputPath = ".sdp/metrics/latest.json" } diff --git a/sdp-plugin/cmd/sdp/try.go b/sdp-plugin/cmd/sdp/try.go new file mode 100644 index 00000000..d1d4faab --- /dev/null +++ b/sdp-plugin/cmd/sdp/try.go @@ -0,0 +1,175 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/fall-out-bug/sdp/internal/telemetry" + "github.com/fall-out-bug/sdp/internal/trial" + "github.com/spf13/cobra" +) + +func tryCmd() *cobra.Command { + var discard bool + var keep bool + + cmd := &cobra.Command{ + Use: "try \"task description\"", + Short: "Try a task on a temporary branch", + Long: `Plan a bounded task for trial execution (dry-run mode): + - Creates temporary branch (sdp-try-{timestamp}) + - Generates a plan for the requested task + - Shows results for review + - On accept: keeps branch, suggests 'sdp adopt' + - On discard: deletes branch, returns to original state + +This provides a zero-commitment first experience with SDP.`, + Example: ` # Try a task + sdp try "Add user authentication" + + # Try and discard if not satisfied + sdp try "Refactor API" --discard + + # Try and keep for adoption + sdp try "Add tests" --keep`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + startTime := time.Now() + taskDescription := args[0] + projectPath := "." + + // Convert to absolute path + absPath, err := filepath.Abs(projectPath) + if err != nil { + return fmt.Errorf("failed to resolve path: %w", err) + } + + // Create trial session + t, err := trial.NewTrial(absPath, taskDescription) + if err != nil { + return fmt.Errorf("failed to create trial: %w", err) + } + + // Verify clean state + clean, err := t.VerifyClean() + if err != nil { + return fmt.Errorf("failed to verify clean state: %w", err) + } + if !clean { + return fmt.Errorf("working directory not clean - commit or stash changes first") + } + + // Initialize telemetry collector (after clean-state check, UX metrics now go to user config dir) + uxMetrics, err := telemetry.NewUXMetricsCollector("") + if err != nil { + // Don't fail the command if telemetry fails + fmt.Fprintf(os.Stderr, "Warning: failed to initialize telemetry: %v\n", err) + } + + // Start trial + fmt.Printf("Starting trial on branch: %s\n", t.BranchName) + fmt.Printf("Task: %s\n\n", taskDescription) + + if err := t.Start(); err != nil { + return fmt.Errorf("failed to start trial: %w", err) + } + + fmt.Println("✓ Trial branch created") + + // Execute task + fmt.Println("\nExecuting task...") + result, err := t.Execute() + if err != nil { + // Record discard telemetry on execution failure + if uxMetrics != nil { + _ = uxMetrics.RecordTryDiscard("unknown", "execution_failure", 1) + } + return fmt.Errorf("execution failed: %w", err) + } + + // Show results + fmt.Printf("\nExecution completed in %v\n", result.Duration.Round(time.Second)) + fmt.Printf("Result: %s\n", result.Message) + + // Determine outcome + var outcome string + var stepNumber int + + // Handle flags + if discard { + fmt.Println("\nDiscarding trial...") + outcome = "user_discarded" + stepNumber = 2 + if err := t.Discard(); err != nil { + return err + } + } else if keep { + fmt.Println("\nKeeping trial...") + outcome = "user_accepted" + stepNumber = 2 + if err := t.Accept(); err != nil { + return err + } + } else { + // Interactive prompt + fmt.Println("\nWhat would you like to do?") + fmt.Println(" [1] Accept - Keep branch and adopt changes") + fmt.Println(" [2] Discard - Delete branch and restore original state") + fmt.Print("Choice: ") + + reader := bufio.NewReader(os.Stdin) + choice, _ := reader.ReadString('\n') + choice = strings.TrimSpace(choice) + + switch choice { + case "1", "a", "accept": + outcome = "user_accepted" + stepNumber = 2 + if err := t.Accept(); err != nil { + return err + } + case "2", "d", "discard": + outcome = "user_discarded" + stepNumber = 2 + if err := t.Discard(); err != nil { + return err + } + default: + fmt.Println("Invalid choice. Discarding trial.") + outcome = "invalid_choice" + stepNumber = 2 + if err := t.Discard(); err != nil { + return err + } + } + } + + // Record telemetry + if uxMetrics != nil { + duration := time.Since(startTime) + if outcome == "user_accepted" && result.Success { + // Record successful completion + if err := uxMetrics.RecordTryComplete("unknown", duration); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to record telemetry: %v\n", err) + } + } else { + // Record discard + if err := uxMetrics.RecordTryDiscard("unknown", outcome, stepNumber); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to record telemetry: %v\n", err) + } + } + } + + return nil + }, + } + + cmd.Flags().BoolVar(&discard, "discard", false, "Discard trial after execution") + cmd.Flags().BoolVar(&keep, "keep", false, "Keep trial after execution") + + return cmd +} diff --git a/sdp-plugin/internal/assess/assess.go b/sdp-plugin/internal/assess/assess.go new file mode 100644 index 00000000..6fefa369 --- /dev/null +++ b/sdp-plugin/internal/assess/assess.go @@ -0,0 +1,325 @@ +package assess + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// Recommendation represents a single recommendation for the project +type Recommendation struct { + Category string + Title string + Message string + Priority string +} + +// Assessment represents the complete assessment of a project +type Assessment struct { + Language string + Framework []string + Structure []string + IsMonorepo bool + HasTests bool + HasCI bool + Recommendations []Recommendation +} + +// Assess performs a read-only scan of the repository +func Assess(projectPath string) (*Assessment, error) { + result := &Assessment{ + Framework: []string{}, + Structure: []string{}, + Recommendations: []Recommendation{}, + } + + // Detect language + lang, err := detectLanguage(projectPath) + if err != nil { + return nil, fmt.Errorf("failed to detect language: %w", err) + } + result.Language = lang + + // Detect framework + frameworks := detectFramework(projectPath, lang) + result.Framework = frameworks + + // Detect structure + structure := detectStructure(projectPath) + result.Structure = structure + + // Detect if monorepo + result.IsMonorepo = detectMonorepo(projectPath) + + // Detect tests + result.HasTests = detectTests(projectPath) + + // Detect CI + result.HasCI = detectCI(projectPath) + + // Generate recommendations + generateRecommendations(result) + + return result, nil +} + +// detectLanguage identifies the primary programming language +func detectLanguage(projectPath string) (string, error) { + detectors := []struct { + files []string + language string + }{ + {[]string{"go.mod"}, "Go"}, + {[]string{"package.json"}, "Node.js/TypeScript"}, + {[]string{"requirements.txt", "pyproject.toml", "setup.py", "Pipfile"}, "Python"}, + {[]string{"Cargo.toml"}, "Rust"}, + {[]string{"pom.xml", "build.gradle"}, "Java"}, + {[]string{"Gemfile"}, "Ruby"}, + {[]string{"composer.json"}, "PHP"}, + {[]string{"*.csproj", "*.sln"}, "C#"}, + } + + for _, detector := range detectors { + for _, file := range detector.files { + if strings.Contains(file, "*") { + matches, _ := filepath.Glob(filepath.Join(projectPath, file)) + if len(matches) > 0 { + return detector.language, nil + } + } else { + if _, err := os.Stat(filepath.Join(projectPath, file)); err == nil { + return detector.language, nil + } + } + } + } + + return "Unknown", nil +} + +// detectFramework identifies frameworks based on dependencies +func detectFramework(projectPath, language string) []string { + frameworks := []string{} + + switch language { + case "Go": + goModPath := filepath.Join(projectPath, "go.mod") + if content, err := os.ReadFile(goModPath); err == nil { + contentStr := string(content) + if strings.Contains(contentStr, "github.com/gin-gonic/gin") { + frameworks = append(frameworks, "Gin") + } + if strings.Contains(contentStr, "github.com/gorilla/mux") { + frameworks = append(frameworks, "Gorilla Mux") + } + if strings.Contains(contentStr, "net/http") { + frameworks = append(frameworks, "net/http (stdlib)") + } + } + + case "Node.js/TypeScript": + packageJsonPath := filepath.Join(projectPath, "package.json") + if content, err := os.ReadFile(packageJsonPath); err == nil { + contentStr := string(content) + if strings.Contains(contentStr, "\"react\"") { + frameworks = append(frameworks, "React") + } + if strings.Contains(contentStr, "\"vue\"") { + frameworks = append(frameworks, "Vue") + } + if strings.Contains(contentStr, "\"next\"") { + frameworks = append(frameworks, "Next.js") + } + if strings.Contains(contentStr, "\"express\"") { + frameworks = append(frameworks, "Express") + } + if strings.Contains(contentStr, "\"@angular\"") { + frameworks = append(frameworks, "Angular") + } + } + + case "Python": + requirementsPath := filepath.Join(projectPath, "requirements.txt") + pyprojectPath := filepath.Join(projectPath, "pyproject.toml") + + var content []byte + var err error + + if content, err = os.ReadFile(requirementsPath); err != nil { + content, _ = os.ReadFile(pyprojectPath) + } + + if len(content) > 0 { + contentStr := string(content) + if strings.Contains(contentStr, "django") { + frameworks = append(frameworks, "Django") + } + if strings.Contains(contentStr, "flask") { + frameworks = append(frameworks, "Flask") + } + if strings.Contains(contentStr, "fastapi") { + frameworks = append(frameworks, "FastAPI") + } + } + } + + if len(frameworks) == 0 { + frameworks = append(frameworks, "None detected") + } + + return frameworks +} + +// detectStructure identifies project structure patterns +func detectStructure(projectPath string) []string { + structures := []string{} + + dirs := []string{ + "src", "cmd", "internal", "pkg", "lib", "app", + "components", "pages", "services", "handlers", "models", "utils", + "tests", "test", "__tests__", "spec", + } + + for _, dir := range dirs { + if info, err := os.Stat(filepath.Join(projectPath, dir)); err == nil && info.IsDir() { + structures = append(structures, dir) + } + } + + return structures +} + +// detectMonorepo checks if this is a monorepo +func detectMonorepo(projectPath string) bool { + indicators := []string{ + "packages", "apps", "services", "workspaces", + ".gitmodules", "pnpm-workspace.yaml", "lerna.json", + } + + for _, indicator := range indicators { + if info, err := os.Stat(filepath.Join(projectPath, indicator)); err == nil { + if info.IsDir() || strings.HasPrefix(indicator, ".") { + return true + } + } + } + + return false +} + +// detectTests checks if the project has tests. +// Walks the directory tree recursively since filepath.Glob does not support "**". +func detectTests(projectPath string) bool { + testDirs := []string{ + "tests", "test", "__tests__", "spec", + } + + for _, dir := range testDirs { + if info, err := os.Stat(filepath.Join(projectPath, dir)); err == nil && info.IsDir() { + return true + } + } + + suffixes := []string{"_test.go", "_test.py", ".test.ts", ".test.js", ".spec.ts", ".spec.js"} + found := false + filepath.WalkDir(projectPath, func(path string, d os.DirEntry, err error) error { + if err != nil || found { + return nil + } + if d.IsDir() { + return nil + } + name := d.Name() + for _, suf := range suffixes { + if strings.HasSuffix(name, suf) { + found = true + return filepath.SkipAll + } + } + return nil + }) + return found +} + +// detectCI checks if the project has CI configuration +func detectCI(projectPath string) bool { + ciIndicators := []string{ + ".github", ".gitlab-ci.yml", ".circleci", + ".travis.yml", "jenkins.yml", "azure-pipelines.yml", + } + + for _, ci := range ciIndicators { + if _, err := os.Stat(filepath.Join(projectPath, ci)); err == nil { + return true + } + } + + return false +} + +// generateRecommendations creates recommendations based on assessment +func generateRecommendations(result *Assessment) { + // SDP setup recommendation + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "sdp", + Title: "Initialize SDP", + Message: "Run 'sdp init' to initialize Spec-Driven Protocol for this project", + Priority: "medium", + }) + + // Test recommendations + if !result.HasTests { + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "testing", + Title: "Add Tests", + Message: "No tests detected. Consider adding tests for reliability.", + Priority: "high", + }) + } + + // CI recommendations + if !result.HasCI { + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "ci", + Title: "Setup CI", + Message: "No CI detected. Consider setting up GitHub Actions or similar.", + Priority: "medium", + }) + } + + // Language-specific recommendations + switch result.Language { + case "Go": + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "tooling", + Title: "Use go.mod", + Message: "Ensure dependencies are managed via go.mod", + Priority: "low", + }) + case "Node.js/TypeScript": + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "tooling", + Title: "Use package.json", + Message: "Ensure dependencies are managed via package.json", + Priority: "low", + }) + case "Python": + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "tooling", + Title: "Use Virtual Environment", + Message: "Consider using venv or pyenv for dependency isolation", + Priority: "medium", + }) + } + + // Monorepo recommendations + if result.IsMonorepo { + result.Recommendations = append(result.Recommendations, Recommendation{ + Category: "structure", + Title: "Monorepo Detected", + Message: "This appears to be a monorepo. SDP can work with monorepos - consider using workspaces.", + Priority: "low", + }) + } +} diff --git a/sdp-plugin/internal/telemetry/consent.go b/sdp-plugin/internal/telemetry/consent.go index d3175d88..dff3434b 100644 --- a/sdp-plugin/internal/telemetry/consent.go +++ b/sdp-plugin/internal/telemetry/consent.go @@ -77,36 +77,38 @@ func GrantConsent(configPath string, enabled bool) error { // AskForConsent prompts user for telemetry consent (interactive) // Returns true if user consented, false otherwise func AskForConsent() (bool, error) { - fmt.Println("\n" + strings.Repeat("=", 60)) - fmt.Println("📊 Telemetry Consent") - fmt.Println(strings.Repeat("=", 60)) - fmt.Println() - fmt.Println("SDP can collect anonymous usage statistics") - fmt.Println("to improve quality and reliability.") - fmt.Println() - fmt.Println("🔒 What is collected:") - fmt.Println(" • Commands (@build, @review, @oneshot)") - fmt.Println(" • Command execution duration") - fmt.Println(" • Success/failure of execution") - fmt.Println() - fmt.Println("❌ What is NOT collected:") - fmt.Println(" • PII (names, email, usernames)") - fmt.Println(" • Code content") - fmt.Println(" • File paths") - fmt.Println(" • Data stays local (not transmitted)") - fmt.Println() - fmt.Println("📜 Privacy policy: docs/PRIVACY.md") - fmt.Println() + // Write consent banner to stderr to avoid corrupting structured stdout (e.g., --json) + w := os.Stderr + fmt.Fprintln(w, "\n"+strings.Repeat("=", 60)) + fmt.Fprintln(w, "📊 Telemetry Consent") + fmt.Fprintln(w, strings.Repeat("=", 60)) + fmt.Fprintln(w) + fmt.Fprintln(w, "SDP can collect anonymous usage statistics") + fmt.Fprintln(w, "to improve quality and reliability.") + fmt.Fprintln(w) + fmt.Fprintln(w, "🔒 What is collected:") + fmt.Fprintln(w, " • Commands (@build, @review, @oneshot)") + fmt.Fprintln(w, " • Command execution duration") + fmt.Fprintln(w, " • Success/failure of execution") + fmt.Fprintln(w) + fmt.Fprintln(w, "❌ What is NOT collected:") + fmt.Fprintln(w, " • PII (names, email, usernames)") + fmt.Fprintln(w, " • Code content") + fmt.Fprintln(w, " • File paths") + fmt.Fprintln(w, " • Data stays local (not transmitted)") + fmt.Fprintln(w) + fmt.Fprintln(w, "📜 Privacy policy: docs/PRIVACY.md") + fmt.Fprintln(w) reader := bufio.NewReader(os.Stdin) for { - fmt.Print("Help improve SDP? (y/n): ") + fmt.Fprint(w, "Help improve SDP? (y/n): ") input, err := reader.ReadString('\n') if err != nil { // Non-interactive environment (e.g., script) - fmt.Println("\n(non-interactive mode: telemetry disabled)") + fmt.Fprintln(w, "\n(non-interactive mode: telemetry disabled)") return false, nil } @@ -114,19 +116,19 @@ func AskForConsent() (bool, error) { switch input { case "y", "yes": - fmt.Println("\n✓ Thank you! Your contribution helps improve SDP.") - fmt.Println(" You can disable anytime with:") - fmt.Println(" sdp telemetry disable") + fmt.Fprintln(w, "\n✓ Thank you! Your contribution helps improve SDP.") + fmt.Fprintln(w, " You can disable anytime with:") + fmt.Fprintln(w, " sdp telemetry disable") return true, nil case "n", "no": - fmt.Println("\n✓ Telemetry disabled.") - fmt.Println(" You can enable later with:") - fmt.Println(" sdp telemetry enable") + fmt.Fprintln(w, "\n✓ Telemetry disabled.") + fmt.Fprintln(w, " You can enable later with:") + fmt.Fprintln(w, " sdp telemetry enable") return false, nil default: - fmt.Println("Please enter 'y' or 'n'") + fmt.Fprintln(w, "Please enter 'y' or 'n'") } } } diff --git a/sdp-plugin/internal/telemetry/export.go b/sdp-plugin/internal/telemetry/export.go index 42bfed9e..ce28e5c7 100644 --- a/sdp-plugin/internal/telemetry/export.go +++ b/sdp-plugin/internal/telemetry/export.go @@ -85,3 +85,58 @@ func (c *Collector) ExportCSV(exportPath string) error { return nil } + +// ExportUXMetrics exports UX metrics from events.jsonl to a JSON file +func (c *Collector) ExportUXMetrics(exportPath string) error { + c.mu.Lock() + defer c.mu.Unlock() + + // Read all events from file + events, err := c.readEvents() + if err != nil { + return fmt.Errorf("failed to read events: %w", err) + } + + // Filter only UX metric events + uxEvents := make([]map[string]any, 0) + for _, event := range events { + if event.Type == EventTypeUXMetric { + uxEvents = append(uxEvents, event.Data) + } + } + + // Marshal to JSON array + data, err := json.MarshalIndent(uxEvents, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal UX metrics: %w", err) + } + + // Write to export file (restricted permissions for telemetry data) + if err := os.WriteFile(exportPath, data, 0600); err != nil { + return fmt.Errorf("failed to write export file: %w", err) + } + + return nil +} + +// GetUXMetrics retrieves all UX metrics events +func (c *Collector) GetUXMetrics() ([]map[string]any, error) { + c.mu.Lock() + defer c.mu.Unlock() + + // Read all events from file + events, err := c.readEvents() + if err != nil { + return nil, fmt.Errorf("failed to read events: %w", err) + } + + // Filter only UX metric events + uxEvents := make([]map[string]any, 0) + for _, event := range events { + if event.Type == EventTypeUXMetric { + uxEvents = append(uxEvents, event.Data) + } + } + + return uxEvents, nil +} diff --git a/sdp-plugin/internal/telemetry/types.go b/sdp-plugin/internal/telemetry/types.go index 85ad7ad2..62b7e1f0 100644 --- a/sdp-plugin/internal/telemetry/types.go +++ b/sdp-plugin/internal/telemetry/types.go @@ -13,13 +13,24 @@ const ( EventTypeWSStart EventType = "ws_start" EventTypeWSComplete EventType = "ws_complete" EventTypeQualityGateResult EventType = "quality_gate_result" + + // UX metric events for measuring adoption journey + EventTypeUXMetric EventType = "ux_metric" + EventTypeAssessComplete EventType = "assess_complete" + EventTypeTryComplete EventType = "try_complete" + EventTypeTryDiscard EventType = "try_discard" + EventTypeAdoptComplete EventType = "adopt_complete" + EventTypeReset EventType = "reset" + EventTypeUninstall EventType = "uninstall" ) // IsValid checks if the event type is valid func (et EventType) IsValid() bool { switch et { case EventTypeCommandStart, EventTypeCommandComplete, - EventTypeWSStart, EventTypeWSComplete, EventTypeQualityGateResult: + EventTypeWSStart, EventTypeWSComplete, EventTypeQualityGateResult, + EventTypeUXMetric, EventTypeAssessComplete, EventTypeTryComplete, + EventTypeTryDiscard, EventTypeAdoptComplete, EventTypeReset, EventTypeUninstall: return true default: return false @@ -39,3 +50,39 @@ type Status struct { EventCount int `json:"event_count"` FilePath string `json:"file_path"` } + +// UXMetricType represents the type of UX metric +type UXMetricType string + +const ( + UXMetricTimeToFirstValue UXMetricType = "time_to_first_value" + UXMetricStepAbandonRate UXMetricType = "step_abandon_rate" + UXMetricResetUninstallFrequency UXMetricType = "reset_uninstall_frequency" + UXMetricBrownfieldInitCompletion UXMetricType = "brownfield_init_completion" + UXMetricRecoverySuccessRate UXMetricType = "recovery_success_rate" + UXMetricSecondSessionReturn UXMetricType = "second_session_return" +) + +// UXMetricEvent represents a UX metric measurement +type UXMetricEvent struct { + MetricType UXMetricType `json:"metric_type"` + Timestamp time.Time `json:"timestamp"` + Value interface{} `json:"value"` // Can be number, bool, or string + SessionID string `json:"session_id,omitempty"` + ProjectType string `json:"project_type,omitempty"` // "greenfield", "brownfield", "unknown" + StepName string `json:"step_name,omitempty"` // e.g., "assess", "try", "adopt" + StepNumber int `json:"step_number,omitempty"` + Context map[string]any `json:"context,omitempty"` +} + +// IsValid checks if the UX metric type is valid +func (mt UXMetricType) IsValid() bool { + switch mt { + case UXMetricTimeToFirstValue, UXMetricStepAbandonRate, + UXMetricResetUninstallFrequency, UXMetricBrownfieldInitCompletion, + UXMetricRecoverySuccessRate, UXMetricSecondSessionReturn: + return true + default: + return false + } +} diff --git a/sdp-plugin/internal/telemetry/ux_metrics.go b/sdp-plugin/internal/telemetry/ux_metrics.go new file mode 100644 index 00000000..bfe54a50 --- /dev/null +++ b/sdp-plugin/internal/telemetry/ux_metrics.go @@ -0,0 +1,289 @@ +package telemetry + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// UXMetricsCollector manages UX metrics collection +type UXMetricsCollector struct { + eventsFile string + mu sync.Mutex + sessionID string +} + +// NewUXMetricsCollector creates a new UX metrics collector +// UX metrics are stored in the user's config directory (~/.config/sdp/ux-metrics.jsonl) +// rather than in the project-local .sdp/ directory to avoid polluting assessed repositories. +// If sdpDir is provided and non-empty, it will be used directly (useful for testing). +func NewUXMetricsCollector(sdpDir string) (*UXMetricsCollector, error) { + var eventsFile string + + if sdpDir != "" { + // Use provided directory (useful for testing with temp dirs) + // Ensure the directory exists + if err := os.MkdirAll(sdpDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create SDP directory: %w", err) + } + eventsFile = filepath.Join(sdpDir, "ux-metrics.jsonl") + } else { + // Get user config directory + configDir, err := os.UserConfigDir() + if err != nil { + return nil, fmt.Errorf("failed to get user config directory: %w", err) + } + + // Create SDP config directory + sdpConfigDir := filepath.Join(configDir, "sdp") + if err := os.MkdirAll(sdpConfigDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create SDP config directory: %w", err) + } + + eventsFile = filepath.Join(sdpConfigDir, "ux-metrics.jsonl") + } + + // Create or verify UX metrics file exists + if _, err := os.OpenFile(eventsFile, os.O_CREATE|os.O_WRONLY, 0600); err != nil { + return nil, fmt.Errorf("failed to create UX metrics file: %w", err) + } + + // Generate session ID + sessionID := fmt.Sprintf("session_%d", time.Now().UnixNano()) + + return &UXMetricsCollector{ + eventsFile: eventsFile, + sessionID: sessionID, + }, nil +} + +// RecordMetric records a UX metric event +func (ux *UXMetricsCollector) RecordMetric(event UXMetricEvent) error { + // Validate metric type + if !event.MetricType.IsValid() { + return fmt.Errorf("invalid UX metric type: %s", event.MetricType) + } + + // Set timestamp if not provided + if event.Timestamp.IsZero() { + event.Timestamp = time.Now() + } + + // Set session ID if not provided + if event.SessionID == "" { + event.SessionID = ux.sessionID + } + + // Create telemetry event + teleEvent := Event{ + Type: EventTypeUXMetric, + Timestamp: event.Timestamp, + Data: map[string]any{}, + } + + // Marshal UX metric event to JSON and store in data + dataBytes, err := json.Marshal(event) + if err != nil { + return fmt.Errorf("failed to marshal UX metric event: %w", err) + } + + // Parse back into map for Data field + if err := json.Unmarshal(dataBytes, &teleEvent.Data); err != nil { + return fmt.Errorf("failed to unmarshal UX metric data: %w", err) + } + + // Append to events file (appendEvent handles locking) + return ux.appendEvent(teleEvent) +} + +// RecordAssessComplete records completion of assess phase +func (ux *UXMetricsCollector) RecordAssessComplete(projectType string, duration time.Duration) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricTimeToFirstValue, // Assess is part of time-to-first-value + Timestamp: time.Now(), + Value: duration.Milliseconds(), + ProjectType: projectType, + StepName: "assess", + StepNumber: 1, + Context: map[string]any{ + "duration_ms": duration.Milliseconds(), + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordTryComplete records completion of try phase +func (ux *UXMetricsCollector) RecordTryComplete(projectType string, duration time.Duration) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricTimeToFirstValue, // Try is part of time-to-first-value + Timestamp: time.Now(), + Value: duration.Milliseconds(), + ProjectType: projectType, + StepName: "try", + StepNumber: 2, + Context: map[string]any{ + "duration_ms": duration.Milliseconds(), + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordTryDiscard records abandonment during try phase +func (ux *UXMetricsCollector) RecordTryDiscard(projectType, reason string, stepNumber int) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricStepAbandonRate, // Track abandonment + Timestamp: time.Now(), + Value: false, + ProjectType: projectType, + StepName: "try", + StepNumber: stepNumber, + Context: map[string]any{ + "exit_reason": reason, + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordAdoptComplete records completion of adopt phase +func (ux *UXMetricsCollector) RecordAdoptComplete(projectType string, duration time.Duration) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricTimeToFirstValue, // Adopt completes the journey + Timestamp: time.Now(), + Value: duration.Milliseconds(), + ProjectType: projectType, + StepName: "adopt", + StepNumber: 3, + Context: map[string]any{ + "duration_ms": duration.Milliseconds(), + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordReset records a reset event +func (ux *UXMetricsCollector) RecordReset(reason string) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricResetUninstallFrequency, + Timestamp: time.Now(), + Value: 1, + Context: map[string]any{ + "action": "reset", + "exit_reason": reason, + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordUninstall records an uninstall event +func (ux *UXMetricsCollector) RecordUninstall(reason string) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricResetUninstallFrequency, + Timestamp: time.Now(), + Value: 1, + Context: map[string]any{ + "action": "uninstall", + "exit_reason": reason, + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordTimeToFirstValue records time from init to first successful feature +func (ux *UXMetricsCollector) RecordTimeToFirstValue(duration time.Duration) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricTimeToFirstValue, + Timestamp: time.Now(), + Value: duration.Milliseconds(), + Context: map[string]any{ + "duration_ms": duration.Milliseconds(), + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordBrownfieldInitCompletion records brownfield init completion status +func (ux *UXMetricsCollector) RecordBrownfieldInitCompletion(success bool, phase string, details map[string]any) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricBrownfieldInitCompletion, + Timestamp: time.Now(), + Value: success, + ProjectType: "brownfield", + Context: make(map[string]any), + } + + if phase != "" { + uxEvent.Context["init_phase"] = phase + } + + // Merge additional details + for k, v := range details { + uxEvent.Context[k] = v + } + + return ux.RecordMetric(uxEvent) +} + +// RecordRecoveryAttempt records a recovery attempt and its success +func (ux *UXMetricsCollector) RecordRecoveryAttempt(success bool, recoveryType string) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricRecoverySuccessRate, + Timestamp: time.Now(), + Value: success, + Context: map[string]any{ + "recovery_type": recoveryType, + "success": success, + }, + } + return ux.RecordMetric(uxEvent) +} + +// RecordSecondSessionReturn records whether user returned for a second session +func (ux *UXMetricsCollector) RecordSecondSessionReturn(daysSinceFirst int) error { + uxEvent := UXMetricEvent{ + MetricType: UXMetricSecondSessionReturn, + Timestamp: time.Now(), + Value: daysSinceFirst > 0, + Context: map[string]any{ + "days_since_first_session": daysSinceFirst, + }, + } + return ux.RecordMetric(uxEvent) +} + +// appendEvent appends an event to the events file +func (ux *UXMetricsCollector) appendEvent(event Event) error { + // Marshal event to JSON + data, err := json.Marshal(event) + if err != nil { + return fmt.Errorf("failed to marshal event: %w", err) + } + + // Append to file with secure permissions + ux.mu.Lock() + defer ux.mu.Unlock() + + file, err := os.OpenFile(ux.eventsFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return fmt.Errorf("failed to open events file: %w", err) + } + defer file.Close() + + if _, err := file.Write(append(data, '\n')); err != nil { + return fmt.Errorf("failed to write event: %w", err) + } + + return nil +} + +// GetEventsFile returns the path to the events file +func (ux *UXMetricsCollector) GetEventsFile() string { + return ux.eventsFile +} + +// GetSessionID returns the current session ID +func (ux *UXMetricsCollector) GetSessionID() string { + return ux.sessionID +} diff --git a/sdp-plugin/internal/telemetry/ux_metrics_test.go b/sdp-plugin/internal/telemetry/ux_metrics_test.go new file mode 100644 index 00000000..5a0de94e --- /dev/null +++ b/sdp-plugin/internal/telemetry/ux_metrics_test.go @@ -0,0 +1,621 @@ +package telemetry + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + "time" +) + +func TestUXMetricsCollector(t *testing.T) { + // Create temporary directory for testing + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + // Create UX metrics collector + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + // Verify collector is initialized + if collector.eventsFile == "" { + t.Error("Events file path is empty") + } + if collector.sessionID == "" { + t.Error("Session ID is empty") + } + + // Verify events file was created + if _, err := os.Stat(collector.eventsFile); os.IsNotExist(err) { + t.Error("Events file was not created") + } +} + +func TestRecordTimeToFirstValue(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + duration := 5 * time.Minute + err = collector.RecordTimeToFirstValue(duration) + if err != nil { + t.Fatalf("Failed to record time to first value: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + if event.Type != EventTypeUXMetric { + t.Errorf("Expected event type %s, got %s", EventTypeUXMetric, event.Type) + } + + // Check metric type in data + metricType, ok := event.Data["metric_type"].(string) + if !ok || metricType != string(UXMetricTimeToFirstValue) { + t.Errorf("Expected metric type %s, got %v", UXMetricTimeToFirstValue, metricType) + } +} + +func TestRecordAssessComplete(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + duration := 2 * time.Minute + err = collector.RecordAssessComplete("greenfield", duration) + if err != nil { + t.Fatalf("Failed to record assess complete: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check step name + stepName, ok := event.Data["step_name"].(string) + if !ok || stepName != "assess" { + t.Errorf("Expected step name 'assess', got %v", stepName) + } + + // Check project type + projectType, ok := event.Data["project_type"].(string) + if !ok || projectType != "greenfield" { + t.Errorf("Expected project type 'greenfield', got %v", projectType) + } +} + +func TestRecordTryComplete(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + duration := 10 * time.Minute + err = collector.RecordTryComplete("brownfield", duration) + if err != nil { + t.Fatalf("Failed to record try complete: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check step name + stepName, ok := event.Data["step_name"].(string) + if !ok || stepName != "try" { + t.Errorf("Expected step name 'try', got %v", stepName) + } + + // Check step number + stepNumber, ok := event.Data["step_number"].(float64) + if !ok || int(stepNumber) != 2 { + t.Errorf("Expected step number 2, got %v", stepNumber) + } +} + +func TestRecordTryDiscard(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + err = collector.RecordTryDiscard("brownfield", "user_exited", 2) + if err != nil { + t.Fatalf("Failed to record try discard: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check metric_type + metricType, ok := event.Data["metric_type"].(string) + if !ok || metricType != "step_abandon_rate" { + t.Errorf("Expected metric_type 'step_abandon_rate', got %v", metricType) + } + + // Check exit reason in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + exitReason, ok := context["exit_reason"].(string) + if !ok || exitReason != "user_exited" { + t.Errorf("Expected exit reason 'user_exited', got %v", exitReason) + } + + // Check value (should be false) + value, ok := event.Data["value"].(bool) + if !ok || value != false { + t.Errorf("Expected value false, got %v", value) + } +} + +func TestRecordAdoptComplete(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + duration := 30 * time.Minute + err = collector.RecordAdoptComplete("greenfield", duration) + if err != nil { + t.Fatalf("Failed to record adopt complete: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check step name + stepName, ok := event.Data["step_name"].(string) + if !ok || stepName != "adopt" { + t.Errorf("Expected step name 'adopt', got %v", stepName) + } + + // Check step number + stepNumber, ok := event.Data["step_number"].(float64) + if !ok || int(stepNumber) != 3 { + t.Errorf("Expected step number 3, got %v", stepNumber) + } +} + +func TestRecordReset(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + err = collector.RecordReset("configuration_error") + if err != nil { + t.Fatalf("Failed to record reset: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check metric type + metricType, ok := event.Data["metric_type"].(string) + if !ok || metricType != string(UXMetricResetUninstallFrequency) { + t.Errorf("Expected metric type %s, got %v", UXMetricResetUninstallFrequency, metricType) + } + + // Check action in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + action, ok := context["action"].(string) + if !ok || action != "reset" { + t.Errorf("Expected action 'reset', got %v", action) + } +} + +func TestRecordUninstall(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + err = collector.RecordUninstall("not_suitable") + if err != nil { + t.Fatalf("Failed to record uninstall: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check action in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + action, ok := context["action"].(string) + if !ok || action != "uninstall" { + t.Errorf("Expected action 'uninstall', got %v", action) + } +} + +func TestRecordBrownfieldInitCompletion(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + details := map[string]any{ + "project_size": "large", + "language": "go", + } + + err = collector.RecordBrownfieldInitCompletion(true, "complete", details) + if err != nil { + t.Fatalf("Failed to record brownfield init completion: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check project type + projectType, ok := event.Data["project_type"].(string) + if !ok || projectType != "brownfield" { + t.Errorf("Expected project type 'brownfield', got %v", projectType) + } + + // Check init phase in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + initPhase, ok := context["init_phase"].(string) + if !ok || initPhase != "complete" { + t.Errorf("Expected init phase 'complete', got %v", initPhase) + } + + // Check value (should be true) + value, ok := event.Data["value"].(bool) + if !ok || value != true { + t.Errorf("Expected value true, got %v", value) + } + + // Check additional details in context + projectSize, ok := context["project_size"].(string) + if !ok || projectSize != "large" { + t.Errorf("Expected project size 'large', got %v", projectSize) + } +} + +func TestRecordRecoveryAttempt(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + err = collector.RecordRecoveryAttempt(true, "auto_fix") + if err != nil { + t.Fatalf("Failed to record recovery attempt: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check recovery type in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + recoveryType, ok := context["recovery_type"].(string) + if !ok || recoveryType != "auto_fix" { + t.Errorf("Expected recovery type 'auto_fix', got %v", recoveryType) + } + + // Check success value + success, ok := event.Data["value"].(bool) + if !ok || success != true { + t.Errorf("Expected success true, got %v", success) + } +} + +func TestRecordSecondSessionReturn(t *testing.T) { + tempDir := t.TempDir() + sdpDir := filepath.Join(tempDir, ".sdp") + + collector, err := NewUXMetricsCollector(sdpDir) + if err != nil { + t.Fatalf("Failed to create UX metrics collector: %v", err) + } + + err = collector.RecordSecondSessionReturn(3) + if err != nil { + t.Fatalf("Failed to record second session return: %v", err) + } + + // Verify event was written + eventsFile := collector.GetEventsFile() + data, err := os.ReadFile(eventsFile) + if err != nil { + t.Fatalf("Failed to read events file: %v", err) + } + + var event Event + if err := json.Unmarshal(data, &event); err != nil { + t.Fatalf("Failed to unmarshal event: %v", err) + } + + // Check days since first session in context + context, ok := event.Data["context"].(map[string]interface{}) + if !ok { + t.Fatalf("Expected context to be a map, got %T", event.Data["context"]) + } + daysSinceFirst, ok := context["days_since_first_session"].(float64) + if !ok || int(daysSinceFirst) != 3 { + t.Errorf("Expected 3 days since first session, got %v", daysSinceFirst) + } + + // Check value (should be true since days > 0) + value, ok := event.Data["value"].(bool) + if !ok || value != true { + t.Errorf("Expected value true, got %v", value) + } +} + +func TestUXMetricTypeValidation(t *testing.T) { + tests := []struct { + name string + metric UXMetricType + valid bool + }{ + {"time_to_first_value", UXMetricTimeToFirstValue, true}, + {"step_abandon_rate", UXMetricStepAbandonRate, true}, + {"reset_uninstall_frequency", UXMetricResetUninstallFrequency, true}, + {"brownfield_init_completion", UXMetricBrownfieldInitCompletion, true}, + {"recovery_success_rate", UXMetricRecoverySuccessRate, true}, + {"second_session_return", UXMetricSecondSessionReturn, true}, + {"invalid_metric", UXMetricType("invalid"), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.metric.IsValid() + if got != tt.valid { + t.Errorf("UXMetricType.IsValid() = %v, want %v", got, tt.valid) + } + }) + } +} + +func TestEventTypeValidationWithUXTypes(t *testing.T) { + tests := []struct { + name string + etype EventType + valid bool + }{ + {"ux_metric", EventTypeUXMetric, true}, + {"assess_complete", EventTypeAssessComplete, true}, + {"try_complete", EventTypeTryComplete, true}, + {"try_discard", EventTypeTryDiscard, true}, + {"adopt_complete", EventTypeAdoptComplete, true}, + {"reset", EventTypeReset, true}, + {"uninstall", EventTypeUninstall, true}, + {"invalid_type", EventType("invalid"), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.etype.IsValid() + if got != tt.valid { + t.Errorf("EventType.IsValid() = %v, want %v", got, tt.valid) + } + }) + } +} + +func TestExportUXMetrics(t *testing.T) { + tempDir := t.TempDir() + + // Create a collector with events file + eventsFile := filepath.Join(tempDir, "events.jsonl") + collector, err := NewCollector(eventsFile, true) + if err != nil { + t.Fatalf("Failed to create collector: %v", err) + } + + // Record some UX metric events + collector.Record(Event{ + Type: EventTypeUXMetric, + Timestamp: time.Now(), + Data: map[string]any{ + "metric_type": string(UXMetricTimeToFirstValue), + "value": int64(300000), + }, + }) + + collector.Record(Event{ + Type: EventTypeUXMetric, + Timestamp: time.Now(), + Data: map[string]any{ + "metric_type": string(UXMetricStepAbandonRate), + "value": 15.5, + }, + }) + + // Export UX metrics + exportPath := filepath.Join(tempDir, "ux_metrics_export.json") + err = collector.ExportUXMetrics(exportPath) + if err != nil { + t.Fatalf("Failed to export UX metrics: %v", err) + } + + // Verify export file exists + if _, err := os.Stat(exportPath); os.IsNotExist(err) { + t.Error("Export file was not created") + } + + // Read and verify export + data, err := os.ReadFile(exportPath) + if err != nil { + t.Fatalf("Failed to read export file: %v", err) + } + + var exportedMetrics []map[string]any + if err := json.Unmarshal(data, &exportedMetrics); err != nil { + t.Fatalf("Failed to unmarshal exported metrics: %v", err) + } + + if len(exportedMetrics) != 2 { + t.Errorf("Expected 2 exported metrics, got %d", len(exportedMetrics)) + } +} + +func TestGetUXMetrics(t *testing.T) { + tempDir := t.TempDir() + + // Create a collector with events file + eventsFile := filepath.Join(tempDir, "events.jsonl") + collector, err := NewCollector(eventsFile, true) + if err != nil { + t.Fatalf("Failed to create collector: %v", err) + } + + // Record mixed events (UX and non-UX) + collector.Record(Event{ + Type: EventTypeCommandStart, + Timestamp: time.Now(), + Data: map[string]any{ + "command": "test", + }, + }) + + collector.Record(Event{ + Type: EventTypeUXMetric, + Timestamp: time.Now(), + Data: map[string]any{ + "metric_type": string(UXMetricTimeToFirstValue), + "value": int64(300000), + }, + }) + + // Get UX metrics + uxMetrics, err := collector.GetUXMetrics() + if err != nil { + t.Fatalf("Failed to get UX metrics: %v", err) + } + + if len(uxMetrics) != 1 { + t.Errorf("Expected 1 UX metric, got %d", len(uxMetrics)) + } + + // Verify the metric + metricType, ok := uxMetrics[0]["metric_type"].(string) + if !ok || metricType != string(UXMetricTimeToFirstValue) { + t.Errorf("Expected metric type %s, got %v", UXMetricTimeToFirstValue, metricType) + } +} + diff --git a/sdp-plugin/internal/trial/trial.go b/sdp-plugin/internal/trial/trial.go new file mode 100644 index 00000000..0bd90172 --- /dev/null +++ b/sdp-plugin/internal/trial/trial.go @@ -0,0 +1,234 @@ +package trial + +import ( + "fmt" + "os/exec" + "strings" + "time" +) + +// Trial represents a trial execution session +type Trial struct { + BranchName string + OriginalBranch string + ProjectPath string + TaskDescription string + StartTime time.Time +} + +// TrialResult represents the result of a trial execution +type TrialResult struct { + Success bool + Message string + Changes []string + Duration time.Duration +} + +// NewTrial creates a new trial session +func NewTrial(projectPath, taskDescription string) (*Trial, error) { + originalBranch, err := getCurrentBranch(projectPath) + if err != nil { + return nil, fmt.Errorf("failed to get current branch: %w", err) + } + + timestamp := time.Now().Format("20060102-150405") + branchName := fmt.Sprintf("sdp-try-%s", timestamp) + + return &Trial{ + BranchName: branchName, + OriginalBranch: originalBranch, + ProjectPath: projectPath, + TaskDescription: taskDescription, + StartTime: time.Now(), + }, nil +} + +// Start creates the temporary branch and sets up the trial +func (t *Trial) Start() error { + if err := t.createBranch(); err != nil { + return fmt.Errorf("failed to create branch: %w", err) + } + return nil +} + +// Execute runs the task as a dry-run planner +// It parses the task description and returns a structured execution plan +// without making actual changes to the codebase +func (t *Trial) Execute() (*TrialResult, error) { + // Validate task description + if t.TaskDescription == "" { + return &TrialResult{ + Success: false, + Message: "Task description cannot be empty", + Changes: []string{}, + Duration: time.Since(t.StartTime), + }, nil + } + + // Analyze task description and create execution plan + plan := t.createExecutionPlan() + + result := &TrialResult{ + Success: true, + Message: fmt.Sprintf("Dry-run plan created for: %s\n\n%s", t.TaskDescription, plan), + Changes: []string{ + fmt.Sprintf("Branch: %s", t.BranchName), + fmt.Sprintf("Task: %s", t.TaskDescription), + fmt.Sprintf("Plan:\n%s", plan), + fmt.Sprintf("Duration: %v", time.Since(t.StartTime).Round(time.Millisecond)), + }, + Duration: time.Since(t.StartTime), + } + + return result, nil +} + +// createExecutionPlan analyzes the task and creates a structured execution plan +func (t *Trial) createExecutionPlan() string { + // Detect task type from description + taskDesc := strings.ToLower(t.TaskDescription) + + var taskType string + var steps []string + + // Simple pattern matching for common task types + switch { + case strings.Contains(taskDesc, "add") || strings.Contains(taskDesc, "create") || strings.Contains(taskDesc, "implement"): + taskType = "Feature Addition" + steps = []string{ + "1. Analyze existing codebase structure", + "2. Identify relevant files and dependencies", + "3. Create/modify implementation files", + "4. Add/update tests", + "5. Update documentation", + } + case strings.Contains(taskDesc, "fix") || strings.Contains(taskDesc, "bug"): + taskType = "Bug Fix" + steps = []string{ + "1. Reproduce and identify the issue", + "2. Locate problematic code", + "3. Implement fix", + "4. Add regression tests", + "5. Verify fix resolves issue", + } + case strings.Contains(taskDesc, "refactor") || strings.Contains(taskDesc, "clean"): + taskType = "Refactoring" + steps = []string{ + "1. Analyze current implementation", + "2. Identify refactoring opportunities", + "3. Apply refactoring changes", + "4. Ensure tests pass", + "5. Update documentation if needed", + } + case strings.Contains(taskDesc, "test"): + taskType = "Test Addition" + steps = []string{ + "1. Identify untested code paths", + "2. Design test cases", + "3. Implement tests", + "4. Verify coverage", + "5. Document test scenarios", + } + default: + taskType = "General Task" + steps = []string{ + "1. Understand requirements", + "2. Analyze affected components", + "3. Implement changes", + "4. Test and verify", + "5. Document changes", + } + } + + // Build plan string + var plan strings.Builder + plan.WriteString(fmt.Sprintf("Task Type: %s\n", taskType)) + plan.WriteString("Proposed Execution Steps:\n") + for _, step := range steps { + plan.WriteString(fmt.Sprintf(" %s\n", step)) + } + plan.WriteString("\nNote: This is a dry-run plan. No actual changes have been made.") + plan.WriteString("\nUse 'sdp adopt' to convert this trial into a full SDP setup.") + + return plan.String() +} + +// Accept keeps the branch and suggests adoption +func (t *Trial) Accept() error { + fmt.Printf("✓ Trial accepted. Branch '%s' kept for adoption.\n", t.BranchName) + fmt.Println("\nNext steps:") + fmt.Println(" 1. Review the changes: git diff") + fmt.Println(" 2. Run 'sdp adopt' to convert to full SDP setup") + fmt.Printf(" 3. Or merge manually: git checkout && git merge %s\n", t.BranchName) + return nil +} + +// Discard deletes the branch and returns to original state +func (t *Trial) Discard() error { + if err := checkoutBranch(t.ProjectPath, t.OriginalBranch); err != nil { + return fmt.Errorf("failed to checkout original branch: %w", err) + } + + if err := deleteBranch(t.ProjectPath, t.BranchName); err != nil { + return fmt.Errorf("failed to delete trial branch: %w", err) + } + + fmt.Printf("✓ Trial discarded. Repository restored to original state.\n") + return nil +} + +// VerifyClean checks if repository is clean (no uncommitted changes) +func (t *Trial) VerifyClean() (bool, error) { + cmd := exec.Command("git", "status", "--porcelain") + cmd.Dir = t.ProjectPath + output, err := cmd.Output() + if err != nil { + return false, fmt.Errorf("failed to check git status: %w", err) + } + + return len(strings.TrimSpace(string(output))) == 0, nil +} + +// getCurrentBranch returns the current git branch name +func getCurrentBranch(projectPath string) (string, error) { + cmd := exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD") + cmd.Dir = projectPath + output, err := cmd.Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(output)), nil +} + +// createBranch creates and checks out a new branch +func (t *Trial) createBranch() error { + cmd := exec.Command("git", "checkout", "-b", t.BranchName) + cmd.Dir = t.ProjectPath + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to create branch: %s", string(output)) + } + return nil +} + +// checkoutBranch checks out the specified branch +func checkoutBranch(projectPath, branchName string) error { + cmd := exec.Command("git", "checkout", branchName) + cmd.Dir = projectPath + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to checkout branch: %s", string(output)) + } + return nil +} + +// deleteBranch deletes the specified branch +func deleteBranch(projectPath, branchName string) error { + cmd := exec.Command("git", "branch", "-D", branchName) + cmd.Dir = projectPath + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete branch: %s", string(output)) + } + return nil +}