diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json index f8f301c..38da4b8 100644 --- a/.agents/plugins/marketplace.json +++ b/.agents/plugins/marketplace.json @@ -6,7 +6,7 @@ "plugins": [ { "name": "frontend-skills", - "version": "4.11.0", + "version": "4.11.1", "description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks, 101 scripts, 66 skills (includes /diagnose feedback-loop-first 6-phase debugger, /triage multi-tracker (gh + acli) state machine, green-not-done warnings gate via `test-warning-check` + `ci-warning-audit`, /steelman anti-sycophancy guard, /snyk-ux-security with JS+Go tracks, exploitability-triage first gate, top-level-first upgrade ladder, `govulncheck`), 9 agents, 5 routines. 21 LLM failure modes (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025). OWASP+STRIDE, Core Web Vitals gate, bun/yarn lockfile parity, MCP ban with CLI redirect, worktree isolation, agent-browser integration. Opus 4.7 tuned. POSIX-friendly.", "source": { "source": "local", @@ -17,7 +17,7 @@ "authentication": "ON_INSTALL" }, "category": "Development", - "x-updatedAt": "2026-05-26", + "x-updatedAt": "2026-06-10", "x-includes": { "instructions": [ "CLAUDE.md", @@ -26,6 +26,7 @@ "settings": "settings.json" }, "x-changelog": { + "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.", "4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.", "4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.", "4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.", diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b955c91..18165e6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,12 +11,12 @@ { "name": "frontend-skills", "description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks (101 scripts) enforce patterns on every edit. Green != done: `test-warning-check` + `ci-warning-audit` block passing tests with DeprecationWarning / React `act()` / unhandled rejection noise (local + `gh run view --log` on green CI). 66 skills from TDD through CI-green PR, including /diagnose feedback-loop-first 6-phase debugger + /triage multi-tracker (gh + acli) state machine + /steelman anti-sycophancy guard + /snyk-ux-security JS+Go per-path vuln sweep (exploitability-triage first gate via `bun why`/`go mod why`/`govulncheck`; top-level direct dep bump first, parent dep second, resolutions/overrides/replace last resort only; React 18 pin + changelog-walked majors + no-deferral escalation; Go `go get -u` + `go mod tidy` + `govulncheck` verify). 21 LLM failure modes enforced (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025). OWASP + STRIDE + snyk/bun audit. Core Web Vitals perf gate. bun.lock + yarn.lock parity enforced. Worktree isolation + branch safety. MCP ban with CLI redirect (~20x savings). Agent-browser integration (~91% token reduction). 9 agents (3-hat plan review + karpathy reference), 5 routines. Opus 4.7 tuned. POSIX-friendly.", - "version": "4.11.0", + "version": "4.11.1", "source": { "source": "github", "repo": "redpanda-data/ui-harness" }, - "x-updatedAt": "2026-05-26", + "x-updatedAt": "2026-06-10", "x-includes": { "instructions": [ "CLAUDE.md", @@ -25,6 +25,7 @@ "settings": "settings.json" }, "x-changelog": { + "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.", "4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.", "4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.", "4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.", diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index bd1dda3..2193746 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "frontend-skills", "description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks across 101 scripts enforce patterns on every edit. Green != done: `test-warning-check` surfaces warnings on passing test/lint/type runs (DeprecationWarning, React `act()`, unhandled rejection, `@ts-ignore`); `ci-warning-audit` Stop-hook scans `gh run view --log` on green CI for deprecations / console warnings / skipped tests. 66 skills from TDD through CI-green PR, including /steelman anti-sycophancy guard + /snyk-ux-security per-path vuln sweep (JS + Go ecosystems; exploitability-triage first gate via `bun why`/`go mod why`/`govulncheck`; top-level direct dep bump first, parent dep bump second, resolutions/overrides/replace as last resort only with follow-up TODO to remove; React 18 pin + changelog-walked major bumps + no-deferral escalation; Go `snyk test --file=go.mod` + `go get -u` + `go mod tidy` + `govulncheck` verify). Session exit blocked while PR review threads remain unresolved. TypeScript escape hatches blocked at Edit. tsconfig strictness weakening blocked. Worktree isolation + branch safety hook-enforced. bun.lock + yarn.lock parity enforced (Snyk IO doesn't parse bun.lock). 21 LLM failure modes enforced: 7 Karpathy single-agent + 14 MAST multi-agent (Cemri et al. NeurIPS 2025). OWASP + STRIDE subset + snyk/bun audit. Core Web Vitals perf gate. MCP ban with CLI redirect (~20x token savings). Agent-browser wrap (~91% token reduction for AI browsing). 3-hat plan review (product/engineering/design). 9 agents (adds plan hats + karpathy reference), 5 routines. Opus 4.7 tuned, POSIX-friendly.", - "version": "4.11.0", + "version": "4.11.1", "author": { "name": "Redpanda Data" }, @@ -100,7 +100,7 @@ "./agents/self-reviewer.md", "./agents/verifier.md" ], - "x-updatedAt": "2026-05-26", + "x-updatedAt": "2026-06-10", "x-includes": { "instructions": [ "CLAUDE.md", @@ -110,6 +110,7 @@ "manifestSource": "skill-manifest.json" }, "x-changelog": { + "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.", "4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.", "4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.", "4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.", diff --git a/.claude/hooks/intent-detect.sh b/.claude/hooks/intent-detect.sh index ffbe731..0f0bd46 100755 --- a/.claude/hooks/intent-detect.sh +++ b/.claude/hooks/intent-detect.sh @@ -113,17 +113,18 @@ if [ -n "$_pr_number" ]; then fi # ── Scope-lock: prefer committing to current feature branch ───── -# Auto-detected from branch state, not prompt keywords. +# Add only when some workflow directive already fired. Pure questions should +# stay silent even on feature branches. -_current_branch=$(git branch --show-current 2>/dev/null || true) -case "$_current_branch" in - main|master|develop|"") ;; - *) - if [ -n "$directives" ]; then +if [ -n "$directives" ]; then + _current_branch=$(git branch --show-current 2>/dev/null || true) + case "$_current_branch" in + main|master|develop|"") ;; + *) directives="$directives\n[SCOPE-LOCK] On feature branch '$_current_branch'. Prefer committing here. Ask before creating new branches or PRs unless explicitly instructed." - fi - ;; -esac + ;; + esac +fi # ── Risk tier (informs auto mode confidence) ──────────────────── # low: tests, components, refactoring — fully guarded by hooks diff --git a/.claude/settings.json b/.claude/settings.json index fe253f9..da88065 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -164,6 +164,13 @@ "args": [ "session-end.sh" ] + }, + { + "type": "command", + "command": ".claude/hooks/run-hook.sh", + "args": [ + "metrics-summary-stop.sh" + ] } ] } diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 1d963a2..bf0805c 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "frontend-skills", - "version": "4.11.0", + "version": "4.11.1", "description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks (101 scripts) enforce patterns on every edit. Green != done: `test-warning-check` (local Bash PostToolUse) + `ci-warning-audit` (Stop, scans `gh run view --log`) block passing tests with DeprecationWarning / React `act()` / unhandled rejection / `@ts-ignore` noise. 66 skills from TDD through CI-green PR, including /diagnose feedback-loop-first 6-phase debugger + /triage multi-tracker (gh + acli) state machine + /steelman anti-sycophancy guard + /snyk-ux-security JS+Go per-path vuln sweep (exploitability-triage first gate, top-level-first upgrade ladder, React 18 pin, `govulncheck` for Go). 21 LLM failure modes (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025), OWASP + STRIDE, Core Web Vitals gate, bun.lock + yarn.lock parity, worktree isolation + branch safety, MCP ban with CLI redirect (~20x savings), agent-browser integration (~91% token reduction). 9 agents (3 plan-review hats + karpathy reference), 5 routines. Single-source manifest codegen. Opus 4.7 tuned. POSIX-friendly.", "author": { "name": "Redpanda Data", @@ -44,7 +44,7 @@ "Review this PR and fix all findings" ] }, - "x-updatedAt": "2026-05-26", + "x-updatedAt": "2026-06-10", "x-includes": { "instructions": [ "CLAUDE.md", @@ -53,6 +53,7 @@ "settings": "settings.json" }, "x-changelog": { + "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.", "4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.", "4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.", "4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.", diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c36568..334b3f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 4.11.1 + +2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200. + ## 4.11.0 2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66. Full visual-review eval suite: 79/79. diff --git a/README.md b/README.md index 19902dc..52173a5 100644 --- a/README.md +++ b/README.md @@ -10,20 +10,27 @@ Hooks enforce patterns real-time, skills guide workflow, orchestration layer ens ## Install -Run inside [Claude Code](https://docs.anthropic.com/en/docs/claude-code) session (start with `claude` in terminal): +Claude Code 2.1.157+ auto-loads plugins from `~/.claude/skills/`. No marketplace needed. + +Still a plugin: `.claude-plugin/plugin.json` is what lets the harness bundle hooks, agents, and skills together. We just keep it in the skills directory instead of installing from a marketplace. ```bash -/plugin marketplace add redpanda-data/ui-harness +mkdir -p ~/.claude/skills +git clone https://github.com/redpanda-data/ui-harness ~/.claude/skills/frontend-skills ``` + +Start or restart [Claude Code](https://docs.anthropic.com/en/docs/claude-code) from any project: + ```bash -/plugin install frontend-skills@ui-harness +claude ``` + +Skills, hooks, agents activate automatically. If Claude Code was already running: + ```bash /reload-plugins ``` -Three commands. Skills, hooks, agents activate immediately. Done. - **Recommended: rtk** (output-compression proxy, ~60-90% token savings on git/cargo/test/gh): ```bash @@ -36,17 +43,37 @@ Harness fail-open -- skip safe; SessionStart nudge remind if miss. **Update** (pull latest): ```bash -/plugin install frontend-skills --force +git -C ~/.claude/skills/frontend-skills pull --ff-only ``` -Restart Claude Code session so hooks reload from new cache. +Restart Claude Code session or run `/reload-plugins` so hooks reload. **Verify:** ```bash -bash "$(ls -d ~/.claude/plugins/cache/ui-harness/frontend-skills/*/ | tail -1)scripts/verify-install.sh" +claude plugin list | grep 'frontend-skills.*skills-dir' +bash ~/.claude/skills/frontend-skills/scripts/verify-install.sh +``` + +
+Legacy: marketplace install (Claude Code <=2.1.156) + +Use this only if skills-directory plugins are not available. + +```bash +/plugin marketplace add redpanda-data/ui-harness +/plugin install frontend-skills@ui-harness +/reload-plugins +``` + +Update legacy marketplace install: + +```bash +/plugin install frontend-skills --force ``` +
+
Codex (OpenAI) -- install as Codex plugin @@ -73,7 +100,7 @@ codex plugin add frontend-skills@ui-harness Or pin a release: ```bash -codex plugin marketplace add redpanda-data/ui-harness --ref v4.10.2 +codex plugin marketplace add redpanda-data/ui-harness --ref v4.11.1 codex plugin marketplace upgrade ui-harness codex plugin add frontend-skills@ui-harness ``` @@ -415,7 +442,7 @@ Featured skill moments -- each from an actual session: | Cross-session learning | No | Manual edit | No | No | **Yes (Phase 6 Compound -> `.claude/rules/`)** | | Opinionated stack | N/A | N/A | Agnostic | Varies | **React + TanStack + ConnectRPC + Bun** | | Config surface | 0 | Low | Low | Medium | **Medium (14 setup skills, env vars)** | -| Setup cost | 0 | ~30 min prompt writing | One `/install` | Varies | **3 commands** | +| Setup cost | 0 | ~30 min prompt writing | One `/install` | Varies | **One clone into `~/.claude/skills/`** | **TL;DR:** If your stack matches (React + Bun/TypeScript + modern patterns), the deterministic enforcement is worth the opinionation. If not, fork the hook scripts and keep the lifecycle skills. @@ -480,11 +507,11 @@ The fastest way to believe it: reproduce the core claim in your terminal. **Prereq:** Claude Code installed, fresh repo. -**1. Install the plugin** +**1. Install the plugin from your skills directory** ```bash -/plugin marketplace add redpanda-data/ui-harness -/plugin install frontend-skills@ui-harness -/reload-plugins +mkdir -p ~/.claude/skills +git clone https://github.com/redpanda-data/ui-harness ~/.claude/skills/frontend-skills +claude ``` **2. Ask Claude to write a banned pattern** @@ -566,7 +593,7 @@ No. Redpanda-specific rules live in a **separate** kit (`redpanda-frontend-kit`)
How do I customize or remove a hook? -Every hook is a bash script in `.claude/hooks/` -- inspect, edit, delete. Plugin install places them in `~/.claude/plugins/cache/ui-harness/frontend-skills//.claude/hooks/`. Override per-project by copying to `/.claude/hooks/` (takes precedence). Env vars control most behavior: `HOOK_VERBOSITY=terse`, `REACT_RULES_BAN_USEEFFECT=1`, `ORCHESTRATION_STRICT=0`, etc. See [Configuration](#configuration). +Every hook is a bash script in `~/.claude/skills/frontend-skills/.claude/hooks/` -- inspect, edit, delete. Override per-project by copying to `/.claude/hooks/` (takes precedence). Env vars control most behavior: `HOOK_VERBOSITY=terse`, `REACT_RULES_BAN_USEEFFECT=1`, `ORCHESTRATION_STRICT=0`, etc. See [Configuration](#configuration).
@@ -753,7 +780,7 @@ New to AI-assisted dev? Start here. **Day 1 (30 min):** 1. Install (see [Install](#install) above) -2. Run `bash "$(ls -d ~/.claude/plugins/cache/ui-harness/frontend-skills/*/ | tail -1)scripts/verify-install.sh"` confirm all wired +2. Run `bash ~/.claude/skills/frontend-skills/scripts/verify-install.sh` confirm all wired 3. Pick real ticket from backlog -- not toy problem **First prompt:** @@ -1446,4 +1473,4 @@ Prior art, techniques, related work that informed design decisions in this harne ### State of the art - [State of Playwright AI Ecosystem 2026](https://currents.dev/posts/state-of-playwright-ai-ecosystem-in-2026) -- [Agent Browser vs Puppeteer & Playwright (Webfuse)](https://www.webfuse.com/blog/agent-browser-vs-puppeteer-and-playwright) \ No newline at end of file +- [Agent Browser vs Puppeteer & Playwright (Webfuse)](https://www.webfuse.com/blog/agent-browser-vs-puppeteer-and-playwright) diff --git a/agents/adversarial-reviewer.md b/agents/adversarial-reviewer.md index fc38072..ead13ce 100644 --- a/agents/adversarial-reviewer.md +++ b/agents/adversarial-reviewer.md @@ -1,6 +1,6 @@ --- name: adversarial-reviewer -description: Constructs failure scenarios and stress-tests implementations. Asks "what breaks this?" not "does this look right?" Gated: runs only when diff_lines > 200 OR any prior reviewer returned a CRITICAL finding OR diff touches auth/security paths. Outputs structured JSON findings per findings-schema.md. +description: 'Constructs failure scenarios and stress-tests implementations. Asks "what breaks this?" not "does this look right?" Gated: runs only when diff_lines > 200 OR any prior reviewer returned a CRITICAL finding OR diff touches auth/security paths. Outputs structured JSON findings per findings-schema.md.' model: opus allowed-tools: Read, Grep, Glob, Bash(git diff *), Bash(git log *) --- diff --git a/evals/test-claude-skills-dir-install.sh b/evals/test-claude-skills-dir-install.sh new file mode 100644 index 0000000..22490b0 --- /dev/null +++ b/evals/test-claude-skills-dir-install.sh @@ -0,0 +1,46 @@ +# Evals for Claude Code v2.1.157+ skills-directory plugin install. + +README_INSTALL_BLOCK=$(awk '/^## Install/{flag=1} /^
/{flag=0} flag{print}' "$REPO_ROOT/README.md") + +if printf '%s\n' "$README_INSTALL_BLOCK" | grep -qE 'git clone https://github.com/redpanda-data/ui-harness \$HOME/\.claude/skills/frontend-skills|git clone https://github.com/redpanda-data/ui-harness ~/\.claude/skills/frontend-skills'; then + echo " PASS README primary Claude install uses skills-directory clone" + PASS=$((PASS + 1)) +else + echo " FAIL README primary Claude install should use ~/.claude/skills/frontend-skills clone" + FAIL=$((FAIL + 1)) + ERRORS="$ERRORS\n FAIL: README primary Claude install not simplified" +fi + +if printf '%s\n' "$README_INSTALL_BLOCK" | grep -q '/plugin marketplace add\|/plugin install'; then + echo " FAIL README primary Claude install still requires marketplace commands" + FAIL=$((FAIL + 1)) + ERRORS="$ERRORS\n FAIL: README primary Claude install still mentions marketplace" +else + echo " PASS README primary Claude install has no marketplace commands" + PASS=$((PASS + 1)) +fi + +run_content_eval "$REPO_ROOT/README.md" 'Claude Code 2\.1\.157\+' \ + "README calls out Claude Code 2.1.157+ no-marketplace path" +run_content_eval "$REPO_ROOT/README.md" 'Still a plugin' \ + "README clarifies plugin manifest still needed for hooks and agents" +run_content_eval "$REPO_ROOT/README.md" 'Legacy: marketplace install' \ + "README keeps legacy marketplace fallback discoverable" +run_content_eval "$REPO_ROOT/README.md" 'claude plugin list' \ + "README verify step checks Claude sees skills-dir plugin" + +tmp_home=$(mktemp -d) +mkdir -p "$tmp_home/.claude/skills" +ln -s "$REPO_ROOT" "$tmp_home/.claude/skills/frontend-skills" +verify_output=$(HOME="$tmp_home" bash "$REPO_ROOT/scripts/verify-install.sh" 2>&1 || true) +rm -rf "$tmp_home" + +if printf '%s\n' "$verify_output" | grep -q -- '--- Install Mode: skills-dir-plugin ---'; then + echo " PASS verify-install detects skills-directory plugin install" + PASS=$((PASS + 1)) +else + echo " FAIL verify-install should detect skills-directory plugin install" + echo " output: $(printf '%s\n' "$verify_output" | head -5 | tr '\n' ' ')" + FAIL=$((FAIL + 1)) + ERRORS="$ERRORS\n FAIL: verify-install misses skills-dir plugin" +fi diff --git a/evals/test-version-metadata.sh b/evals/test-version-metadata.sh new file mode 100755 index 0000000..0473ef8 --- /dev/null +++ b/evals/test-version-metadata.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Version metadata eval + +EXPECTED_VERSION="4.11.1" +EXPECTED_DATE="2026-06-10" + +json_get() { + local file="$1" + local expr="$2" + python3 - "$file" "$expr" <<'PY' +import json, sys +file, expr = sys.argv[1], sys.argv[2] +data = json.load(open(file)) +value = eval(expr, {}, {"data": data}) +print(value) +PY +} + +assert_eq() { + local actual="$1" + local expected="$2" + local description="$3" + if [ "$actual" = "$expected" ]; then + echo " PASS $description" + PASS=$((PASS + 1)) + else + echo " FAIL $description (expected: $expected, got: $actual)" + FAIL=$((FAIL + 1)) + ERRORS="$ERRORS\n FAIL: $description" + fi +} + +assert_file_contains() { + local file="$1" + local pattern="$2" + local description="$3" + if grep -qF -- "$pattern" "$file"; then + echo " PASS $description" + PASS=$((PASS + 1)) + else + echo " FAIL $description (missing: $pattern)" + FAIL=$((FAIL + 1)) + ERRORS="$ERRORS\n FAIL: $description" + fi +} + +assert_eq "$(json_get skill-manifest.json 'data["version"]')" "$EXPECTED_VERSION" "skill manifest version is current" + +for file in .claude-plugin/plugin.json .codex-plugin/plugin.json; do + assert_eq "$(json_get "$file" 'data["version"]')" "$EXPECTED_VERSION" "$file version is current" + assert_eq "$(json_get "$file" 'data["x-updatedAt"]')" "$EXPECTED_DATE" "$file updated date is current" + assert_eq "$(json_get "$file" 'str("4.11.1" in data["x-changelog"]).lower()')" "true" "$file changelog includes current version" +done + +for file in .claude-plugin/marketplace.json .agents/plugins/marketplace.json; do + assert_eq "$(json_get "$file" 'data["plugins"][0]["version"]')" "$EXPECTED_VERSION" "$file plugin version is current" + assert_eq "$(json_get "$file" 'data["plugins"][0]["x-updatedAt"]')" "$EXPECTED_DATE" "$file plugin updated date is current" + assert_eq "$(json_get "$file" 'str("4.11.1" in data["plugins"][0]["x-changelog"]).lower()')" "true" "$file changelog includes current version" +done + +assert_file_contains CHANGELOG.md "## 4.11.1" "changelog includes current release" +assert_file_contains README.md "--ref v4.11.1" "README pinned install uses current release tag" diff --git a/evals/test-worktree-isolation.sh b/evals/test-worktree-isolation.sh index 3f235e4..f5fc91a 100644 --- a/evals/test-worktree-isolation.sh +++ b/evals/test-worktree-isolation.sh @@ -56,7 +56,10 @@ _run_bs() { local stderr_file stderr_file=$(mktemp) local exit_code=0 - echo "$1" | bash "$HOOKS/branch-safety-check.sh" 2>"$stderr_file" > /dev/null || exit_code=$? + ( + cd "${_BS_REPO:-$REPO_ROOT}" + echo "$1" | bash "$HOOKS/branch-safety-check.sh" + ) 2>"$stderr_file" > /dev/null || exit_code=$? _last_stderr=$(cat "$stderr_file") _last_exit=$exit_code rm -f "$stderr_file" diff --git a/hooks/hooks.json b/hooks/hooks.json index fedb420..947c6b1 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -122,6 +122,10 @@ { "type": "command", "command": "f=\"${CLAUDE_PLUGIN_ROOT}/.claude/hooks/session-end.sh\"; [ -x \"$f\" ] && exec \"$f\"; exit 0" + }, + { + "type": "command", + "command": "f=\"${CLAUDE_PLUGIN_ROOT}/.claude/hooks/metrics-summary-stop.sh\"; [ -x \"$f\" ] && exec \"$f\"; exit 0" } ] } diff --git a/scripts/verify-install.sh b/scripts/verify-install.sh index 06e9256..5413fdb 100755 --- a/scripts/verify-install.sh +++ b/scripts/verify-install.sh @@ -37,21 +37,36 @@ $JSON_MODE || echo "=== Skills & Hooks Installation Verification ===" $JSON_MODE || echo "" # ── Detect installation mode ──────────────────────────────────── -# Plugin install: hooks live in plugin cache, wired via hooks.json +# skills-dir plugin (Claude Code >=2.1.157): repo lives under ~/.claude/skills/ +# Plugin cache install (legacy marketplace): hooks live in plugin cache # Manual install: hooks copied to consumer .claude/hooks/ PLUGIN_ROOT="" INSTALL_MODE="manual" +INSTALL_SOURCE="manual" -# Check if installed as a plugin (pick latest version, not first) -for dir in "$HOME/.claude/plugins/cache/ui-harness/frontend-skills"/*/; do - if [ -f "${dir}hooks/hooks.json" ]; then - PLUGIN_ROOT="$dir" +# Prefer the new skills-directory plugin path. Claude Code loads any folder +# under ~/.claude/skills/ that has .claude-plugin/plugin.json, no marketplace. +for dir in "$HOME/.claude/skills/frontend-skills" "$HOME/.claude/skills/ui-harness"; do + if [ -f "${dir}/.claude-plugin/plugin.json" ] && [ -f "${dir}/hooks/hooks.json" ]; then + PLUGIN_ROOT="${dir%/}/" INSTALL_MODE="plugin" + INSTALL_SOURCE="skills-dir-plugin" fi done -$JSON_MODE || echo "--- Install Mode: $INSTALL_MODE ---" +# Fallback: legacy marketplace/plugin-cache install (pick latest version). +if [ -z "$PLUGIN_ROOT" ]; then + for dir in "$HOME/.claude/plugins/cache/ui-harness/frontend-skills"/*/; do + if [ -f "${dir}hooks/hooks.json" ]; then + PLUGIN_ROOT="$dir" + INSTALL_MODE="plugin" + INSTALL_SOURCE="plugin-cache" + fi + done +fi + +$JSON_MODE || echo "--- Install Mode: $INSTALL_SOURCE ---" # ── 1. Version info ─────────────────────────────────────────── @@ -250,8 +265,8 @@ fi $JSON_MODE || echo "" $JSON_MODE || echo "--- Hook Wiring ---" -# All 8 hook events used by the harness -HOOK_EVENTS=("SessionStart" "PostCompact" "UserPromptSubmit" "PreToolUse" "PostToolUse" "SubagentStart" "SubagentStop" "Stop") +# All Claude hook events used by the harness. +HOOK_EVENTS=("SessionStart" "UserPromptSubmit" "PostCompact" "PreCompact" "PostToolUseFailure" "FileChanged" "WorktreeCreate" "SessionEnd" "PreToolUse" "PostToolUse" "SubagentStart" "SubagentStop" "Stop") if [ "$INSTALL_MODE" = "plugin" ]; then # Plugin mode: check hooks/hooks.json @@ -455,17 +470,21 @@ if [ -n "$REMOTE" ]; then $JSON_MODE || echo "" $JSON_MODE || echo "--- Version Check (remote: $REMOTE) ---" - # Check if any hook is a symlink pointing to a skills repo skills_repo="" - for hook in ".claude/hooks/react-rules-check.sh" ".claude/hooks/enforce-toolchain.sh"; do - if [ -L "$hook" ]; then - target=$(readlink "$hook" 2>/dev/null || true) - if echo "$target" | grep -q "skills"; then - skills_repo=$(echo "$target" | sed 's|/setup-.*||;s|/shared/.*||') - break + if [ "$INSTALL_SOURCE" = "skills-dir-plugin" ] && [ -d "${PLUGIN_ROOT%/}/.git" ]; then + skills_repo="${PLUGIN_ROOT%/}" + else + # Manual installs may symlink hook scripts back to a skills repo. + for hook in ".claude/hooks/react-rules-check.sh" ".claude/hooks/enforce-toolchain.sh"; do + if [ -L "$hook" ]; then + target=$(readlink "$hook" 2>/dev/null || true) + if echo "$target" | grep -q "skills"; then + skills_repo=$(echo "$target" | sed 's|/setup-.*||;s|/shared/.*||') + break + fi fi - fi - done + done + fi if [ -n "$skills_repo" ] && [ -d "$skills_repo/.git" ]; then local_hash=$(cd "$skills_repo" && git rev-parse HEAD 2>/dev/null || echo "unknown") diff --git a/shared/intent-detect.sh b/shared/intent-detect.sh index ee6143e..c02b3a1 100755 --- a/shared/intent-detect.sh +++ b/shared/intent-detect.sh @@ -104,17 +104,18 @@ if [ -n "$_pr_number" ]; then fi # ── Scope-lock: prefer committing to current feature branch ───── -# Auto-detected from branch state, not prompt keywords. +# Add only when some workflow directive already fired. Pure questions should +# stay silent even on feature branches. -_current_branch=$(git branch --show-current 2>/dev/null || true) -case "$_current_branch" in - main|master|develop|"") ;; - *) - if [ -n "$directives" ]; then +if [ -n "$directives" ]; then + _current_branch=$(git branch --show-current 2>/dev/null || true) + case "$_current_branch" in + main|master|develop|"") ;; + *) directives="$directives\n[SCOPE-LOCK] On feature branch '$_current_branch'. Prefer committing here. Ask before creating new branches or PRs unless explicitly instructed." - fi - ;; -esac + ;; + esac +fi # ── Risk tier (informs auto mode confidence) ──────────────────── # low: tests, components, refactoring — fully guarded by hooks diff --git a/skill-manifest.json b/skill-manifest.json index 8edc75e..fb2305f 100644 --- a/skill-manifest.json +++ b/skill-manifest.json @@ -1,6 +1,6 @@ { "$comment": "Source of truth for all hook configurations. Generates .claude/settings.json, hooks/hooks.json, .claude-plugin/plugin.json counts, .codex-plugin/plugin.json counts, .agents/plugins/marketplace.json, .claude-plugin/marketplace.json. Run scripts/generate-hook-configs.sh to regenerate.", - "version": "4.11.0", + "version": "4.11.1", "hooks": { "SessionStart": { "": [ @@ -53,7 +53,8 @@ }, "SessionEnd": { "": [ - "session-end.sh" + "session-end.sh", + "metrics-summary-stop.sh" ] }, "PreToolUse": {