diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json
index f8f301c..38da4b8 100644
--- a/.agents/plugins/marketplace.json
+++ b/.agents/plugins/marketplace.json
@@ -6,7 +6,7 @@
"plugins": [
{
"name": "frontend-skills",
- "version": "4.11.0",
+ "version": "4.11.1",
"description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks, 101 scripts, 66 skills (includes /diagnose feedback-loop-first 6-phase debugger, /triage multi-tracker (gh + acli) state machine, green-not-done warnings gate via `test-warning-check` + `ci-warning-audit`, /steelman anti-sycophancy guard, /snyk-ux-security with JS+Go tracks, exploitability-triage first gate, top-level-first upgrade ladder, `govulncheck`), 9 agents, 5 routines. 21 LLM failure modes (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025). OWASP+STRIDE, Core Web Vitals gate, bun/yarn lockfile parity, MCP ban with CLI redirect, worktree isolation, agent-browser integration. Opus 4.7 tuned. POSIX-friendly.",
"source": {
"source": "local",
@@ -17,7 +17,7 @@
"authentication": "ON_INSTALL"
},
"category": "Development",
- "x-updatedAt": "2026-05-26",
+ "x-updatedAt": "2026-06-10",
"x-includes": {
"instructions": [
"CLAUDE.md",
@@ -26,6 +26,7 @@
"settings": "settings.json"
},
"x-changelog": {
+ "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.",
"4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.",
"4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.",
"4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.",
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index b955c91..18165e6 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -11,12 +11,12 @@
{
"name": "frontend-skills",
"description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks (101 scripts) enforce patterns on every edit. Green != done: `test-warning-check` + `ci-warning-audit` block passing tests with DeprecationWarning / React `act()` / unhandled rejection noise (local + `gh run view --log` on green CI). 66 skills from TDD through CI-green PR, including /diagnose feedback-loop-first 6-phase debugger + /triage multi-tracker (gh + acli) state machine + /steelman anti-sycophancy guard + /snyk-ux-security JS+Go per-path vuln sweep (exploitability-triage first gate via `bun why`/`go mod why`/`govulncheck`; top-level direct dep bump first, parent dep second, resolutions/overrides/replace last resort only; React 18 pin + changelog-walked majors + no-deferral escalation; Go `go get -u` + `go mod tidy` + `govulncheck` verify). 21 LLM failure modes enforced (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025). OWASP + STRIDE + snyk/bun audit. Core Web Vitals perf gate. bun.lock + yarn.lock parity enforced. Worktree isolation + branch safety. MCP ban with CLI redirect (~20x savings). Agent-browser integration (~91% token reduction). 9 agents (3-hat plan review + karpathy reference), 5 routines. Opus 4.7 tuned. POSIX-friendly.",
- "version": "4.11.0",
+ "version": "4.11.1",
"source": {
"source": "github",
"repo": "redpanda-data/ui-harness"
},
- "x-updatedAt": "2026-05-26",
+ "x-updatedAt": "2026-06-10",
"x-includes": {
"instructions": [
"CLAUDE.md",
@@ -25,6 +25,7 @@
"settings": "settings.json"
},
"x-changelog": {
+ "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.",
"4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.",
"4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.",
"4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.",
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index bd1dda3..2193746 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
{
"name": "frontend-skills",
"description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks across 101 scripts enforce patterns on every edit. Green != done: `test-warning-check` surfaces warnings on passing test/lint/type runs (DeprecationWarning, React `act()`, unhandled rejection, `@ts-ignore`); `ci-warning-audit` Stop-hook scans `gh run view --log` on green CI for deprecations / console warnings / skipped tests. 66 skills from TDD through CI-green PR, including /steelman anti-sycophancy guard + /snyk-ux-security per-path vuln sweep (JS + Go ecosystems; exploitability-triage first gate via `bun why`/`go mod why`/`govulncheck`; top-level direct dep bump first, parent dep bump second, resolutions/overrides/replace as last resort only with follow-up TODO to remove; React 18 pin + changelog-walked major bumps + no-deferral escalation; Go `snyk test --file=go.mod` + `go get -u` + `go mod tidy` + `govulncheck` verify). Session exit blocked while PR review threads remain unresolved. TypeScript escape hatches blocked at Edit. tsconfig strictness weakening blocked. Worktree isolation + branch safety hook-enforced. bun.lock + yarn.lock parity enforced (Snyk IO doesn't parse bun.lock). 21 LLM failure modes enforced: 7 Karpathy single-agent + 14 MAST multi-agent (Cemri et al. NeurIPS 2025). OWASP + STRIDE subset + snyk/bun audit. Core Web Vitals perf gate. MCP ban with CLI redirect (~20x token savings). Agent-browser wrap (~91% token reduction for AI browsing). 3-hat plan review (product/engineering/design). 9 agents (adds plan hats + karpathy reference), 5 routines. Opus 4.7 tuned, POSIX-friendly.",
- "version": "4.11.0",
+ "version": "4.11.1",
"author": {
"name": "Redpanda Data"
},
@@ -100,7 +100,7 @@
"./agents/self-reviewer.md",
"./agents/verifier.md"
],
- "x-updatedAt": "2026-05-26",
+ "x-updatedAt": "2026-06-10",
"x-includes": {
"instructions": [
"CLAUDE.md",
@@ -110,6 +110,7 @@
"manifestSource": "skill-manifest.json"
},
"x-changelog": {
+ "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.",
"4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.",
"4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.",
"4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.",
diff --git a/.claude/hooks/intent-detect.sh b/.claude/hooks/intent-detect.sh
index ffbe731..0f0bd46 100755
--- a/.claude/hooks/intent-detect.sh
+++ b/.claude/hooks/intent-detect.sh
@@ -113,17 +113,18 @@ if [ -n "$_pr_number" ]; then
fi
# ── Scope-lock: prefer committing to current feature branch ─────
-# Auto-detected from branch state, not prompt keywords.
+# Add only when some workflow directive already fired. Pure questions should
+# stay silent even on feature branches.
-_current_branch=$(git branch --show-current 2>/dev/null || true)
-case "$_current_branch" in
- main|master|develop|"") ;;
- *)
- if [ -n "$directives" ]; then
+if [ -n "$directives" ]; then
+ _current_branch=$(git branch --show-current 2>/dev/null || true)
+ case "$_current_branch" in
+ main|master|develop|"") ;;
+ *)
directives="$directives\n[SCOPE-LOCK] On feature branch '$_current_branch'. Prefer committing here. Ask before creating new branches or PRs unless explicitly instructed."
- fi
- ;;
-esac
+ ;;
+ esac
+fi
# ── Risk tier (informs auto mode confidence) ────────────────────
# low: tests, components, refactoring — fully guarded by hooks
diff --git a/.claude/settings.json b/.claude/settings.json
index fe253f9..da88065 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -164,6 +164,13 @@
"args": [
"session-end.sh"
]
+ },
+ {
+ "type": "command",
+ "command": ".claude/hooks/run-hook.sh",
+ "args": [
+ "metrics-summary-stop.sh"
+ ]
}
]
}
diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json
index 1d963a2..bf0805c 100644
--- a/.codex-plugin/plugin.json
+++ b/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
{
"name": "frontend-skills",
- "version": "4.11.0",
+ "version": "4.11.1",
"description": "React/TypeScript frontend + Go backend enforcement harness. 92 wired hooks (101 scripts) enforce patterns on every edit. Green != done: `test-warning-check` (local Bash PostToolUse) + `ci-warning-audit` (Stop, scans `gh run view --log`) block passing tests with DeprecationWarning / React `act()` / unhandled rejection / `@ts-ignore` noise. 66 skills from TDD through CI-green PR, including /diagnose feedback-loop-first 6-phase debugger + /triage multi-tracker (gh + acli) state machine + /steelman anti-sycophancy guard + /snyk-ux-security JS+Go per-path vuln sweep (exploitability-triage first gate, top-level-first upgrade ladder, React 18 pin, `govulncheck` for Go). 21 LLM failure modes (7 Karpathy single-agent + 14 MAST multi-agent, Cemri et al. NeurIPS 2025), OWASP + STRIDE, Core Web Vitals gate, bun.lock + yarn.lock parity, worktree isolation + branch safety, MCP ban with CLI redirect (~20x savings), agent-browser integration (~91% token reduction). 9 agents (3 plan-review hats + karpathy reference), 5 routines. Single-source manifest codegen. Opus 4.7 tuned. POSIX-friendly.",
"author": {
"name": "Redpanda Data",
@@ -44,7 +44,7 @@
"Review this PR and fix all findings"
]
},
- "x-updatedAt": "2026-05-26",
+ "x-updatedAt": "2026-06-10",
"x-includes": {
"instructions": [
"CLAUDE.md",
@@ -53,6 +53,7 @@
"settings": "settings.json"
},
"x-changelog": {
+ "4.11.1": "2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.",
"4.11.0": "2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66.",
"4.10.6": "2026-05-25 -- Codex marketplace install fix: expose root plugin through root plugin path so Codex marketplace source path is a plugin directory while retaining root-relative plugin packaging.",
"4.10.5": "2026-05-25 -- Vendor remaining mattpocock/skills into repo, caveman-compress new skill docs, add vendoring evals, and fix detached-HEAD branch-safety eval coverage.",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c36568..334b3f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# Changelog
+## 4.11.1
+
+2026-06-10 -- Simplify Claude Code v2.1.157+ installation: primary Claude install now clones the repo into `~/.claude/skills/frontend-skills` and verifies `frontend-skills@skills-dir` through `claude plugin list`, with marketplace install retained as a legacy fallback. `verify-install.sh` detects skills-directory plugins, drops stale cache namespace fallback, and keeps clean-HOME smoke coverage. Moves metrics summary to `SessionEnd`, fixes adversarial reviewer frontmatter validation, and hardens branch-safety and scope-lock evals for detached worktrees and pure questions. Full eval suite: 2200/2200.
+
## 4.11.0
2026-05-26 -- Add `/visual-review` skill for browser-based frontend QA before PRs. The workflow runs standalone and is wired into `/go`, `/commit-push`, `/commit-push-pr`, self-reviewer, and code-reviewer for frontend diffs, requiring visual review evidence or an explicit skip reason. Ships platform/environment fingerprinting (browser, user agent, platform, viewport, visualViewport, DPR, media preferences, locale/direction), a platform risk map, Web Weekly-informed visual/a11y/perf checks, and 79 eval assertions. Skills 65 -> 66. Full visual-review eval suite: 79/79.
diff --git a/README.md b/README.md
index 19902dc..52173a5 100644
--- a/README.md
+++ b/README.md
@@ -10,20 +10,27 @@ Hooks enforce patterns real-time, skills guide workflow, orchestration layer ens
## Install
-Run inside [Claude Code](https://docs.anthropic.com/en/docs/claude-code) session (start with `claude` in terminal):
+Claude Code 2.1.157+ auto-loads plugins from `~/.claude/skills/`. No marketplace needed.
+
+Still a plugin: `.claude-plugin/plugin.json` is what lets the harness bundle hooks, agents, and skills together. We just keep it in the skills directory instead of installing from a marketplace.
```bash
-/plugin marketplace add redpanda-data/ui-harness
+mkdir -p ~/.claude/skills
+git clone https://github.com/redpanda-data/ui-harness ~/.claude/skills/frontend-skills
```
+
+Start or restart [Claude Code](https://docs.anthropic.com/en/docs/claude-code) from any project:
+
```bash
-/plugin install frontend-skills@ui-harness
+claude
```
+
+Skills, hooks, agents activate automatically. If Claude Code was already running:
+
```bash
/reload-plugins
```
-Three commands. Skills, hooks, agents activate immediately. Done.
-
**Recommended: rtk** (output-compression proxy, ~60-90% token savings on git/cargo/test/gh):
```bash
@@ -36,17 +43,37 @@ Harness fail-open -- skip safe; SessionStart nudge remind if miss.
**Update** (pull latest):
```bash
-/plugin install frontend-skills --force
+git -C ~/.claude/skills/frontend-skills pull --ff-only
```
-Restart Claude Code session so hooks reload from new cache.
+Restart Claude Code session or run `/reload-plugins` so hooks reload.
**Verify:**
```bash
-bash "$(ls -d ~/.claude/plugins/cache/ui-harness/frontend-skills/*/ | tail -1)scripts/verify-install.sh"
+claude plugin list | grep 'frontend-skills.*skills-dir'
+bash ~/.claude/skills/frontend-skills/scripts/verify-install.sh
+```
+
+
+Legacy: marketplace install (Claude Code <=2.1.156)
+
+Use this only if skills-directory plugins are not available.
+
+```bash
+/plugin marketplace add redpanda-data/ui-harness
+/plugin install frontend-skills@ui-harness
+/reload-plugins
+```
+
+Update legacy marketplace install:
+
+```bash
+/plugin install frontend-skills --force
```
+
+
Codex (OpenAI) -- install as Codex plugin
@@ -73,7 +100,7 @@ codex plugin add frontend-skills@ui-harness
Or pin a release:
```bash
-codex plugin marketplace add redpanda-data/ui-harness --ref v4.10.2
+codex plugin marketplace add redpanda-data/ui-harness --ref v4.11.1
codex plugin marketplace upgrade ui-harness
codex plugin add frontend-skills@ui-harness
```
@@ -415,7 +442,7 @@ Featured skill moments -- each from an actual session:
| Cross-session learning | No | Manual edit | No | No | **Yes (Phase 6 Compound -> `.claude/rules/`)** |
| Opinionated stack | N/A | N/A | Agnostic | Varies | **React + TanStack + ConnectRPC + Bun** |
| Config surface | 0 | Low | Low | Medium | **Medium (14 setup skills, env vars)** |
-| Setup cost | 0 | ~30 min prompt writing | One `/install` | Varies | **3 commands** |
+| Setup cost | 0 | ~30 min prompt writing | One `/install` | Varies | **One clone into `~/.claude/skills/`** |
**TL;DR:** If your stack matches (React + Bun/TypeScript + modern patterns), the deterministic enforcement is worth the opinionation. If not, fork the hook scripts and keep the lifecycle skills.
@@ -480,11 +507,11 @@ The fastest way to believe it: reproduce the core claim in your terminal.
**Prereq:** Claude Code installed, fresh repo.
-**1. Install the plugin**
+**1. Install the plugin from your skills directory**
```bash
-/plugin marketplace add redpanda-data/ui-harness
-/plugin install frontend-skills@ui-harness
-/reload-plugins
+mkdir -p ~/.claude/skills
+git clone https://github.com/redpanda-data/ui-harness ~/.claude/skills/frontend-skills
+claude
```
**2. Ask Claude to write a banned pattern**
@@ -566,7 +593,7 @@ No. Redpanda-specific rules live in a **separate** kit (`redpanda-frontend-kit`)
How do I customize or remove a hook?
-Every hook is a bash script in `.claude/hooks/` -- inspect, edit, delete. Plugin install places them in `~/.claude/plugins/cache/ui-harness/frontend-skills//.claude/hooks/`. Override per-project by copying to `/.claude/hooks/` (takes precedence). Env vars control most behavior: `HOOK_VERBOSITY=terse`, `REACT_RULES_BAN_USEEFFECT=1`, `ORCHESTRATION_STRICT=0`, etc. See [Configuration](#configuration).
+Every hook is a bash script in `~/.claude/skills/frontend-skills/.claude/hooks/` -- inspect, edit, delete. Override per-project by copying to `/.claude/hooks/` (takes precedence). Env vars control most behavior: `HOOK_VERBOSITY=terse`, `REACT_RULES_BAN_USEEFFECT=1`, `ORCHESTRATION_STRICT=0`, etc. See [Configuration](#configuration).
@@ -753,7 +780,7 @@ New to AI-assisted dev? Start here.
**Day 1 (30 min):**
1. Install (see [Install](#install) above)
-2. Run `bash "$(ls -d ~/.claude/plugins/cache/ui-harness/frontend-skills/*/ | tail -1)scripts/verify-install.sh"` confirm all wired
+2. Run `bash ~/.claude/skills/frontend-skills/scripts/verify-install.sh` confirm all wired
3. Pick real ticket from backlog -- not toy problem
**First prompt:**
@@ -1446,4 +1473,4 @@ Prior art, techniques, related work that informed design decisions in this harne
### State of the art
- [State of Playwright AI Ecosystem 2026](https://currents.dev/posts/state-of-playwright-ai-ecosystem-in-2026)
-- [Agent Browser vs Puppeteer & Playwright (Webfuse)](https://www.webfuse.com/blog/agent-browser-vs-puppeteer-and-playwright)
\ No newline at end of file
+- [Agent Browser vs Puppeteer & Playwright (Webfuse)](https://www.webfuse.com/blog/agent-browser-vs-puppeteer-and-playwright)
diff --git a/agents/adversarial-reviewer.md b/agents/adversarial-reviewer.md
index fc38072..ead13ce 100644
--- a/agents/adversarial-reviewer.md
+++ b/agents/adversarial-reviewer.md
@@ -1,6 +1,6 @@
---
name: adversarial-reviewer
-description: Constructs failure scenarios and stress-tests implementations. Asks "what breaks this?" not "does this look right?" Gated: runs only when diff_lines > 200 OR any prior reviewer returned a CRITICAL finding OR diff touches auth/security paths. Outputs structured JSON findings per findings-schema.md.
+description: 'Constructs failure scenarios and stress-tests implementations. Asks "what breaks this?" not "does this look right?" Gated: runs only when diff_lines > 200 OR any prior reviewer returned a CRITICAL finding OR diff touches auth/security paths. Outputs structured JSON findings per findings-schema.md.'
model: opus
allowed-tools: Read, Grep, Glob, Bash(git diff *), Bash(git log *)
---
diff --git a/evals/test-claude-skills-dir-install.sh b/evals/test-claude-skills-dir-install.sh
new file mode 100644
index 0000000..22490b0
--- /dev/null
+++ b/evals/test-claude-skills-dir-install.sh
@@ -0,0 +1,46 @@
+# Evals for Claude Code v2.1.157+ skills-directory plugin install.
+
+README_INSTALL_BLOCK=$(awk '/^## Install/{flag=1} /^/{flag=0} flag{print}' "$REPO_ROOT/README.md")
+
+if printf '%s\n' "$README_INSTALL_BLOCK" | grep -qE 'git clone https://github.com/redpanda-data/ui-harness \$HOME/\.claude/skills/frontend-skills|git clone https://github.com/redpanda-data/ui-harness ~/\.claude/skills/frontend-skills'; then
+ echo " PASS README primary Claude install uses skills-directory clone"
+ PASS=$((PASS + 1))
+else
+ echo " FAIL README primary Claude install should use ~/.claude/skills/frontend-skills clone"
+ FAIL=$((FAIL + 1))
+ ERRORS="$ERRORS\n FAIL: README primary Claude install not simplified"
+fi
+
+if printf '%s\n' "$README_INSTALL_BLOCK" | grep -q '/plugin marketplace add\|/plugin install'; then
+ echo " FAIL README primary Claude install still requires marketplace commands"
+ FAIL=$((FAIL + 1))
+ ERRORS="$ERRORS\n FAIL: README primary Claude install still mentions marketplace"
+else
+ echo " PASS README primary Claude install has no marketplace commands"
+ PASS=$((PASS + 1))
+fi
+
+run_content_eval "$REPO_ROOT/README.md" 'Claude Code 2\.1\.157\+' \
+ "README calls out Claude Code 2.1.157+ no-marketplace path"
+run_content_eval "$REPO_ROOT/README.md" 'Still a plugin' \
+ "README clarifies plugin manifest still needed for hooks and agents"
+run_content_eval "$REPO_ROOT/README.md" 'Legacy: marketplace install' \
+ "README keeps legacy marketplace fallback discoverable"
+run_content_eval "$REPO_ROOT/README.md" 'claude plugin list' \
+ "README verify step checks Claude sees skills-dir plugin"
+
+tmp_home=$(mktemp -d)
+mkdir -p "$tmp_home/.claude/skills"
+ln -s "$REPO_ROOT" "$tmp_home/.claude/skills/frontend-skills"
+verify_output=$(HOME="$tmp_home" bash "$REPO_ROOT/scripts/verify-install.sh" 2>&1 || true)
+rm -rf "$tmp_home"
+
+if printf '%s\n' "$verify_output" | grep -q -- '--- Install Mode: skills-dir-plugin ---'; then
+ echo " PASS verify-install detects skills-directory plugin install"
+ PASS=$((PASS + 1))
+else
+ echo " FAIL verify-install should detect skills-directory plugin install"
+ echo " output: $(printf '%s\n' "$verify_output" | head -5 | tr '\n' ' ')"
+ FAIL=$((FAIL + 1))
+ ERRORS="$ERRORS\n FAIL: verify-install misses skills-dir plugin"
+fi
diff --git a/evals/test-version-metadata.sh b/evals/test-version-metadata.sh
new file mode 100755
index 0000000..0473ef8
--- /dev/null
+++ b/evals/test-version-metadata.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Version metadata eval
+
+EXPECTED_VERSION="4.11.1"
+EXPECTED_DATE="2026-06-10"
+
+json_get() {
+ local file="$1"
+ local expr="$2"
+ python3 - "$file" "$expr" <<'PY'
+import json, sys
+file, expr = sys.argv[1], sys.argv[2]
+data = json.load(open(file))
+value = eval(expr, {}, {"data": data})
+print(value)
+PY
+}
+
+assert_eq() {
+ local actual="$1"
+ local expected="$2"
+ local description="$3"
+ if [ "$actual" = "$expected" ]; then
+ echo " PASS $description"
+ PASS=$((PASS + 1))
+ else
+ echo " FAIL $description (expected: $expected, got: $actual)"
+ FAIL=$((FAIL + 1))
+ ERRORS="$ERRORS\n FAIL: $description"
+ fi
+}
+
+assert_file_contains() {
+ local file="$1"
+ local pattern="$2"
+ local description="$3"
+ if grep -qF -- "$pattern" "$file"; then
+ echo " PASS $description"
+ PASS=$((PASS + 1))
+ else
+ echo " FAIL $description (missing: $pattern)"
+ FAIL=$((FAIL + 1))
+ ERRORS="$ERRORS\n FAIL: $description"
+ fi
+}
+
+assert_eq "$(json_get skill-manifest.json 'data["version"]')" "$EXPECTED_VERSION" "skill manifest version is current"
+
+for file in .claude-plugin/plugin.json .codex-plugin/plugin.json; do
+ assert_eq "$(json_get "$file" 'data["version"]')" "$EXPECTED_VERSION" "$file version is current"
+ assert_eq "$(json_get "$file" 'data["x-updatedAt"]')" "$EXPECTED_DATE" "$file updated date is current"
+ assert_eq "$(json_get "$file" 'str("4.11.1" in data["x-changelog"]).lower()')" "true" "$file changelog includes current version"
+done
+
+for file in .claude-plugin/marketplace.json .agents/plugins/marketplace.json; do
+ assert_eq "$(json_get "$file" 'data["plugins"][0]["version"]')" "$EXPECTED_VERSION" "$file plugin version is current"
+ assert_eq "$(json_get "$file" 'data["plugins"][0]["x-updatedAt"]')" "$EXPECTED_DATE" "$file plugin updated date is current"
+ assert_eq "$(json_get "$file" 'str("4.11.1" in data["plugins"][0]["x-changelog"]).lower()')" "true" "$file changelog includes current version"
+done
+
+assert_file_contains CHANGELOG.md "## 4.11.1" "changelog includes current release"
+assert_file_contains README.md "--ref v4.11.1" "README pinned install uses current release tag"
diff --git a/evals/test-worktree-isolation.sh b/evals/test-worktree-isolation.sh
index 3f235e4..f5fc91a 100644
--- a/evals/test-worktree-isolation.sh
+++ b/evals/test-worktree-isolation.sh
@@ -56,7 +56,10 @@ _run_bs() {
local stderr_file
stderr_file=$(mktemp)
local exit_code=0
- echo "$1" | bash "$HOOKS/branch-safety-check.sh" 2>"$stderr_file" > /dev/null || exit_code=$?
+ (
+ cd "${_BS_REPO:-$REPO_ROOT}"
+ echo "$1" | bash "$HOOKS/branch-safety-check.sh"
+ ) 2>"$stderr_file" > /dev/null || exit_code=$?
_last_stderr=$(cat "$stderr_file")
_last_exit=$exit_code
rm -f "$stderr_file"
diff --git a/hooks/hooks.json b/hooks/hooks.json
index fedb420..947c6b1 100644
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -122,6 +122,10 @@
{
"type": "command",
"command": "f=\"${CLAUDE_PLUGIN_ROOT}/.claude/hooks/session-end.sh\"; [ -x \"$f\" ] && exec \"$f\"; exit 0"
+ },
+ {
+ "type": "command",
+ "command": "f=\"${CLAUDE_PLUGIN_ROOT}/.claude/hooks/metrics-summary-stop.sh\"; [ -x \"$f\" ] && exec \"$f\"; exit 0"
}
]
}
diff --git a/scripts/verify-install.sh b/scripts/verify-install.sh
index 06e9256..5413fdb 100755
--- a/scripts/verify-install.sh
+++ b/scripts/verify-install.sh
@@ -37,21 +37,36 @@ $JSON_MODE || echo "=== Skills & Hooks Installation Verification ==="
$JSON_MODE || echo ""
# ── Detect installation mode ────────────────────────────────────
-# Plugin install: hooks live in plugin cache, wired via hooks.json
+# skills-dir plugin (Claude Code >=2.1.157): repo lives under ~/.claude/skills/
+# Plugin cache install (legacy marketplace): hooks live in plugin cache
# Manual install: hooks copied to consumer .claude/hooks/
PLUGIN_ROOT=""
INSTALL_MODE="manual"
+INSTALL_SOURCE="manual"
-# Check if installed as a plugin (pick latest version, not first)
-for dir in "$HOME/.claude/plugins/cache/ui-harness/frontend-skills"/*/; do
- if [ -f "${dir}hooks/hooks.json" ]; then
- PLUGIN_ROOT="$dir"
+# Prefer the new skills-directory plugin path. Claude Code loads any folder
+# under ~/.claude/skills/ that has .claude-plugin/plugin.json, no marketplace.
+for dir in "$HOME/.claude/skills/frontend-skills" "$HOME/.claude/skills/ui-harness"; do
+ if [ -f "${dir}/.claude-plugin/plugin.json" ] && [ -f "${dir}/hooks/hooks.json" ]; then
+ PLUGIN_ROOT="${dir%/}/"
INSTALL_MODE="plugin"
+ INSTALL_SOURCE="skills-dir-plugin"
fi
done
-$JSON_MODE || echo "--- Install Mode: $INSTALL_MODE ---"
+# Fallback: legacy marketplace/plugin-cache install (pick latest version).
+if [ -z "$PLUGIN_ROOT" ]; then
+ for dir in "$HOME/.claude/plugins/cache/ui-harness/frontend-skills"/*/; do
+ if [ -f "${dir}hooks/hooks.json" ]; then
+ PLUGIN_ROOT="$dir"
+ INSTALL_MODE="plugin"
+ INSTALL_SOURCE="plugin-cache"
+ fi
+ done
+fi
+
+$JSON_MODE || echo "--- Install Mode: $INSTALL_SOURCE ---"
# ── 1. Version info ───────────────────────────────────────────
@@ -250,8 +265,8 @@ fi
$JSON_MODE || echo ""
$JSON_MODE || echo "--- Hook Wiring ---"
-# All 8 hook events used by the harness
-HOOK_EVENTS=("SessionStart" "PostCompact" "UserPromptSubmit" "PreToolUse" "PostToolUse" "SubagentStart" "SubagentStop" "Stop")
+# All Claude hook events used by the harness.
+HOOK_EVENTS=("SessionStart" "UserPromptSubmit" "PostCompact" "PreCompact" "PostToolUseFailure" "FileChanged" "WorktreeCreate" "SessionEnd" "PreToolUse" "PostToolUse" "SubagentStart" "SubagentStop" "Stop")
if [ "$INSTALL_MODE" = "plugin" ]; then
# Plugin mode: check hooks/hooks.json
@@ -455,17 +470,21 @@ if [ -n "$REMOTE" ]; then
$JSON_MODE || echo ""
$JSON_MODE || echo "--- Version Check (remote: $REMOTE) ---"
- # Check if any hook is a symlink pointing to a skills repo
skills_repo=""
- for hook in ".claude/hooks/react-rules-check.sh" ".claude/hooks/enforce-toolchain.sh"; do
- if [ -L "$hook" ]; then
- target=$(readlink "$hook" 2>/dev/null || true)
- if echo "$target" | grep -q "skills"; then
- skills_repo=$(echo "$target" | sed 's|/setup-.*||;s|/shared/.*||')
- break
+ if [ "$INSTALL_SOURCE" = "skills-dir-plugin" ] && [ -d "${PLUGIN_ROOT%/}/.git" ]; then
+ skills_repo="${PLUGIN_ROOT%/}"
+ else
+ # Manual installs may symlink hook scripts back to a skills repo.
+ for hook in ".claude/hooks/react-rules-check.sh" ".claude/hooks/enforce-toolchain.sh"; do
+ if [ -L "$hook" ]; then
+ target=$(readlink "$hook" 2>/dev/null || true)
+ if echo "$target" | grep -q "skills"; then
+ skills_repo=$(echo "$target" | sed 's|/setup-.*||;s|/shared/.*||')
+ break
+ fi
fi
- fi
- done
+ done
+ fi
if [ -n "$skills_repo" ] && [ -d "$skills_repo/.git" ]; then
local_hash=$(cd "$skills_repo" && git rev-parse HEAD 2>/dev/null || echo "unknown")
diff --git a/shared/intent-detect.sh b/shared/intent-detect.sh
index ee6143e..c02b3a1 100755
--- a/shared/intent-detect.sh
+++ b/shared/intent-detect.sh
@@ -104,17 +104,18 @@ if [ -n "$_pr_number" ]; then
fi
# ── Scope-lock: prefer committing to current feature branch ─────
-# Auto-detected from branch state, not prompt keywords.
+# Add only when some workflow directive already fired. Pure questions should
+# stay silent even on feature branches.
-_current_branch=$(git branch --show-current 2>/dev/null || true)
-case "$_current_branch" in
- main|master|develop|"") ;;
- *)
- if [ -n "$directives" ]; then
+if [ -n "$directives" ]; then
+ _current_branch=$(git branch --show-current 2>/dev/null || true)
+ case "$_current_branch" in
+ main|master|develop|"") ;;
+ *)
directives="$directives\n[SCOPE-LOCK] On feature branch '$_current_branch'. Prefer committing here. Ask before creating new branches or PRs unless explicitly instructed."
- fi
- ;;
-esac
+ ;;
+ esac
+fi
# ── Risk tier (informs auto mode confidence) ────────────────────
# low: tests, components, refactoring — fully guarded by hooks
diff --git a/skill-manifest.json b/skill-manifest.json
index 8edc75e..fb2305f 100644
--- a/skill-manifest.json
+++ b/skill-manifest.json
@@ -1,6 +1,6 @@
{
"$comment": "Source of truth for all hook configurations. Generates .claude/settings.json, hooks/hooks.json, .claude-plugin/plugin.json counts, .codex-plugin/plugin.json counts, .agents/plugins/marketplace.json, .claude-plugin/marketplace.json. Run scripts/generate-hook-configs.sh to regenerate.",
- "version": "4.11.0",
+ "version": "4.11.1",
"hooks": {
"SessionStart": {
"": [
@@ -53,7 +53,8 @@
},
"SessionEnd": {
"": [
- "session-end.sh"
+ "session-end.sh",
+ "metrics-summary-stop.sh"
]
},
"PreToolUse": {