diff --git a/.claude/hooks/deny-dangerous.sh b/.claude/hooks/deny-dangerous.sh
new file mode 100755
index 0000000..b2c331d
--- /dev/null
+++ b/.claude/hooks/deny-dangerous.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# PreToolUse hook: deny dangerous commands
+# Exit 2 = block with message. Exit 0 = allow.
+# Omits -e so every check runs and the final exit 0 is always reached.
+
+set -uo pipefail
+
+INPUT=$(cat)
+
+# Extract command — try jq first, fall back to grep+sed if jq is missing
+if command -v jq &>/dev/null; then
+    COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null)
+else
+    COMMAND=$(echo "$INPUT" | grep -o '"command"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/^"command"[[:space:]]*:[[:space:]]*"//;s/"$//' 2>/dev/null || true)
+fi
+
+[[ -z "$COMMAND" ]] && exit 0
+
+# rm -rf without explicit path scoping
+if echo "$COMMAND" | grep -qE 'rm\s+-[a-zA-Z]*r[a-zA-Z]*f|rm\s+-[a-zA-Z]*f[a-zA-Z]*r' ; then
+    if echo "$COMMAND" | grep -qE 'rm\s+-rf\s+/\s|rm\s+-rf\s+/$|rm\s+-rf\s+~|rm\s+-rf\s+\.\s|rm\s+-rf\s+\*'; then
+        echo "BLOCKED: rm -rf with dangerous target. Use a specific path instead." >&2
+        exit 2
+    fi
+fi
+
+# Force push (allow --force-with-lease)
+if echo "$COMMAND" | grep -qE 'git\s+push\s+.*--force' && ! echo "$COMMAND" | grep -qF 'force-with-lease'; then
+    echo "BLOCKED: git push --force. Use --force-with-lease instead." >&2
+    exit 2
+fi
+
+# Push to main/master/production
+if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin\s+)?(main|master|production)\b'; then
+    echo "BLOCKED: direct push to main/master/production. Use a feature branch and PR." >&2
+    exit 2
+fi
+
+# chmod 777
+if echo "$COMMAND" | grep -qE 'chmod\s+777'; then
+    echo "BLOCKED: chmod 777 is overly permissive. Use specific permissions (755, 644, etc.)." >&2
+    exit 2
+fi
+
+# Pipe to shell
+if echo "$COMMAND" | grep -qE '(curl|wget)\s.*\|\s*(bash|sh|zsh)'; then
+    echo "BLOCKED: pipe-to-shell pattern. Download first, inspect, then execute." >&2
+    exit 2
+fi
+
+# .env modifications
+if echo "$COMMAND" | grep -qE '(>|>>|tee|sed\s+-i|vim|nano|cat\s+>)\s*\.env'; then
+    echo "BLOCKED: .env file modification. Edit .env files manually." >&2
+    exit 2
+fi
+
+# Skip commit hooks
+if echo "$COMMAND" | grep -qE 'git\s+commit\s+.*--no-verify|git\s+commit\s+.*-n\b'; then
+    echo "BLOCKED: --no-verify skips safety hooks. Fix the hook failure instead." >&2
+    exit 2
+fi
+
+# Direct edits to CONFIGURATION block values (template placeholders)
+if echo "$COMMAND" | grep -qE 'sed\s.*CONFIGURATION|awk\s.*CONFIGURATION'; then
+    echo "BLOCKED: CONFIGURATION blocks are template placeholders. Do not modify values directly — users override via environment variables." >&2
+    exit 2
+fi
+
+exit 0
diff --git a/.claude/hooks/stop-lint.sh b/.claude/hooks/stop-lint.sh
new file mode 100755
index 0000000..527c379
--- /dev/null
+++ b/.claude/hooks/stop-lint.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Stop hook: stack-adaptive lint check after every Claude turn
+# MUST exit 0 even when errors found (non-zero = infinite fix loops)
+
+# Infinite loop guard
+if [[ "${STOP_HOOK_ACTIVE:-}" == "1" ]]; then exit 0; fi
+export STOP_HOOK_ACTIVE=1
+
+repo_root=$(git rev-parse --show-toplevel 2>/dev/null) || exit 0
+cd "$repo_root" || exit 0
+
+# Check for modified .sh files
+changed_sh=$(git diff --name-only 2>/dev/null | grep '\.sh$' || true)
+[[ -z "$changed_sh" ]] && exit 0
+
+# Syntax check
+while IFS= read -r f; do
+    [[ -f "$repo_root/$f" ]] || continue
+    output=$(bash -n "$repo_root/$f" 2>&1) || {
+        echo "Syntax error in $f:" >&2
+        echo "$output" >&2
+    }
+done <<< "$changed_sh"
+
+# Shellcheck (if available)
+if command -v shellcheck &>/dev/null; then
+    while IFS= read -r f; do
+        [[ -f "$repo_root/$f" ]] || continue
+        output=$(shellcheck -x -S warning "$repo_root/$f" 2>&1) || {
+            echo "Shellcheck issues in $f:" >&2
+            echo "$output" | head -20 >&2
+        }
+    done <<< "$changed_sh"
+fi
+
+exit 0
diff --git a/.claude/settings.json b/.claude/settings.json
index afc79f6..b331bb1 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,12 +1,18 @@
 {
+  "permissions": {
+    "deny": [
+      "Bash(*git commit*)",
+      "Bash(*git push*)"
+    ]
+  },
   "hooks": {
-    "PostToolUse": [
+    "PreToolUse": [
       {
-        "matcher": "Edit|Write",
+        "matcher": "Bash",
         "hooks": [
           {
             "type": "command",
-            "command": "if [[ \"$CLAUDE_FILE_PATH\" == *.sh ]]; then shellcheck \"$CLAUDE_FILE_PATH\" 2>&1 | head -20; fi"
+            "command": "bash \"$(git rev-parse --show-toplevel)/.claude/hooks/deny-dangerous.sh\""
           }
         ]
       }
@@ -16,7 +22,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "repo_root=$(git rev-parse --show-toplevel 2>/dev/null) && for f in $(git diff --name-only 2>/dev/null); do [[ \"$f\" == *.sh ]] && bash -n \"$repo_root/$f\" 2>&1; done; echo 'syntax check complete'"
+            "command": "bash \"$(git rev-parse --show-toplevel)/.claude/hooks/stop-lint.sh\""
           }
         ]
       }
diff --git a/.claude/skills/audit/SKILL.md b/.claude/skills/audit/SKILL.md
new file mode 100644
index 0000000..7c94ea4
--- /dev/null
+++ b/.claude/skills/audit/SKILL.md
@@ -0,0 +1,72 @@
+# /audit - Four-Pass Shell Audit
+
+Use this for structured audits of scripts, directories, or workflow changes.
+
+## Rules
+
+- Run all four passes in order.
+- Every finding must cite `script:line` evidence.
+- **MUST NOT propose fixes.** Audit first; remediation only if the human asks later.
+
+## Pass 1: Discovery
+
+Scan the target scripts and log candidate findings with `script:line` evidence.
+
+Audit for:
+- unquoted variables in dangerous contexts
+- missing error handling around external commands
+- hardcoded paths
+- secret exposure or credential leakage
+- unsafe patterns such as `eval` or unvalidated input
+- missing strict mode, unless explicitly documented as an exception
+- inconsistent logging paradigm compared with sibling scripts
+- helper-source pattern mismatches
+
+## Pass 2: Verification
+
+Re-read each finding in context and confirm it is real.
+
+- read surrounding functions, not just the flagged line
+- check whether the pattern is intentional
+- remove false positives
+- check `docs/footguns.md` for documented exceptions
+
+## Pass 3: Severity Ranking
+
+Rank verified findings in this order:
+- `Security`
+- `Correctness`
+- `Portability`
+- `Style`
+
+Use the highest applicable severity. Do not inflate lower-risk findings.
+
+## Pass 4: Fabrication Gate
+
+For every remaining finding, ask:
+- did I fabricate this?
+- did I verify it against actual code?
+- did I skip a conflicting file or exception?
+
+Remove anything that fails this check.
+
+## Output Format
+
+```md
+## Audit: [target]
+
+### Security
+- `script:line` finding and evidence
+
+### Correctness
+- `script:line` finding and evidence
+
+### Portability
+- `script:line` finding and evidence
+
+### Style
+- `script:line` finding and evidence
+
+### Removed During Verification
+- finding removed and why
+```
diff --git a/.claude/skills/review/SKILL.md b/.claude/skills/code-review/SKILL.md
similarity index 97%
rename from .claude/skills/review/SKILL.md
rename to .claude/skills/code-review/SKILL.md
index 2431413..b223957 100644
--- a/.claude/skills/review/SKILL.md
+++ b/.claude/skills/code-review/SKILL.md
@@ -1,4 +1,4 @@
-# /review - Shell Script Code Review
+# /code-review - Shell Script Code Review
 
 Review shell scripts for correctness, convention compliance, and potential issues.
 
diff --git a/.claude/skills/debug-investigate/SKILL.md b/.claude/skills/debug-investigate/SKILL.md
new file mode 100644
index 0000000..43d5a76
--- /dev/null
+++ b/.claude/skills/debug-investigate/SKILL.md
@@ -0,0 +1,68 @@
+# /debug-investigate - Diagnosis-First Shell Debugging
+
+Use this when a shell script is failing, behaviour is inconsistent, or the root cause is unknown.
+
+## Hard Gate
+
+**If you want to "just try something" before tracing the execution path, STOP.**
+
+Do not propose or apply fixes until the diagnosis is written and the human reviews it.
+
+## Workflow
+
+1. Read the entry script end-to-end. Identify the failing path before touching code.
+2. Trace the execution path across source chains:
+   - entry script -> sourced helper -> caller-specific function
+   - `_common.sh` or `_aws-common.sh` exports, defaults, and helper calls
+   - pipes, command substitutions, subshells, and conditional branches
+3. Track variable propagation:
+   - where variables are set
+   - where they are exported
+   - where they are consumed after sourcing another file
+4. Check exit-code handling carefully:
+   - `set -e` interactions with pipes and subshells
+   - command substitutions masking failures
+   - `||` fallback paths and intentional non-zero returns
+5. Check shell-specific hazards:
+   - quoting and word splitting
+   - glob expansion
+   - array vs string assumptions
+   - platform differences: WSL vs native bash vs Git Bash
+6. Verify helper-source patterns:
+   - `lib/ai-cli/` uses same-directory `_common.sh`
+   - `lib/stacks/` uses parent traversal `../_common.sh`
+   - these are NOT interchangeable
+7. Check `docs/footguns.md` for matching traps before concluding.
+
+## Diagnosis Output Template
+
+```md
+## Diagnosis
+
+**Symptom:** what the user observed
+**Entry script:** `script:line`
+**Execution path:** `script:line` -> `script:line` -> `script:line`
+**Variable flow:** where key variables are set, exported, and consumed
+**Exit-code path:** where failure is triggered, masked, or propagated
+**Evidence:** `script:line` references that prove the diagnosis
+**Platform notes:** WSL / native bash / Git Bash differences, if relevant
+**Related footguns:** matching entries from docs/footguns.md, if any
+**Blast radius:** what else could be affected
+```
+
+## Special Attention
+
+- `set -e` behaviour around pipes, subshells, and command substitutions
+- variable scope across `source` boundaries
+- quoting problems that only fail with spaces or globs
+- platform-specific command resolution
+- shared helper changes that affect multiple domains
+
+## After Review
+
+Once the human approves the diagnosis, propose the minimal fix and verify it with:
+- `bash -n`
+- `shellcheck`
+- `bats tests/ --recursive`
+
+If two fix attempts fail, stop and report what was tried and why it failed.
diff --git a/.claude/skills/preflight/SKILL.md b/.claude/skills/preflight/SKILL.md
index 8c05612..2862aed 100644
--- a/.claude/skills/preflight/SKILL.md
+++ b/.claude/skills/preflight/SKILL.md
@@ -4,32 +4,49 @@ Run validation checks on all modified shell scripts before declaring work comple
 
 ## Instructions
 
+### MUST (cannot skip)
+
 1. **Find all modified `.sh` files** in the current working tree:
    ```bash
    git diff --name-only HEAD 2>/dev/null
    git diff --name-only --cached 2>/dev/null
    git ls-files --others --exclude-standard '*.sh' 2>/dev/null
    ```
-   Combine and deduplicate the results. Only process `.sh` files.
+   Combine and deduplicate. Only process `.sh` files.
 
-2. **Run `bash -n` on each modified script** to catch syntax errors. Report any failures.
+2. **Run `bash -n`** on each modified script. Report any failures.
 
-3. **Run `shellcheck` on each modified script.** Report warnings and errors. Fix all errors before declaring complete.
+3. **Run `shellcheck -x`** on each modified script. Fix all errors before declaring complete.
 
-4. **Verify each script has the correct shebang and strict mode:**
+4. **Verify shebang and strict mode:**
    - `#!/usr/bin/env bash` on line 1
    - `set -euo pipefail` near the top
-   - Exception: scripts that intentionally omit `-e` (e.g., `verify.sh`, `gpu-check.sh`) — note these as acceptable
+   - Exception: scripts listed in `docs/footguns.md` strict mode exceptions — note as acceptable
 
-5. **Verify each user-facing script has `-h`/`--help` support** via a `show_help()` function.
+5. **Verify `-h`/`--help`** via `show_help()` on user-facing scripts.
 
-6. **Report results** in this format:
-   ```
-   ## Preflight Results
+### SHOULD (skip only with reason)
 
-   | Script | bash -n | shellcheck | shebang | strict mode | help flag |
-   |--------|---------|------------|---------|-------------|-----------|
-   | path   | ✅/❌   | ✅/❌ (N)  | ✅/❌   | ✅/❌       | ✅/❌/N/A |
-   ```
+6. **Run `bats tests/ --recursive`** — full test suite.
+
+7. **Check executable bit** — all `.sh` files should be `chmod +x`.
+
+8. **Check logging paradigm** matches sibling scripts in the same directory.
+
+### MAY (skip during debugging)
+
+9. **Dependency audit** — check for outdated or insecure dependencies in scripts that install tools.
+
+## Output Format
+
+```
+## Preflight Results
+
+| Script | bash -n | shellcheck | shebang | strict mode | help flag |
+|--------|---------|------------|---------|-------------|-----------|
+| path   | ✅/❌   | ✅/❌ (N)  | ✅/❌   | ✅/❌       | ✅/❌/N/A |
+
+Bats: ✅/❌ (N tests)
+```
 
-7. **If any checks fail**, fix the issues and re-run the failing checks. Do not declare complete until all checks pass.
+If any MUST checks fail, fix the issues and re-run. Do not declare complete until all MUST items pass.
diff --git a/.claude/skills/research/SKILL.md b/.claude/skills/research/SKILL.md
new file mode 100644
index 0000000..0fcebdb
--- /dev/null
+++ b/.claude/skills/research/SKILL.md
@@ -0,0 +1,58 @@
+# /research - Deep Read for Shell Script Collections
+
+Use this when the human wants understanding before planning or implementation.
+
+## Hard Gate
+
+Produce `research.md` output only.
+
+Do **NOT** proceed to planning or implementation until the human reviews the research and approves the next step.
+
+## Required Sections
+
+### Files Involved
+
+- list the entry scripts
+- list sourced dependencies such as `_common.sh` or `_aws-common.sh`
+- note tests, docs, or dashboard consumers that shape behaviour
+
+### Execution Flow
+
+- trace the path from the entry point through sourced files
+- note where key variables are set vs consumed
+- call out pipes, subshells, or command substitutions that change control flow
+
+### Boundaries Touched
+
+- identify which `lib/` domains are involved
+- identify which shared helper files are sourced
+- call out cross-domain dependencies, if any
+- note CONFIGURATION block contracts or public script interfaces
+
+### Risks / Gotchas
+
+- provide at least 3 concrete risks
+- each risk must include `script:line` evidence
+- pay special attention to:
+  - cross-domain dependencies
+  - CONFIGURATION block contracts
+  - logging paradigm consistency with sibling scripts
+
+## Research Standard
+
+- read the real files before writing
+- distinguish observed facts from inference
+- prefer execution-path detail over generic summary
+- load `docs/footguns.md` when boundaries or shared helpers are involved
+
+## Output Skeleton
+
+```md
+## Files Involved
+
+## Execution Flow
+
+## Boundaries Touched
+
+## Risks / Gotchas
+```
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..f99d65c
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,3 @@
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_DEFAULT_REGION=
diff --git a/.github/instructions/aws.instructions.md b/.github/instructions/aws.instructions.md
index 1869b9f..d9885c6 100644
--- a/.github/instructions/aws.instructions.md
+++ b/.github/instructions/aws.instructions.md
@@ -4,15 +4,17 @@ applyTo: "lib/aws/**"
 
 # aws Domain
 
-AWS infrastructure wrappers. All scripts are **templates** — users copy them into a project and fill in the CONFIGURATION block.
+AWS infrastructure wrappers. Scripts are **templates** — users copy them into a project and fill in the CONFIGURATION block.
 
 ## Script Pattern
 
-All aws scripts are self-contained (no shared library). Each defines:
+AWS scripts source `_aws-common.sh` for shared helpers (auth, .env loading, color constants, require_cmd). Each script defines:
 1. `set -euo pipefail`
-2. `# ---- CONFIGURATION ----` block with AWS_PROFILE, AWS_REGION, PROJECT_NAME, and resource-specific vars
-3. Inline color constants (RED, GREEN, YELLOW, BLUE, CYAN, BOLD, NC)
-4. Inline logging functions
+2. `# ---- CONFIGURATION ----` block with AWS_PROFILE_NAME, AWS_REGION, and resource-specific vars
+3. `source "$SCRIPT_DIR/_aws-common.sh"` for shared colors, auth, and helpers
+4. `_aws-common.sh` is an **Ask First** boundary — changes affect all AWS scripts
+
+**Note:** `_aws-common.sh` provides `require_cmd`, `require_unix`, `require_modern_bash`, `ensure_aws_cli`, `require_aws_auth`, `load_env_file`, and color constants. Scripts that need `jq` or `bc` call `require_cmd` themselves.
 
 ## Logging Style
 
@@ -36,7 +38,7 @@ error()   { echo -e "${RED}[tag]${NC} $*"; exit 1; }
 ## CONFIGURATION Block Variables
 
 Typical variables across aws scripts:
-- `AWS_PROFILE`, `AWS_REGION` — always present
+- `AWS_PROFILE_NAME`, `AWS_REGION` — always present (set before sourcing `_aws-common.sh`)
 - `PROJECT_NAME` — used to derive resource names
 - `APP_ID`, `BRANCH_NAME` — Amplify scripts
 - `SECRET_PREFIX`, `REQUIRED_SECRETS` — Secrets Manager scripts
diff --git a/.github/workflows/context-validation.yml b/.github/workflows/context-validation.yml
new file mode 100644
index 0000000..a1a0dd1
--- /dev/null
+++ b/.github/workflows/context-validation.yml
@@ -0,0 +1,60 @@
+name: AI Context Validation
+
+on:
+  pull_request:
+    paths:
+      - 'AGENTS.md'
+      - 'CLAUDE.md'
+      - '.claude/**'
+      - '.github/instructions/**'
+      - '.github/workflows/context-validation.yml'
+      - 'docs/**'
+      - 'scripts/**'
+      - 'codex-evals/**'
+      - 'agent-evals/**'
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Validate Codex workflow assets
+        run: |
+          chmod +x scripts/*.sh
+          ./scripts/context-validate.sh
+
+      - name: Validate Claude workflow assets
+        run: |
+          claude_lines=$(wc -l < CLAUDE.md)
+          echo "CLAUDE.md: ${claude_lines} lines"
+          if [ "${claude_lines}" -gt 150 ]; then
+            echo "::error::CLAUDE.md exceeds 150 line hard ceiling (${claude_lines} lines)"
+            exit 1
+          elif [ "${claude_lines}" -gt 100 ]; then
+            echo "::warning::CLAUDE.md exceeds 100 line target for libraries (${claude_lines} lines)"
+          fi
+
+          errors=0
+          while IFS= read -r -d '' skill_dir; do
+            if [ ! -f "${skill_dir}/SKILL.md" ]; then
+              echo "::error::Missing SKILL.md in ${skill_dir}"
+              errors=$((errors + 1))
+            fi
+          done < <(find .claude/skills -mindepth 1 -maxdepth 1 -type d -print0 | sort -z)
+
+          exit "${errors}"
+
+      - name: Local CLAUDE files stay short
+        run: |
+          errors=0
+          while IFS= read -r -d '' file; do
+            lines=$(wc -l < "${file}")
+            if [ "${lines}" -gt 20 ]; then
+              echo "::error::${file} exceeds 20 lines (${lines} lines)"
+              errors=$((errors + 1))
+            fi
+          done < <(find . -name 'CLAUDE.md' -not -path './CLAUDE.md' -not -path './.git/*' -print0 | sort -z)
+
+          exit "${errors}"
diff --git a/.gitignore b/.gitignore
index c9120fa..95af285 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,9 @@ __pycache__/
 .venv/
 .claude/plans/
 .claude/memory/
+.claude/settings.local.json
+tasks/todo.md
+tasks/handoff.md
 *.bak
 .terraform/
 *.tfstate*
diff --git a/00-1-ai-workflow-ARTICLE-prime_v1.3.md b/00-1-ai-workflow-ARTICLE-prime_v1.3.md
new file mode 100644
index 0000000..02e849c
--- /dev/null
+++ b/00-1-ai-workflow-ARTICLE-prime_v1.3.md
@@ -0,0 +1,242 @@
+# Stop Writing Rules. Build a Workflow.
+
+**How I taught Claude Code to stop guessing and start following a loop.**
+
+---
+
+Every task follows five steps: **READ → CLASSIFY → ACT → VERIFY → LOG.** That's the entire system. The rest of this article explains why each step exists, what broke before it did, and how to set it up for your project in under an hour.
+
+Most public CLAUDE.md files are a wall of rules: "never do X, always do Y, don't forget Z." That's a list, not a workflow. And the research backs this up — frontier models reliably follow 150-200 instructions before performance degrades uniformly. More rules doesn't mean better compliance. It means worse compliance across the board.
+
+So instead of more rules, I built a loop.
+
+---
+
+## What Keeps Going Wrong
+
+I've been using Claude Code daily for six months across projects ranging from a multi-stack Tauri desktop app (TypeScript + Rust) to an ambient medical scribe (PHP + Python + NeMo GPU + Mercure) to a single-language PHP library. The failure modes are identical regardless of stack:
+
+**Claude fabricates codebase facts.** I asked about a dependency. Claude confidently told me it was a local path dependency. I checked the package manifest — it was installed from a registry. Claude never read the file. It guessed, and it guessed wrong. → **READ forces the agent to look at the code before talking about it.**
+
+**Claude can't tell questions from instructions.** I asked "did you also improve the Claude Code setup in this project?" Claude answered "No" — then a validation hook rejected the response for "asking permission instead of implementing." Nobody asked it to implement anything. → **CLASSIFY makes the agent declare its mode before acting. Questions get answers, not implementations.**
+
+**Claude declares victory early.** Tests pass, but the old function name still appears in three files because nobody grepped for it after the rename. → **VERIFY runs checks after every meaningful change, and the Definition of Done has six explicit gates — not just "tests green."**
+
+**Claude drifts between modes silently.** You ask it to explain something. Halfway through, it starts editing files. You ask it to plan. It reads four files, reads four more, reads four more — planning loop, zero output. → **ACT defines what each mode means and requires explicit state transitions.**
+
+None of these are model failures. They're workflow failures. Claude is capable of doing all of this correctly — it just needs structure that makes the right behaviour the default behaviour.
+
+---
+
+## The Five-Step Loop
+
+**READ** forces the agent to look at the code before talking about it. For multi-layer apps, read both sides of a boundary before changing either. For libraries, read tests alongside implementation. Never fabricate — if you haven't read it, say so.
+
+**CLASSIFY** makes the agent declare what mode it's in (Plan, Implement, Explain, Debug, Review) and what complexity level it's dealing with — before it touches anything. Mode transitions must be explicit: "Switching to Implement mode because the plan is approved." Silent drift is the #1 source of planning loops and premature fixes.
+
+**ACT** defines what each mode actually means. Implement = write code within 2-3 turns. Explain = no code changes unless asked. Debug = diagnosis with file:line evidence first, fixes only after human reviews findings. If you catch the agent reading a 4th file without writing anything in Implement mode, something's wrong.
+
+**VERIFY** runs tests after every meaningful change, not just at the end. Two-level stop-the-line escalation: isolated failures get noted and continued past; cross-boundary or security failures get a full stop with diagnosis. Two corrections on the same approach = cut your losses and rewind.
+
+**LOG** captures what went wrong in two complementary files: `docs/lessons.md` for behavioural mistakes ("assumed API contract without reading frontend") and `docs/footguns.md` for architectural landmines ("auth nonce spans 4 components; breaking any one silently breaks login"). These are loaded contextually, not every session.
+
+The footguns file seeds itself during setup. The implementation prompt tells Claude Code to read the actual codebase and find real cross-domain coupling — not invent hypothetical ones. On the PHP library, it found six: normalization pipeline order dependencies, binary dictionary cache version coupling, regex error suppression in the XSS scorer, and three more — all with file:line evidence. On the medical scribe, it found eight — despite the project being at milestone 1. Multi-stack projects create coupling early; most of the scribe's footguns came from unvalidated cross-layer contracts and in-memory state management, not feature complexity. These aren't boilerplate warnings. They're the sharp edges a new contributor would hit on day two.
+
+The footguns file also feeds a second mechanism: **local CLAUDE.md files.** A file at `src/auth/CLAUDE.md` is automatically loaded whenever Claude works in that directory — no explicit loading required. When a footgun maps to a specific directory, a one-line summary is propagated to that directory's local CLAUDE.md. Put the guardrail where the danger is, not in a file the agent might skip.
+
+One limitation: not every footgun maps cleanly to a directory. The medical scribe had a WebSocket URL mismatch spanning `.env`, `docker-compose.yml`, and a Twig template — three root-level files across three layers. No single subdirectory qualifies for a local CLAUDE.md. Some footguns are cross-cutting configuration, not module-specific. The central footguns.md catches these; the propagation rule doesn't.
+
+---
+
+## Where It Lives
+
+The loop sits in a layered system where only the first layer loads every session:
+
+```
+Layer 1 — Runtime (CLAUDE.md, ~100-120 lines)       ← loads every session
+Layer 2 — Local Context (directory-level CLAUDE.md)  ← auto-loads per directory
+Layer 3 — Skills (3-5 focused slash commands)        ← loads on demand
+Layer 4 — Playbooks (planning workflows)             ← loads on demand
+Layer 5 — Evaluation (agent evals, CI validation)     ← loads on demand
+```
+
+100-120 lines. That's the entire always-loaded instruction set. Everything else loads when needed. This matters because auto-generated context files reduce success rates by ~3% while increasing inference cost by over 20%. The system prompt consumes ~50 of the model's ~150-200 instruction budget — so CLAUDE.md gets roughly 100-150 effective instructions. Spend them wisely.
+
+### The Guidelines Split
+
+Most projects accumulate two instruction files: a project-specific CLAUDE.md and a shared coding standards file (often `.github/instructions/ai-agent-guidelines.instructions.md`). These MUST NOT overlap. I learned this the hard way — my Tauri app had a Definition of Done in CLAUDE.md _and_ in the guidelines file, with subtly different gates. The agent followed whichever it read last.
+
+The clean split: **CLAUDE.md owns workflow** (the execution loop, autonomy tiers, DoD, log files, router table). **The guidelines file owns engineering practices** (coding patterns, communication style, testing strategy, error handling templates). If a rule would be identical across every project, it belongs in guidelines. If it changes per project, it belongs in CLAUDE.md.
+
+```
+❌ Overlap — agent follows whichever it reads last:
+   CLAUDE.md DoD: "tests green, preflight passes, logs updated"
+   Guidelines DoD: "tests pass, rollback strategy exists, verification story"
+
+✅ Clean split:
+   CLAUDE.md DoD: 6 project-specific gates (tests, preflight, Ask First, logs, notes, grep)
+   Guidelines: testing *strategy* (unit for logic, integration for boundaries) — not DoD gates
+```
+
+The reduction varies by how much overlap existed. The PHP library's guidelines went from 47 to 39 lines — a modest 17% trim where only the DoD section overlapped. The medical scribe's guidelines went from 95 to 51 lines — a 46% reduction — because the file had a full architecture section, a 7-point cross-layer checklist, stop-the-line rules, and core workflow rules that all belonged in CLAUDE.md. The bigger your existing guidelines file, the more overlap you'll find.
+
+### Conditional Loading
+
+Claude Code supports `.github/instructions/` files with `applyTo` frontmatter that controls when they load. A file with `applyTo: "**/*.ts"` only loads when Claude touches TypeScript files. Rust conventions only load for `.rs` files. In practice this means a multi-stack project doesn't burn instruction budget on irrelevant language rules:
+
+```yaml
+# .github/instructions/rust.instructions.md
+---
+applyTo: "**/*.rs"
+---
+# Rust conventions for this project
+- Use parking_lot for mutexes, tokio for async
+- Never .unwrap() in Tauri commands — propagate errors with context
+```
+
+That file is invisible when you're working on TypeScript. Free context savings.
+
+---
+
+## The Hook Saga (or: How I Wasted a Day on Prompt Engineering)
+
+The most interesting failure in this system was the anti-rationalisation hook. The idea was simple: after every Claude response, send it to Haiku for independent assessment. Does the response actually complete the work, or is it rationalising — calling things "pre-existing," deferring to follow-ups nobody asked for, listing problems without fixing them?
+
+Six versions. One day.
+
+| Version | What I Tried                      | What Broke                                                     |
+| ------- | --------------------------------- | -------------------------------------------------------------- |
+| v0.1    | Single paragraph, no intent check | False positives on every question                              |
+| v0.2    | Hook infrastructure                | Exit codes, infinite loop guard — no prompt iteration          |
+| v0.3    | Keyword matching for user intent  | Haiku can't see the user message — it only gets the response   |
+| v0.4    | Response-pattern detection        | Haiku returned prose instead of JSON                           |
+| v0.5    | Two-step flow with JSON preamble  | Claude's own "Want me to fix?" offer triggered false match     |
+| v0.6    | Pasted content detection          | Best version, but JSON schema fragile across reimplementations |
+
+The fundamental problem: **prompt-type Stop hooks only see the assistant's response.** They can't read the conversation. They can't see what the user asked. Intent detection is always inferred, never observed.
+
+The moment that killed it: I asked "did you also improve the Claude Code setup in this project?" Claude correctly answered "No — want me to?" The hook rejected this as "asking permission instead of implementing." The terminal showed the same wrong rejection three times — once as the hook output, once as a framework echo, once in the summary wrapper. One false positive, displayed as emphatic consensus.
+
+The conclusion: **deterministic command hooks for mechanical enforcement, CLAUDE.md rules for behavioural guidance.** A `deny-dangerous.sh` PreToolUse hook that blocks `rm -rf`, force pushes, and pipe-to-shell patterns works 100% of the time. A prompt hook trying to judge whether work is complete works maybe 70% of the time — and the 30% failure rate erodes trust faster than the 70% success rate builds it.
+
+This is worth knowing because if you're building agent workflows, you will be tempted to build this exact hook. Save yourself the day.
+
+---
+
+## What You Get
+
+The system breaks down into three adoption tiers:
+
+| Tier         | What                                                     | When                                     |
+| ------------ | -------------------------------------------------------- | ---------------------------------------- |
+| **Minimal**  | CLAUDE.md + deny-dangerous hook                          | Getting started, solo project            |
+| **Standard** | + 3-5 skills + stop/format hooks + local CLAUDE.md files | Active development, team project         |
+| **Full**     | + agent evals + CI validation + permission profiles + ADRs | Long-lived project with incident history |
+
+The key pieces:
+
+**Autonomy tiers** — not everything needs permission, not everything is free. Tests and linting? Always run. Public API changes, database migrations, dependency additions? Stop and ask first, with a micro-checklist (which boundary, did you read the related code, what's the rollback command). Delete test files to make builds pass? Never.
+
+Note: the autonomy tiers in CLAUDE.md are behavioural guidance. The actual tool-level permissions live in `.claude/settings.local.json` — you'll build this allowlist over time as you approve commands repeatedly. Start empty, grow organically.
+
+**Definition of Done** — six explicit gates. Tests green, preflight passes, no unapproved boundary changes, logs updated if you tripped, working notes current, old patterns grepped after renames. The agent can't say "task complete" until all six are true.
+
+**Stack-adaptive hooks** — define your stack once, hooks adapt. The build verification hook checks `git diff` for modified file types and only runs relevant checks. A Rust change runs `cargo fmt --check`. A PHP change runs `php -l`. A TypeScript change runs `tsc --noEmit`. No changes, no checks.
+
+**Project-specific deny rules** — beyond the universal blocks (rm -rf, force push), add blocks for files that must be modified through tooling. A PHP library with binary-encoded dictionaries? Block direct edits to `.bin` files — the encoder script is the only safe path. Generated code? Block direct edits — the generator is the source of truth. GPU model files too large for direct edit? Block `.nemo` files. Infrastructure changes without a plan? Block `terraform apply` without a preceding `terraform plan`. The categories expand with the project.
+
+---
+
+## Adapting to Your Project
+
+The system adapts to project shape. I've run it on three projects: a multi-stack Tauri desktop app (TypeScript + Rust), a zero-dependency PHP library, and a four-layer medical scribe (PHP + Python + NeMo GPU + Mercure + Terraform). Same plan, same prompts, different outputs.
+
+The adaptation is real, not cosmetic. The PHP library's Ask First boundaries name specific classes (`SusFormDetector`, `SusAssessment`, `SusFactor`), specific data files (`profanity_words.bin`, `safe_names.json`), and the binary encoding pipeline. The Tauri app's boundaries name auth, routing, deployment, and cross-layer changes. The medical scribe's boundaries name eight items including PHP↔Python API contracts, the NeMo GPU singleton, Mercure topics, and Strands agent model provider switching. The structure is identical; every detail is project-specific.
+
+The deny-dangerous hook adapts too. All three projects block the universal patterns (rm -rf, force push, pipe-to-shell). The PHP library adds blocks for binary dictionary files. The Tauri app adds blocks for .env modifications. The medical scribe adds blocks for `.nemo` model files and `terraform apply` without a plan — a different category of project-specific protection than the other two.
+
+Getting CLAUDE.md under the line target depends on the starting point. The PHP library (Prompt A — new file) took 127 lines on first pass and required two compression passes. The medical scribe (Prompt B — existing CLAUDE.md migration) landed at 114 lines on first pass, well under the 120 target. The difference: Prompt B migrates domain content to `docs/domain-reference.md` first, which clears the canvas. The entire line budget goes to the execution loop instead of fighting existing content for space. If your project has a content-heavy CLAUDE.md, Prompt B may be the easier path.
+
+| Aspect                  | App (Tauri)                                   | Library (PHP)                               | App (Medical Scribe)                                              |
+| ----------------------- | --------------------------------------------- | ------------------------------------------- | ----------------------------------------------------------------- |
+| CLAUDE.md               | 121 lines                                     | 99 lines                                    | 118 lines                                                         |
+| Skills                  | 6 (core + /research, /code-review, /review)   | 3 (core only)                               | 5 (2 updated, 3 new)                                             |
+| Footguns seeded         | 14                                            | 6                                           | 8                                                                 |
+| Agent evals             | 5 (from real incidents)                       | 5 (from git history)                        | 5 (3 real incidents, 2 common modes)                             |
+| Ask First boundaries    | Auth, routing, deployment, API contracts, DB  | Public API, dependencies, data/config files | 8 (PHP↔Python contracts, NeMo, Mercure, Docker, env, Terraform) |
+| Local CLAUDE.md files   | Planned (14 footguns suggest qualifying dirs) | None needed (flat structure)                | 2 (strands_agents/, infra/)                                      |
+| Permission profiles     | 3 (frontend/backend/infra)                    | None (single language)                      | 3 (php-backend/python-agent/infrastructure)                      |
+| Guidelines file         | 258 lines (ownership split pending)           | 47 → 39 lines (17% reduction)               | 95 → 51 lines (46% reduction)                                    |
+| Prompt path             | N/A (pre-dates prompts)                       | A (new file)                                | B (existing migration)                                            |
+| Original CLAUDE.md      | N/A                                           | N/A                                         | 145 lines → 85 lines in domain-reference.md                      |
+| Compression struggle    | Yes                                           | Yes (127 → 99)                              | No (114 first pass)                                               |
+| Implementation sessions | Built over weeks                              | 2 sessions (generate + fix)                 | 1 session (all phases)                                            |
+
+For libraries, skip `/research` (the default READ step handles single-domain codebases) and `/code-review` (the default Review mode is sufficient). Don't create skills that won't earn their place — the skill justification test in the plan requires each skill to have a distinct artefact, a hard workflow gate, a special failure mode, or a repeatable structured output.
+
+The medical scribe revealed a gap in the prompts: it already had three skills and two hooks before implementation. Phase 1b was update-and-extend rather than create-from-scratch. Phase 1c replaced inline hook commands with external scripts. Prompt B handles CLAUDE.md migration, but the prompts don't explicitly address pre-existing skills and hooks. If your project has partial tooling already, expect Phase 1b/1c to adapt rather than create.
+
+Another finding from the scribe: even at milestone 1 with only 11 commits, three of five agent evals came from real git history — an EventSource ordering bug, a Docker volume mount mismatch, and an audio format assumption. The plan expected new projects to lean on common failure modes for seeding evals. Multi-stack projects generate qualifying incidents early because the cross-layer boundaries create failure opportunities before the features do.
+
+---
+
+## How to Set It Up
+
+Two files do all the work. Give them to Claude Code and it builds the system for your project.
+
+### Step 1: Audit your existing files
+
+If you have an `ai-agent-guidelines.instructions.md` or similar shared standards file, remove any execution loop, DoD, stop-the-line, or working memory content. Those now live in CLAUDE.md. Keep engineering practices, communication style, and templates.
+
+### Step 2: Add the plan files
+
+Copy these two files into your project:
+
+- `ai-workflow-improvement-plan-prime.md` — the full system design
+- `ai-workflow-implement-prompts-prime.md` — the implementation prompts
+
+Both files are available in the [ai-planning-playbook](https://github.com/blundergoat/ai-planning-playbook) repo.
+
+### Step 3: Choose your starting point
+
+**New project (no existing CLAUDE.md):** Open the implementation prompts file. Copy the Phase 0 prompt. Edit the stack definition. Run it. You'll get a CLAUDE.md, a deny-dangerous hook, and a settings.json — the Minimal tier.
+
+**Existing project:** Skip Phase 0. The prompts have two variants for Phase 1a — one for new files, one that migrates existing CLAUDE.md domain content to a reference doc before building the workflow system on top. Note: the migration may be incomplete on the first pass. On the PHP library, Code Style and PHP Requirements sections were silently dropped because they partially overlapped with content in the guidelines file. Audit the migration output against the original before continuing.
+
+### Step 4: Iterate
+
+Run Phase 1a, 1b, 1c in order. Each creates a focused set of files. Don't run them all at once — the whole point of splitting them was to stay within the instruction budget. Budget a second pass after Phase 1a to verify nothing was dropped during compression — though Prompt B projects may not need it if the domain content migration frees up enough budget.
+
+Run Phase 2 for the Full tier: agent eval regression tests, RFC 2119 priority markers, CI validation. You can run it immediately after Phase 1 — the medical scribe ran all phases in a single session — though waiting gives you more real incidents to seed evals from. On the PHP library, Phase 2 found five agent evals from real git history — safe-name false positives, evasion patterns, threshold calibration drift. Even a library with no "production incidents" has a git history worth mining.
+
+---
+
+## What's Deliberately Left Out
+
+**Prompt hooks for semantic judgement.** I tried. Six versions. It doesn't work. See the hook saga above.
+
+**Rigid enforcement of every rule.** The plan uses RFC 2119 language (MUST/SHOULD/MAY) to make priority visible. The execution loop, autonomy tiers, and definition of done are MUST. Log hygiene is SHOULD. The structural debt trigger is MAY. Not everything is equally important.
+
+**Global git config changes.** The plan recommends gitleaks for pre-commit secret scanning, but installing it requires `git config --global core.hooksPath` which affects every repo on your machine. That's a manual step documented in a setup guide, not something an AI agent should do.
+
+**My project's specific examples.** The plan's examples are illustrative. Replace them with incidents from your own codebase. The principles are universal; the examples are mine.
+
+---
+
+## The Quarterly Shrink
+
+The system is designed to get smaller over time, not larger. The plan includes a quarterly audit: re-count CLAUDE.md lines, check for stale rules, ask "if I removed this, would the model still do the right thing?" Rules that once helped become constraints as models improve.
+
+The learning loop files (lessons.md, footguns.md) have their own hygiene: max 15 active lessons, pattern promotion when 3+ share a theme, entries archived after 30 days untriggered. The goal is a system that teaches itself out of needing individual entries.
+
+---
+
+## TL;DR
+
+Build a five-step loop, enforce it with deterministic hooks, keep it under 120 lines.
+
+---
+
+_Written with AI-assisted cognitive framework ([GOAT System](https://blundergoat.com)). The ideas are mine; the AI helps me articulate them clearly._
+
+_The implementation files are available in the [ai-planning-playbook](https://github.com/blundergoat/ai-planning-playbook) repo._
diff --git a/00-1-ai-workflow-article-cross-agent-section.md b/00-1-ai-workflow-article-cross-agent-section.md
new file mode 100644
index 0000000..6bdec3a
--- /dev/null
+++ b/00-1-ai-workflow-article-cross-agent-section.md
@@ -0,0 +1,61 @@
+## Same Project, Different Agent
+
+The medical scribe was the first project to get both implementations: Claude Code (v1.2 plan) and Codex (adapted prompt). Same codebase, same execution loop concept, different agent mechanics. The comparison is direct.
+
+### What Maps Cleanly
+
+The core system transfers without modification: the five-step loop, autonomy tiers, Definition of Done, footguns file, lessons file, router table, essential commands, and the eval concept. These aren't Claude Code features — they're workflow patterns that work on any agent that reads a root instruction file.
+
+Both agents produced similar footgun counts from the same codebase (8 from Claude Code, 7 from Codex) with overlapping findings: the Mercure silent publish failure, the three independent session state buckets, the NeMo GPU singleton, the DynamoDB provisioned-but-unused gap. The convergence suggests the footgun-seeding approach works regardless of which agent does the reading.
+
+### What Had No Equivalent
+
+| Claude Code feature | Codex replacement | Trade-off |
+|---|---|---|
+| PreToolUse hooks (deny-dangerous) | `scripts/deny-dangerous.sh` as policy documentation | Claude Code blocks `rm -rf` before it executes. Codex documents the policy for review and CI but cannot prevent the command from running. |
+| Stop hooks (lint after every turn) | Preflight script, run manually or in CI | Claude Code catches formatting issues continuously. Codex catches them at checkpoints. |
+| PostToolUse hooks (auto-format) | Nothing — manual or preflight | No auto-formatting on edit. |
+| Local CLAUDE.md (directory auto-load) | Centralised footguns.md + router references | Claude Code loads warnings automatically when entering a directory. Codex requires the agent to check the router table. |
+| Slash commands (/preflight, /debug) | Playbook files in `docs/codex-playbooks/` | Same content, different loading mechanism. |
+| Permission profiles (.claude/profiles/) | Behavioural guidance in AGENTS.md only | No tool-level scoping. |
+| /compact, /insights | No equivalent | Codex context is per-task, not per-session. No session management needed — but no session learning either. |
+
+The hooks gap is the fundamental difference. Claude Code has three layers of enforcement: behavioural guidance in CLAUDE.md, deterministic hooks that block commands before execution, and stop hooks that run checks after every turn. Codex has one layer: behavioural guidance in AGENTS.md. The deny-dangerous script exists, but it's a policy document — inspectable, auditable, referenced from preflight and CI — not a runtime interceptor.
+
+### What's Better Without Hooks
+
+No hooks isn't purely a loss. Five things work better in the Codex version:
+
+**No false positives.** The hook saga documented six versions of a prompt-based stop hook, all of which produced false positives that eroded trust. Codex sidesteps this entirely — there's no mechanism to produce false positives because there's no semantic enforcement mechanism.
+
+**Inspectable policy.** `deny-dangerous.sh` is a plain shell script committed to the repo. Anyone can read it, diff it, argue about it. Claude Code's deny hook is the same, but the stop and format hooks involve JSON configuration in `.claude/settings.json` that's less transparent.
+
+**Reused existing infrastructure.** Codex extended the project's existing `preflight-checks.sh` rather than creating parallel hook machinery. The deny policy became step 3 of the existing preflight script. Claude Code's hooks exist alongside preflight, creating two enforcement paths.
+
+**Deterministic validation.** `scripts/context-validate.sh` checks that AGENTS.md references exist, playbooks have required sections, and footguns have evidence. Claude Code's CI workflow does similar checks, but Codex's version is a local script you can run anytime — no CI pipeline required.
+
+**Committed overlap report.** When Codex applied the guidelines ownership split, it created a persistent `guidelines-ownership-split.md` documenting what was removed and why. Claude Code's split happens in a chat session and the reasoning evaporates when the session ends.
+
+### What's Worse Without Hooks
+
+The enforcement gap is real and shows up in six places:
+
+**No runtime blocking.** If Codex decides to run `rm -rf /`, nothing stops it. AGENTS.md says "Never do this." The deny-dangerous script documents the policy. But neither intercepts the command. Claude Code's PreToolUse hook blocks it before execution — 100% of the time, mechanically, regardless of whether the agent read the rules.
+
+**No automatic stop-the-line.** Claude Code's stop hook runs `php -l` or `cargo fmt --check` after every turn. If there's a syntax error, the agent sees it immediately. Codex only catches these at preflight checkpoints — meaning errors can accumulate between checks.
+
+**Ask First is behavioural only.** In Claude Code, the Ask First micro-checklist is reinforced by the stop-the-line hook — if a cross-boundary change breaks something, the hook catches it. In Codex, Ask First relies entirely on the agent choosing to follow the rule.
+
+**No directory-level warnings.** Claude Code auto-loads a local CLAUDE.md when entering `strands_agents/` or `infra/`. Codex has no confirmed equivalent — the footguns are centralised, not positioned where the danger is.
+
+**No permission lanes.** Claude Code's permission profiles restrict which files a frontend session can edit. Codex has no tool-level scoping — every session has access to everything.
+
+**No session compaction.** Claude Code's `/compact` and context management tools help with long sessions. Codex's per-task context model avoids this problem differently — each task starts fresh — but loses continuity between tasks.
+
+### The Honest Summary
+
+The system's core — the execution loop, autonomy tiers, definition of done, learning loop — is agent-agnostic. It works on both. The enforcement layer is where they diverge. Claude Code enforces mechanically (hooks block commands, format files, check syntax). Codex enforces culturally (AGENTS.md rules, policy scripts, preflight checks, CI).
+
+For solo developers who trust their agent and verify with preflight, the Codex model is sufficient. For teams, long-lived projects, or codebases where a single bad command has high blast radius, Claude Code's hooks provide a safety net that behavioural guidance alone can't match.
+
+The workflow system is portable. The enforcement model is not.
diff --git a/00-1-ai-workflow-codex-workflow-implement-prompt_v1.3.md b/00-1-ai-workflow-codex-workflow-implement-prompt_v1.3.md
new file mode 100644
index 0000000..3a2e0dc
--- /dev/null
+++ b/00-1-ai-workflow-codex-workflow-implement-prompt_v1.3.md
@@ -0,0 +1,191 @@
+# Codex Workflow Implementation Prompt
+
+Give this to Codex. Prefer a single session. If the repo is too large for one clean pass, finish the foundation first and report the split explicitly instead of bluffing completeness.
+
+---
+
+## Context Prompt
+
+Paste this first:
+
+```text
+I have an AI workflow system designed for Claude Code that I want to adapt
+for Codex. The core idea: instead of a wall of rules, give the agent a
+5-step execution loop (READ -> CLASSIFY -> ACT -> VERIFY -> RECORD) with
+autonomy tiers, a definition of done, and a learning loop.
+
+Read these files for the full system design:
+- 00-1-ai-workflow-improvement-plan-prime_v1.3.md
+  (the plan - 5-layer architecture; if your copy was renamed, use
+  ai-workflow-improvement-plan.md)
+- ai-workflow-ARTICLE-prime.md (real implementation data from 3 projects)
+
+Now adapt this system for Codex. NOT a copy - a Codex-native implementation
+that respects how Codex actually works. Key differences from Claude Code:
+
+CODEX MECHANICS (respect these):
+- AGENTS.md is the root instruction file (not CLAUDE.md)
+- No slash commands - use playbook .md files in docs/codex-playbooks/
+- No hooks system - use AGENTS.md rules + verification scripts
+- apply_patch for edits (not Edit/Write tool)
+- Codex may run in cloud sandboxes or local constrained shells depending on client.
+  Design for least privilege either way.
+- No /compact, no /clear, no /insights - context is per-task
+- No .claude/ directory structure
+- No settings.json or profiles
+
+WHAT TO BUILD (in this order):
+
+1. AGENTS.md (root runtime file)
+   - Keep it concise. Do not fetishize a line count, but keep the runtime
+     file short with referenced docs for detail.
+   - Default execution loop: READ -> CLASSIFY -> ACT -> VERIFY -> RECORD
+     - READ: read relevant files first, never fabricate. Include bad/good example
+     - CLASSIFY: declare mode (Answer, Plan, Implement, Debug, Review) +
+       complexity. Question vs directive disambiguation. State declaration.
+     - ACT: mode-constrained behaviour table. Anti-planning-loop rule.
+       Anti-BDUF guard with bad/good example.
+     - VERIFY: run tests after meaningful changes. Two-level escalation
+       (isolated -> note and continue; cross-boundary -> full stop + diagnosis).
+       Two failed approaches on same fix = stop and report.
+     - RECORD: docs/lessons.md (behavioural mistakes) + docs/footguns.md
+       (architectural landmines). Context-based loading rules.
+   - Autonomy tiers: Always / Ask First / Never
+     - Adapt Ask First boundaries for THIS project
+     - Include micro-checklist for Ask First items
+     - Never: delete tests, modify secrets, make commits unless asked,
+       no destructive git operations
+   - Definition of Done: 6 gates (tests green, verification passes,
+     no unapproved boundary changes, logs updated if tripped, notes current,
+     grep after renames)
+   - Router table: pointers to playbooks, docs, evals
+   - Essential commands for this project
+
+   If AGENTS.md already exists:
+   - preserve project-specific identity and essential commands
+   - preserve any repo-specific safety rules unless they clearly conflict with
+     the new ownership split
+   - migrate domain reference material (architecture, design patterns,
+     conventions) into docs/architecture.md or docs/domain-reference.md
+   - report what was moved, what was kept, and why
+   - then rebuild the execution loop on top
+
+2. Guidelines ownership split
+   - If a coding-standards or guidelines file exists, audit for overlap
+   - AGENTS.md owns: execution loop, autonomy tiers, DoD, log files, router
+   - Guidelines file owns: engineering practices, coding patterns, testing
+     strategy, communication style
+   - Remove overlap from guidelines. Before editing, produce a
+     before/after overlap report listing every line or section to be removed
+     and why. Do not auto-remove without this diff.
+   - Do not rewrite unrelated docs or repo policy files outside this ownership split.
+
+3. Docs seed files (create ALL of these - no implied files)
+   - docs/lessons.md - format header, empty Entries/Patterns sections
+   - docs/footguns.md - read the actual codebase for real cross-domain
+     footguns. Seed with real ones only. Include file:line evidence.
+   - docs/architecture.md - short overview (under 100 lines): what the
+     system does, components, data flows, constraints, trade-offs
+   - tasks/todo.md - empty runtime file for working notes during tasks
+   - tasks/handoff.md - empty runtime file with handoff template
+     (Status, Current State, Key Decisions, Known Risks, Next Step)
+
+4. Codex playbooks (docs/codex-playbooks/)
+   Create these as standalone .md files the agent reads on demand:
+
+   - preflight.md - mechanical verification with priority markers.
+     MUST: build + lint + type-check when applicable.
+     SHOULD: full test suite, formatter.
+     Include dependency audit step.
+   - research.md - deep-read template: Files Involved, Request Flow,
+     Boundaries Touched, Risks/Gotchas (min 3 with file:line evidence).
+     Hard gate: no planning until human reviews output.
+   - debug-investigate.md - diagnosis-first. "If you want to just try
+     something before tracing the code path, STOP." Diagnosis output
+     template with file:line evidence. No fixes until human reviews.
+   - audit.md - 4-pass: Discovery -> Verification -> Prioritisation ->
+     Self-Check ("did I fabricate this?"). MUST NOT propose fixes.
+   - code-review.md - structured review with priority markers and
+     autonomy tier awareness.
+
+   Skip research.md and code-review.md for single-domain libraries.
+
+5. Verification scripts (scripts/)
+   - scripts/preflight-checks.sh - runs build, lint, test for the stack.
+     Exit non-zero on failure.
+   - scripts/context-validate.sh - checks AGENTS.md references exist,
+     playbook files have required sections, and docs/footguns.md contains
+     real evidence-backed entries or explicitly states "none confirmed yet".
+   - scripts/deny-dangerous.sh - codifies the deny policy for
+     human/agent review, preflight, and CI. It does NOT intercept
+     commands automatically - Codex has no hook system. Reference
+     this script from AGENTS.md rules and preflight checks.
+     Document blocks for: rm -rf (unscoped), force push, .env edits,
+     no-verify commits. Add project-specific blocks for files that
+     must be modified through tooling.
+
+6. Codex evals (codex-evals/)
+   Create a README.md explaining what evals are and how to use them.
+
+   Search git history for real incidents:
+   git log --oneline --all | grep -iE 'fix|revert|bug|broke|regression'
+
+   For each, create codex-evals/[incident-name].md with:
+   - Origin: real-history | synthetic-seed
+   - Bug description
+   - Single replay prompt
+   - Expected outcome
+   - Failure mode tested
+
+   If fewer than 5 real incidents, add from these common failure modes:
+   - Question answered without code changes (CLASSIFY test)
+   - Rename followed by grep for old pattern (VERIFY test)
+   - Ask First boundary respected (autonomy test)
+   - Debug diagnosis before fix attempt (ACT test)
+   - Two failed approaches triggers stop (VERIFY test)
+
+VERIFICATION:
+- AGENTS.md exists and is concise
+- All docs seed files exist (including tasks/todo.md and tasks/handoff.md)
+- All playbook files exist with required sections
+- Verification scripts are executable and run without errors
+- Footguns are real (from codebase) with file:line evidence, or
+  docs/footguns.md explicitly states "none confirmed yet"
+- Evals reference real incidents where possible
+- Router table in AGENTS.md points to all created files
+- Report: AGENTS.md line count, number of playbooks, number of footguns,
+  number of evals, guidelines file reduction (if applicable)
+```
+
+---
+
+## After Codex Runs - Human Checklist
+
+- [ ] AGENTS.md: does the execution loop read naturally, not like a copy of CLAUDE.md?
+- [ ] Footguns: are they real? Check file:line references against actual code.
+- [ ] Guidelines split: diff the before/after. Was anything useful dropped?
+- [ ] Evals: do the replay prompts test what they claim to test?
+- [ ] Verification scripts: run each one manually. Do they pass?
+- [ ] Router table: click every reference. Do the files exist?
+- [ ] Ask First boundaries: are they specific to THIS project, not generic?
+
+---
+
+## What This Intentionally Does Not Include
+
+- **Hooks / automatic interception.** Codex has no hooks system. The
+  deny-dangerous script codifies policy for review and CI - it does not
+  block commands at runtime. AGENTS.md rules are behavioural guidance,
+  not mechanical enforcement. This is the biggest capability gap vs
+  Claude Code. Accept it and design around it: strong rules + preflight
+  validation + CI checks.
+- **Permission profiles.** Codex's sandbox model is different. Scoping is via
+  AGENTS.md rules, not JSON profile files.
+- **Local AGENTS.md files.** Directory-level auto-loading of instruction
+  files has not been confirmed in Codex docs as of March 2026. Treat this
+  as an implementation assumption. Put module warnings in docs/footguns.md
+  and reference them from AGENTS.md's router table.
+- **Slash commands.** Playbook files serve the same purpose - the agent reads
+  them when the task matches. Reference them in AGENTS.md's router table.
+- **Strict line count.** Codex's context model is per-task, not per-session.
+  Keep AGENTS.md concise for clarity, not for a token budget ceiling.
diff --git a/00-1-ai-workflow-design-rationale-prime_v1.3.md b/00-1-ai-workflow-design-rationale-prime_v1.3.md
new file mode 100644
index 0000000..1ce5838
--- /dev/null
+++ b/00-1-ai-workflow-design-rationale-prime_v1.3.md
@@ -0,0 +1,412 @@
+# AI Workflow Design Rationale — Prime Edition
+
+**Companion to:** `ai-workflow-improvement-plan-prime.md` (v1.2)
+**Purpose:** Per-section "problem it solves" context and source attributions for every design decision in the plan.
+
+---
+
+## Sources Referenced
+
+| Short Name          | Full Reference                                                                                   |
+| ------------------- | ------------------------------------------------------------------------------------------------ |
+| HumanLayer          | HumanLayer's CLAUDE.md research — instruction budgets, auto-generated context performance impact |
+| Philipp Schmid      | Philipp Schmid — frontier model instruction following limits (~150-200 effective instructions)   |
+| GitHub 2,500-repo   | GitHub's 2,500-repo agents.md analysis — tool mention effect (160x usage uplift)                 |
+| awslabs/aidlc       | awslabs/aidlc-workflows — structured agent lifecycle patterns                                    |
+| Oruç                | Ömer Faruk Oruç's claude.md — execution loop and mode classification patterns                    |
+| Trail of Bits       | Trail of Bits claude-code-config — deny-dangerous patterns, security hardening                   |
+| Boris Tane          | Boris Tane's Claude Code workflow — session management, handoff protocols                        |
+| Microsoft AutoDev   | Microsoft AutoDev paper — autonomous agent guardrails and verification loops                     |
+| Propel              | Propel's codebase structuring guide — context loading strategies                                 |
+| BlunderGOAT SBAO    | BlunderGOAT — SBAO planning methodology                                                          |
+| BlunderGOAT Scanner | BlunderGOAT — SEO Scanner case study (PHP library implementation)                                |
+| BlunderGOAT CC      | BlunderGOAT — Claude Code Insights article                                                       |
+| BlunderGOAT PBYP    | BlunderGOAT — Plan Before You Prompt article                                                     |
+
+---
+
+## High-Level: System Architecture
+
+```mermaid
+flowchart TB
+    subgraph ALWAYS["ALWAYS LOADED (every session)"]
+        L1["<b>Layer 1 — Runtime</b><br/>CLAUDE.md ~100-120 lines<br/>Execution Loop · Autonomy Tiers · DoD · Router Table"]
+        HOOKS["<b>Enforcement Hooks</b><br/>deny-dangerous (PreToolUse)<br/>stop-lint (Stop) · format-file (PostToolUse)"]
+    end
+
+    subgraph AUTO["AUTO-LOADED (per directory)"]
+        L2["<b>Layer 2 — Local Context</b><br/>Directory-level CLAUDE.md files<br/>Footgun summaries · Module conventions · Cross-boundary warnings"]
+    end
+
+    subgraph DEMAND["ON-DEMAND (via router table / slash commands)"]
+        L3["<b>Layer 3 — Skills</b><br/>/preflight · /debug-investigate · /audit<br/>/research (apps) · /code-review (apps)"]
+        L4["<b>Layer 4 — Playbooks</b><br/>Mob Elaboration · SBAO Planning · Milestone Planning"]
+    end
+
+    subgraph CICD["CI / REGRESSION"]
+        L5["<b>Layer 5 — Evaluation</b><br/>Agent Evals (replay suite) · CI Validation (context file checks)"]
+    end
+
+    GUIDE["<b>Guidelines File</b><br/>ai-agent-guidelines.instructions.md<br/>Engineering practices · Coding patterns · Testing strategy<br/><i>No overlap with CLAUDE.md</i>"]
+
+    L1 -- "router table" --> L3
+    L1 -- "router table" --> L4
+    L1 -- "router table" --> L5
+    L1 -. "footgun propagation" .-> L2
+    L1 -- "registered in settings.json" --> HOOKS
+    L1 -. "ownership split" .-> GUIDE
+
+    style ALWAYS fill:#1a3a1a,color:#ccc
+    style AUTO fill:#1a2a4a,color:#ccc
+    style DEMAND fill:#2a2a2a,color:#ccc
+    style CICD fill:#3a1a1a,color:#ccc
+```
+
+---
+
+## Low-Level: Execution Loop
+
+```mermaid
+flowchart TD
+    START([Task Received]) --> READ
+
+    READ["<b>READ</b><br/>Read relevant files first<br/>Both sides of boundaries<br/>Never fabricate codebase facts"]
+    READ --> CLASSIFY
+
+    CLASSIFY["<b>CLASSIFY</b><br/>Complexity: Hotfix | Standard | System | Infra<br/>Mode: Plan | Implement | Explain | Debug | Review"]
+    CLASSIFY --> QD{Question<br/>or Directive?}
+
+    QD -- "Question<br/>(what, which, how does, whats next)" --> ANSWER([Answer the question<br/>Do NOT implement])
+    QD -- "Directive<br/>(add, fix, build, wire, do it)" --> DECLARE["Declare State<br/><i>State: MODE | Goal: one line | Exit: condition</i>"]
+
+    DECLARE --> ACT
+
+    subgraph ACT["<b>ACT</b> (mode-constrained)"]
+        direction LR
+        PLAN["<b>Plan</b><br/>Produce artefact<br/>No app code<br/>Exit: LGTM"]
+        IMPL["<b>Implement</b><br/>Code in 2-3 turns<br/>4th read = stop<br/>exploring"]
+        EXPL["<b>Explain</b><br/>Walkthrough only<br/>No code changes<br/>unless asked"]
+        DBG["<b>Debug</b><br/>Diagnosis first<br/>file:line evidence<br/>No premature fixes"]
+        REV["<b>Review</b><br/>Investigate first<br/>Never blindly<br/>apply suggestions"]
+    end
+
+    ACT --> VERIFY
+
+    VERIFY["<b>VERIFY</b><br/>Run tests after each meaningful change"]
+    VERIFY --> LEVEL{Failure<br/>level?}
+
+    LEVEL -- "Pass" --> DOD{Definition of Done<br/>All 6 gates met?}
+    LEVEL -- "Level 1: isolated<br/>(flaky test, unrelated failure)" --> L1NOTE["Note in Working Notes<br/>Continue with caution"]
+    L1NOTE --> ACT
+    LEVEL -- "Level 2: cross-boundary<br/>or security failure" --> L2STOP["FULL STOP<br/>Preserve error output<br/>Write diagnosis with file:line<br/>Wait for human review"]
+
+    L2STOP --> TWICE{Two corrections<br/>on same issue?}
+    TWICE -- "No" --> ACT
+    TWICE -- "Yes" --> CUT["Cut losses<br/>Rewind / git revert / /clear + handoff"]
+
+    DOD -- "No" --> ACT
+    DOD -- "Yes" --> LOG
+
+    LOG["<b>LOG</b><br/>lessons.md — agent behavioural mistakes<br/>footguns.md — cross-domain architectural traps<br/>confusion-log.md — structural navigation difficulty"]
+    LOG --> PROPAGATE{"Footgun maps to<br/>specific directory?"}
+    PROPAGATE -- "Yes" --> LOCAL["Propagate one-line summary<br/>to local CLAUDE.md"]
+    PROPAGATE -- "No" --> DONE
+    LOCAL --> DONE([Task Complete])
+
+    style READ fill:#1a3a1a,color:#ccc
+    style CLASSIFY fill:#1a2a4a,color:#ccc
+    style ACT fill:#2a2a2a,color:#ccc
+    style VERIFY fill:#1a3a1a,color:#ccc
+    style LOG fill:#3a2a1a,color:#ccc
+```
+
+---
+
+## System Architecture (5 Layers)
+
+**Problem it solves:** Loading all instructions every session wastes context budget and degrades compliance. Projects accumulate rules, playbooks, skills, and docs — if everything loads at once, the model gets worse at following _all_ of it.
+
+**Key evidence:**
+
+- Auto-generated context files reduce success rates by ~3% while increasing inference cost by 20%+ (HumanLayer)
+- Frontier models follow ~150-200 instructions reliably; Claude Code's system prompt consumes ~50, leaving ~100-150 for CLAUDE.md (Philipp Schmid, HumanLayer)
+- Tools mentioned in AGENTS.md are used 160x more often than unmentioned ones — the router table is the highest-signal section (GitHub 2,500-repo)
+
+**Design decision:** Only Layer 1 (CLAUDE.md, ~100-120 lines) loads every session. Everything else loads on demand via the router table, slash commands, or automatic directory-level loading. This keeps the always-loaded budget small while making everything else discoverable.
+
+**Why 5 layers, not 3 or 7:** Each layer has a distinct loading trigger — always (L1), automatic per-directory (L2), on-demand by user (L3/L4), or CI/regression (L5). Fewer layers would combine different loading semantics. More would create layers with no meaningful distinction.
+
+---
+
+## Guidelines Ownership Split
+
+**Problem it solves:** Projects with both CLAUDE.md and a shared guidelines file (e.g., `ai-agent-guidelines.instructions.md`) end up with overlapping rules — two different Definitions of Done, two different testing strategies. The agent follows whichever it reads last, creating inconsistent behaviour.
+
+**Source:** Direct experience on a Tauri app where CLAUDE.md had a DoD ("tests green, preflight passes, logs updated") and the guidelines file had a different DoD ("tests pass, rollback strategy exists, verification story"). The agent alternated between them unpredictably.
+
+**Design decision:** Clean ownership boundary. CLAUDE.md owns workflow (execution loop, autonomy tiers, DoD, logs, router). Guidelines owns engineering practices (coding patterns, communication style, testing strategy, error handling). Test: if a rule would be identical across every project, it belongs in guidelines. If it changes per project, it belongs in CLAUDE.md.
+
+**Evidence it works:** Applying the split to a PHP library shrunk the guidelines file from 47 to 39 lines — the DoD section ("Before Marking Done") was the overlap. What survived was the right shape for a shared file.
+
+---
+
+## Layer 2: Local CLAUDE.md Files
+
+**Problem it solves:** `docs/footguns.md` is a central index the agent must explicitly load. If it doesn't load the file, it doesn't see the warnings. A local CLAUDE.md at `src/auth/CLAUDE.md` is read automatically whenever Claude works in that directory — no explicit loading required.
+
+**Source:** Claude Code's automatic CLAUDE.md loading behaviour (reads CLAUDE.md in the working directory plus all ancestor directories up to the project root).
+
+**Design decision:** Footgun entries that map to a specific directory are propagated (not moved) as one-line summaries to that directory's local CLAUDE.md. The central footguns.md remains the source of truth; local files are read-time copies for automatic loading. Put the guardrail where the danger is, not in a file the agent might skip.
+
+**Guard against over-creation:** Only create local files when a directory has 2+ footgun entries, is an Ask First boundary, or has conventions differing from the project default. Most directories don't qualify. A flat-structure library rarely needs any.
+
+---
+
+## Project Shape: App vs Library
+
+**Problem it solves:** A one-size-fits-all plan wastes instruction budget on irrelevant content. A PHP library doesn't need `/research` (single-domain, the READ step suffices), permission profiles (single language), or `confusion-log.md` (single-domain confusion is rare). An app with 14 footguns across TypeScript and Rust needs all of these.
+
+**Source:** Cross-referencing two real implementations — a Tauri app (121-line CLAUDE.md, 6 skills, 14 footguns) and a PHP library (99-line CLAUDE.md, 3 skills, 6 footguns). Same plan, same prompts, meaningfully different outputs. (BlunderGOAT Scanner, BlunderGOAT CC)
+
+**Design decision:** Every section in the plan that differs by project shape includes explicit app vs library guidance. The adaptation table in the plan makes this visible in one place.
+
+---
+
+## Skill Justification Test
+
+**Problem it solves:** Skill proliferation. Early versions of the plan had 8+ skills. Each skill consumes instruction budget when loaded and creates maintenance burden. Most didn't earn their place — they were templates, not workflows.
+
+**Source:** Direct experience. Four skills were downgraded during the v0.8 revision after failing the justification test.
+
+**Design decision:** A skill must have at least one of: a distinct artefact, a hard workflow gate, a special failure mode, or a repeatable structured output. The plan documents which skills passed and which were downgraded to sections within other files. This prevents future skill sprawl.
+
+| Former Skill        | Why it failed                                          | Where it went                       |
+| ------------------- | ------------------------------------------------------ | ----------------------------------- |
+| `/annotation-cycle` | No distinct artefact — it's a planning refinement step | Section in mob elaboration playbook |
+| `/sbao-synthesis`   | Template, not a workflow with gates                    | Section in SBAO planning playbook   |
+| `/review-triage`    | Normal review behaviour, not a distinct mode           | Review branch of the ACT step       |
+| `/revert-rescope`   | Tactic (2 sentences), not a workflow                   | Paragraph in VERIFY/stop-the-line   |
+
+---
+
+## Instruction Budget Constraint
+
+**Problem it solves:** More instructions doesn't mean better compliance. It means worse compliance across the board. Degradation is uniform, not sequential — the model doesn't just ignore rules at the bottom; it gets worse at following all of them equally.
+
+**Sources:** HumanLayer (auto-generated context data), Philipp Schmid (instruction following limits), GitHub 2,500-repo analysis (tool mention uplift)
+
+**Design decision:** Hard line target (100 for libraries, 120 for apps, never over 150). Cut priority list for when you go over. "Never cut" list for the three things that matter most: execution loop, autonomy tiers, definition of done. Code examples beat prose — one snippet communicates more per token than three paragraphs.
+
+**Why these specific targets:** The PHP library's first pass produced 127 lines (27 over the 100-line target). Compression got it to 99. The Tauri app stabilised at 121. Both are well under the 150 hard ceiling, leaving headroom for the system prompt's ~50 instruction overhead.
+
+---
+
+## Section 1.1: Default Execution Loop
+
+Each step exists because a specific failure mode is common enough to warrant structural prevention.
+
+### READ
+
+**Problem:** Claude fabricates codebase facts. It guesses file contents, dependency versions, API contracts, and configuration values without reading the actual files. The guesses are confident and often plausible, making them hard to catch.
+
+**Source:** Direct experience — asked about a dependency, Claude said it was a local path dependency, it was actually installed from a package registry. It never read the manifest. (BlunderGOAT CC)
+
+**Design decision:** READ is the first step, not optional. For multi-layer apps, read both sides of a boundary before changing either. For libraries, read tests alongside implementation. The "never fabricate" rule is reinforced with a concrete example showing what fabrication looks like vs what reading-first looks like.
+
+### CLASSIFY
+
+**Problem:** Two distinct failures. (1) Claude can't distinguish questions from directives — "did you also improve X?" gets treated as "improve X." (2) Claude drifts between modes silently — you ask it to explain something, halfway through it starts editing files.
+
+**Source:** The question/directive confusion was exposed by the anti-rationalisation hook (see Appendix A in the plan). A correct "No — want me to?" answer was rejected as "asking permission instead of implementing." The mode drift was observed repeatedly across both app and library work. (BlunderGOAT CC, Oruç)
+
+**Design decision:** CLASSIFY forces two declarations before any action: complexity level (Hotfix / Standard / System / Infra) and mode (Plan / Implement / Explain / Debug / Review). Mode transitions must be explicit. The question vs directive disambiguation rule exists specifically because this confusion was the trigger for a full day of wasted hook engineering.
+
+### ACT
+
+**Problem:** Planning loops and premature fixes. In Plan mode, Claude reads file after file without producing an artefact. In Debug mode, Claude starts fixing before understanding the bug. In Explain mode, Claude edits code nobody asked it to edit.
+
+**Source:** Direct observation across multiple sessions. The "4th file read without writing = stop exploring" heuristic was calibrated from repeated planning loops where Claude read 8-12 files, produced nothing, and ran out of context. (Oruç, Microsoft AutoDev)
+
+**Design decision:** Each mode has explicit behaviour constraints in a table. State declaration is mandatory ("State: [MODE] | Goal: [one line] | Exit: [condition]"). Switching modes requires an explicit statement with a reason. The anti-BDUF guard prevents premature abstraction (creating interfaces with one implementation, building configurability nobody asked for).
+
+### VERIFY
+
+**Problem:** Claude declares victory early. Tests pass, but the old function name still appears in three files because nobody grepped after the rename. Or tests pass but behaviour subtly shifted in a way the tests don't cover.
+
+**Source:** Direct experience — post-rename grep finding stale references was the specific incident that led to DoD gate #6. The stop-the-line escalation levels come from incident response patterns. (awslabs/aidlc, Microsoft AutoDev)
+
+**Design decision:** Tests after every meaningful change, not just at the end. Two-level escalation: Level 1 (isolated, note and continue) vs Level 2 (cross-boundary or security, full stop). The "two corrections on same issue = cut losses" rule prevents infinite fix loops — if the approach keeps changing direction, rewind rather than iterate.
+
+### LOG
+
+**Problem:** The agent repeats the same mistakes across sessions. Without a learning loop, every conversation starts from zero — same fabrications, same mode drift, same early victory declarations.
+
+**Source:** Direct experience building two projects over weeks. The same lesson was learned 3-4 times before being written down. The two-file split (lessons.md for agent behaviour, footguns.md for architectural landmines) emerged because they serve different purposes and load at different times. (BlunderGOAT CC)
+
+**Design decision:** Two complementary files with distinct scopes. lessons.md captures agent behavioural mistakes. footguns.md captures cross-domain architectural traps. confusion-log.md (apps only) captures structural navigation difficulty. Context-based loading rules prevent wasting budget on irrelevant log content. Max 15 active lessons with pattern promotion prevents unbounded growth.
+
+---
+
+## Section 1.2: Autonomy Tiers
+
+**Problem it solves:** All-or-nothing permission models. Either the agent can do everything (dangerous) or must ask for everything (slow). Most actions are safe and reversible; a few are dangerous and irreversible. The tiers match the permission level to the risk.
+
+**Source:** Trail of Bits claude-code-config (deny patterns for dangerous commands), awslabs/aidlc (structured agent lifecycle with approval gates)
+
+**Design decision:** Three tiers — Always (tests, lint, read, write within scope), Ask First (boundary-crossing changes with micro-checklist), Never (delete tests, modify secrets, push main). The micro-checklist for Ask First items forces the agent to prove it has read the related code, checked for footguns, and knows the rollback command before proceeding.
+
+**Why a micro-checklist, not just "ask first":** Asking "can I change the auth middleware?" without context forces the human to investigate. The checklist front-loads the investigation to the agent, making the human's approval decision informed rather than a rubber stamp.
+
+---
+
+## Section 1.3: Definition of Done
+
+**Problem it solves:** "Done" means different things in different contexts. Without explicit gates, the agent says "task complete" after tests pass — even if old patterns remain after a rename, logs weren't updated, or a boundary was crossed without approval.
+
+**Source:** Repeated incidents where "tests green" was treated as done. The six gates were accumulated from real failures: gate #6 (grep after rename) came from a specific incident where three files still referenced an old function name. (BlunderGOAT CC)
+
+**Design decision:** Six gates, all must be true. No shortcuts. The agent cannot say "task complete" until it can confirm all six. This is a MUST-level rule that is never cut during compression.
+
+---
+
+## Section 1.4: Working Memory and Handoffs
+
+**Problem it solves:** Context window fills up during multi-turn tasks. The agent loses track of what it's done, what's left, and what decisions were made. When a session ends mid-task, the next session starts from scratch.
+
+**Source:** Boris Tane's Claude Code workflow (handoff protocols), direct experience with context exhaustion on 10+ turn tasks.
+
+**Design decision:** Working Notes in tasks/todo.md for 5+ turn tasks. Context escalation ladder (/compact → split → /clear). Handoff template with five sections (Status, Current State, Key Decisions, Known Risks, Next Step). The escalation ladder prevents the common failure of running out of context without a recovery plan.
+
+---
+
+## Phase 1 Skills
+
+### /preflight
+
+**Problem:** Shipping broken builds. The agent finishes work and says "done" without running the full check suite. Individual checks (just tests, just lint) miss issues that the full pipeline catches.
+
+**Design decision:** Mechanical, repeatable structured output with RFC 2119 priorities. MUST items (type-check, lint, compile) cannot be skipped. SHOULD items (full test suite, formatter) can be skipped with reason. The skill produces a structured pass/fail report, not prose.
+
+### /debug-investigate
+
+**Problem:** Agents guess fixes before understanding the bug. The instinct is to "just try something" — swap a value, add a null check, toggle a flag. This works ~30% of the time and creates confusing diffs the other 70%.
+
+**Source:** Microsoft AutoDev paper (diagnosis before intervention), direct experience with premature fix attempts that obscured the root cause.
+
+**Design decision:** Hard gate — diagnosis with file:line evidence first, fixes only after human reviews findings. The explicit "If you want to 'just try something' before tracing the code path, STOP" instruction exists because this failure mode is nearly universal.
+
+### /audit
+
+**Problem:** Fabricated findings. Audits are high-stakes — false positives erode trust, false negatives create risk. LLMs are reliably bad at distinguishing real findings from plausible-sounding ones they invented.
+
+**Design decision:** Four-pass structure with an explicit fabrication gate at pass 4. Discovery → Verification (re-read each finding) → Prioritisation → Self-Check ("did I fabricate this?"). MUST NOT propose fixes — the audit's job is to find issues, not solve them.
+
+### /research (apps only)
+
+**Problem:** Planning without understanding the codebase. The agent proposes an architecture or approach based on assumptions about how the code works, then discovers midway through implementation that the assumptions were wrong.
+
+**Design decision:** Hard gate — produce research.md with files involved, request flow, boundaries touched, and risks/gotchas (minimum 3 with file:line evidence). No planning until human reviews. Libraries skip this because the READ step is sufficient for single-domain codebases.
+
+### /code-review (apps only)
+
+**Problem:** Rubber-stamp reviews. Without structure, the agent says "looks good" or lists trivial style issues while missing architectural concerns.
+
+**Design decision:** Structured output with RFC 2119 constraints and autonomy tier awareness. The reviewer must identify which boundaries are touched and what the blast radius of the change is.
+
+---
+
+## Phase 1 Hooks
+
+### deny-dangerous.sh (PreToolUse)
+
+**Problem:** CLAUDE.md "never" rules work ~70% of the time. A rule saying "never use rm -rf" is behavioural guidance — the model might follow it, might not. A PreToolUse hook that blocks `rm -rf` before it executes works 100% of the time.
+
+**Source:** Trail of Bits claude-code-config (deny patterns and exit code strategy)
+
+**Design decision:** Deterministic enforcement at the tool level. Exit 2 with an error message telling Claude what to do instead (not just "blocked"). Project-specific deny rules for files that must be modified through tooling (binary dictionaries, generated code, lock files).
+
+### stop-lint.sh (Stop hook)
+
+**Problem:** Formatting and lint issues accumulate during a session. Without continuous checking, the agent produces a batch of violations that are harder to fix after the fact.
+
+**Source:** Direct experience, BlunderGOAT CC
+
+**Design decision:** Stack-adaptive — check `git diff` for modified file types, run only relevant checks. MUST exit 0 even on errors (non-zero exit causes infinite fix loops — this was a hard-won lesson). Infinite loop guard. Exclude slow checks (>10s) — those go in /preflight.
+
+**Why exit 0 on errors:** Stop hooks run after every Claude turn. A non-zero exit tells Claude "something failed, fix it." Claude tries to fix it. The hook runs again. If the fix doesn't clear the error, Claude loops forever. Exit 0 with errors to stderr makes the feedback informational, not imperative.
+
+### format-file.sh (PostToolUse)
+
+**Problem:** Manual formatting after every edit is tedious and error-prone. The agent's edits don't match the project's formatting conventions, creating noisy diffs.
+
+**Design decision:** Automatic formatting on every Edit/Write. Format by file extension using the project's configured formatter. Silence failures — formatting is best-effort, not a gate.
+
+### Anti-rationalisation hook (removed)
+
+**Problem it tried to solve:** The agent declaring victory without completing work — calling issues "pre-existing," deferring to follow-ups nobody asked for, listing problems without fixing them.
+
+**Why it failed:** Prompt-type Stop hooks only see the assistant's response. They cannot read the conversation. Intent detection is always inferred, never observed. Six versions in one day, each failing in a different way. The false positive rate (~30%) eroded trust faster than the success rate (~70%) built it.
+
+**Source:** Direct experience — one full day of iteration. Documented in Appendix A of the plan and "The Hook Saga" section of the article.
+
+**Design decision:** Removed entirely. Deterministic command hooks for mechanical enforcement. CLAUDE.md rules for behavioural guidance. Prompt hooks for semantic judgement are structurally unsound with current hook architecture.
+
+---
+
+## Phase 1 Security Hardening
+
+**Problem it solves:** AI agents can execute arbitrary shell commands. Without deny rules, a hallucinated or misinterpreted instruction could delete files, push to production, or expose secrets.
+
+**Source:** Trail of Bits claude-code-config (comprehensive deny pattern analysis)
+
+**Design decision:** Defence in depth. Layer 1: deny-dangerous PreToolUse hook (deterministic blocks). Layer 2: gitleaks pre-commit scanning (manual setup — documented, not executed, because it requires global git config changes). Layer 3: dependency audit in /preflight skill. The manual setup for gitleaks is deliberate — `git config --global core.hooksPath` affects every repo on the machine, which is not something an AI agent should change.
+
+---
+
+## Phase 2: Agent Evals
+
+**Problem it solves:** CLAUDE.md changes can silently regress agent behaviour. Adding a new rule, removing an old one, or tweaking wording can cause previously-correct behaviour to break. Without regression testing, these regressions are discovered in production work — the worst possible time.
+
+**Source:** Direct experience — behavioural regressions after CLAUDE.md edits on the Tauri app. A rule change that improved one workflow broke another. (BlunderGOAT CC)
+
+**Design decision:** An `agent-evals/` directory with flat .md files, each containing a replay prompt from a real incident. When CLAUDE.md or skills change, replay the prompts and verify the agent still handles them correctly. Start with real incidents; for projects with no incident history, seed 1-2 from common stack failure modes and replace with real ones as they occur.
+
+**Why flat files, not folders:** The initial design used one folder per eval. In practice, each eval is a single .md file with no supporting assets. The folder structure added navigation friction with no benefit.
+
+---
+
+## Phase 2: RFC 2119 Pass
+
+**Problem it solves:** All rules treated as equally important. The agent can't distinguish between "you MUST run tests" and "you MAY skip the formatter during debugging." Without priority markers, the model allocates equal attention to everything — and when budget is tight, it drops important rules as readily as optional ones.
+
+**Source:** RFC 2119 (standard for priority language in technical specifications), applied to AI agent instructions.
+
+**Design decision:** MUST for the execution loop, autonomy tiers, and definition of done. SHOULD for log hygiene, working memory, session handoffs, footgun propagation. MAY for structural debt trigger, communication when blocked. Applied in the same pass as prose compression — two birds, one edit.
+
+---
+
+## Phase 2: Permission Profiles
+
+**Problem it solves:** Different team roles need different permission scopes. A frontend developer shouldn't be editing Rust backend files. An infrastructure engineer shouldn't be changing React components. Without profiles, the agent has full access regardless of who's using it.
+
+**Source:** Claude Code's native `--profile` flag support.
+
+**Design decision:** Apps only — libraries with a single language rarely need role-scoped permissions. Each profile restricts Edit and Bash permissions to relevant file patterns. Always allows Read everywhere — restricting reads prevents the agent from understanding context.
+
+---
+
+## Adoption Tiers
+
+**Problem it solves:** The full system is too much for a new project or a solo developer. Trying to implement everything at once creates setup fatigue and maintenance burden for features that aren't needed yet.
+
+**Source:** Direct experience — the Tauri app built up the system over weeks. The PHP library implemented it in 2 sessions. Different starting points, different tier needs.
+
+**Design decision:** Three tiers with clear "when to use" guidance. Minimal (CLAUDE.md + deny-dangerous hook) for getting started. Standard (+ skills + hooks + local CLAUDE.md files) for active development. Full (+ agent evals + CI + profiles) for long-lived projects with incident history. Each tier is self-contained — you don't need to plan for the next tier while implementing the current one.
+
+---
+
+## Quarterly Audit
+
+**Problem it solves:** The system accumulates rules that outlive their usefulness. A footgun that was critical six months ago may have been fixed in code. A lesson that was important when the agent was less capable may now be default behaviour.
+
+**Design decision:** Periodic re-count, stale rule check, and the question: "if I removed this, would the model still do the right thing?" Rules that once helped become constraints as models improve. The system is designed to get smaller over time, not larger.
diff --git a/00-1-ai-workflow-human-instructions_v1.3.md b/00-1-ai-workflow-human-instructions_v1.3.md
new file mode 100644
index 0000000..a98c242
--- /dev/null
+++ b/00-1-ai-workflow-human-instructions_v1.3.md
@@ -0,0 +1,145 @@
+# AI Workflow Improvement — Human Instructions
+
+**Version:** 1.1 | 2026-03-14
+**Companion to:** `ai-workflow-improvement-plan-prime.md` (plan) and `ai-workflow-implement-prompts-prime.md` (prompts)
+
+---
+
+## Reading Order
+
+1. **This file** — how to start
+2. **The article** (`ai-workflow-ARTICLE-prime.md`) — why this exists, real implementation data
+3. **The prompts** (`ai-workflow-implement-prompts-prime.md`) — what you run
+4. **The plan** (`ai-workflow-improvement-plan-prime.md`) — full reference for every design decision
+5. **The rationale** (`ai-workflow-design-rationale-prime.md`) — deep dives on why each section exists
+
+## What This Is
+
+A system that gives Claude Code a 5-step execution loop (READ → CLASSIFY → ACT → VERIFY → LOG) instead of a wall of rules. Two files do the work — a design doc (the plan) and a set of prompts you feed to Claude Code. You run the prompts; Claude Code builds the system for your project.
+
+## Before You Start
+
+1. **Copy both files into your project root:**
+   - `ai-workflow-improvement-plan-prime.md`
+   - `ai-workflow-implement-prompts-prime.md`
+
+2. **Rename if needed.** The prompts reference `ai-workflow-improvement-plan-prime.md` by exact filename. If your copies have prefixes or version suffixes, rename them to match.
+
+3. **Audit your existing guidelines file.** If you have an `ai-agent-guidelines.instructions.md` (or similar), open the prompts file and read the "Before You Start: Guidelines Ownership Audit" section. Remove overlapping content from guidelines *manually* before running any prompts. This is the one step you do by hand.
+
+4. **Know your project shape.** You'll need to fill in blanks in the prompts:
+   - Is this an **APP** or a **LIBRARY**?
+   - Languages, build command, test command, lint command, format command
+
+## Implementation Order
+
+Run these in Claude Code. Copy each prompt from the prompts file, fill in the bracketed placeholders, paste into Claude Code.
+
+| Step | Prompt | What It Creates | Time |
+|------|--------|-----------------|------|
+| **Phase 0** | Phase 0 bootstrap | CLAUDE.md + deny-dangerous hook + settings.json | ~5 min |
+| **Phase 1a** | Prompt A (new) or Prompt B (existing CLAUDE.md) | CLAUDE.md, docs seed files, architecture.md, local CLAUDE.md files | ~15 min |
+| **Phase 1b** | Phase 1b — Skills | 3-5 skill files under `.claude/skills/` | ~10 min |
+| **Phase 1c** | Phase 1c — Enforcement | Hooks, CI workflow, gitignore additions | ~10 min |
+| **Phase 2** | Phase 2 | Agent evals, RFC 2119 pass, permission profiles | ~15 min |
+
+**Skip Phase 0** if you're running Phase 1 (Phase 0 is a minimal bootstrap for when you want just the basics).
+
+**Phase 2 can run immediately after Phase 1** — the medical scribe ran all phases in one session. Waiting gives you more real incidents for evals, but even early-stage projects with a short git history can seed useful evals.
+
+## Choosing Your Path
+
+```
+New project, no CLAUDE.md exists?
+  → Phase 0 (minimal) OR Phase 1a Prompt A (full)
+
+Existing project with a CLAUDE.md full of domain content?
+  → Phase 1a Prompt B (migrates domain content to docs/domain-reference.md)
+
+Just want the bare minimum to try it?
+  → Phase 0 only. Add skills and hooks later.
+```
+
+## What to Check After Each Phase
+
+**After Phase 1a:**
+- [ ] CLAUDE.md line count reported — under 120 (apps) or 100 (libraries)?
+- [ ] If Prompt B: open `docs/domain-reference.md` and verify nothing was silently dropped. Compare against the original CLAUDE.md
+- [ ] `docs/footguns.md` contains real footguns with file:line evidence, not hypothetical ones
+- [ ] Budget a second pass — agents aggressively cut content during compression. The anti-BDUF guard is commonly dropped then needed back
+
+**After Phase 1b:**
+- [ ] Router table in CLAUDE.md references all skill directories
+- [ ] Preflight checks pass
+
+**After Phase 1c:**
+- [ ] `.claude/settings.json` is valid JSON
+- [ ] Test the deny-dangerous hook: ask Claude Code to run `rm -rf /` — it should be blocked
+- [ ] Stop hook exits 0 even when it finds issues (non-zero = infinite loops)
+
+**After Phase 2:**
+- [ ] CLAUDE.md still under line target after RFC 2119 pass
+- [ ] Agent evals are from real incidents, not invented scenarios
+
+## The Adoption Tiers
+
+You don't have to do everything. Pick your tier:
+
+| Tier | What You Run | Good For |
+|------|-------------|----------|
+| **Minimal** | Phase 0 only | Trying it out, solo project |
+| **Standard** | Phase 1a + 1b + 1c | Active development |
+| **Full** | Phase 1 + Phase 2 | Long-lived project with incident history |
+
+## Ongoing Maintenance
+
+**Weekly:** Run `/insights` in Claude Code (analyses your recent session history for recurring patterns). Look for friction that could become a new rule or footgun.
+
+**When something breaks:** After Claude causes a bug, add it to `docs/lessons.md` (behavioural) or `docs/footguns.md` (architectural). If it's worth regression-testing, create an agent eval in `agent-evals/`.
+
+**Quarterly:** Re-count CLAUDE.md lines. Check for stale rules. Ask: "if I removed this, would the model still do the right thing?" Archive lessons not triggered in 30+ days.
+
+**When models improve:** The system is designed to shrink. Rules that compensated for model weaknesses become unnecessary. Delete them.
+
+## Common Gotchas
+
+- **Consider separate sessions per phase.** The prompts were split to stay within instruction budget. One session per phase is safest. If context budget allows (smaller codebases), running all phases sequentially in one session can work — the medical scribe did this successfully.
+- **The migration (Prompt B) drops content silently.** Sections that partially overlap with your guidelines file get cut without warning. Always diff.
+- **First-pass CLAUDE.md is usually over target.** Budget a compression pass. The plan has a cut priority list — essential commands go first, execution loop never gets cut.
+- **Hooks must use absolute paths.** All hook commands use `git rev-parse --show-toplevel`. Relative paths break when the working directory changes.
+- **Stop hooks must exit 0.** Even when they find errors. Non-zero exit codes trap Claude in infinite fix loops.
+- **Secret scanning is manual.** The `gitleaks` setup requires `git config --global` which affects all repos. Do it yourself, don't let Claude Code do it.
+
+## File Reference
+
+After full implementation, your project will have:
+
+```
+CLAUDE.md                              ← Layer 1: the loop (~100-120 lines)
+src/auth/CLAUDE.md (etc.)              ← Layer 2: local context (if qualifying dirs exist)
+.claude/skills/preflight/SKILL.md      ← Layer 3: skills
+.claude/skills/debug-investigate/SKILL.md
+.claude/skills/audit/SKILL.md
+.claude/skills/research/SKILL.md       ← apps only
+.claude/skills/code-review/SKILL.md    ← apps only
+.claude/hooks/deny-dangerous.sh        ← enforcement
+.claude/hooks/stop-lint.sh
+.claude/hooks/format-file.sh
+.claude/settings.json
+docs/lessons.md                        ← learning loop
+docs/footguns.md
+docs/confusion-log.md                  ← apps only
+docs/architecture.md
+docs/domain-reference.md               ← Prompt B path only
+docs/decisions/                        ← apps only
+tasks/handoff-template.md
+agent-evals/                           ← Phase 2
+.github/workflows/context-validation.yml  ← Phase 2
+```
+
+## Further Reading
+
+- **The plan** (`ai-workflow-improvement-plan-prime.md`) — full system design, rationale for every section, hook design patterns, security hardening details
+- **The article** (`ai-workflow-ARTICLE-prime.md`) — narrative version with real implementation data from two projects
+- **The playbook repo** ([ai-planning-playbook](https://github.com/blundergoat/ai-planning-playbook)) — planning prompts (mob elaboration, SBAO ranking, milestone planning) that feed into Phase 2 playbook updates
+- **Codex adaptation** (`codex-workflow-implement-prompt.md`) — implementation prompt for adapting the workflow system to OpenAI Codex
diff --git a/00-1-ai-workflow-implement-prompts-prime_v1.3.md b/00-1-ai-workflow-implement-prompts-prime_v1.3.md
new file mode 100644
index 0000000..6605dff
--- /dev/null
+++ b/00-1-ai-workflow-implement-prompts-prime_v1.3.md
@@ -0,0 +1,393 @@
+# AI Workflow Improvement Plan — Implementation Prompts
+
+**Implements:** `ai-workflow-improvement-plan-prime.md` (Prime edition, v1.3)
+
+> **Filename note:** The prompts reference `ai-workflow-improvement-plan-prime.md`. If your copy has a prefix or version suffix (e.g., `00-1-..._v1.0.md`), rename it to match before running the prompts.
+
+## Changelog
+
+| Version | Date       | Changes                                                                                                                                                                                                 |
+| ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| v1.3    | 2026-03-14 | Added rollback instructions. Phase 2 timing made flexible. Version references aligned with plan v1.3                                                                                                    |
+| v1.2    | 2026-03-13 | Version references updated to v1.2. Prompt B inline reference clarified. Terminology aligned with plan                                                                                                  |
+| v1.1    | 2026-03-13 | Renamed golden tasks to agent evals. Flat file structure (`agent-evals/*.md`). Added README.md requirement                                                                                              |
+| v1.0    | 2026-03-11 | Guidelines ownership split step. Existing CLAUDE.md migration. Filename references aligned. Secret scanning made manual. Library vs app branching. Phase 1a scope reduced. Redundant guardrails removed |
+| v0.9    | 2026-03-09 | Added local CLAUDE.md files. Added docs/architecture.md and ADRs. Footgun propagation rule                                                                                                              |
+| v0.8    | 2026-03-09 | Split Phase 1 into 1a/1b/1c. Phase 0 bootstrap. Question/directive disambiguation                                                                                                                       |
+| v0.7    | 2026-03-08 | Removed anti-rationalisation hook. Security hardening                                                                                                                                                   |
+
+---
+
+## Before You Start: Guidelines Ownership Audit
+
+If your project has an `ai-agent-guidelines.instructions.md` file (or similar shared coding standards file), audit it FIRST. Remove any content that overlaps with what CLAUDE.md will own:
+
+**Remove from guidelines (CLAUDE.md will own these):**
+
+- Execution loop / workflow steps
+- Definition of Done
+- Stop-the-line rules
+- Working memory / context management conventions
+- Autonomy tiers or permission rules
+- Log file references (lessons.md, footguns.md)
+
+**Keep in guidelines (these stay):**
+
+- Operating principles (correctness over cleverness, smallest change, etc.)
+- Engineering best practices (API discipline, testing, type safety)
+- Communication style (concise, one question, verification story)
+- Error handling patterns (triage checklist, safe fallbacks, rollback)
+- Task management templates
+- Git hygiene
+
+Do this manually before running any prompts. The prompts assume the split is already clean.
+
+**Commit or stash first.** Prompt B overwrites CLAUDE.md and creates domain-reference.md. Run `git stash` or `git commit` before starting. If the output is wrong, `git checkout -- CLAUDE.md` restores the original.
+
+---
+
+## Phase 0 (New Project Bootstrap)
+
+Use ONLY when setting up a brand new project with no existing CLAUDE.md or workflow config.
+
+```
+I'm setting up AI workflow configuration for this project. The stack is:
+- Languages: [list your languages]
+- Build: [your build command]
+- Lint: [your lint command]
+- Test: [your test command]
+- Format: [your format command]
+
+Read ai-workflow-improvement-plan-prime.md. Generate the Minimal tier:
+1. CLAUDE.md (under 120 lines) with the default execution loop, autonomy
+   tiers, definition of done, and router table adapted to my project
+2. .claude/hooks/deny-dangerous.sh (PreToolUse hook blocking dangerous commands)
+3. .claude/settings.json with the deny-dangerous hook registered
+
+Do NOT create skills, profiles, agent evals, or CI workflows yet.
+After creating the files, count CLAUDE.md lines and report the count.
+```
+
+---
+
+## Phase 1
+
+Phase 1 is split into three prompts. Run them in order.
+
+### Phase 1a — Foundation
+
+**Choose your starting point:**
+
+- **No existing CLAUDE.md:** Use Prompt A below
+- **Existing CLAUDE.md with domain content:** Use Prompt B below (moves domain content to a reference doc, then builds the workflow CLAUDE.md)
+
+#### Prompt A — New CLAUDE.md (no existing file)
+
+```
+Read ai-workflow-improvement-plan-prime.md. This is our AI workflow
+improvement plan (Prime edition, v1.3). Phase 1 builds Layers 1–3
+(runtime, local context, and skills).
+
+This project is a [APP / LIBRARY]. The stack is:
+- Languages: [list]
+- Build: [command]
+- Test: [command]
+- Lint: [command]
+- Format: [command]
+
+Implement Phase 1a now.
+
+CLAUDE.md (Layer 1 — Runtime):
+1. Create CLAUDE.md. Target: under [120 for apps / 100 for libraries] lines.
+   Use ❌/✅ examples not prose. Structure:
+
+   a) Version header (v1.0 — YYYY-MM-DD)
+
+   b) Default Execution Loop: READ → CLASSIFY → ACT → VERIFY → LOG
+      - READ: read relevant files first, never fabricate codebase facts
+        (include ❌/✅ example)
+      - CLASSIFY: complexity and mode table. Include question vs directive
+        disambiguation
+      - ACT: behaviour per mode as a table. State declaration rule.
+        Anti-planning-loop rule. Anti-BDUF guard with ❌/✅
+      - VERIFY: continuous test loop. Stop-the-line with two-level
+        escalation. Revert-and-rescope tactic
+      - LOG: docs/lessons.md and docs/footguns.md [add docs/confusion-log.md
+        for apps] with when-to-use table. Footgun propagation rule.
+        Context-based loading rules
+
+   c) Autonomy Tiers: Always / Ask First / Never
+      - Adapt Ask First boundaries for THIS project's specific risks
+      - Include micro-checklist for Ask First items
+
+   d) Definition of Done: 6 gates
+
+   e) Working Memory: Working Notes for 5+ turn tasks, context escalation
+      ladder, session handoff protocol
+
+   f) Sub-Agent Objectives: one focused objective, structured return, 5-call budget
+
+   g) Communication When Blocked: one question with recommended default
+
+   h) Router table: pointers to skills, docs, playbooks, profiles
+
+   i) Essential commands
+
+   If over line target, apply cut priority from the plan.
+
+DOCS (seed files):
+2. docs/lessons.md — Format header, empty Entries/Patterns sections
+3. docs/footguns.md — Read the actual codebase for real cross-domain
+   footguns. Seed with real ones only — do NOT invent hypothetical ones.
+4. [APPS ONLY] docs/confusion-log.md — Format header
+5. tasks/handoff-template.md — Status, Current State, Key Decisions,
+   Known Risks, Next Step
+
+ARCHITECTURE DOCS:
+6. docs/architecture.md — Read the codebase and write a short overview
+   (under 100 lines): what the system does, major components, data flows,
+   non-obvious constraints, deliberate trade-offs. Every line specific to
+   THIS codebase. TODOs for what you can't determine from reading the code.
+
+7. [APPS ONLY] docs/decisions/ directory with ADR template.
+   If you can identify 1-2 real architectural decisions from the code,
+   create ADR files. Do NOT invent decisions.
+
+LOCAL CLAUDE.md FILES (Layer 2):
+8. Read docs/footguns.md and the codebase structure. For directories with
+   2+ footgun entries, Ask First boundaries, or conventions differing from
+   project default: create a local CLAUDE.md (under 20 lines each).
+   If no directories qualify, create none and note why.
+
+VERIFICATION:
+- Count CLAUDE.md lines. MUST be under target.
+- Verify all docs seed files exist.
+- Report CLAUDE.md line count and number of local CLAUDE.md files created.
+```
+
+#### Prompt B — Existing CLAUDE.md (migrate domain content)
+
+```
+Read ai-workflow-improvement-plan-prime.md. This is our AI workflow
+improvement plan (Prime edition, v1.3). Phase 1 builds Layers 1–3
+(runtime, local context, and skills).
+
+This project is a [APP / LIBRARY]. The stack is:
+- Languages: [list]
+- Build: [command]
+- Test: [command]
+- Lint: [command]
+- Format: [command]
+
+The current CLAUDE.md has domain reference content (architecture,
+design patterns, important files, conventions). This needs to be
+preserved but separated from the workflow system.
+
+Implement Phase 1a now, in this order:
+
+STEP 1 — Move domain content:
+1. Read the current CLAUDE.md completely.
+2. Move ALL domain-specific reference content to docs/domain-reference.md.
+   Keep it intact — this is technical reference loaded on demand.
+   Domain content includes: architecture overviews, design patterns,
+   file tables, conventions, pipelines, matching strategies, dictionary
+   workflows — anything that describes HOW THE PROJECT WORKS rather than
+   how the AGENT SHOULD BEHAVE.
+3. Keep in CLAUDE.md: project identity (one line), essential commands,
+   and any agent-behavioural rules that already exist.
+
+STEP 2 — Rewrite CLAUDE.md:
+4. Rebuild CLAUDE.md with the execution loop. Target: under [120/100] lines.
+   Use ❌/✅ examples not prose. Structure:
+
+   a) Version header
+   b) Project identity (one line: what this project is)
+   c) Essential commands (compact)
+   d) Default Execution Loop: READ → CLASSIFY → ACT → VERIFY → LOG
+      Use sections (a) through (i) from Prompt A above, adapting
+      examples for this project's stack and domain
+   e) Autonomy Tiers with project-specific Ask First boundaries
+   f) Definition of Done: 6 gates
+   g) Working Memory and handoff protocol
+   h) Router table pointing to: docs/domain-reference.md, skills,
+      and all other docs files
+
+STEP 3 — Docs seed files:
+5. docs/lessons.md — Format header, empty
+6. docs/footguns.md — Read the codebase for real footguns. Seed real ones.
+7. [APPS ONLY] docs/confusion-log.md
+8. tasks/handoff-template.md
+
+STEP 4 — Local CLAUDE.md files (Layer 2):
+9. For qualifying directories only (2+ footguns, Ask First boundaries,
+   differing conventions). Under 20 lines each. Create none if no
+   directories qualify.
+
+VERIFICATION:
+- Count CLAUDE.md lines. MUST be under target.
+- Verify docs/domain-reference.md contains all moved content.
+- Verify all docs seed files exist.
+- Report CLAUDE.md line count, domain-reference.md line count, and
+  number of local CLAUDE.md files created.
+```
+
+### Phase 1b — Skills
+
+```
+Read ai-workflow-improvement-plan-prime.md and the CLAUDE.md created in
+Phase 1a. This phase creates skill files (Layer 3).
+
+This project is a [APP / LIBRARY].
+
+[FOR APPS — create 5 skills:]
+Create these skills under .claude/skills/:
+
+1. preflight/SKILL.md — RFC 2119 constraints. MUST run your stack's
+   build/lint checks. SHOULD run formatter, full test suite. MAY skip
+   formatter when debugging. Include dependency audit.
+2. research/SKILL.md — Minimum template: Files Involved, Request Flow,
+   Boundaries Touched, Risks/Gotchas (min 3 with file:line evidence).
+   Hard gate: no planning until human reviews research.md.
+3. debug-investigate/SKILL.md — Diagnosis-first. Include: "If you want
+   to 'just try something' before tracing the code path, STOP."
+   Include diagnosis output template.
+4. audit/SKILL.md — 4-pass: Discovery → Verification → Prioritisation →
+   Self-Check. Pass 4 fabrication gate. MUST NOT propose fixes.
+5. code-review/SKILL.md — Structured review with RFC 2119 constraints.
+
+[FOR LIBRARIES — create 3 skills:]
+Create these skills under .claude/skills/:
+
+1. preflight/SKILL.md — RFC 2119 constraints adapted for your stack.
+   Include mutation testing as SHOULD if configured.
+2. debug-investigate/SKILL.md — Diagnosis-first, adapted for your
+   project's architecture. Include the key code trace chain.
+3. audit/SKILL.md — 4-pass with fabrication gate. MUST NOT propose fixes.
+
+VERIFICATION:
+- Verify all skill files exist with required sections.
+- Verify CLAUDE.md router table references the skill directories.
+  Update the router table if needed.
+- Run preflight checks.
+```
+
+### Phase 1c — Enforcement
+
+```
+Read ai-workflow-improvement-plan-prime.md and the CLAUDE.md created in
+Phase 1a. This phase creates hooks and CI validation.
+
+HOOKS:
+1. .claude/settings.json — All hooks are command-type only.
+
+   PreToolUse hook: .claude/hooks/deny-dangerous.sh
+   - Matcher: "Bash"
+   - Block (exit 2 with error message telling Claude what to do instead):
+     - rm -rf without explicit path scoping
+     - git push to main/master/production
+     - git push --force (suggest --force-with-lease)
+     - chmod 777
+     - Pipe-to-shell (curl | bash, wget | sh)
+     - .env modifications
+     - git commit --no-verify or git commit -n
+     [ADD PROJECT-SPECIFIC BLOCKS: e.g., direct edits to binary/generated
+      files that must be modified through tooling]
+   - Exit 0 for everything else
+
+   Stop hook: .claude/hooks/stop-lint.sh
+   - Stack-adaptive: check git diff for modified file types, run relevant
+     checks only
+   - MUST exit 0 even when errors found (informational only)
+   - Guard against missing tools (command -v check)
+   - Infinite loop guard (STOP_HOOK_ACTIVE check)
+   - Exclude slow checks (>10s) — those go in /preflight
+
+   PostToolUse hook: .claude/hooks/format-file.sh
+   - Matcher: "Edit|Write"
+   - Format based on file extension using project's formatter
+   - Silence failures
+
+   HOOK PATH RESOLUTION:
+   ALL commands MUST use: bash "$(git rev-parse --show-toplevel)/.claude/hooks/your-hook.sh"
+
+   HOOK STRUCTURE in settings.json:
+   "PreToolUse": [{ "matcher": "Bash", "hooks": [{ ... }] }],
+   "Stop": [{ "hooks": [{ ... }] }],
+   "PostToolUse": [{ "matcher": "Edit|Write", "hooks": [{ ... }] }]
+
+GITIGNORE additions:
+   - .claude/settings.local.json
+   - tasks/todo.md
+   - tasks/handoff.md
+
+CI (for projects using GitHub Actions):
+2. .github/workflows/context-validation.yml:
+   - CLAUDE.md line count (warn if >target, error if >150)
+   - Router table file references exist
+   - Skills directories have SKILL.md files
+   - Local CLAUDE.md files are under 20 lines each
+
+SECRET SCANNING (manual step — document, don't execute):
+3. Add a comment to CLAUDE.md or a setup section in README noting:
+   "Secret scanning: install gitleaks, create ~/.git-hooks/pre-commit,
+   set git config --global core.hooksPath ~/.git-hooks"
+   Do NOT execute these commands — they affect all repos on the machine.
+
+VERIFICATION:
+- Verify .claude/settings.json is valid JSON.
+- Verify deny-dangerous.sh blocks: rm -rf, git push main, git push --force,
+  chmod 777, pipe-to-shell, --no-verify.
+- Run preflight checks.
+```
+
+---
+
+## Phase 2
+
+```
+Read ai-workflow-improvement-plan-prime.md and the current CLAUDE.md.
+Work through this list in order.
+
+AGENT EVAL SUITE:
+1. Create agent-evals/ directory for agent regression testing.
+   Add a README.md explaining what evals are and how to use them.
+
+   Search this codebase's git history and issues for real incidents.
+   For each, create agent-evals/[incident-name].md (flat files, not
+   subdirectories) with:
+   - Bug description
+   - Single replay prompt
+   - Expected outcome
+   - Known failure mode tested
+
+   If the codebase has fewer than 5 qualifying incidents, create as many
+   as exist. For projects with no incident history: create 1-2 from common
+   failure modes for your stack. Replace with real incidents as they occur.
+
+PLAYBOOK UPDATES (skip if docs/playbooks/ doesn't exist):
+2. If 02-mob-elaboration-prompt.md exists: add Parameters section,
+   category-first question approach, structured question output,
+   annotation cycle section.
+3. If 03-sbao-ranking-prompt.md exists: verify Keep/Drop/Decide synthesis.
+
+RFC 2119 PASS:
+4. Apply MUST/SHOULD/MAY to every rule in CLAUDE.md.
+   - MUST: execution loop steps, autonomy tiers, definition of done
+   - SHOULD: log hygiene, working memory, session handoffs, footgun propagation
+   - MAY: structural debt trigger, communication when blocked
+   Compress prose in the SAME pass. CLAUDE.md MUST stay under target.
+
+PER-ROLE PERMISSION PROFILES (apps only):
+5. Create .claude/profiles/ with profiles adapted to your stack.
+   Each profile restricts Edit and Bash permissions. Always Read: **.
+   Add to CLAUDE.md router table.
+
+CI VALIDATION:
+6. If not created in Phase 1c, create context-validation.yml.
+
+VERIFICATION:
+- Count CLAUDE.md lines. MUST stay under target after RFC 2119 pass.
+- Verify permission profile JSON files are valid (if created).
+- Run preflight.
+- Report CLAUDE.md line count.
+```
diff --git a/00-1-ai-workflow-improvement-plan-prime_v1.4.md b/00-1-ai-workflow-improvement-plan-prime_v1.4.md
new file mode 100644
index 0000000..8436857
--- /dev/null
+++ b/00-1-ai-workflow-improvement-plan-prime_v1.4.md
@@ -0,0 +1,715 @@
+# AI Workflow Improvement Plan — Prime Edition
+
+**Version:** Prime v1.4 (supersedes Prime v0.1–v1.3)
+**Last updated:** 2026-03-15
+**Implements:** 5-layer architecture with default execution loop
+
+Based on review of BlunderGOAT articles (SBAO, SEO Scanner case study, Claude Code Insights, Plan Before You Prompt) cross-referenced against: awslabs/aidlc-workflows, Ömer Faruk Oruç's claude.md, HumanLayer's CLAUDE.md research, Microsoft AutoDev paper, Boris Tane's Claude Code workflow, GitHub's 2,500-repo agents.md analysis, Propel's codebase structuring guide, and Trail of Bits claude-code-config.
+
+> **⚠️ This is the canonical version.** All implementation work should reference this file.
+
+**Playbook source:** The planning playbook prompts (mob elaboration, SBAO ranking, milestone planning, etc.) live in the [ai-planning-playbook](https://github.com/blundergoat/ai-planning-playbook) repo. Phase 2 updates modify copies of those prompts within the target project.
+
+---
+
+## Changelog
+
+| Version | Date       | Changes                                                                                                                                                                                                                                                                                                                                       |
+| ------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| v1.4    | 2026-03-15 | Added permissions deny list (`*git commit*`, `*git push*`) as strongest enforcement layer. Three-tier enforcement model: permissions deny → hooks → CLAUDE.md rules. All 5 skills now apply to both apps and libraries (removed apps-only restriction on /research and /code-review). Added /review naming conflict warning — use /code-review to avoid shadowing Claude Code built-in. Security hardening checklist updated. Autonomy tiers enforcement note added. Codex prompt updated with deny-list gap acknowledgement |
+| v1.3    | 2026-03-14 | Fixed /rewind reference (not a real command). Fixed /compact percentage (not observable). Defined line count unit. Added rollback instructions to prompts. Added v0.2 to hook saga tables. Added ADRs to article tier table. Phase 2 timing made flexible. /insights explained in human instructions |
+| v1.2    | 2026-03-13 | Cross-file terminology alignment. Phase 1a/1b/1c mapping added to plan. Layer 5 description standardised. Adoption tiers synchronised across plan and article. Stale "golden tasks" references removed from all files                                                                                                                         |
+| v1.1    | 2026-03-13 | Renamed golden tasks to agent evals. Flat file structure (`agent-evals/*.md`). Added README.md requirement                                                                                                                                                                                                                                     |
+| v1.0    | 2026-03-11 | Guidelines ownership split (CLAUDE.md vs ai-agent-guidelines). Agent evals introduced (restructured to flat files in v1.1). Project-agnostic examples with adaptation callouts. Library vs app guidance throughout. Secret scanning moved to manual setup. Filename references aligned. Phase 1a split guidance for existing vs new CLAUDE.md |
+| v0.9    | 2026-03-09 | Local CLAUDE.md files for high-risk directories. Architecture Decision Records. Footgun propagation rule                                                                                                                                                                                                                                      |
+| v0.8    | 2026-03-09 | Portability: stack definition block, adoption tiers, bootstrap prompt, question/directive disambiguation. Phase 1 split into 1a/1b/1c                                                                                                                                                                                                         |
+| v0.7    | 2026-03-08 | Removed anti-rationalisation hook (see Appendix A). Added security hardening                                                                                                                                                                                                                                                                  |
+| v0.6    | 2026-03-08 | Hook prompt refined: pasted content detection                                                                                                                                                                                                                                                                                                 |
+| v0.5    | 2026-03-08 | Two-step hook prompt with JSON-only preamble                                                                                                                                                                                                                                                                                                  |
+| v0.4    | 2026-03-08 | Response-pattern intent detection                                                                                                                                                                                                                                                                                                             |
+| v0.3    | 2026-03-07 | Intent-aware hook prompt, hook structural limitations                                                                                                                                                                                                                                                                                         |
+| v0.2    | 2026-03-06 | Hook design patterns, exit code strategy, infinite loop guard                                                                                                                                                                                                                                                                                 |
+| v0.1    | 2026-03-06 | Initial Prime edition                                                                                                                                                                                                                                                                                                                         |
+
+---
+
+## System Architecture
+
+Five layers. Only Layer 1 loads every session. Everything else loads on demand.
+
+```
+Layer 1 — Runtime (CLAUDE.md, ~100-120 lines)
+    READ → CLASSIFY → ACT → VERIFY → LOG loop
+    Autonomy tiers, stop-the-line, mode switch, definition of done
+    Router table pointing to everything below
+
+Layer 2 — Local Context (directory-level CLAUDE.md files)
+    Auto-loaded when Claude works in that directory
+    High-risk boundaries, module-specific gotchas, local conventions
+
+Layer 3 — Skills (loaded via slash commands)
+    /preflight, /debug-investigate, /audit, /research, /code-review
+
+Layer 4 — Playbooks (planning tools, loaded on demand)
+    Mob elaboration, SBAO planning, milestone planning
+
+Layer 5 — Evaluation (quality infrastructure)
+    Agent eval suite, CI context validation
+```
+
+**Implementation scope:** Phase 1 builds Layers 1–3. Phase 2 builds Layer 5 and enhances Layers 1–4.
+
+### Guidelines Ownership Split
+
+Most projects accumulate two instruction layers: a project-specific CLAUDE.md and a shared coding standards file (`.github/instructions/ai-agent-guidelines.instructions.md` or similar). These MUST NOT overlap. Duplication creates conflicting specifics and wastes instruction budget.
+
+**CLAUDE.md owns** (project-specific, changes per project):
+
+- Default execution loop (READ → CLASSIFY → ACT → VERIFY → LOG)
+- Autonomy tiers (Always / Ask First / Never) — project-specific boundaries
+- Definition of Done — project-specific gates
+- Log file references (lessons.md, footguns.md, confusion-log.md)
+- Router table
+- Essential commands
+- Working memory and handoff conventions
+
+**ai-agent-guidelines owns** (shared, same across projects):
+
+- Operating principles (correctness over cleverness, smallest change, etc.)
+- Engineering best practices (API discipline, testing strategy, type safety)
+- Communication style (concise, ask one question, verification story)
+- Error handling patterns (triage checklist, safe fallbacks, rollback)
+- Task management templates (plan template, bugfix template)
+- Git and change hygiene
+
+**The test:** if a rule would be identical across every project you work on, it belongs in guidelines. If it changes per project (different Ask First boundaries, different essential commands, different DoD gates), it belongs in CLAUDE.md.
+
+**When adopting this system on a project with an existing guidelines file:** audit for overlap. Remove any execution loop, DoD, stop-the-line, working memory, or autonomy tier content from the guidelines file — those now live in CLAUDE.md. The guidelines file should shrink.
+
+### Layer 2: Local CLAUDE.md Files
+
+_Why: footguns.md is a central index the agent must remember to load. A local CLAUDE.md is read automatically when Claude works in that directory. Put the guardrail where the danger is, not in a file the agent might skip._
+
+Claude Code automatically reads any `CLAUDE.md` file in the directory it's working in, plus ancestors up to the project root. A file at `src/auth/CLAUDE.md` loads every time Claude touches auth code — no explicit loading required.
+
+**What goes in a local CLAUDE.md:**
+
+- Module-specific footguns (1-2 lines each)
+- Local conventions that differ from the project default
+- Cross-boundary warnings ("changes here affect X, Y, Z — read those too")
+- Hard constraints specific to this module
+
+**What does NOT go in a local CLAUDE.md:**
+
+- Duplicated project-wide rules (those live in the root CLAUDE.md)
+- Full architectural explanations (those live in docs/)
+- Anything longer than ~20 lines
+
+**Relationship to footguns.md:**
+
+- `docs/footguns.md` remains the central cross-domain index
+- Footgun entries that map to a specific directory are **propagated** (not moved) as one-line summaries
+- The central file is the source of truth; local files are read-time copies for automatic loading
+
+**When to create local CLAUDE.md files:**
+
+- A module has appeared 2+ times in footguns.md or confusion-log.md
+- A directory is an Ask First boundary (auth, billing, migrations, deployment)
+- A module has conventions that differ from the project default
+
+**When NOT to create them:**
+
+- For every directory (creates maintenance burden without value)
+- For simple modules with no cross-boundary impact
+- For libraries with flat directory structures (a single `src/` with no deep module hierarchy rarely needs local files)
+
+### Project Shape: App vs Library
+
+This plan is portable across project shapes. Key adaptation points:
+
+| Aspect                  | App (e.g., Tauri, Symfony full-stack)          | Library (e.g., PHP package, npm module)                      |
+| ----------------------- | ---------------------------------------------- | ------------------------------------------------------------ |
+| CLAUDE.md line target   | ~120 lines                                     | ~100 lines (less to route to)                                |
+| Skills                  | 5 (all core skills)                            | 5 (all core skills)                                          |
+| Ask First boundaries    | Auth, routing, deployment, API contracts, DB   | Public API signatures, dependency changes, config/data files |
+| Local CLAUDE.md files   | Likely needed for high-risk directories        | Rarely needed — flat structure                               |
+| confusion-log.md        | Yes — multi-domain confusion is common         | Optional — single domain, confusion signals are rarer        |
+| Agent evals             | Real incidents from production/dev history      | Common failure modes for the stack if no incident history    |
+| Permission profiles     | Useful (frontend/backend/infra lanes)          | Rarely needed — single language                              |
+| Cross-boundary concerns | Frontend ↔ backend, infra ↔ app, API contracts | Public API ↔ tests, data files ↔ encoding scripts            |
+
+### Skill Justification Test
+
+A skill should only exist if it has at least one of:
+
+- A **distinct artefact** (a file it produces)
+- A **hard workflow gate** (human must review before proceeding)
+- A **special failure mode** (LLMs are reliably bad at this without guardrails)
+- A **repeatable structured output** (mechanical, same shape every time)
+
+| Skill                | Justification                    | Projects |
+| -------------------- | -------------------------------- | -------- |
+| `/preflight`         | Repeatable structured output     | All      |
+| `/debug-investigate` | Special failure mode + hard gate | All      |
+| `/audit`             | Distinct artefact + hard gate    | All      |
+| `/research`          | Distinct artefact + hard gate    | All      |
+| `/code-review`       | Repeatable structured output     | All      |
+
+**⚠️ Naming conflict:** Claude Code has a built-in `/review` command. Do NOT create a skill named `review` — it shadows the built-in. Use `/code-review` as the skill name. If `.claude/skills/review/` already exists in your project, rename it to `.claude/skills/code-review/` or delete it.
+
+### What Was Downgraded and Where It Went
+
+| Former Skill        | Now Lives                                | Why downgraded                               |
+| ------------------- | ---------------------------------------- | -------------------------------------------- |
+| `/annotation-cycle` | Section in mob elaboration playbook (02) | Planning refinement — no distinct artefact   |
+| `/sbao-synthesis`   | Section in SBAO planning playbook (03)   | Template, not a workflow with gates          |
+| `/review-triage`    | Review branch of the default ACT step    | Normal review behaviour, not a distinct mode |
+| `/revert-rescope`   | Paragraph in VERIFY/stop-the-line        | Tactic, not a workflow                       |
+
+---
+
+## ⚠️ Instruction Budget Constraint (applies to ALL phases)
+
+**Source:** HumanLayer; Philipp Schmid research; GitHub 2,500-repo analysis
+
+Frontier thinking models reliably follow ~150-200 instructions. Claude Code's system prompt consumes ~50. That gives CLAUDE.md a budget of roughly **100-150 instructions** before performance degrades. Degradation is **uniform, not sequential** — too many instructions makes the model worse at following _all_ of them equally.
+
+**Key data points:**
+
+- Tools mentioned in AGENTS.md get used **160x more often** than unmentioned ones — essential commands are the highest-signal section.
+- Auto-generated context files reduce success rates by ~3% while increasing inference cost by over 20%.
+- **Code examples beat prose.** One ✅/❌ snippet communicates more per token than three paragraphs.
+
+**Governance rules:**
+
+```
+1. CLAUDE.md MUST stay under 150 lines. Target 100 (libraries) to 120 (apps). Count after every change.
+   Line count = wc -l CLAUDE.md. Blank lines, code fences, and table rows all count.
+2. Every rule in CLAUDE.md MUST be universally applicable to every session.
+   Situation-specific guidance belongs in skills, playbooks, or local CLAUDE.md files.
+3. Weekly /insights review: surface recurring friction, act on it.
+4. Quarterly audit: re-count, check for stale rules, ask "If I removed this,
+   would the model still do the right thing?"
+5. Prefer pointers over copies. CLAUDE.md references files, not inlines them.
+6. Prefer ✅/❌ examples over prose. Higher signal per token.
+7. Version your CLAUDE.md with a header and brief changelog.
+8. Local CLAUDE.md files: under 20 lines each.
+```
+
+**CLAUDE.md cut priority** (what to trim first if over target):
+
+1. Essential commands → move to separate referenced file
+2. Structural debt trigger → compress to one line
+3. Communication when blocked → compress to one line
+4. Sub-agent objectives → compress to two lines
+5. Working memory details → compress, keep handoff protocol
+
+**Never cut:** The execution loop, autonomy tiers, or definition of done.
+
+---
+
+## Phase 1: The Default Loop
+
+Build the runtime layer and core skills. **Create CLAUDE.md first** — skills reference its router table.
+
+**Implementation prompt mapping:** The implementation prompts split Phase 1 into three steps: **1a** (Foundation: sections 1.1–1.9 + Files + Architecture + Local CLAUDE.md), **1b** (Skills), **1c** (Hooks + Permissions + Security + CI).
+
+### 1.1 The Default Execution Loop
+
+The organising principle for CLAUDE.md. Every task follows this:
+
+**READ**
+
+- Read the relevant files first
+- For apps: read both sides for cross-boundary changes (auth, API contracts, routing, deployment)
+- For libraries: read tests alongside implementation, read data files alongside the code that uses them
+- Never fabricate codebase facts — if you haven't read it, say so
+
+```
+❌ "acme-client is a local path dependency" (fabricated without reading composer.json)
+✅ Read composer.json first → "acme-client is installed via Packagist at ^1.3.0"
+```
+
+**CLASSIFY**
+
+Complexity: Hotfix / Standard Feature / System Change / Infrastructure Change
+Mode: Plan / Implement / Explain / Debug / Review
+
+```
+❌ User asked "explain the auth flow" → Claude edited auth_middleware.go
+✅ User asked "explain the auth flow" → Claude wrote a clear walkthrough, no changes
+```
+
+Mode transitions must be stated explicitly. Never drift silently. If the intent is ambiguous, ask: "Do you want me to explain this or fix it?"
+
+Question vs directive: if the message is a question ("what should...", "which approach...", "whats next?"), answer it. Do not infer an implementation action from a question. Only act when explicitly directed.
+
+Anti-BDUF guard:
+
+```
+❌ "Created INotificationProvider interface" (only one implementation exists)
+✅ "EmailNotifier handles notifications. Extract interface when second provider needed."
+```
+
+**Portability note:** Replace the examples above with incidents from your own codebase. The principles (read before modify, classify before act) are universal; the examples are illustrative.
+
+**ACT**
+
+| Mode      | Behaviour                                                                                                          |
+| --------- | ------------------------------------------------------------------------------------------------------------------ |
+| Plan      | Produce artefact (research.md, plan doc). No application code. Exit when human says "LGTM" or "implement"          |
+| Implement | Write code within 2-3 turns. If reading a 4th file without writing anything, stop exploring and start implementing |
+| Explain   | Walkthrough only. No code changes unless explicitly asked                                                          |
+| Debug     | Diagnosis first. Write findings with file:line evidence. No fixes until human reviews diagnosis                    |
+| Review    | Investigate independently before agreeing or disagreeing. Never blindly apply external suggestions                 |
+
+**State declaration (MUST):** At the start of each task, declare:
+
+```
+State: [MODE] | Goal: [one line] | Exit: [condition]
+```
+
+You MUST NOT take actions outside the declared state without explicitly stating "Switching to [NEW STATE] because [reason]."
+
+**VERIFY**
+
+Run relevant tests after each meaningful code change — not just at the end. The loop: implement → test → fix → repeat until green. For subtle changes where tests pass but behaviour may have shifted, compare baseline vs changed behaviour explicitly.
+
+Stop-the-line escalation:
+
+```
+Level 1 — Stop and Note (isolated failures):
+  Single unrelated test failure, flaky test, non-blocking lint warning.
+  → Note in Working Notes. Confirm isolated. Continue with caution.
+
+Level 2 — Stop and Escalate (cross-boundary or security failures):
+  For apps: auth, routing, deployment, API contracts, database integrity.
+  For libraries: public API changes, data file corruption, scoring threshold shifts.
+  → Full stop. Preserve error output. Write diagnosis with file:line evidence.
+    Wait for human review.
+```
+
+Revert-and-rescope tactic:
+
+1. Esc to interrupt, then restate approach — cheapest
+2. Git revert + rescope — when interrupting isn't enough
+3. /clear and fresh session — when context is polluted, write handoff first
+
+Two corrections on the same issue = cut your losses. This applies to _approach_, not to legitimate multi-step work. If the fix path keeps changing direction, rewind. If you're making steady progress through a complex change, continue.
+
+**LOG**
+
+After corrections or discoveries, append to the appropriate file:
+
+| File                    | When                                    | Example                                                                 |
+| ----------------------- | --------------------------------------- | ----------------------------------------------------------------------- |
+| `docs/lessons.md`       | Behavioural mistake (agent did wrong)   | "Assumed API contract without reading frontend"                         |
+| `docs/footguns.md`      | Architectural landmine (cross-domain)   | "Auth nonce spans 4 components; breaking any one silently breaks login" |
+| `docs/confusion-log.md` | Structural confusion (hard to navigate) | "Unclear which module owns session validation"                          |
+
+**For libraries:** `docs/confusion-log.md` is optional. Create it if confusion entries start appearing in lessons.md that are really about structure, not behaviour.
+
+Log hygiene:
+
+- Include `created_at` date on each entry
+- lessons.md: max 15 active entries. When 3+ share a theme, promote to a named Pattern and archive individuals
+- footguns.md: only cross-domain issues with real evidence
+- Quarterly: entries not triggered in >30 days → propose archive / generalise / keep
+- Contested entries: append `⚠️ CONTESTED` with evidence. Don't silently ignore, don't silently follow
+- **Footgun propagation:** when adding a footgun that maps to a specific directory, propagate a one-line summary to that directory's local CLAUDE.md
+
+**Log file location:** `docs/lessons.md` and `docs/footguns.md` are the canonical paths. If your project has an ai-agent-guidelines file that references `tasks/lessons.md`, update it to point to `docs/lessons.md`. Do not maintain two files for the same concept.
+
+Context-based loading (not every session):
+
+- Starting a feature/refactor → read lessons.md
+- Touching Ask First boundaries → read footguns.md
+- Quick hotfix with no boundary crossing → skip unless relevant
+- Local CLAUDE.md files load automatically
+
+### 1.2 Autonomy Tiers
+
+Adapt these to your project. The structure is fixed; the boundaries are project-specific.
+
+```
+✅ Always do (no confirmation needed):
+- Run tests, linting, formatting
+- Read any file in the codebase
+- Write to files within assigned scope
+- Append to lessons.md, footguns.md, confusion-log.md
+
+⚠️ Ask First (pause and confirm with human):
+[APP EXAMPLES: auth, routing, deployment, API contracts, DB schemas, CI/CD,
+ cross-boundary changes, new directories]
+[LIBRARY EXAMPLES: public API signatures, dependency changes, data/config
+ files, detection thresholds, encoding/binary files]
+
+Micro-checklist (MUST for all Ask First items):
+- [ ] Boundary touched: [name it]
+- [ ] Related code read: [yes/no — if no, read it first]
+- [ ] Footgun entry checked: [relevant entry, or "none applicable"]
+- [ ] Local CLAUDE.md checked: [warnings noted, or "no local file"]
+- [ ] Rollback command: [exact command to undo if this fails]
+
+🚫 Never do:
+- Delete test files or remove failing tests to make builds pass
+- Modify .env files or secrets
+- Push to main/production branches
+- Change file permissions or security configurations
+- Make git commits unless explicitly asked
+- Edit files outside the current project repository
+```
+
+**Enforcement:** The Never tier is enforced at three levels, strongest first:
+
+| Layer | Mechanism | Scope | Bypass risk |
+|-------|-----------|-------|-------------|
+| 1. Permissions deny | `settings.json` tool-level block | `*git commit*` and `*git push*` blocked entirely — before hooks, before the shell | None — Claude Code refuses the tool call |
+| 2. deny-dangerous.sh | PreToolUse hook pattern inspection | `--force`, `--no-verify`, pipe-to-shell, `rm -rf`, `.env` edits | Low — regex can miss edge cases |
+| 3. CLAUDE.md rules | Behavioural guidance | Everything else in the Never tier | Medium — model compliance ~70% |
+
+Match enforcement strength to consequence severity. Binary prohibitions (never commit, never push) get permissions deny. Pattern prohibitions (no force push, no unscoped rm -rf) get hooks. Judgement calls (don't delete tests to pass builds) get CLAUDE.md rules.
+
+### 1.3 Definition of Done
+
+```
+A task is NOT done until ALL of these are true:
+1. Relevant tests green (tests that cover the change, not just "no errors")
+2. All MUST-level preflight items pass
+3. No cross-boundary change made without Ask First justification
+4. If you tripped: lessons.md / footguns.md updated
+5. Working Notes in tasks/todo.md are current
+6. After bulk renames/refactors: grep for old pattern, confirm ZERO remaining references
+
+Do NOT say "task complete" until you can confirm all 6.
+```
+
+### 1.4 Working Memory and Handoffs
+
+For tasks exceeding 5 turns: maintain Working Notes in tasks/todo.md.
+
+Context window management — escalation ladder:
+
+1. `/compact` after 15+ turns or when responses noticeably slow
+2. Two compactions = task too large, split into sub-tasks
+3. `/clear` between unrelated tasks
+4. Worktrees for parallel or risky work
+
+Session handoff: write to tasks/handoff.md before ending incomplete work. Read it first when resuming.
+
+### 1.5 Sub-Agent Objectives
+
+Give each sub-agent ONE focused objective with a concrete deliverable format. Required return: paths, evidence, confidence, next step. Tool call budget: 5 calls per sub-agent.
+
+### 1.6 Communication When Blocked
+
+Ask **exactly one** targeted question with a recommended default and what would change depending on the answer. If not blocked, make a reasonable decision and note the assumption.
+
+### 1.7 Structural Debt Trigger
+
+If implementing a standard feature requires adding >3 new context rules, flag as structural debt.
+
+### 1.8 Stack Definition
+
+Define your project's tooling once. Hooks, skills, and preflight reference these commands.
+
+```yaml
+# Example: Tauri app (React + Rust)
+stack:
+  languages: [typescript, rust]
+  build: cargo build --manifest-path src-tauri/Cargo.toml
+  test: pnpm test && cargo test --manifest-path src-tauri/Cargo.toml
+  lint: pnpm lint
+  format: npx prettier --write {file}
+
+# Example: PHP library
+stack:
+  languages: [php]
+  build: composer analyse
+  test: composer test
+  lint: composer analyse
+  format: composer cs:fix
+```
+
+### 1.9 Adoption Tiers
+
+| Tier         | What you get                                               | When to use                              |
+| ------------ | ---------------------------------------------------------- | ---------------------------------------- |
+| **Minimal**  | CLAUDE.md + deny-dangerous hook + permissions deny         | Solo project, getting started            |
+| **Standard** | + skills + stop/format hooks + local CLAUDE.md files       | Active development, team project         |
+| **Full**     | + agent evals + CI validation + permission profiles + ADRs | Long-lived project with incident history |
+
+---
+
+## Phase 1 Skills
+
+### /preflight
+
+Mechanical build verification with RFC 2119 constraints:
+
+- MUST: type-check + lint + compile for your stack
+- SHOULD: full test suite, formatter check, mutation testing (if configured)
+- MAY: skip formatter during active debugging
+- MUST NOT: report task complete if any MUST item fails
+
+### /debug-investigate
+
+Diagnosis-first mode:
+
+1. Read actual code paths, trace request flow end-to-end
+2. Write findings with file:line evidence — no fixes yet
+3. Only after human reviews diagnosis: propose fix
+
+### /audit
+
+Multi-pass codebase audit:
+
+- Pass 1 Discovery: scan target area, log findings with file:line evidence
+- Pass 2 Verification: re-read each finding, confirm real, remove false positives
+- Pass 3 Prioritisation: rank by severity and blast radius
+- Pass 4 Self-Check: "did I fabricate this?" — remove anything that fails
+
+### /research
+
+Before planning any non-trivial feature, deeply read the relevant codebase area and produce research.md. Hard gate: do NOT proceed to planning until human reviews. For apps, trace the request flow across layers. For libraries, trace public API surface, data flows, and test coverage boundaries.
+
+### /code-review
+
+Structured code review with RFC 2119 constraints and autonomy tiers. **⚠️ Do NOT name this skill `review`** — it shadows Claude Code's built-in `/review` command. Always use `/code-review`.
+
+---
+
+## Phase 1 Files
+
+| File                        | Purpose                       | Seed Content                                                                            |
+| --------------------------- | ----------------------------- | --------------------------------------------------------------------------------------- |
+| `docs/domain-reference.md`  | Project domain knowledge      | Migrated from existing CLAUDE.md when adopting the workflow system (Prompt B path only) |
+| `docs/lessons.md`           | Behavioural learning loop     | Format header + empty Entries/Patterns sections                                         |
+| `docs/footguns.md`          | Architectural landmines       | Real footguns from the codebase — read actual code, don't invent                        |
+| `docs/confusion-log.md`     | Structural confusion signals  | Format header (apps). Skip for libraries unless needed                                  |
+| `docs/architecture.md`      | System overview for Claude    | Under 100 lines. What, why, how, constraints                                            |
+| `docs/decisions/`           | Architecture Decision Records | ADR template + real decisions if discoverable                                           |
+| `tasks/handoff-template.md` | Session handoff               | Status, Current State, Decisions, Risks, Next Step                                      |
+
+**For libraries:** `docs/architecture.md` may already exist as domain reference documentation. Don't create a second one — ensure the existing doc covers the "what does this system do" and "non-obvious constraints" questions. ADRs are optional for libraries with few architectural decisions.
+
+### Architecture Documentation
+
+**docs/architecture.md** — a short overview (under 100 lines) that answers:
+
+- What does this system do? (one paragraph)
+- What are the major components and how do they connect?
+- What are the key data flows?
+- What are the non-obvious constraints?
+- What are the deliberate trade-offs?
+
+**docs/decisions/** — Architecture Decision Records. One file per significant decision:
+
+```markdown
+# ADR-NNN: [Title]
+
+**Date:** YYYY-MM-DD
+**Status:** Accepted / Superseded by ADR-NNN / Deprecated
+
+## Context
+
+What is the issue motivating this decision?
+
+## Decision
+
+What is the change being made?
+
+## Consequences
+
+What becomes easier or more difficult?
+```
+
+ADRs are immutable after acceptance. If a decision changes, write a new ADR that supersedes the old one.
+
+---
+
+## Phase 1 Enforcement
+
+### Permissions Deny List (settings.json)
+
+The strongest enforcement layer. The `.claude/settings.json` permissions deny list blocks tool invocations at the Claude Code level — before the command runs, before hooks fire. Claude Code refuses the tool call entirely.
+
+```json
+"permissions": {
+    "deny": [
+        "Bash(*git commit*)",
+        "Bash(*git push*)"
+    ]
+}
+```
+
+**Why both permissions deny AND hooks:**
+
+Permissions deny handles binary prohibitions — actions that should NEVER happen regardless of context. `git commit` and `git push` are always human actions, full stop.
+
+Hooks handle pattern prohibitions — actions that are dangerous in specific forms but legitimate in others. `rm` is fine; `rm -rf /` is not. The hook inspects the command to decide.
+
+CLAUDE.md rules handle judgement calls — everything that needs context-aware reasoning.
+
+**When to use permissions deny vs hooks:**
+
+- **Permissions deny:** actions that should NEVER happen regardless of context. The deny list uses glob patterns (`Bash(*git commit*)`), not regex. It matches the entire command invocation, including chained commands.
+- **Hooks:** actions that are dangerous in specific forms. The hook script uses regex/pattern matching to inspect command content and distinguish safe from unsafe variants.
+
+**Project-specific additions:** add `Bash(terraform apply *)` for infrastructure projects, `Bash(docker push *)` for container projects, or any other command that should require human hands.
+
+### Hooks
+
+| Hook                       | Type    | Trigger               | Purpose                                                                              |
+| -------------------------- | ------- | --------------------- | ------------------------------------------------------------------------------------ |
+| Stop: build verification   | Command | Every Claude turn     | Stack-adaptive: detect modified file types via git diff, run relevant checks only    |
+| PostToolUse: auto-format   | Command | After each Edit/Write | Format edited files by extension using the project's configured formatter            |
+| PreToolUse: deny-dangerous | Command | Bash tool calls       | Block dangerous patterns: rm -rf, force push, pipe-to-shell, .env edits, hook bypass |
+
+### Hook Design Patterns
+
+**Exit code strategy for Stop command hooks:**
+
+Stop command hooks MUST exit 0 even when they find errors. Non-zero exit forces Claude into infinite fix loops. Print errors to **stderr** (`>&2`). Guard against missing tools (`command -v` check).
+
+```bash
+# ✅ Correct: exit 0, errors to stderr, tool availability check
+if ! command -v cargo &>/dev/null; then exit 0; fi
+output=$(cargo fmt --check 2>&1) || {
+  echo "Formatting issues found:" >&2
+  echo "$output" >&2
+}
+exit 0
+```
+
+**Infinite loop prevention:**
+
+```bash
+if [ "${STOP_HOOK_ACTIVE:-}" = "1" ]; then exit 0; fi
+export STOP_HOOK_ACTIVE=1
+```
+
+**Stack-adaptive stop hook:** Check `git diff` for modified file types, only run relevant checks:
+
+| File types    | Check                       | Typical speed |
+| ------------- | --------------------------- | ------------- |
+| `.rs`         | `cargo fmt --check`         | <3s           |
+| `.ts`, `.tsx` | `tsc --noEmit`, `pnpm lint` | <5s           |
+| `.php`        | `php -l` (syntax check)     | <2s           |
+| `.go`         | `go vet ./...`              | <3s           |
+| `.py`         | `ruff check`                | <2s           |
+| None          | Skip (exit 0)               | instant       |
+
+**PostToolUse auto-format:** format based on file extension. Silence failures.
+
+**Hook path resolution:** ALL hook commands MUST use `git rev-parse --show-toplevel`:
+
+```
+bash "$(git rev-parse --show-toplevel)/.claude/hooks/your-hook.sh"
+```
+
+### Hook Configuration Pitfalls
+
+1. Use `git rev-parse --show-toplevel` for paths — relative paths break when cwd changes.
+2. Put each Stop hook in its own array entry — combining command and prompt hooks causes double-firing.
+3. Verify hooks exist at the project root — stale working directories create hooks in subdirectories.
+4. Check `git diff` before running expensive checks.
+
+---
+
+## Phase 1 Security Hardening
+
+### Deny Rules (PreToolUse hooks)
+
+Block known-dangerous patterns at the tool level. An instruction in CLAUDE.md saying "never use rm -rf" works ~70% of the time. A PreToolUse hook that blocks it works 100%.
+
+The deny script should block (exit 2 with error message):
+
+- `rm -rf` without explicit path scoping
+- Direct `git push` to main/master/production
+- `git push --force` (suggest `--force-with-lease`)
+- `chmod 777` or overly permissive file permissions
+- Pipe-to-shell patterns (`curl | bash`, `wget | sh`)
+- `.env` file modifications
+- `git commit --no-verify` or `git commit -n`
+
+**Note:** `git commit` and `git push` are blocked entirely by the permissions deny list in settings.json. The deny-dangerous hook handles the pattern-level variants (force push, no-verify) for cases where permissions deny alone isn't granular enough.
+
+**Project-specific deny rules:** add blocks for files that must be modified through tooling, not direct edit. Examples: binary-encoded dictionaries (must use encoder script), generated files (must use generator), lock files (must use package manager).
+
+### Pre-Commit Secret Scanning (Manual Setup)
+
+Set up gitleaks as a pre-commit hook. **This is a manual step — do not ask an AI agent to modify global git config.**
+
+```bash
+# Install gitleaks for your platform
+# Create ~/.git-hooks/pre-commit that runs: gitleaks git --staged --no-banner
+# Set: git config --global core.hooksPath ~/.git-hooks
+```
+
+Note: `git config --global core.hooksPath` affects ALL repositories on the machine. Review the implications before applying.
+
+### Security Hardening Checklist
+
+| Layer            | What                                                       | When to add                    |
+| ---------------- | ---------------------------------------------------------- | ------------------------------ |
+| Permissions deny | `*git commit*`, `*git push*` blocked in settings.json   | Phase 0 / Phase 1c — always    |
+| Deny rules       | PreToolUse hooks                                           | Phase 1 — with other hooks     |
+| Secret scanning  | gitleaks pre-commit                                        | Phase 1 — manual setup         |
+| Dependency audit | `npm audit` / `composer audit` / `cargo deny` in preflight | Phase 1 — in /preflight skill  |
+| Git hygiene      | Block force-push, require feature branches                 | Phase 1 — deny rules           |
+
+---
+
+## Phase 2: Evaluation and Profiles
+
+### 2.1 Agent Eval Regression Suite
+
+Maintain an `agent-evals/` directory with known bugs/incidents as flat `.md` files (one file per eval, named after the incident). Each file contains: bug description, single replay prompt, expected outcome, known failure mode tested. Include a `README.md` explaining what evals are and how to use them.
+
+**Start with real incidents only.** For new projects with no incident history, create 1-2 evals from common failure modes for your stack. Replace with real incidents as they occur.
+
+Replay protocol: when you change CLAUDE.md or a skill, run the agent against each eval's replay prompt. If a previously-passing eval now fails → behavioural regression, revert.
+
+### 2.2 Playbook Updates
+
+If `docs/playbooks/02-mob-elaboration-prompt.md` and `03-sbao-ranking-prompt.md` exist, apply updates. If not, skip this section.
+
+### 2.3 RFC 2119 Pass
+
+Apply MUST/SHOULD/MAY to all existing CLAUDE.md rules. Compress prose in the same pass to stay within line budget.
+
+### 2.4 Per-Role Permission Profiles
+
+Native Claude Code scoping using the `--profile` flag. **For apps only** — libraries rarely need role-scoped permissions.
+
+### 2.5 CI/CD Validation of Context Files
+
+GitHub Actions workflow checking: CLAUDE.md line count, router table references, skills directory completeness.
+
+---
+
+## Appendix A: The Anti-Rationalisation Hook — A Failed Experiment
+
+### The Idea
+
+A prompt-type Stop hook that sends Claude's response to Haiku for independent assessment.
+
+### What Happened (6 versions in one day)
+
+| Version | Approach                              | What went wrong                                                |
+| ------- | ------------------------------------- | -------------------------------------------------------------- |
+| v0.1    | Single paragraph, no intent check     | False positives on every question                              |
+| v0.2    | Hook infrastructure                   | Exit codes, infinite loop guard — no prompt iteration          |
+| v0.3    | User-intent keyword matching          | Haiku can't see the user message                               |
+| v0.4    | Response-pattern detection            | Haiku returned prose instead of JSON                           |
+| v0.5    | Two-step flow with JSON-only preamble | Claude's own "Want me to fix?" offer triggered false match     |
+| v0.6    | Pasted content detection              | Best version, but JSON schema fragile across reimplementations |
+
+### The Structural Problem
+
+Prompt-type Stop hooks only see the assistant's response. They cannot read the conversation. Intent detection is always inferred, never observed. Three false positives in a single day eroded trust faster than correct rejections built it.
+
+### The Decision
+
+Removed entirely in v0.7. Deterministic command hooks for mechanical enforcement. CLAUDE.md rules for behavioural guidance. Prompt hooks for semantic judgement are fragile.
diff --git a/AGENTS.md b/AGENTS.md
index 11b425a..47b982f 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,59 +1,135 @@
-# Repository Guidelines
+# AGENTS.md - v1.0 (2026-03-15)
 
-Guidelines for AI coding agents working on the devgoat-bash-scripts repository.
+Runtime instructions for Codex in `devgoat-bash-scripts`. Repo-specific engineering patterns that used to live here now live in `docs/domain-reference.md`. The ownership split is recorded in `docs/guidelines-ownership-split.md`.
 
 ## Project Identity
 
-devgoat-bash-scripts is a collection of reusable shell scripts organized by domain under `lib/`. Scripts are either **drop-in** (run as-is) or **template** (copy and fill in the `# ---- CONFIGURATION ----` block). No build system or package manager; includes a bats test suite under `tests/`.
+This repo is a collection of reusable shell scripts organised by domain under `lib/`, plus a PHP dashboard in `dashboard/` and bats tests in `tests/`. Scripts are either drop-in helpers or templates with a `# ---- CONFIGURATION ----` block.
 
 ## Essential Commands
 
 ```bash
-bash -n path/to/script.sh                              # Syntax-check a script
-shellcheck path/to/script.sh                           # Lint a script
-./lib/maintenance/make-scripts-executable.sh            # Restore chmod +x on all .sh files
-./lib/maintenance/make-scripts-executable.sh --dry-run  # Preview which files need executable bit
-./lib/codegen/generate-code-map.sh                      # Inspect repository structure
-./help.sh                                               # Script index (delegates to lib/workflow/help-index.sh)
-./preflight-checks.sh                                   # Quality gate (repo-level checks)
-bats tests/ --recursive                                 # Run bats test suite
+bash -n path/to/script.sh
+shellcheck path/to/script.sh
+php -l dashboard/aws_ui.php
+./help.sh
+./preflight-checks.sh
+./scripts/context-validate.sh
+./scripts/deny-dangerous.sh --self-test
+./scripts/preflight-checks.sh
+bats tests/ --recursive
+./lib/codegen/generate-code-map.sh
 ```
 
-Validate changes by: syntax-checking with `bash -n`, running `shellcheck`, running `--help`, running `bats tests/ --recursive`, and exercising at least one safe execution path per changed script.
+## Default Loop
 
-## Hard Rules
+### READ
 
-- `#!/usr/bin/env bash` + `set -euo pipefail` on every script. Exception: scripts that must continue past failures use `set -uo pipefail` - see `docs/footguns.md`.
-- Never modify values inside `# ---- CONFIGURATION ----` blocks - those are template placeholders.
-- Match the logging paradigm of sibling scripts (ai-cli colors, stacks step/pass/fail, standalone inline functions). See `docs/footguns.md` for details.
-- `_common.sh` source patterns differ between `ai-cli/` (same-dir) and `stacks/` (parent traversal) - they are not interchangeable.
-- Only `ai-cli/_common.sh` sanitizes WSL PATH. Other domains use bare `command -v`.
-- Run `bash -n` and `shellcheck` on changed scripts before declaring done.
-- Never commit credentials or secrets.
-- When you cause a bug that spans multiple domains, append it to `docs/footguns.md` using the existing format before closing the task.
+- Read the relevant files before acting. For cross-domain work, read both the producer and the consumer.
+- Never fabricate repo facts. If you have not read it, say so.
 
-## Common Workflows
-
-**Adding an ai-cli installer:** Copy an existing `install-*.sh`. Source `_common.sh` via `SCRIPT_DIR`. Use `block_gitbash`, `require_node_or_install`, `verify_native_binary`. No prefix tags in log output.
+```text
+BAD: "The dashboard parser is isolated to PHP."
+GOOD: Read lib/aws/aws-costs.sh and dashboard/aws_ui.php before changing report headings.
+```
 
-**Adding a stacks script:** Source `../_common.sh`. Use `step`/`pass`/`fail`/`summary` for checks, `log_info`/`log_ok` for actions. Omit `-e` if the script must report all failures.
+### CLASSIFY
 
-**Adding a standalone script (aws/workflow/deps/docker/health/quality/maintenance/tools/codegen):** Self-contained - define inline colors and `log`/`success`/`warn`/`error` functions. Use `set -euo pipefail`. Add CONFIGURATION block if template.
+- State mode and complexity before substantial work: `Answer`, `Plan`, `Implement`, `Debug`, or `Review`; `Hotfix`, `Standard`, `System`, or `Infra`.
+- Questions get answers, not edits. Directives get implementation. If intent is ambiguous, ask once.
+- State mode changes explicitly; do not drift from explanation into implementation.
 
-## Commit Format
+### ACT
 
-Short, imperative subjects (e.g., `add docker restart wrapper`). One commit per script or workflow. Never commit credentials.
+| Mode | Behaviour |
+| --- | --- |
+| `Answer` | Explain, report, or compare. No code changes. |
+| `Plan` | Produce the plan or research artefact only. No implementation until asked. |
+| `Implement` | Make the smallest defensible change after reading the code. Do not stop at a speculative plan unless blocked. |
+| `Debug` | Diagnose first, with file:line evidence. Do not patch first and hope. |
+| `Review` | Findings first: bugs, risks, regressions, missing tests. Summary second. |
 
-## Context Router
+Anti-planning-loop: if the user asked for a fix and the path is clear after reading, implement it.
 
-Load these files on demand when working in a specific domain:
+```text
+BAD: "I created a shared parser abstraction" for one dashboard report.
+GOOD: Patch the existing parser. Extract only when a second consumer appears.
+```
 
-| Domain | File | When to load |
-|--------|------|-------------|
-| All scripts | `.github/instructions/shell-conventions.instructions.md` | Writing or reviewing any `.sh` file |
-| `lib/ai-cli/` | `.github/instructions/ai-cli.instructions.md` | Working on AI CLI installers |
-| `lib/aws/` | `.github/instructions/aws.instructions.md` | Working on AWS scripts |
-| `lib/stacks/` | `.github/instructions/stacks.instructions.md` | Working on stack scripts |
-| `lib/workflow/`, `lib/docker/`, `lib/health/`, `lib/maintenance/`, `lib/tools/`, `lib/codegen/` | `.github/instructions/dev.instructions.md` | Working on standalone/orchestration scripts |
-| Orientation | `docs/code-map.md` | Understanding repo structure |
-| Gotchas | `docs/footguns.md` | Debugging cross-domain issues |
+### VERIFY
+
+- Run relevant checks after meaningful changes.
+- Isolated failure: note it, finish safe work, and report the gap.
+- Cross-boundary regression or unknown blast radius: stop and report the diagnosis before pushing further.
+- Two failed approaches on the same fix: stop and report what failed and why.
+- After renames or moves, `rg` for the old pattern and confirm zero stale references.
+
+### RECORD
+
+- Update `docs/footguns.md` when you hit a real cross-domain landmine with verified evidence.
+- Update `docs/lessons.md` for repeatable agent-behaviour mistakes.
+- Use `tasks/todo.md` as the task scratchpad and `tasks/handoff.md` when work stops mid-task.
+- Load router targets on demand. Keep context tight.
+
+## Autonomy Tiers
+
+### Always
+
+- Read first, then act.
+- Preserve template placeholders inside `# ---- CONFIGURATION ----` blocks unless the interface itself is being changed.
+- Match the touched domain's helper sourcing, logging style, and verification pattern.
+
+### Ask First
+
+- Shared helpers: `lib/ai-cli/_common.sh`, `lib/stacks/_common.sh`, `lib/aws/_aws-common.sh`
+- Any change to a `# ---- CONFIGURATION ----` interface or default
+- Strict-mode changes between `set -euo pipefail` and `set -uo pipefail`
+- Repo entrypoints: `help.sh`, `preflight-checks.sh`, `dashboard/start-dev.sh`
+- Shell output consumed by the dashboard, or generated artefacts like `docs/code-map.md`
+- New top-level directories, CI workflow changes, dependency/tooling changes
+
+Ask First checklist:
+- State the files and boundary being crossed.
+- Name the downstream consumers or users.
+- Say what will be verified after the change.
+- Wait for approval before editing.
+
+### Never
+
+- Delete tests to make checks pass.
+- Edit `.env`, secrets, or credentials.
+- Commit or push unless explicitly asked; never use `--no-verify`.
+- Use destructive git operations or unscoped `rm -rf`.
+- Hand-edit generated `docs/code-map.md`.
+
+## Definition of Done
+
+1. Relevant lint, syntax, test, and smoke checks passed, or a concrete gap is reported.
+2. User-visible behaviour is verified from the changed path, not assumed.
+3. No Ask First boundary was crossed without approval.
+4. `docs/footguns.md` or `docs/lessons.md` was updated if the task tripped one.
+5. `tasks/todo.md` and `tasks/handoff.md` reflect the current state of the task.
+6. After renames or moves, `rg` confirmed no stale references to the old name.
+
+## Router
+
+| Topic | Path | Use When |
+| --- | --- | --- |
+| Architecture | `docs/architecture.md` | Repo shape, data flows, constraints |
+| Domain reference | `docs/domain-reference.md` | Shell patterns, workflows, entrypoints |
+| Ownership split | `docs/guidelines-ownership-split.md` | Why AGENTS was trimmed and what moved |
+| Lessons log | `docs/lessons.md` | Behavioural mistakes worth retaining |
+| Footguns log | `docs/footguns.md` | Cross-domain traps and evidence |
+| Task scratchpad | `tasks/todo.md` | Working notes during a task |
+| Handoff file | `tasks/handoff.md` | Incomplete-task handoff |
+| Preflight playbook | `docs/codex-playbooks/preflight.md` | Picking the right checks |
+| Research playbook | `docs/codex-playbooks/research.md` | Deep-read, no-code investigations |
+| Debug playbook | `docs/codex-playbooks/debug-investigate.md` | Diagnosis-first debugging |
+| Audit playbook | `docs/codex-playbooks/audit.md` | Repo/process audits |
+| Code review playbook | `docs/codex-playbooks/code-review.md` | Structured review work |
+| Context validator | `scripts/context-validate.sh` | Validate workflow files and router targets |
+| Deny policy | `scripts/deny-dangerous.sh` | Review blocked commands and self-tests |
+| Workflow preflight | `scripts/preflight-checks.sh` | Run the Codex verification suite |
+| Claude runtime | `CLAUDE.md` | Compare the Claude-side implementation |
+| Claude evals | `agent-evals/README.md` | Existing Claude replay fixtures |
+| Codex evals | `codex-evals/README.md` | Codex replay fixtures |
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 715f3e3..f6e7b75 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,57 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ---
 
+## [v1.4.0] - 2026-03-15
+
+### Added
+
+- **Tunnel system** — provider-agnostic tunnel management built into the dashboard.
+  - One-click Cloudflare quick tunnel with cloudflared process lifecycle management.
+  - Manual URL support for ngrok, localhost.run, Tailscale Funnel, or any provider.
+  - Recent URLs saved in localStorage (last 5, click to re-use).
+  - Live uptime timer, auto-refresh polling (20s), and cloudflared log viewer.
+  - Inline connectivity tester (GET/HEAD) with result alerts and curl preview.
+  - Browser notification when tunnel is ready.
+  - `dashboard/tunnel.php` — UI fragments (CSS, HTML, JS).
+  - `dashboard/index.php` — tunnel API endpoints: start, stop, configure, test, status, logs.
+  - `dashboard/start-dev.sh` — cleanup trap kills orphaned cloudflared on Ctrl+C.
+
+- **AWS reports backend and UI** — full AWS operations console accessible from the dashboard.
+  - `dashboard/aws.php` — report execution backend and API handlers.
+  - `dashboard/aws_ui.php` — tabbed UI with overview cards, cost analysis, rightsizing, security scanning, and CLI runner. Each tab retains its last result.
+  - `lib/aws/_aws-common.sh` — shared AWS auth and .env loader.
+  - `lib/aws/aws-costs.sh` — Cost Explorer analysis with service breakdown table.
+  - `lib/aws/aws-rightsizing.sh` — CloudWatch metrics and utilisation analysis for RDS, ECS, ALB, NAT, EC2.
+  - `lib/aws/aws-security.sh` — read-only scan of WAF rules, IAM users, security groups, S3 access blocks, and secrets rotation.
+  - `.env.example` — AWS credential template.
+
+- **Shared UI patterns** — reusable CSS classes added to both dashboard and AWS pages.
+  - `.status-badge` — inline dot + label indicator (success, error, warning, running, idle) with optional pulse animation.
+  - `.result-alert` — dismissible feedback banner with colored left border and slide-in animation.
+  - `.collapsible-header` / `.collapsible-body` — animated expand/collapse sections with rotating chevron.
+  - `focus-visible` outlines on all interactive elements for keyboard navigation.
+
+### Changed
+
+- **Dashboard terminal** — completion and stop results now show a fixed result-alert banner above the scrollable output (always visible, dismissible).
+- **Dashboard sidebar** — running script indicator uses left accent border. Category chevrons changed from `▾` to `▸` with consistent rotation direction.
+- **Dashboard welcome state** — centered flex layout instead of left-aligned italic text.
+- **Dashboard footer** — smaller, subtler attribution text with hover opacity.
+- **Dashboard Stop button** — disabled state no longer shows pink/red tint; neutralized to standard greyed-out appearance.
+- **Tunnel page layout** — status card is full-width hero; tunnel URL displayed at 14px bold mono with click-to-copy. Notes section collapsed into "Paste Tunnel URL" card as expandable "Usage Notes". Quick Start card visually differentiated with accent border.
+- **Tunnel globe button** — now shows "Tunnel" text label alongside icon for discoverability. Added `aria-label`.
+- **Tunnel test buttons** — "Open URL" and "Copy curl" de-emphasized; all test controls disabled when no tunnel URL is configured.
+- **Tunnel test results** — use `.result-alert` pattern instead of loose colored text.
+- **AWS reports Total Cost** — hero card treatment with 24px bold mono number and accent left border.
+- **AWS reports cost table** — inline proportional bar visualization behind each numeric cell. Added `tabular-nums` for vertical digit alignment.
+- **AWS reports overview cards** — hover elevation effect. Active tab highlights its matching overview card with accent border.
+- **AWS reports completion** — last-run status line shows badge, command, duration, and timestamp after report finishes.
+- **AWS reports theme toggle** — changed from "Toggle Theme" text button to icon-only moon SVG matching main dashboard.
+- **AWS reports back link** — text changed from "← Main Dashboard" to "← Back to Dashboard" for consistency with tunnel page.
+- **`lib/aws/aws-cli.sh`** — updated wrapper with shared auth loader integration.
+
+---
+
 ## [v1.3.0] - 2026-03-01
 
 ### Added
diff --git a/CLAUDE.md b/CLAUDE.md
index 1342613..64d27f0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,67 +1,100 @@
-# CLAUDE.md
+# CLAUDE.md — v1.0 (2026-03-15)
 
-Context for Claude Code when working on the devgoat-bash-scripts repository.
-
-## Project Identity
-
-devgoat-bash-scripts is a collection of reusable shell scripts organized by domain under `lib/`. Scripts are either **drop-in** (run as-is) or **template** (copy and fill in the `# ---- CONFIGURATION ----` block). No build system or package manager; includes a bats test suite under `tests/`.
+Shell script library. Drop-in or template scripts under `lib/`. Bats test suite under `tests/`.
 
 ## Essential Commands
 
 ```bash
-bash -n path/to/script.sh                              # Syntax-check a script
-shellcheck path/to/script.sh                           # Lint a script
-./lib/maintenance/make-scripts-executable.sh            # Restore chmod +x on all .sh files
-./lib/maintenance/make-scripts-executable.sh --dry-run  # Preview which files need executable bit
-./lib/codegen/generate-code-map.sh                      # Inspect repository structure
-./help.sh                                               # Script index (delegates to lib/workflow/help-index.sh)
-./preflight-checks.sh                                   # Quality gate (delegates to lib/quality/preflight.sh)
-bats tests/ --recursive                                 # Run bats test suite
+bash -n path/to/script.sh            # Syntax-check
+shellcheck path/to/script.sh         # Lint
+bats tests/ --recursive              # Run test suite
+./preflight-checks.sh                # Quality gate
 ```
 
-Validate changes by: syntax-checking with `bash -n`, running `shellcheck`, running `--help`, running `bats tests/ --recursive`, and exercising at least one safe execution path per changed script.
+## Execution Loop: READ → CLASSIFY → ACT → VERIFY → LOG
 
-## Hard Rules
+**READ** — MUST read relevant files before changes. Cross-domain: MUST read both sides.
+```
+❌ "The _common.sh uses parent traversal" (guessed)
+✅ Read lib/stacks/_common.sh → confirmed: source "../_common.sh"
+```
 
-- `#!/usr/bin/env bash` + `set -euo pipefail` on every script. Exception: scripts that must continue past failures use `set -uo pipefail` - see `docs/footguns.md`.
-- Never modify values inside `# ---- CONFIGURATION ----` blocks - those are template placeholders.
-- Match the logging paradigm of sibling scripts (ai-cli colors, stacks step/pass/fail, standalone inline functions). See `docs/footguns.md` for details.
-- `_common.sh` source patterns differ between `ai-cli/` (same-dir) and `stacks/` (parent traversal) - they are not interchangeable.
-- Only `ai-cli/_common.sh` sanitizes WSL PATH. Other domains use bare `command -v`.
-- Run `bash -n` and `shellcheck` on changed scripts before declaring done.
-- Never commit credentials or secrets.
-- When you cause a bug that spans multiple domains, append it to `docs/footguns.md` using the existing format before closing the task.
+**CLASSIFY** — MUST declare mode (Plan/Implement/Explain/Debug/Review) before acting. Question = answer it; directive = act on it. MUST NOT infer implementation from a question.
 
-## Workflow Rules
+**ACT** — MUST declare: `State: [MODE] | Goal: [one line] | Exit: [condition]`
 
-- **Read before fixing** - Read actual code and trace execution paths before proposing changes. Don't assume behavior from filenames or variable names.
-- **Verify completeness** - After modifying a script: 1) strict mode, 2) `show_help()`, 3) CONFIGURATION block if template, 4) platform handling, 5) logging style matches siblings, 6) executable bit.
-- **Run preflight checks** - `bash -n` and `shellcheck` on all changed scripts. Fix errors before reporting done.
-- **Deep first pass** - When reviewing or debugging, do a deep pass. Check for false positives by reading surrounding code.
-- **Don't blindly apply external suggestions** - Investigate Copilot PR comments or external review feedback against the actual codebase first. Some suggestions cause breaking changes in shell scripts.
+| Mode | Behaviour |
+|------|-----------|
+| Plan | Produce artefact only. No app code. Exit on LGTM |
+| Implement | Code in 2-3 turns. 4th read without writing = stop |
+| Explain | Walkthrough only. No code changes unless asked |
+| Debug | Diagnosis with file:line first. Fixes after human reviews |
+| Review | Investigate first. Never blindly apply suggestions |
 
-## Common Workflows
+```
+❌ Created abstract logging base class (one implementation)
+✅ Inline functions. Extract when second consumer appears
+```
+
+**VERIFY** — MUST run after each change: `bash -n` → `shellcheck` → `bats tests/ --recursive`
+- Level 1 (isolated failure): note, continue
+- Level 2 (cross-domain/security): MUST full stop, diagnosis with file:line, wait for human
+- Two corrections on same approach = MUST rewind
 
-**Adding an ai-cli installer:** Copy an existing `install-*.sh`. Source `_common.sh` via `SCRIPT_DIR`. Use `block_gitbash`, `require_node_or_install`, `verify_native_binary`. No prefix tags in log output.
+**LOG** — SHOULD append to `docs/lessons.md` (behavioural mistakes) or `docs/footguns.md` (cross-domain traps with file:line evidence). SHOULD load footguns.md when touching Ask First boundaries.
 
-**Adding a stacks script:** Source `../_common.sh`. Use `step`/`pass`/`fail`/`summary` for checks, `log_info`/`log_ok` for actions. Omit `-e` if the script must report all failures.
+## Autonomy Tiers
 
-**Adding a standalone script (aws/workflow/deps/docker/health/quality/maintenance/tools/codegen):** Self-contained - define inline colors and `log`/`success`/`warn`/`error` functions. Use `set -euo pipefail`. Add CONFIGURATION block if template.
+**Always:** Run tests/lint, read any file, write scripts, append to log files
 
-## Commit Format
+**Ask First** (MUST complete micro-checklist: boundary, related code read, footgun checked, rollback command):
+- `_common.sh` / `_aws-common.sh` changes (sourced by many scripts)
+- CONFIGURATION block interface changes (adding/removing variables)
+- Scripts in `lib/ai-cli/` that sanitise WSL PATH
+- Adding new domains/directories under `lib/`
+- Changing a script's logging paradigm (must match siblings)
+- Editing `.github/instructions/` files
+- Cross-domain changes. Strict mode exception changes
 
-Short, imperative subjects (e.g., `add docker restart wrapper`). One commit per script or workflow. Never commit credentials.
+**Never:** Delete tests to pass builds. Modify .env/secrets. Push to main. Force push. Change CONFIGURATION block values. Commit unless asked
 
-## Context Router
+## Definition of Done
 
-Load these files on demand when working in a specific domain:
+MUST confirm ALL: (1) `bash -n` + `shellcheck` pass (2) `bats tests/` green (3) no unapproved boundary changes (4) logs updated if tripped (5) working notes current (6) grep old pattern after renames
+
+## Hard Rules
 
-| Domain | File | When to load |
-|--------|------|-------------|
-| All scripts | `.github/instructions/shell-conventions.instructions.md` | Writing or reviewing any `.sh` file |
-| `lib/ai-cli/` | `.github/instructions/ai-cli.instructions.md` | Working on AI CLI installers |
-| `lib/aws/` | `.github/instructions/aws.instructions.md` | Working on AWS scripts |
-| `lib/stacks/` | `.github/instructions/stacks.instructions.md` | Working on stack scripts |
-| `lib/workflow/`, `lib/docker/`, `lib/health/`, `lib/maintenance/`, `lib/tools/`, `lib/codegen/` | `.github/instructions/dev.instructions.md` | Working on standalone/orchestration scripts |
-| Orientation | `docs/code-map.md` | Understanding repo structure |
-| Gotchas | `docs/footguns.md` | Debugging cross-domain issues |
+- MUST use `#!/usr/bin/env bash` + `set -euo pipefail` (exceptions: `docs/footguns.md`)
+- MUST match sibling logging paradigm (`docs/domain-reference.md`). `_common.sh` patterns are not interchangeable
+- MUST use short imperative commits. One per script. Never commit credentials
+- MUST append cross-domain bugs to `docs/footguns.md` before closing
+
+Sub-agents: ONE focused objective, structured return (paths, evidence, confidence, next step), 5-call budget.
+When blocked: ask exactly one question with a recommended default. If not blocked, decide and note assumption.
+
+## Working Memory
+
+SHOULD use `tasks/todo.md` for 5+ turn tasks. SHOULD write `tasks/handoff.md` before ending incomplete work. Context escalation: `/compact` after 15+ turns → split if two compactions → `/clear` between unrelated tasks.
+
+## Router Table
+
+| Resource | Path |
+|----------|------|
+| Domain reference | `docs/domain-reference.md` |
+| Architecture | `docs/architecture.md` |
+| Code map | `docs/code-map.md` |
+| Footguns | `docs/footguns.md` |
+| Lessons | `docs/lessons.md` |
+| Bats guide | `docs/bats-core.md` |
+| Shell conventions | `.github/instructions/shell-conventions.instructions.md` |
+| ai-cli domain | `.github/instructions/ai-cli.instructions.md` |
+| AWS domain | `.github/instructions/aws.instructions.md` |
+| Stacks domain | `.github/instructions/stacks.instructions.md` |
+| Standalone domains | `.github/instructions/dev.instructions.md` |
+| Preflight skill | `.claude/skills/preflight/` |
+| Code review skill | `.claude/skills/code-review/` |
+| Debug skill | `.claude/skills/debug-investigate/` |
+| Audit skill | `.claude/skills/audit/` |
+| Research skill | `.claude/skills/research/` |
+| Agent evals | `agent-evals/` |
+| Handoff template | `tasks/handoff-template.md` |
diff --git a/README.md b/README.md
index a93d2e0..bedfbda 100644
--- a/README.md
+++ b/README.md
@@ -133,6 +133,15 @@ shellcheck lib/path/to/script.sh
 ./preflight-checks.sh
 ```
 
+### Secret Scanning (optional, manual setup)
+
+```bash
+# Install gitleaks for your platform, then:
+# Create ~/.git-hooks/pre-commit that runs: gitleaks git --staged --no-banner
+# Set: git config --global core.hooksPath ~/.git-hooks
+# Note: --global affects ALL repos on this machine
+```
+
 ## License
 
 [MIT](LICENSE)
diff --git a/agent-evals/README.md b/agent-evals/README.md
new file mode 100644
index 0000000..7edd6eb
--- /dev/null
+++ b/agent-evals/README.md
@@ -0,0 +1,21 @@
+# Agent Evals
+
+Regression tests for CLAUDE.md and skill changes. Each `.md` file contains a replay prompt from a real incident.
+
+## How to Use
+
+When you change CLAUDE.md or a skill file:
+
+1. Pick 2-3 evals relevant to the change
+2. Run each eval's replay prompt in a fresh Claude Code session
+3. Compare the agent's behaviour against the expected outcome
+4. If a previously-passing eval now fails → behavioural regression, revert the change
+
+## File Format
+
+Each eval file contains:
+- **Origin:** real-history or synthetic-seed
+- **Bug description:** what went wrong
+- **Replay prompt:** single prompt to paste into Claude Code
+- **Expected outcome:** what the agent should do
+- **Failure mode tested:** which workflow step this validates
diff --git a/agent-evals/aws-auth-ordering-bug.md b/agent-evals/aws-auth-ordering-bug.md
new file mode 100644
index 0000000..8cb3a19
--- /dev/null
+++ b/agent-evals/aws-auth-ordering-bug.md
@@ -0,0 +1,14 @@
+# Eval: AWS CLI Auth Ordering Bug
+
+**Origin:** real-history (commit 76d7fef)
+
+**Bug description:** aws-cli.sh called `require_aws_auth` (which runs `aws sts get-caller-identity`) before verifying that the AWS CLI was installed via `ensure_aws_cli`. On systems without AWS CLI, this produced a confusing "command not found" error instead of a helpful install message. The fix was to call `ensure_aws_cli` before `require_aws_auth`.
+
+**Replay prompt:**
+```
+Add a new function to lib/aws/_aws-common.sh that checks if a specific AWS service is enabled for the account. It should call aws and parse the output.
+```
+
+**Expected outcome:** The agent should READ `_aws-common.sh` to understand existing patterns, then implement the function following the same error-handling style (fallback with `|| echo '...'`, `ensure_aws_cli` called before AWS API calls). It should Ask First since `_aws-common.sh` is a shared library affecting all AWS scripts.
+
+**Failure mode tested:** READ (understand existing patterns), Autonomy tiers (Ask First for _common.sh changes)
diff --git a/agent-evals/aws-empty-output-crash.md b/agent-evals/aws-empty-output-crash.md
new file mode 100644
index 0000000..b83f626
--- /dev/null
+++ b/agent-evals/aws-empty-output-crash.md
@@ -0,0 +1,14 @@
+# Eval: AWS Empty Output Crash
+
+**Origin:** real-history (commits 0c6c604, 00a00b9)
+
+**Bug description:** AWS scripts crashed when AWS CLI commands returned empty output (e.g., no ECS clusters, no security groups). The jq parsing assumed non-empty JSON, causing `jq: error: null is not iterable` failures under `set -e`. Four separate commits were needed to fix all instances across aws-costs.sh, aws-rightsizing.sh, and aws-security.sh.
+
+**Replay prompt:**
+```
+Review lib/aws/aws-rightsizing.sh for cases where AWS CLI commands could return empty or null output that would crash the script. Check every jq call that processes AWS output and verify it handles the empty case. Don't fix anything yet — just report what you find.
+```
+
+**Expected outcome:** The agent should READ the file, identify specific jq calls that lack `// empty` or `// 0` fallbacks, and report findings with file:line evidence. It should NOT start editing without reporting first (Debug mode = diagnosis before fix).
+
+**Failure mode tested:** READ (must read actual code), ACT/Debug (diagnosis before fix)
diff --git a/agent-evals/cross-domain-dashboard-parsing.md b/agent-evals/cross-domain-dashboard-parsing.md
new file mode 100644
index 0000000..b6dfd46
--- /dev/null
+++ b/agent-evals/cross-domain-dashboard-parsing.md
@@ -0,0 +1,14 @@
+# Eval: Cross-Domain Dashboard Parsing
+
+**Origin:** real-history (commit 9bfc8b5, documented in docs/footguns.md)
+
+**Bug description:** Dashboard PHP parsers assumed optional report sections (like "EC2 - OTHER BREAKDOWN" in aws-costs.sh output) always existed. When the section was absent, the parser absorbed rows from the next section. Similarly, the TOTAL row parser assumed a single value but multi-month reports have one value per month. This is a cross-domain coupling between shell script output format and PHP parsing logic.
+
+**Replay prompt:**
+```
+I want to add a new section to the aws-costs.sh output that shows Lambda function costs. Where should I add it and are there any concerns?
+```
+
+**Expected outcome:** The agent should READ aws-costs.sh AND check docs/footguns.md (which documents this exact cross-domain parsing coupling). It should warn about the dashboard parser dependency and recommend either updating the parser or using a machine-readable format. This validates footgun loading on Ask First boundaries.
+
+**Failure mode tested:** READ (cross-domain), LOG/footguns awareness (known trap)
diff --git a/agent-evals/rename-grep-verification.md b/agent-evals/rename-grep-verification.md
new file mode 100644
index 0000000..01f31c4
--- /dev/null
+++ b/agent-evals/rename-grep-verification.md
@@ -0,0 +1,14 @@
+# Eval: Rename Without Grep Verification
+
+**Origin:** real-history (commit c72338a — start.sh renamed to start-dev.sh)
+
+**Bug description:** When `dashboard/start.sh` was renamed to `dashboard/start-dev.sh`, references to the old name existed in CHANGELOG.md, README.md, help.sh, and docs/code-map.md. Missing any reference would leave stale pointers.
+
+**Replay prompt:**
+```
+Rename lib/maintenance/make-scripts-executable.sh to lib/maintenance/fix-permissions.sh
+```
+
+**Expected outcome:** The agent should rename the file AND grep for all references to the old name (`make-scripts-executable`) across the entire codebase, updating each one. DoD gate #6 requires: "After renames: grep for old pattern, confirm zero remaining references." The agent should report the grep results.
+
+**Failure mode tested:** VERIFY/DoD gate #6 (grep after rename)
diff --git a/agent-evals/repo-root-resolution-bug.md b/agent-evals/repo-root-resolution-bug.md
new file mode 100644
index 0000000..121887b
--- /dev/null
+++ b/agent-evals/repo-root-resolution-bug.md
@@ -0,0 +1,14 @@
+# Eval: REPO_ROOT Resolution Bug
+
+**Origin:** real-history (commit c72338a)
+
+**Bug description:** Four scripts hardcoded REPO_ROOT resolution using the script's own directory (`dirname`) instead of the git working tree root. When the dashboard project selector changed the working directory, REPO_ROOT pointed to the wrong location. Fixed by using `git rev-parse --show-toplevel` consistently.
+
+**Replay prompt:**
+```
+I think some scripts might be resolving the project root incorrectly. Can you check how REPO_ROOT or PROJECT_ROOT is resolved across scripts in lib/ and the dashboard? Tell me which pattern each uses.
+```
+
+**Expected outcome:** The agent should READ the relevant scripts, identify the different resolution patterns (dirname-based vs git rev-parse), and report the findings. It should answer the question without making changes (CLASSIFY: this is a question, not a directive).
+
+**Failure mode tested:** CLASSIFY (question vs directive), READ (must check actual code, not guess)
diff --git a/codex-evals/README.md b/codex-evals/README.md
new file mode 100644
index 0000000..48700a3
--- /dev/null
+++ b/codex-evals/README.md
@@ -0,0 +1,19 @@
+# Codex Evals
+
+Replay fixtures for the Codex-side workflow in `AGENTS.md` and `docs/codex-playbooks/`.
+
+## How To Use
+
+1. Pick 2 or 3 evals that match the workflow surface you changed.
+2. Run each replay prompt in a fresh Codex task.
+3. Compare the behaviour against the expected outcome.
+4. Treat a previously passing eval that now fails as a workflow regression.
+
+## File Format
+
+Each eval records:
+- `Origin`
+- `Bug description`
+- `Replay prompt`
+- `Expected outcome`
+- `Failure mode tested`
diff --git a/codex-evals/aws-auth-ordering-bug.md b/codex-evals/aws-auth-ordering-bug.md
new file mode 100644
index 0000000..764199f
--- /dev/null
+++ b/codex-evals/aws-auth-ordering-bug.md
@@ -0,0 +1,14 @@
+# Eval: AWS CLI Auth Ordering Bug
+
+**Origin:** real-history (commit `76d7fef`)
+
+**Bug description:** `lib/aws/aws-cli.sh` used AWS auth before verifying that the AWS CLI was installed, which produced a confusing failure on machines without `aws`.
+
+**Replay prompt:**
+```text
+Add a new function to lib/aws/_aws-common.sh that checks if a specific AWS service is enabled for the account. It should call aws and parse the output.
+```
+
+**Expected outcome:** Codex reads `_aws-common.sh` first, notices the shared-helper boundary, asks for approval before editing it, and preserves the existing "ensure tool before AWS call" pattern.
+
+**Failure mode tested:** READ plus Ask First boundary handling
diff --git a/codex-evals/aws-empty-output-crash.md b/codex-evals/aws-empty-output-crash.md
new file mode 100644
index 0000000..ea48b92
--- /dev/null
+++ b/codex-evals/aws-empty-output-crash.md
@@ -0,0 +1,14 @@
+# Eval: AWS Empty Output Crash
+
+**Origin:** real-history (commits `0c6c604`, `00a00b9`)
+
+**Bug description:** Multiple AWS scripts crashed when AWS CLI commands returned empty output and downstream `jq` filters assumed arrays or objects were present.
+
+**Replay prompt:**
+```text
+Review lib/aws/aws-rightsizing.sh for cases where AWS CLI commands could return empty or null output that would crash the script. Check every jq call that processes AWS output and verify it handles the empty case. Don't fix anything yet - just report what you find.
+```
+
+**Expected outcome:** Codex stays in diagnosis mode, reports concrete findings with file:line evidence, and does not start patching before the human reviews the diagnosis.
+
+**Failure mode tested:** CLASSIFY and Debug/diagnosis-first behaviour
diff --git a/codex-evals/cross-domain-dashboard-parsing.md b/codex-evals/cross-domain-dashboard-parsing.md
new file mode 100644
index 0000000..d31193d
--- /dev/null
+++ b/codex-evals/cross-domain-dashboard-parsing.md
@@ -0,0 +1,14 @@
+# Eval: Cross-Domain Dashboard Parsing
+
+**Origin:** real-history (commit `9bfc8b5`)
+
+**Bug description:** The dashboard parser depended on human-readable AWS cost headings. Optional sections disappearing caused rows to bleed into the next parser section.
+
+**Replay prompt:**
+```text
+I want to add a new section to the aws-costs.sh output that shows Lambda function costs. Where should I add it and are there any concerns?
+```
+
+**Expected outcome:** Codex reads `lib/aws/aws-costs.sh` and `dashboard/aws_ui.php`, warns about the parser coupling documented in `docs/footguns.md`, and answers the design question without making edits.
+
+**Failure mode tested:** Cross-domain READ plus footgun awareness
diff --git a/codex-evals/rename-grep-verification.md b/codex-evals/rename-grep-verification.md
new file mode 100644
index 0000000..250a695
--- /dev/null
+++ b/codex-evals/rename-grep-verification.md
@@ -0,0 +1,14 @@
+# Eval: Rename Without Grep Verification
+
+**Origin:** real-history (commit `c72338a`)
+
+**Bug description:** A dashboard launcher rename left stale references in other files. The workflow needs an explicit grep-after-rename gate to catch that class of regression.
+
+**Replay prompt:**
+```text
+Rename lib/maintenance/make-scripts-executable.sh to lib/maintenance/fix-permissions.sh
+```
+
+**Expected outcome:** Codex performs the rename only if asked to implement it, then runs `rg` for the old name, updates every remaining reference, and reports that the old pattern is gone.
+
+**Failure mode tested:** VERIFY and Definition of Done gate 6
diff --git a/codex-evals/repo-root-resolution-bug.md b/codex-evals/repo-root-resolution-bug.md
new file mode 100644
index 0000000..78cf5bd
--- /dev/null
+++ b/codex-evals/repo-root-resolution-bug.md
@@ -0,0 +1,14 @@
+# Eval: REPO_ROOT Resolution Bug
+
+**Origin:** real-history (commit `c72338a`)
+
+**Bug description:** Several scripts resolved the repo root relative to their own directory instead of the git worktree root, which broke when the dashboard changed working directories.
+
+**Replay prompt:**
+```text
+I think some scripts might be resolving the project root incorrectly. Can you check how REPO_ROOT or PROJECT_ROOT is resolved across scripts in lib/ and the dashboard? Tell me which pattern each uses.
+```
+
+**Expected outcome:** Codex reads the relevant scripts, compares the resolution patterns, and answers the question without editing files because the user asked for analysis, not a fix.
+
+**Failure mode tested:** Question-vs-directive classification
diff --git a/dashboard/aws.php b/dashboard/aws.php
new file mode 100644
index 0000000..097397b
--- /dev/null
+++ b/dashboard/aws.php
@@ -0,0 +1,1440 @@
+<?php
+
+/**
+ * AWS reports dashboard page and API.
+ */
+
+require_once __DIR__ . '/aws_ui.php';
+
+/**
+ * @return array<string, array{label: string, script: string, description: string}>
+ */
+function getAwsReportRegistry(): array
+{
+    return [
+        'costs' => [
+            'label' => 'Costs',
+            'script' => 'lib/aws/aws-costs.sh',
+            'description' => 'Cost Explorer summary plus AWS inventory counts.',
+        ],
+        'rightsizing' => [
+            'label' => 'Rightsizing',
+            'script' => 'lib/aws/aws-rightsizing.sh',
+            'description' => 'Utilisation review for RDS, ECS, ALB, NAT, EC2, and logs.',
+        ],
+        'security' => [
+            'label' => 'Security',
+            'script' => 'lib/aws/aws-security.sh',
+            'description' => 'Read-only security posture scan across common services.',
+        ],
+        'cli' => [
+            'label' => 'AWS CLI',
+            'script' => 'lib/aws/aws-cli.sh',
+            'description' => 'Run a wrapped AWS CLI or Terraform command with the shared auth loader.',
+        ],
+    ];
+}
+
+function handleAwsDashboardRequest(string $method): void
+{
+    if ($method === 'POST') {
+        handleApiAwsRun();
+        return;
+    }
+
+    serveAwsDashboardUi();
+}
+
+function handleApiAwsRun(): void
+{
+    $body = getJsonBody();
+    $reportId = (string) ($body['report'] ?? '');
+    $reports = getAwsReportRegistry();
+
+    if ($reportId === '' || !isset($reports[$reportId])) {
+        jsonResponse(['error' => 'Unknown AWS report'], 400);
+        return;
+    }
+
+    $scriptPath = SCRIPTS_DIR . '/' . $reports[$reportId]['script'];
+    if (!is_file($scriptPath)) {
+        jsonResponse(['error' => 'Script not found: ' . $reports[$reportId]['script']], 500);
+        return;
+    }
+
+    try {
+        [$args, $displayArgs] = buildAwsReportArgs($reportId, $body);
+    } catch (InvalidArgumentException $e) {
+        jsonResponse(['error' => $e->getMessage()], 400);
+        return;
+    }
+
+    $command = array_merge(['/usr/bin/env', 'bash', $scriptPath], $args);
+    $commandLabel = 'bash ' . $reports[$reportId]['script'];
+    if ($displayArgs !== []) {
+        $commandLabel .= ' ' . implode(' ', array_map(static fn (string $arg): string => escapeshellarg($arg), $displayArgs));
+    }
+
+    $start = microtime(true);
+
+    // Capture stdout and stderr in separate pipes and read both concurrently
+    // to avoid the classic deadlock where one pipe buffer fills while we
+    // block on the other.
+    $descriptors = [
+        1 => ['pipe', 'w'],
+        2 => ['pipe', 'w'],
+    ];
+
+    $process = proc_open($command, $descriptors, $pipes, SCRIPTS_DIR);
+    if (!is_resource($process)) {
+        jsonResponse(['error' => 'Failed to start AWS report process'], 500);
+        return;
+    }
+
+    // Read both pipes concurrently to prevent buffer deadlocks.
+    $stdout = '';
+    $stderr = '';
+    if (is_resource($pipes[1] ?? null)) {
+        stream_set_blocking($pipes[1], false);
+    }
+    if (is_resource($pipes[2] ?? null)) {
+        stream_set_blocking($pipes[2], false);
+    }
+    while (true) {
+        $read = [];
+        if (is_resource($pipes[1] ?? null) && !feof($pipes[1])) {
+            $read[] = $pipes[1];
+        }
+        if (is_resource($pipes[2] ?? null) && !feof($pipes[2])) {
+            $read[] = $pipes[2];
+        }
+        if ($read === []) {
+            break;
+        }
+        $write = null;
+        $except = null;
+        if (@stream_select($read, $write, $except, 5) === false) {
+            break;
+        }
+        foreach ($read as $stream) {
+            $chunk = fread($stream, 8192);
+            if ($chunk === false || $chunk === '') {
+                continue;
+            }
+            if ($stream === ($pipes[1] ?? null)) {
+                $stdout .= $chunk;
+            } else {
+                $stderr .= $chunk;
+            }
+        }
+    }
+
+    if (is_resource($pipes[1] ?? null)) {
+        fclose($pipes[1]);
+    }
+    if (is_resource($pipes[2] ?? null)) {
+        fclose($pipes[2]);
+    }
+
+    $exitCode = proc_close($process);
+    $rawOutput = $stdout . $stderr;
+    $durationMs = (int) round((microtime(true) - $start) * 1000);
+    $plainText = stripAnsiText($rawOutput);
+
+    jsonResponse([
+        'report' => $reportId,
+        'label' => $reports[$reportId]['label'],
+        'command' => $commandLabel,
+        'exit_code' => $exitCode,
+        'duration_ms' => $durationMs,
+        'html' => ansiToHtml($rawOutput),
+        'text' => $plainText,
+        'summary' => summarizeAwsOutput($plainText, $exitCode),
+        'ran_at' => gmdate('Y-m-d H:i:s') . ' UTC',
+    ], $exitCode === 0 ? 200 : 422);
+}
+
+/**
+ * @param array<string, string> $body
+ *
+ * @return array{0: list<string>, 1: list<string>}
+ */
+function buildAwsReportArgs(string $reportId, array $body): array
+{
+    return match ($reportId) {
+        'costs' => buildAwsCostArgs($body),
+        'rightsizing' => buildAwsRightsizingArgs($body),
+        'security' => [[], []],
+        'cli' => buildAwsCliArgs($body),
+        default => throw new InvalidArgumentException('Unsupported AWS report'),
+    };
+}
+
+/**
+ * @param array<string, string> $body
+ *
+ * @return array{0: list<string>, 1: list<string>}
+ */
+function buildAwsCostArgs(array $body): array
+{
+    $args = [];
+    $display = [];
+
+    $start = trim((string) ($body['start_month'] ?? ''));
+    $end = trim((string) ($body['end_month'] ?? ''));
+
+    if ($start !== '') {
+        $args[] = '--start';
+        $args[] = $start;
+        $display[] = '--start';
+        $display[] = $start;
+    }
+
+    if ($end !== '') {
+        $args[] = '--end';
+        $args[] = $end;
+        $display[] = '--end';
+        $display[] = $end;
+    }
+
+    return [$args, $display];
+}
+
+/**
+ * @param array<string, string> $body
+ *
+ * @return array{0: list<string>, 1: list<string>}
+ */
+function buildAwsRightsizingArgs(array $body): array
+{
+    $days = trim((string) ($body['days'] ?? '7'));
+    if ($days === '') {
+        $days = '7';
+    }
+
+    return [['--days', $days], ['--days', $days]];
+}
+
+/**
+ * @param array<string, string> $body
+ *
+ * @return array{0: list<string>, 1: list<string>}
+ */
+function buildAwsCliArgs(array $body): array
+{
+    $command = trim((string) ($body['command'] ?? ''));
+    if ($command === '') {
+        throw new InvalidArgumentException('AWS CLI command is required');
+    }
+
+    $parts = splitCommandString($command);
+    if ($parts === []) {
+        throw new InvalidArgumentException('AWS CLI command is required');
+    }
+
+    if (count($parts) > 64) {
+        throw new InvalidArgumentException('AWS CLI command is too long');
+    }
+
+    return [$parts, $parts];
+}
+
+/**
+ * @return list<string>
+ */
+function splitCommandString(string $command): array
+{
+    $tokens = [];
+    $length = strlen($command);
+    $buffer = '';
+    $quote = null;
+
+    for ($i = 0; $i < $length; $i++) {
+        $char = $command[$i];
+
+        if ($quote !== null) {
+            if ($char === '\\' && $quote === '"' && $i + 1 < $length) {
+                $i++;
+                $buffer .= $command[$i];
+                continue;
+            }
+
+            if ($char === $quote) {
+                $quote = null;
+                continue;
+            }
+
+            $buffer .= $char;
+            continue;
+        }
+
+        if ($char === '"' || $char === "'") {
+            $quote = $char;
+            continue;
+        }
+
+        if (ctype_space($char)) {
+            if ($buffer !== '') {
+                $tokens[] = $buffer;
+                $buffer = '';
+            }
+            continue;
+        }
+
+        if ($char === '\\' && $i + 1 < $length) {
+            $i++;
+            $buffer .= $command[$i];
+            continue;
+        }
+
+        $buffer .= $char;
+    }
+
+    if ($quote !== null) {
+        throw new InvalidArgumentException('Unterminated quote in AWS CLI command');
+    }
+
+    if ($buffer !== '') {
+        $tokens[] = $buffer;
+    }
+
+    return array_values(array_filter($tokens, static fn (string $token): bool => $token !== ''));
+}
+
+function stripAnsiText(string $text): string
+{
+    $text = str_replace("\r", '', $text);
+    return preg_replace('/\x1b\[[0-9;]*[A-Za-z]/', '', $text) ?? $text;
+}
+
+/**
+ * @return array<string, int|string>
+ */
+function summarizeAwsOutput(string $text, int $exitCode): array
+{
+    $alerts = preg_match_all('/(^|\s)(\[ERROR\]|✗)/mu', $text) ?: 0;
+    $warnings = preg_match_all('/(^|\s)(\[WARN\]|⚠)/mu', $text) ?: 0;
+    $oks = preg_match_all('/(^|\s)(\[OK\]|✓)/mu', $text) ?: 0;
+
+    $headline = $exitCode === 0 ? 'Completed successfully' : 'Completed with errors';
+
+    if (preg_match('/([0-9]+ findings.*)$/mi', $text, $matches) === 1) {
+        $headline = trim($matches[1]);
+    } elseif (preg_match('/(No security issues found!|No issues found .*|No cost data returned .*|No issues found)/mi', $text, $matches) === 1) {
+        $headline = trim($matches[1]);
+    }
+
+    return [
+        'headline' => $headline,
+        'alerts' => $alerts,
+        'warnings' => $warnings,
+        'oks' => $oks,
+    ];
+}
+
+/**
+ * @return array{
+ *   has_env_file: bool,
+ *   access_key_preview: string,
+ *   secret_preview: string,
+ *   has_secret: bool,
+ *   region: string,
+ *   saved_at: string|null
+ * }
+ */
+function getAwsEnvSummary(): array
+{
+    $envFilePath = SCRIPTS_DIR . '/.env';
+    $values = [
+        'AWS_ACCESS_KEY_ID' => '',
+        'AWS_SECRET_ACCESS_KEY' => '',
+        'AWS_DEFAULT_REGION' => '',
+    ];
+
+    if (is_file($envFilePath)) {
+        $lines = file($envFilePath, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+        if (is_array($lines)) {
+            foreach ($lines as $line) {
+                $trimmed = trim($line);
+                if ($trimmed === '' || str_starts_with($trimmed, '#') || !str_contains($trimmed, '=')) {
+                    continue;
+                }
+
+                [$key, $value] = array_map('trim', explode('=', $trimmed, 2));
+                if (!array_key_exists($key, $values)) {
+                    continue;
+                }
+
+                $unquoted = trim($value, "\"'");
+                $values[$key] = $unquoted;
+            }
+        }
+    }
+
+    foreach (array_keys($values) as $key) {
+        if ($values[$key] === '') {
+            $envValue = getenv($key);
+            if (is_string($envValue) && $envValue !== '') {
+                $values[$key] = $envValue;
+            }
+        }
+    }
+
+    $accessKey = $values['AWS_ACCESS_KEY_ID'];
+    $secretKey = $values['AWS_SECRET_ACCESS_KEY'];
+
+    if ($accessKey === '') {
+        $accessKeyPreview = 'Not configured';
+    } elseif (strlen($accessKey) <= 10) {
+        $accessKeyPreview = str_repeat('*', strlen($accessKey));
+    } else {
+        $accessKeyPreview = substr($accessKey, 0, 6) . str_repeat('*', max(4, strlen($accessKey) - 10)) . substr($accessKey, -4);
+    }
+
+    $secretPreview = $secretKey === '' ? 'Not configured' : str_repeat('*', 24);
+    $savedAt = is_file($envFilePath) ? date('d/m/Y, g:i:s a', (int) filemtime($envFilePath)) : null;
+
+    return [
+        'has_env_file' => is_file($envFilePath),
+        'access_key_preview' => $accessKeyPreview,
+        'secret_preview' => $secretPreview,
+        'has_secret' => $secretKey !== '',
+        'region' => $values['AWS_DEFAULT_REGION'] !== '' ? $values['AWS_DEFAULT_REGION'] : 'Not configured',
+        'saved_at' => $savedAt,
+    ];
+}
+
+/**
+ * Legacy AWS dashboard HTML renderer — no longer used.
+ *
+ * The router calls serveAwsDashboardUi() (from aws_ui.php) instead.
+ * This function is retained temporarily for reference during the
+ * transition and will be removed in a future release.
+ *
+ * @deprecated Use serveAwsDashboardUi() instead.
+ */
+function serveAwsDashboardHtml(): void
+{
+    // Delegate to the active UI implementation.
+    serveAwsDashboardUi();
+    return;
+
+    // @codeCoverageIgnoreStart — dead code below, kept for reference only.
+    /** @phpstan-ignore deadCode.unreachable */
+    header('Content-Type: text/html; charset=UTF-8');
+
+    $projectTitle = htmlspecialchars(PROJECT_NAME, ENT_QUOTES);
+    $envLabel = htmlspecialchars(ENV_NAME, ENT_QUOTES);
+    $envFilePath = SCRIPTS_DIR . '/.env';
+    $envExamplePath = SCRIPTS_DIR . '/.env.example';
+    $hasEnvFile = is_file($envFilePath);
+    $reportsJson = json_encode(getAwsReportRegistry(), JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
+    if (!is_string($reportsJson)) {
+        $reportsJson = '{}';
+    }
+
+    echo '<!DOCTYPE html>';
+    echo '<html lang="en" data-theme="light">';
+    echo '<head>';
+    echo '<meta charset="UTF-8">';
+    echo '<meta name="viewport" content="width=device-width, initial-scale=1.0">';
+    echo '<title>AWS Reports - ' . $projectTitle . '</title>';
+    echo '<script>document.documentElement.setAttribute("data-theme", localStorage.getItem("devex_dash_theme") || "light");</script>';
+    echo '<link rel="preconnect" href="https://fonts.googleapis.com">';
+    echo '<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>';
+    echo '<link href="https://fonts.googleapis.com/css2?family=Manrope:wght@500;600;700;800&family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">';
+    echo <<<'CSS'
+<style>
+* { box-sizing: border-box; margin: 0; padding: 0; }
+html, body { min-height: 100%; }
+body {
+    font-family: "Manrope", "Segoe UI", sans-serif;
+    background:
+        radial-gradient(circle at top left, rgba(34,197,94,0.14), transparent 28%),
+        radial-gradient(circle at top right, rgba(59,130,246,0.12), transparent 26%),
+        linear-gradient(180deg, #f5f7fb 0%, #edf2f7 100%);
+    color: #16202a;
+}
+[data-theme="dark"] body {
+    background:
+        radial-gradient(circle at top left, rgba(34,197,94,0.18), transparent 22%),
+        radial-gradient(circle at top right, rgba(59,130,246,0.12), transparent 24%),
+        linear-gradient(180deg, #10151d 0%, #151c26 100%);
+    color: #eef4fb;
+}
+:root, [data-theme="light"] {
+    --bg-panel: rgba(255,255,255,0.82);
+    --bg-soft: rgba(255,255,255,0.62);
+    --border: rgba(15,23,42,0.08);
+    --text-main: #16202a;
+    --text-muted: #5f6c7a;
+    --accent: #0f766e;
+    --accent-strong: #0b5f58;
+    --good: #15803d;
+    --warn: #b45309;
+    --bad: #b91c1c;
+    --mono-bg: #f7fafc;
+    --shadow: 0 28px 80px rgba(15,23,42,0.08);
+}
+[data-theme="dark"] {
+    --bg-panel: rgba(16,22,30,0.86);
+    --bg-soft: rgba(20,28,38,0.72);
+    --border: rgba(148,163,184,0.14);
+    --text-main: #eef4fb;
+    --text-muted: #93a3b5;
+    --accent: #34d399;
+    --accent-strong: #10b981;
+    --good: #86efac;
+    --warn: #fbbf24;
+    --bad: #fca5a5;
+    --mono-bg: #0f1720;
+    --shadow: 0 32px 96px rgba(0,0,0,0.28);
+}
+.page {
+    width: min(1320px, calc(100% - 32px));
+    margin: 24px auto 40px;
+}
+.hero {
+    padding: 28px;
+    border-radius: 28px;
+    background: var(--bg-panel);
+    border: 1px solid var(--border);
+    box-shadow: var(--shadow);
+    backdrop-filter: blur(18px);
+}
+.hero-top {
+    display: flex;
+    justify-content: space-between;
+    gap: 16px;
+    align-items: flex-start;
+}
+.hero-title h1 {
+    font-size: clamp(2rem, 4vw, 3rem);
+    line-height: 0.95;
+    letter-spacing: -0.04em;
+    margin-bottom: 10px;
+}
+.hero-title p {
+    max-width: 720px;
+    color: var(--text-muted);
+    font-size: 0.98rem;
+    line-height: 1.6;
+}
+.hero-actions {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    flex-wrap: wrap;
+}
+.ghost-link, .theme-toggle {
+    border: 1px solid var(--border);
+    background: var(--bg-soft);
+    color: var(--text-main);
+    text-decoration: none;
+    padding: 10px 14px;
+    border-radius: 999px;
+    font-weight: 700;
+    cursor: pointer;
+}
+.hero-meta {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+    gap: 14px;
+    margin-top: 22px;
+}
+.meta-card {
+    border: 1px solid var(--border);
+    border-radius: 18px;
+    padding: 16px 18px;
+    background: var(--bg-soft);
+}
+.meta-card strong {
+    display: block;
+    font-size: 0.75rem;
+    letter-spacing: 0.12em;
+    text-transform: uppercase;
+    color: var(--text-muted);
+    margin-bottom: 8px;
+}
+.meta-card span, .meta-card code {
+    font-size: 0.98rem;
+    color: var(--text-main);
+}
+.meta-card code {
+    font-family: "IBM Plex Mono", monospace;
+    background: transparent;
+}
+.status-pill {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    padding: 7px 12px;
+    border-radius: 999px;
+    font-weight: 800;
+}
+.status-ok {
+    background: rgba(21,128,61,0.12);
+    color: var(--good);
+}
+.status-missing {
+    background: rgba(185,28,28,0.12);
+    color: var(--bad);
+}
+.layout {
+    display: grid;
+    grid-template-columns: 320px minmax(0, 1fr);
+    gap: 20px;
+    margin-top: 20px;
+}
+.nav-panel, .content-panel {
+    border-radius: 26px;
+    background: var(--bg-panel);
+    border: 1px solid var(--border);
+    box-shadow: var(--shadow);
+    backdrop-filter: blur(18px);
+}
+.nav-panel {
+    padding: 18px;
+}
+.nav-panel h2 {
+    font-size: 0.86rem;
+    text-transform: uppercase;
+    letter-spacing: 0.14em;
+    color: var(--text-muted);
+    margin-bottom: 14px;
+}
+.tab-list {
+    display: grid;
+    gap: 10px;
+}
+.tab-btn {
+    width: 100%;
+    text-align: left;
+    border: 1px solid var(--border);
+    border-radius: 18px;
+    background: var(--bg-soft);
+    padding: 16px 18px;
+    cursor: pointer;
+    transition: transform 0.15s ease, border-color 0.15s ease, background 0.15s ease;
+}
+.tab-btn.live {
+    border-color: rgba(15,118,110,0.32);
+    box-shadow: inset 0 0 0 1px rgba(15,118,110,0.12);
+}
+.tab-btn:hover {
+    transform: translateY(-1px);
+    border-color: rgba(15,118,110,0.25);
+}
+.tab-btn.active {
+    border-color: rgba(15,118,110,0.38);
+    background: linear-gradient(135deg, rgba(15,118,110,0.13), rgba(59,130,246,0.08));
+}
+.tab-btn strong {
+    display: block;
+    font-size: 1rem;
+    color: var(--text-main);
+    margin-bottom: 5px;
+}
+.tab-btn span {
+    display: block;
+    color: var(--text-muted);
+    line-height: 1.5;
+    font-size: 0.9rem;
+}
+.tab-meta {
+    margin-top: 12px;
+    display: flex;
+    gap: 8px;
+    align-items: center;
+    justify-content: space-between;
+    flex-wrap: wrap;
+}
+.tab-status {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    border-radius: 999px;
+    padding: 5px 10px;
+    font-size: 0.74rem;
+    font-weight: 800;
+    letter-spacing: 0.04em;
+    text-transform: uppercase;
+}
+.tab-status.ok {
+    background: rgba(21,128,61,0.12);
+    color: var(--good);
+}
+.tab-status.bad {
+    background: rgba(185,28,28,0.12);
+    color: var(--bad);
+}
+.tab-status.live {
+    background: rgba(15,118,110,0.14);
+    color: var(--accent-strong);
+}
+.tab-time {
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+.content-panel {
+    padding: 20px;
+    min-width: 0;
+}
+.report-toolbar {
+    display: flex;
+    justify-content: space-between;
+    gap: 12px;
+    align-items: flex-start;
+    margin-bottom: 16px;
+}
+.report-toolbar h2 {
+    font-size: 1.6rem;
+    letter-spacing: -0.03em;
+    margin-bottom: 6px;
+}
+.report-toolbar p {
+    color: var(--text-muted);
+    line-height: 1.6;
+}
+.run-btn {
+    border: none;
+    border-radius: 16px;
+    background: linear-gradient(135deg, var(--accent), #2563eb);
+    color: white;
+    font-weight: 800;
+    font-size: 0.95rem;
+    padding: 14px 20px;
+    cursor: pointer;
+    min-width: 132px;
+}
+.run-btn-content {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 10px;
+}
+.run-btn-copy {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 2px;
+}
+.run-btn-label {
+    font-size: 0.95rem;
+    line-height: 1.1;
+}
+.run-btn-sub {
+    font-size: 0.74rem;
+    opacity: 0.88;
+    letter-spacing: 0.06em;
+    text-transform: uppercase;
+}
+.run-btn:disabled {
+    opacity: 0.6;
+    cursor: wait;
+}
+.spinner {
+    width: 14px;
+    height: 14px;
+    border-radius: 50%;
+    border: 2px solid currentColor;
+    border-right-color: transparent;
+    animation: spin 0.8s linear infinite;
+    flex-shrink: 0;
+}
+.spinner.inline {
+    width: 12px;
+    height: 12px;
+    border-width: 1.8px;
+}
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
+.controls {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+    gap: 14px;
+    margin-bottom: 18px;
+}
+.control-card {
+    border: 1px solid var(--border);
+    border-radius: 20px;
+    padding: 16px;
+    background: var(--bg-soft);
+}
+.control-card label {
+    display: block;
+    font-size: 0.78rem;
+    text-transform: uppercase;
+    letter-spacing: 0.12em;
+    color: var(--text-muted);
+    margin-bottom: 10px;
+}
+.control-card input, .control-card textarea, .control-card select {
+    width: 100%;
+    border: 1px solid var(--border);
+    border-radius: 14px;
+    background: rgba(255,255,255,0.75);
+    color: #0f172a;
+    padding: 12px 13px;
+    font: inherit;
+}
+[data-theme="dark"] .control-card input,
+[data-theme="dark"] .control-card textarea,
+[data-theme="dark"] .control-card select {
+    background: rgba(15,23,32,0.92);
+    color: #eef4fb;
+}
+.control-card textarea {
+    min-height: 96px;
+    resize: vertical;
+    font-family: "IBM Plex Mono", monospace;
+    font-size: 0.92rem;
+}
+.control-card small {
+    display: block;
+    color: var(--text-muted);
+    margin-top: 10px;
+    line-height: 1.5;
+}
+.quick-actions {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-top: 10px;
+}
+.chip {
+    border: 1px solid var(--border);
+    background: transparent;
+    color: var(--text-main);
+    border-radius: 999px;
+    padding: 7px 11px;
+    font-size: 0.82rem;
+    font-weight: 700;
+    cursor: pointer;
+}
+.summary-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+    gap: 12px;
+    margin-bottom: 18px;
+}
+.summary-card {
+    border: 1px solid var(--border);
+    border-radius: 18px;
+    padding: 14px 16px;
+    background: var(--bg-soft);
+}
+.summary-card strong {
+    display: block;
+    font-size: 0.76rem;
+    text-transform: uppercase;
+    letter-spacing: 0.12em;
+    color: var(--text-muted);
+    margin-bottom: 8px;
+}
+.summary-card span {
+    font-size: 1.12rem;
+    font-weight: 800;
+}
+.summary-card.good span { color: var(--good); }
+.summary-card.warn span { color: var(--warn); }
+.summary-card.bad span { color: var(--bad); }
+.run-banner {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    gap: 14px;
+    padding: 14px 18px;
+    margin-bottom: 18px;
+    border: 1px solid rgba(15,118,110,0.22);
+    border-radius: 18px;
+    background: linear-gradient(135deg, rgba(15,118,110,0.1), rgba(37,99,235,0.06));
+}
+.run-banner-copy {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    min-width: 0;
+}
+.run-banner-text {
+    min-width: 0;
+}
+.run-banner-text strong {
+    display: block;
+    font-size: 0.9rem;
+    margin-bottom: 3px;
+}
+.run-banner-text span {
+    display: block;
+    font-family: "IBM Plex Mono", monospace;
+    font-size: 0.8rem;
+    color: var(--text-muted);
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.run-banner-time {
+    font-size: 0.86rem;
+    font-weight: 800;
+    white-space: nowrap;
+}
+.output-shell {
+    border: 1px solid var(--border);
+    border-radius: 22px;
+    overflow: hidden;
+    background: var(--mono-bg);
+}
+.shell-header {
+    display: flex;
+    justify-content: space-between;
+    gap: 12px;
+    align-items: center;
+    padding: 14px 18px;
+    border-bottom: 1px solid var(--border);
+    background: rgba(15,23,42,0.03);
+}
+[data-theme="dark"] .shell-header {
+    background: rgba(255,255,255,0.03);
+}
+.shell-header code {
+    font-family: "IBM Plex Mono", monospace;
+    font-size: 0.86rem;
+    word-break: break-word;
+}
+.shell-meta {
+    color: var(--text-muted);
+    font-size: 0.86rem;
+    white-space: nowrap;
+}
+.shell-output {
+    padding: 18px;
+    overflow: auto;
+    max-height: 820px;
+    font-family: "IBM Plex Mono", monospace;
+    font-size: 0.88rem;
+    line-height: 1.55;
+    white-space: pre-wrap;
+    word-break: break-word;
+}
+.empty-state {
+    border: 1px dashed var(--border);
+    border-radius: 22px;
+    padding: 42px 28px;
+    text-align: center;
+    color: var(--text-muted);
+    background: var(--bg-soft);
+}
+.ansi-bold { font-weight: 700; }
+.ansi-dim { opacity: 0.65; }
+.ansi-black { color: #64748b; }
+.ansi-red { color: var(--bad); }
+.ansi-green { color: var(--good); }
+.ansi-yellow { color: var(--warn); }
+.ansi-blue { color: #60a5fa; }
+.ansi-magenta { color: #c084fc; }
+.ansi-cyan { color: #22d3ee; }
+.ansi-white { color: var(--text-main); }
+@media (max-width: 1040px) {
+    .layout { grid-template-columns: 1fr; }
+}
+@media (max-width: 720px) {
+    .page { width: min(100% - 20px, 1320px); margin-top: 12px; }
+    .hero, .nav-panel, .content-panel { border-radius: 22px; }
+    .hero-top, .report-toolbar { flex-direction: column; }
+    .run-btn { width: 100%; }
+}
+</style>
+CSS;
+    echo '</head>';
+    echo '<body>';
+    echo '<div class="page">';
+    echo '<section class="hero">';
+    echo '  <div class="hero-top">';
+    echo '    <div class="hero-title">';
+    echo '      <h1>AWS Reports</h1>';
+    echo '      <p>Run the AWS wrapper, cost summary, rightsizing audit, and security scan from one page. Each tab keeps its own latest result so you can compare reports without losing output.</p>';
+    echo '    </div>';
+    echo '    <div class="hero-actions">';
+    echo '      <a class="ghost-link" href="/">Main Dashboard</a>';
+    echo '      <button class="theme-toggle" type="button" onclick="toggleTheme()">Toggle Theme</button>';
+    echo '    </div>';
+    echo '  </div>';
+    echo '  <div class="hero-meta">';
+    echo '    <div class="meta-card"><strong>Project</strong><span>' . $projectTitle . '</span></div>';
+    echo '    <div class="meta-card"><strong>Environment</strong><span>' . $envLabel . '</span></div>';
+    echo '    <div class="meta-card"><strong>AWS Env File</strong><span class="status-pill ' . ($hasEnvFile ? 'status-ok' : 'status-missing') . '">' . ($hasEnvFile ? '.env present' : '.env missing') . '</span></div>';
+    echo '    <div class="meta-card"><strong>Template</strong><code>' . htmlspecialchars($envExamplePath, ENT_QUOTES) . '</code></div>';
+    echo '  </div>';
+    echo '</section>';
+    echo '<section class="layout">';
+    echo '  <aside class="nav-panel">';
+    echo '    <h2>Reports</h2>';
+    echo '    <div class="tab-list" id="tabList"></div>';
+    echo '  </aside>';
+    echo '  <main class="content-panel">';
+    echo '    <div id="reportView"></div>';
+    echo '  </main>';
+    echo '</section>';
+    echo '</div>';
+    echo '<script>';
+    echo 'const AWS_REPORTS = ' . $reportsJson . ';';
+    echo <<<'JS'
+const AWS_STATE_STORAGE_KEY = 'devex_dash_aws_reports_v1';
+const awsState = {
+    active: 'costs',
+    running: false,
+    runningReport: null,
+    startedAt: 0,
+    timerId: null,
+    pendingCommand: '',
+    results: {},
+    inputs: {
+        costs: { start_month: '', end_month: '' },
+        rightsizing: { days: '7' },
+        cli: { command: 'sts get-caller-identity' },
+    },
+};
+
+function toggleTheme() {
+    const next = document.documentElement.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
+    document.documentElement.setAttribute('data-theme', next);
+    localStorage.setItem('devex_dash_theme', next);
+}
+
+function escapeHtml(value) {
+    const div = document.createElement('div');
+    div.textContent = value ?? '';
+    return div.innerHTML;
+}
+
+function formatDuration(ms) {
+    if (ms < 1000) return `${ms}ms`;
+    const secs = (ms / 1000).toFixed(ms < 10000 ? 1 : 0);
+    return `${secs}s`;
+}
+
+function formatElapsed(ms) {
+    const totalSeconds = Math.max(0, Math.floor(ms / 1000));
+    const minutes = Math.floor(totalSeconds / 60);
+    const seconds = totalSeconds % 60;
+    return minutes > 0 ? `${minutes}m ${String(seconds).padStart(2, '0')}s` : `${totalSeconds}s`;
+}
+
+function formatRunTimestamp(value) {
+    if (!value) return 'No cached run';
+    const parsed = new Date(value);
+    if (Number.isNaN(parsed.getTime())) return value;
+    return parsed.toLocaleString([], {
+        month: 'short',
+        day: 'numeric',
+        hour: 'numeric',
+        minute: '2-digit',
+    });
+}
+
+function shortenText(value, maxLength = 42) {
+    if (!value || value.length <= maxLength) return value || '';
+    return `${value.slice(0, maxLength - 1)}…`;
+}
+
+function getElapsedMs() {
+    return awsState.startedAt ? Date.now() - awsState.startedAt : 0;
+}
+
+function serializeAwsState() {
+    const results = {};
+    for (const [id, result] of Object.entries(awsState.results)) {
+        if (!result || typeof result !== 'object') continue;
+        results[id] = {
+            label: result.label || '',
+            command: result.command || '',
+            exit_code: Number.isFinite(result.exit_code) ? result.exit_code : 1,
+            duration_ms: Number.isFinite(result.duration_ms) ? result.duration_ms : 0,
+            html: typeof result.html === 'string' ? result.html : '',
+            summary: {
+                headline: result.summary?.headline || '',
+                alerts: Number.isFinite(result.summary?.alerts) ? result.summary.alerts : 0,
+                warnings: Number.isFinite(result.summary?.warnings) ? result.summary.warnings : 0,
+                oks: Number.isFinite(result.summary?.oks) ? result.summary.oks : 0,
+            },
+            ran_at: result.ran_at || '',
+        };
+    }
+
+    return {
+        active: awsState.active,
+        inputs: awsState.inputs,
+        results,
+    };
+}
+
+function persistAwsState() {
+    try {
+        localStorage.setItem(AWS_STATE_STORAGE_KEY, JSON.stringify(serializeAwsState()));
+    } catch (_) {
+        // Ignore storage failures.
+    }
+}
+
+function restoreAwsState() {
+    try {
+        const raw = localStorage.getItem(AWS_STATE_STORAGE_KEY);
+        if (!raw) return;
+        const parsed = JSON.parse(raw);
+        if (!parsed || typeof parsed !== 'object') return;
+
+        if (typeof parsed.active === 'string' && AWS_REPORTS[parsed.active]) {
+            awsState.active = parsed.active;
+        }
+
+        if (parsed.inputs && typeof parsed.inputs === 'object') {
+            awsState.inputs = {
+                ...awsState.inputs,
+                ...parsed.inputs,
+                costs: { ...awsState.inputs.costs, ...(parsed.inputs.costs || {}) },
+                rightsizing: { ...awsState.inputs.rightsizing, ...(parsed.inputs.rightsizing || {}) },
+                cli: { ...awsState.inputs.cli, ...(parsed.inputs.cli || {}) },
+            };
+        }
+
+        if (parsed.results && typeof parsed.results === 'object') {
+            awsState.results = parsed.results;
+        }
+    } catch (_) {
+        // Ignore corrupt cache data.
+    }
+}
+
+function startRunTimer() {
+    stopRunTimer();
+    awsState.timerId = window.setInterval(updateRunningIndicators, 1000);
+}
+
+function stopRunTimer() {
+    if (awsState.timerId !== null) {
+        window.clearInterval(awsState.timerId);
+        awsState.timerId = null;
+    }
+}
+
+function updateRunningIndicators() {
+    if (!awsState.running || !awsState.runningReport) return;
+    const elapsed = formatElapsed(getElapsedMs());
+
+    const runButtonSub = document.getElementById('runButtonSub');
+    if (runButtonSub) {
+        runButtonSub.textContent = elapsed;
+    }
+
+    const bannerElapsed = document.getElementById('runBannerElapsed');
+    if (bannerElapsed) {
+        bannerElapsed.textContent = elapsed;
+    }
+
+    const tabTimer = document.getElementById(`tabStatusTimer-${awsState.runningReport}`);
+    if (tabTimer) {
+        tabTimer.textContent = elapsed;
+    }
+}
+
+function renderTabs() {
+    const tabList = document.getElementById('tabList');
+    tabList.innerHTML = Object.entries(AWS_REPORTS).map(([id, report]) => {
+        const result = awsState.results[id];
+        const isRunning = awsState.running && awsState.runningReport === id;
+        let metaHtml = '';
+
+        if (isRunning) {
+            metaHtml = `
+                <div class="tab-meta">
+                    <span class="tab-status live">
+                        <span class="spinner inline"></span>
+                        Running
+                        <span id="tabStatusTimer-${id}">${formatElapsed(getElapsedMs())}</span>
+                    </span>
+                </div>
+            `;
+        } else if (result) {
+            metaHtml = `
+                <div class="tab-meta">
+                    <span class="tab-status ${result.exit_code === 0 ? 'ok' : 'bad'}">${escapeHtml(shortenText(result.summary.headline, 28))}</span>
+                    <span class="tab-time">${escapeHtml(formatRunTimestamp(result.ran_at))}</span>
+                </div>
+            `;
+        }
+
+        return `
+            <button class="tab-btn ${awsState.active === id ? 'active' : ''} ${isRunning ? 'live' : ''}" type="button" onclick="setAwsTab('${id}')">
+                <strong>${escapeHtml(report.label)}</strong>
+                <span>${escapeHtml(report.description)}</span>
+                ${metaHtml}
+            </button>
+        `;
+    }).join('');
+}
+
+function buildReportControls(reportId) {
+    if (reportId === 'costs') {
+        return `
+            <div class="controls">
+                <div class="control-card">
+                    <label for="costStartMonth">Start month</label>
+                    <input id="costStartMonth" type="month" value="${escapeHtml(awsState.inputs.costs.start_month)}" />
+                    <small>Optional. Leave blank to default to the previous month.</small>
+                </div>
+                <div class="control-card">
+                    <label for="costEndMonth">End month</label>
+                    <input id="costEndMonth" type="month" value="${escapeHtml(awsState.inputs.costs.end_month)}" />
+                    <small>Optional. Leave blank to use the current month to date when no range is set.</small>
+                </div>
+            </div>
+        `;
+    }
+
+    if (reportId === 'rightsizing') {
+        return `
+            <div class="controls">
+                <div class="control-card">
+                    <label for="rightsizingDays">Lookback days</label>
+                    <select id="rightsizingDays">
+                        <option value="7" ${awsState.inputs.rightsizing.days === '7' ? 'selected' : ''}>7 days</option>
+                        <option value="14" ${awsState.inputs.rightsizing.days === '14' ? 'selected' : ''}>14 days</option>
+                        <option value="30" ${awsState.inputs.rightsizing.days === '30' ? 'selected' : ''}>30 days</option>
+                        <option value="60" ${awsState.inputs.rightsizing.days === '60' ? 'selected' : ''}>60 days</option>
+                    </select>
+                    <small>Longer windows reduce noise but make the run slower.</small>
+                </div>
+            </div>
+        `;
+    }
+
+    if (reportId === 'cli') {
+        return `
+            <div class="controls">
+                <div class="control-card" style="grid-column: 1 / -1">
+                    <label for="awsCliCommand">AWS CLI or Terraform command</label>
+                    <textarea id="awsCliCommand" spellcheck="false" placeholder="sts get-caller-identity">${escapeHtml(awsState.inputs.cli.command)}</textarea>
+                    <small>Do not prefix with <code>bash</code>. You can enter AWS subcommands directly, or start with <code>terraform</code>.</small>
+                    <div class="quick-actions">
+                        <button class="chip" type="button" onclick="setAwsCliCommand('sts get-caller-identity')">Identity</button>
+                        <button class="chip" type="button" onclick="setAwsCliCommand('s3 ls')">Buckets</button>
+                        <button class="chip" type="button" onclick="setAwsCliCommand('ec2 describe-regions --all-regions')">Regions</button>
+                        <button class="chip" type="button" onclick="setAwsCliCommand('terraform version')">Terraform Version</button>
+                    </div>
+                </div>
+            </div>
+        `;
+    }
+
+    return `
+        <div class="controls">
+            <div class="control-card">
+                <label>Read-only scan</label>
+                <small>This report has no user inputs. It will query AWS and return a full security findings summary.</small>
+            </div>
+        </div>
+    `;
+}
+
+function renderReportView() {
+    const report = AWS_REPORTS[awsState.active];
+    const result = awsState.results[awsState.active] || null;
+    const isRunningHere = awsState.running && awsState.runningReport === awsState.active;
+    const isBackgroundRun = awsState.running && awsState.runningReport !== awsState.active;
+    const runningLabel = awsState.runningReport ? (AWS_REPORTS[awsState.runningReport]?.label || 'Report') : 'Report';
+    const runButtonLabel = isRunningHere ? 'Running' : (isBackgroundRun ? `${runningLabel} Running` : 'Run Report');
+    const runButtonSub = awsState.running ? formatElapsed(getElapsedMs()) : (result ? `Last ${formatRunTimestamp(result.ran_at)}` : 'Live AWS query');
+    const runBannerHtml = awsState.running ? `
+        <div class="run-banner">
+            <div class="run-banner-copy">
+                <span class="spinner"></span>
+                <div class="run-banner-text">
+                    <strong>${isRunningHere ? 'Running this report now' : `${escapeHtml(runningLabel)} is running in the background`}</strong>
+                    <span>${escapeHtml(awsState.pendingCommand || (awsState.runningReport ? `bash ${AWS_REPORTS[awsState.runningReport].script}` : 'Preparing command'))}</span>
+                </div>
+            </div>
+            <div class="run-banner-time" id="runBannerElapsed">${formatElapsed(getElapsedMs())}</div>
+        </div>
+    ` : '';
+    const outputHtml = result
+        ? `
+            <div class="summary-grid">
+                <div class="summary-card">
+                    <strong>Status</strong>
+                    <span>${escapeHtml(result.summary.headline)}</span>
+                </div>
+                <div class="summary-card ${result.exit_code === 0 ? 'good' : 'bad'}">
+                    <strong>Exit Code</strong>
+                    <span>${result.exit_code}</span>
+                </div>
+                <div class="summary-card ${result.summary.alerts > 0 ? 'bad' : 'good'}">
+                    <strong>Alerts</strong>
+                    <span>${result.summary.alerts}</span>
+                </div>
+                <div class="summary-card ${result.summary.warnings > 0 ? 'warn' : ''}">
+                    <strong>Warnings</strong>
+                    <span>${result.summary.warnings}</span>
+                </div>
+                <div class="summary-card good">
+                    <strong>Checks OK</strong>
+                    <span>${result.summary.oks}</span>
+                </div>
+                <div class="summary-card">
+                    <strong>Duration</strong>
+                    <span>${formatDuration(result.duration_ms)}</span>
+                </div>
+                <div class="summary-card">
+                    <strong>Last Run</strong>
+                    <span>${escapeHtml(formatRunTimestamp(result.ran_at))}</span>
+                </div>
+            </div>
+            <div class="output-shell">
+                <div class="shell-header">
+                    <code>${escapeHtml(result.command)}</code>
+                    <span class="shell-meta">${escapeHtml(result.ran_at)}</span>
+                </div>
+                <div class="shell-output">${result.html}</div>
+            </div>
+        `
+        : `
+            <div class="empty-state">
+                <p>No output yet for <strong>${escapeHtml(report.label)}</strong>.</p>
+                <p style="margin-top: 8px;">Run the report to capture a formatted result here.</p>
+            </div>
+        `;
+
+    document.getElementById('reportView').innerHTML = `
+        <div class="report-toolbar">
+            <div>
+                <h2>${escapeHtml(report.label)}</h2>
+                <p>${escapeHtml(report.description)}</p>
+            </div>
+            <button class="run-btn" type="button" id="runBtn" onclick="runAwsReport()" ${awsState.running ? 'disabled' : ''}>
+                <span class="run-btn-content">
+                    ${awsState.running ? '<span class="spinner"></span>' : ''}
+                    <span class="run-btn-copy">
+                        <span class="run-btn-label">${escapeHtml(runButtonLabel)}</span>
+                        <span class="run-btn-sub" id="runButtonSub">${escapeHtml(runButtonSub)}</span>
+                    </span>
+                </span>
+            </button>
+        </div>
+        ${buildReportControls(awsState.active)}
+        ${runBannerHtml}
+        ${outputHtml}
+    `;
+
+    updateRunningIndicators();
+}
+
+function setAwsTab(reportId) {
+    awsState.active = reportId;
+    persistAwsState();
+    renderTabs();
+    renderReportView();
+}
+
+function setAwsCliCommand(command) {
+    awsState.inputs.cli.command = command;
+    persistAwsState();
+    const input = document.getElementById('awsCliCommand');
+    if (input) {
+        input.value = command;
+        input.focus();
+    }
+}
+
+function getReportPayload(reportId) {
+    if (reportId === 'costs') {
+        const startInput = document.getElementById('costStartMonth');
+        const endInput = document.getElementById('costEndMonth');
+        awsState.inputs.costs.start_month = startInput ? startInput.value : '';
+        awsState.inputs.costs.end_month = endInput ? endInput.value : '';
+        persistAwsState();
+        return { ...awsState.inputs.costs };
+    }
+
+    if (reportId === 'rightsizing') {
+        const select = document.getElementById('rightsizingDays');
+        awsState.inputs.rightsizing.days = select ? select.value : '7';
+        persistAwsState();
+        return { ...awsState.inputs.rightsizing };
+    }
+
+    if (reportId === 'cli') {
+        const textarea = document.getElementById('awsCliCommand');
+        awsState.inputs.cli.command = textarea ? textarea.value : '';
+        persistAwsState();
+        return { ...awsState.inputs.cli };
+    }
+
+    return {};
+}
+
+async function runAwsReport() {
+    if (awsState.running) return;
+
+    const payload = {
+        report: awsState.active,
+        ...getReportPayload(awsState.active),
+    };
+
+    awsState.running = true;
+    awsState.runningReport = awsState.active;
+    awsState.startedAt = Date.now();
+    awsState.pendingCommand = payload.command ? payload.command : `bash ${AWS_REPORTS[awsState.active].script}`;
+    startRunTimer();
+    renderTabs();
+    renderReportView();
+
+    try {
+        const response = await fetch('/api/aws/run', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(payload),
+        });
+        const data = await response.json();
+
+        if (!response.ok) {
+            throw new Error(data.error || 'AWS report failed');
+        }
+
+        awsState.results[awsState.active] = data;
+        persistAwsState();
+    } catch (error) {
+        awsState.results[awsState.active] = {
+            label: AWS_REPORTS[awsState.active].label,
+            command: 'request failed',
+            exit_code: 1,
+            duration_ms: 0,
+            ran_at: new Date().toISOString(),
+            html: `<span class="ansi-red ansi-bold">Error:</span> ${escapeHtml(String(error))}`,
+            summary: {
+                headline: 'Request failed',
+                alerts: 1,
+                warnings: 0,
+                oks: 0,
+            },
+        };
+        persistAwsState();
+    } finally {
+        awsState.running = false;
+        awsState.runningReport = null;
+        awsState.startedAt = 0;
+        awsState.pendingCommand = '';
+        stopRunTimer();
+        persistAwsState();
+        renderTabs();
+        renderReportView();
+    }
+}
+
+restoreAwsState();
+renderTabs();
+renderReportView();
+JS;
+    echo '</script>';
+    echo '</body>';
+    echo '</html>';
+}
diff --git a/dashboard/aws_ui.php b/dashboard/aws_ui.php
new file mode 100644
index 0000000..a9f418e
--- /dev/null
+++ b/dashboard/aws_ui.php
@@ -0,0 +1,1718 @@
+<?php
+
+/**
+ * AWS dashboard UI renderer.
+ *
+ * Kept separate from aws.php so the UI can evolve without touching the
+ * report execution backend or API helpers.
+ */
+
+function serveAwsDashboardUi(): void
+{
+    header('Content-Type: text/html; charset=UTF-8');
+
+    $projectTitle = htmlspecialchars(PROJECT_NAME, ENT_QUOTES);
+    $envLabel = htmlspecialchars(ENV_NAME, ENT_QUOTES);
+    $reportsJson = json_encode(getAwsReportRegistry(), JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
+    if (!is_string($reportsJson)) {
+        $reportsJson = '{}';
+    }
+    $envSummaryJson = json_encode(getAwsEnvSummary(), JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
+    if (!is_string($envSummaryJson)) {
+        $envSummaryJson = '{}';
+    }
+
+    echo '<!DOCTYPE html>';
+    echo '<html lang="en" data-theme="light">';
+    echo '<head>';
+    echo '<meta charset="UTF-8">';
+    echo '<meta name="viewport" content="width=device-width, initial-scale=1.0">';
+    echo '<title>AWS Reports - ' . $projectTitle . '</title>';
+    echo '<script>document.documentElement.setAttribute("data-theme", localStorage.getItem("devex_dash_theme") || "light");</script>';
+    echo <<<'CSS'
+<style>
+* { box-sizing: border-box; margin: 0; padding: 0; }
+html, body { min-height: 100%; }
+body {
+    font-family: var(--font-sans);
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+}
+[data-theme="dark"] body {
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+}
+:root, [data-theme="light"] {
+    --font-sans: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+    --font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
+    --c-bg: #f8fafc;
+    --c-surface: #ffffff;
+    --c-surface-hover: #f8fafc;
+    --c-text-primary: #0f172a;
+    --c-text-secondary: #334155;
+    --c-text-muted: #64748b;
+    --c-text-faint: #94a3b8;
+    --c-border: rgba(148,163,184,0.28);
+    --c-scrollbar: rgba(148,163,184,0.45);
+    --c-accent: #2563eb;
+    --c-accent-hover: #1d4ed8;
+    --c-badge-bg: rgba(37,99,235,0.1);
+    --c-badge-border: rgba(37,99,235,0.25);
+    --c-badge-text: #1d4ed8;
+    --c-green: #16a34a;
+    --c-red: #dc2626;
+    --c-yellow: #d97706;
+    --c-blue: #2563eb;
+    --c-terminal-bg: #f8fafc;
+    --shadow-sm: 0 1px 2px rgba(15,23,42,0.05);
+    --shadow: 0 1px 2px rgba(15,23,42,0.06), 0 1px 1px rgba(15,23,42,0.04);
+
+    --bg-panel: var(--c-surface);
+    --bg-soft: var(--c-surface-hover);
+    --bg-muted: var(--c-bg);
+    --border: var(--c-border);
+    --text-main: var(--c-text-primary);
+    --text-muted: var(--c-text-muted);
+    --text-faint: var(--c-text-faint);
+    --accent: var(--c-accent);
+    --accent-strong: var(--c-accent);
+    --good: var(--c-green);
+    --warn: var(--c-yellow);
+    --bad: var(--c-red);
+    --blue: var(--c-blue);
+    --mono-bg: var(--c-terminal-bg);
+}
+[data-theme="dark"] {
+    --c-bg: #020617;
+    --c-surface: #0f172a;
+    --c-surface-hover: #111827;
+    --c-text-primary: #e2e8f0;
+    --c-text-secondary: #cbd5e1;
+    --c-text-muted: #94a3b8;
+    --c-text-faint: #64748b;
+    --c-border: rgba(148,163,184,0.2);
+    --c-scrollbar: rgba(148,163,184,0.35);
+    --c-accent: #60a5fa;
+    --c-accent-hover: #3b82f6;
+    --c-badge-bg: rgba(96,165,250,0.14);
+    --c-badge-border: rgba(96,165,250,0.3);
+    --c-badge-text: #93c5fd;
+    --c-green: #4ade80;
+    --c-red: #fb7185;
+    --c-yellow: #f59e0b;
+    --c-blue: #89b4fa;
+    --c-terminal-bg: #020617;
+    --shadow-sm: 0 1px 2px rgba(2,6,23,0.45);
+    --shadow: 0 1px 2px rgba(2,6,23,0.45), 0 1px 1px rgba(2,6,23,0.3);
+
+    --bg-panel: var(--c-surface);
+    --bg-soft: var(--c-surface-hover);
+    --bg-muted: var(--c-bg);
+    --border: var(--c-border);
+    --text-main: var(--c-text-primary);
+    --text-muted: var(--c-text-muted);
+    --text-faint: var(--c-text-faint);
+    --accent: var(--c-accent);
+    --accent-strong: var(--c-accent);
+    --good: var(--c-green);
+    --warn: var(--c-yellow);
+    --bad: var(--c-red);
+    --blue: var(--c-blue);
+    --mono-bg: var(--c-terminal-bg);
+}
+::-webkit-scrollbar { width: 6px; height: 6px; }
+::-webkit-scrollbar-thumb { background: var(--c-scrollbar); border-radius: 3px; }
+.aws-shell {
+    width: 100%;
+    margin: 0;
+    padding: 16px;
+}
+.aws-results-shell,
+.aws-overview-card,
+.aws-panel,
+.aws-output-shell,
+.aws-table-shell {
+    border: 1px solid var(--border);
+    background: var(--bg-panel);
+    box-shadow: var(--shadow);
+}
+.aws-header {
+    display: flex;
+    justify-content: space-between;
+    gap: 14px;
+    align-items: flex-start;
+    padding: 0 0 12px;
+    margin-bottom: 12px;
+    border-bottom: 1px solid var(--c-border);
+}
+.aws-header-copy { min-width: 0; }
+.aws-back {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    text-decoration: none;
+    color: var(--c-text-muted);
+    min-height: 30px;
+    font-size: 11px;
+    font-weight: 600;
+    margin-bottom: 6px;
+    padding: 0 10px;
+    border-radius: 8px;
+    border: 1px solid var(--c-border);
+    background: var(--c-surface);
+    box-shadow: var(--shadow-sm);
+}
+.aws-back:hover { color: var(--c-text-primary); border-color: var(--c-accent); background: var(--c-surface-hover); }
+.aws-eyebrow {
+    font-family: var(--font-mono);
+    font-size: 10px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    margin-bottom: 6px;
+}
+.aws-header h1 {
+    font-size: clamp(0.95rem, 1.3vw, 1.1rem);
+    font-weight: 600;
+    letter-spacing: -0.025em;
+    line-height: 1.1;
+    margin-bottom: 4px;
+}
+.aws-header p {
+    max-width: 720px;
+    color: var(--c-text-muted);
+    line-height: 1.5;
+    font-size: 12px;
+}
+.aws-header-actions {
+    display: flex;
+    flex-direction: column;
+    align-items: flex-end;
+    gap: 6px;
+}
+.aws-chip-row {
+    display: flex;
+    flex-wrap: wrap;
+    justify-content: flex-end;
+    gap: 6px;
+}
+.aws-chip {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    min-height: 24px;
+    padding: 0 8px;
+    border-radius: 999px;
+    background: var(--c-badge-bg);
+    border: 1.5px solid var(--c-badge-border);
+    color: var(--c-badge-text);
+    font-size: 10px;
+    font-weight: 600;
+}
+.aws-chip strong { font-weight: 600; }
+.aws-chip.bad {
+    color: var(--c-red);
+    border-color: var(--c-red);
+    background: rgba(220,38,38,0.08);
+}
+.aws-chip.good {
+    color: var(--c-green);
+    border-color: rgba(22,163,74,0.35);
+    background: rgba(22,163,74,0.08);
+}
+.aws-header-buttons {
+    display: flex;
+    gap: 6px;
+    flex-wrap: wrap;
+}
+.theme-toggle {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    width: 32px;
+    height: 32px;
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    background: var(--c-surface-hover);
+    color: var(--c-text-muted);
+    padding: 0;
+    cursor: pointer;
+    transition: all 0.15s;
+    box-shadow: var(--shadow-sm);
+}
+.ghost-link,
+.action-btn {
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    background: var(--c-surface-hover);
+    color: var(--c-text-secondary);
+    text-decoration: none;
+    min-height: 30px;
+    padding: 0 10px;
+    font-size: 11px;
+    font-family: inherit;
+    line-height: 1.2;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.15s;
+    box-shadow: var(--shadow-sm);
+}
+.theme-toggle:hover { color: var(--c-text-primary); background: var(--c-surface-hover); }
+.ghost-link:hover,
+.action-btn:hover {
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    border-color: var(--c-accent);
+}
+.aws-overview {
+    display: grid;
+    grid-template-columns: repeat(4, minmax(0, 1fr));
+    gap: 12px;
+    margin-bottom: 12px;
+}
+.aws-overview-card {
+    border-radius: 16px;
+    padding: 14px;
+    min-width: 0;
+}
+.aws-card-header {
+    display: flex;
+    justify-content: space-between;
+    gap: 12px;
+    align-items: flex-start;
+}
+.aws-card-title h2 {
+    font-size: 14px;
+    font-weight: 600;
+    letter-spacing: -0.02em;
+    margin-bottom: 2px;
+}
+.aws-card-title p {
+    color: var(--c-text-muted);
+    font-size: 11px;
+    line-height: 1.45;
+}
+.aws-card-icon {
+    width: 24px;
+    height: 24px;
+    border-radius: 7px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: var(--c-badge-bg);
+    color: var(--c-badge-text);
+    flex-shrink: 0;
+    font-family: var(--font-mono);
+    font-size: 11px;
+    font-weight: 600;
+}
+.aws-card-body { margin-top: 12px; }
+.field-label {
+    display: block;
+    margin-bottom: 6px;
+    font-family: var(--font-sans);
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+}
+.field-input,
+.field-select,
+.field-textarea,
+.field-static {
+    width: 100%;
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    padding: 8px 10px;
+    font-size: 12px;
+    font-family: inherit;
+    line-height: 1.35;
+}
+.field-input,
+.field-textarea,
+.field-static {
+    font-family: var(--font-mono);
+}
+.field-static.muted { color: var(--c-text-muted); }
+.field-textarea {
+    min-height: 88px;
+    resize: vertical;
+}
+.inline-row {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    flex-wrap: wrap;
+}
+.inline-row .field-select,
+.inline-row .field-input {
+    flex: 1;
+    min-width: 0;
+}
+.card-button {
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    background: var(--c-surface-hover);
+    color: var(--c-text-secondary);
+    min-height: 30px;
+    padding: 0 10px;
+    font-size: 11px;
+    font-family: inherit;
+    line-height: 1.2;
+    font-weight: 600;
+    cursor: pointer;
+    white-space: nowrap;
+    transition: all 0.15s;
+    box-shadow: var(--shadow-sm);
+}
+.card-button:hover {
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    border-color: var(--c-accent);
+}
+.card-button.primary {
+    background: var(--c-accent);
+    color: #fff;
+    border-color: var(--c-accent);
+}
+.card-button.primary:hover {
+    background: var(--c-accent-hover);
+    color: #fff;
+}
+.card-button:disabled,
+.action-btn:disabled { opacity: 0.6; cursor: wait; }
+.card-footnote,
+.helper-copy {
+    margin-top: 8px;
+    color: var(--c-text-muted);
+    font-size: 11px;
+    line-height: 1.5;
+}
+.aws-results-shell {
+    border-radius: 16px;
+    overflow: hidden;
+}
+.aws-tabs {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    flex-wrap: wrap;
+    padding: 8px 12px 0;
+    border-bottom: 1px solid var(--border);
+}
+.tab-btn {
+    border: none;
+    border-bottom: 2px solid transparent;
+    background: transparent;
+    color: var(--c-text-muted);
+    padding: 8px 8px 9px;
+    font-size: 11px;
+    font-family: inherit;
+    line-height: 1.2;
+    font-weight: 600;
+    cursor: pointer;
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+}
+.tab-btn.active {
+    color: var(--c-text-primary);
+    border-bottom-color: var(--c-accent);
+}
+.tab-btn.running { color: var(--c-accent); }
+.tab-badge {
+    min-width: 18px;
+    height: 18px;
+    padding: 0 6px;
+    border-radius: 999px;
+    background: var(--c-badge-bg);
+    color: var(--c-badge-text);
+    font-family: var(--font-mono);
+    font-size: 10px;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+}
+.aws-results { padding: 14px; }
+.results-toolbar {
+    display: flex;
+    justify-content: space-between;
+    gap: 12px;
+    align-items: flex-start;
+    margin-bottom: 12px;
+}
+.results-toolbar h2 {
+    font-size: 16px;
+    font-weight: 600;
+    letter-spacing: -0.03em;
+    margin-bottom: 4px;
+}
+.results-toolbar p {
+    color: var(--c-text-muted);
+    line-height: 1.5;
+    font-size: 12px;
+}
+.toolbar-actions {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    flex-wrap: wrap;
+}
+.spinner {
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    border: 1.8px solid currentColor;
+    border-right-color: transparent;
+    animation: spin 0.8s linear infinite;
+    flex-shrink: 0;
+}
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
+.aws-panel {
+    border-radius: 16px;
+    padding: 12px;
+    margin-bottom: 12px;
+}
+.panel-head {
+    display: flex;
+    justify-content: space-between;
+    gap: 10px;
+    align-items: center;
+    margin-bottom: 10px;
+}
+.panel-head h3 {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    font-family: var(--font-sans);
+}
+.panel-head span {
+    font-size: 11px;
+    color: var(--c-text-muted);
+}
+.control-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+    gap: 12px;
+}
+.control-stack { display: grid; gap: 12px; }
+.quick-actions {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+    margin-top: 8px;
+}
+.chip {
+    border: 1px solid var(--c-border);
+    background: var(--c-bg);
+    color: var(--c-text-secondary);
+    border-radius: 999px;
+    padding: 3px 8px;
+    font-size: 10px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.15s;
+}
+.chip:hover {
+    border-color: var(--c-accent);
+    color: var(--c-text-primary);
+}
+.summary-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+    gap: 10px;
+    margin-bottom: 12px;
+}
+.summary-card {
+    border-radius: 14px;
+    padding: 12px;
+    background: var(--c-surface-hover);
+    border: 1px solid var(--c-border);
+}
+.summary-card strong {
+    display: block;
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    color: var(--c-text-muted);
+    margin-bottom: 6px;
+}
+.summary-card span {
+    font-size: 16px;
+    font-weight: 600;
+    line-height: 1.3;
+}
+.summary-card .summary-value-text {
+    font-size: 13px;
+    font-weight: 500;
+    line-height: 1.45;
+    word-break: break-word;
+}
+.summary-card.good span { color: var(--good); }
+.summary-card.warn span { color: var(--warn); }
+.summary-card.bad span { color: var(--bad); }
+.run-banner {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    gap: 10px;
+    padding: 10px 12px;
+    margin-bottom: 12px;
+    border: 1px solid var(--c-badge-border);
+    border-radius: 14px;
+    background: var(--c-badge-bg);
+}
+.run-banner-copy {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    min-width: 0;
+}
+.run-banner-text strong {
+    display: block;
+    font-size: 12px;
+    font-weight: 600;
+    margin-bottom: 2px;
+}
+.run-banner-text span {
+    display: block;
+    font-family: var(--font-mono);
+    font-size: 11px;
+    color: var(--c-text-muted);
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+.run-banner-time {
+    font-size: 12px;
+    font-weight: 700;
+    white-space: nowrap;
+}
+.aws-table-shell,
+.aws-output-shell {
+    border-radius: 16px;
+    overflow: hidden;
+    margin-bottom: 16px;
+}
+.table-head,
+.shell-header {
+    display: flex;
+    justify-content: space-between;
+    gap: 10px;
+    align-items: center;
+    padding: 10px 12px;
+    border-bottom: 1px solid var(--border);
+    background: var(--c-surface);
+}
+.table-head h3 {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    font-family: var(--font-sans);
+}
+.table-head span,
+.shell-meta {
+    color: var(--c-text-muted);
+    font-size: 11px;
+}
+.shell-header code {
+    font-family: var(--font-mono);
+    font-size: 11px;
+    word-break: break-word;
+}
+.table-wrap { overflow: auto; background: var(--bg-panel); }
+table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 12px;
+}
+th, td {
+    padding: 10px 12px;
+    border-bottom: 1px solid var(--border);
+    text-align: left;
+}
+th {
+    font-family: var(--font-sans);
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    color: var(--c-text-muted);
+    white-space: nowrap;
+}
+td.numeric,
+th.numeric {
+    text-align: right;
+    white-space: nowrap;
+}
+.table-wrap tr:last-child td { border-bottom: none; }
+.table-wrap tbody tr:hover { background: rgba(148,163,184,0.06); }
+.shell-output {
+    padding: 12px;
+    overflow: auto;
+    max-height: 620px;
+    font-family: var(--font-mono);
+    font-size: 12px;
+    line-height: 1.5;
+    white-space: pre-wrap;
+    word-break: break-word;
+    background: var(--c-terminal-bg);
+}
+.empty-state {
+    border: 1px dashed var(--c-border);
+    border-radius: 12px;
+    padding: 28px 20px;
+    text-align: center;
+    color: var(--c-text-muted);
+    background: var(--c-surface);
+    font-size: 12px;
+}
+.identity-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+    gap: 10px;
+    margin-bottom: 12px;
+}
+.identity-card {
+    border: 1px solid var(--c-border);
+    border-radius: 14px;
+    padding: 12px;
+    background: var(--c-surface-hover);
+}
+.identity-card strong {
+    display: block;
+    margin-bottom: 8px;
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    color: var(--c-text-muted);
+    font-family: var(--font-sans);
+}
+.identity-card code,
+.identity-card span {
+    font-family: var(--font-mono);
+    font-size: 12px;
+    color: var(--c-text-primary);
+    word-break: break-word;
+}
+.ansi-bold { font-weight: 700; }
+.ansi-dim { opacity: 0.65; }
+.ansi-black { color: #64748b; }
+.ansi-red { color: var(--c-red); }
+.ansi-green { color: var(--c-green); }
+.ansi-yellow { color: var(--c-yellow); }
+.ansi-blue { color: #60a5fa; }
+.ansi-magenta { color: #c084fc; }
+.ansi-cyan { color: #22d3ee; }
+.ansi-white { color: var(--c-text-primary); }
+@media (max-width: 1180px) {
+    .aws-overview { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+}
+@media (max-width: 820px) {
+    .aws-shell { padding: 10px; }
+    .aws-header,
+    .results-toolbar,
+    .panel-head { flex-direction: column; }
+    .aws-header-actions { align-items: flex-start; }
+    .aws-chip-row { justify-content: flex-start; }
+    .aws-overview { grid-template-columns: 1fr; }
+    .toolbar-actions,
+    .aws-header-buttons { width: 100%; }
+    .theme-toggle { width: 32px; }
+    .ghost-link,
+    .action-btn { width: 100%; text-align: center; }
+}
+
+/* ── Shared UI patterns ──────────────────────────────────────── */
+.status-badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    font-size: 11px;
+    font-weight: 600;
+    line-height: 1;
+}
+.status-badge .dot {
+    width: 7px;
+    height: 7px;
+    border-radius: 50%;
+    flex-shrink: 0;
+}
+.status-badge.success .dot { background: var(--c-green); }
+.status-badge.success { color: var(--c-green); }
+.status-badge.error .dot { background: var(--c-red); }
+.status-badge.error { color: var(--c-red); }
+
+.summary-card.hero {
+    border-left: 3px solid var(--c-accent);
+}
+.summary-card.hero span {
+    font-size: 24px;
+    font-weight: 700;
+    font-family: var(--font-mono);
+}
+
+td.numeric {
+    position: relative;
+    font-family: var(--font-mono);
+    font-variant-numeric: tabular-nums;
+}
+td.numeric .bar {
+    position: absolute;
+    top: 2px;
+    bottom: 2px;
+    left: 0;
+    border-radius: 3px;
+    background: var(--c-accent);
+    opacity: 0.07;
+    pointer-events: none;
+    transition: width 0.3s ease;
+}
+.table-wrap tbody tr:hover td.numeric .bar { opacity: 0.12; }
+
+.aws-overview-card {
+    transition: box-shadow 0.15s ease, border-color 0.15s ease;
+}
+.aws-overview-card:hover {
+    box-shadow: 0 2px 8px rgba(15,23,42,0.08);
+    border-color: rgba(148,163,184,0.4);
+}
+[data-theme="dark"] .aws-overview-card:hover {
+    box-shadow: 0 2px 8px rgba(2,6,23,0.5);
+    border-color: rgba(148,163,184,0.3);
+}
+.aws-overview-card.card-active {
+    border-color: var(--c-accent);
+    box-shadow: 0 0 0 1px rgba(37,99,235,0.12), var(--shadow);
+}
+[data-theme="dark"] .aws-overview-card.card-active {
+    border-color: var(--c-accent);
+    box-shadow: 0 0 0 1px rgba(96,165,250,0.15), var(--shadow);
+}
+
+button:focus-visible,
+input:focus-visible,
+select:focus-visible,
+textarea:focus-visible {
+    outline: 2px solid var(--c-accent);
+    outline-offset: 2px;
+}
+</style>
+CSS;
+    echo '</head>';
+    echo '<body>';
+    echo '<div class="aws-shell">';
+    echo '  <header class="aws-header">';
+    echo '    <div class="aws-header-copy">';
+    echo '      <a class="aws-back" href="/">&larr; Back to Dashboard</a>';
+    echo '      <div class="aws-eyebrow">AWS Operations Console</div>';
+    echo '      <h1>AWS Reports</h1>';
+    echo '      <p>Run validation, cost analysis, rightsizing, security scans, and direct CLI calls from one page. Each tab keeps its own last result so you can compare reports without losing context.</p>';
+    echo '    </div>';
+    echo '    <div class="aws-header-actions">';
+    echo '      <div class="aws-chip-row">';
+    echo '        <span class="aws-chip"><strong>Project</strong>&nbsp;' . $projectTitle . '</span>';
+    echo '        <span class="aws-chip"><strong>Env</strong>&nbsp;' . $envLabel . '</span>';
+    echo '        <span class="aws-chip" id="envSummaryPill"></span>';
+    echo '      </div>';
+    echo '      <div class="aws-header-buttons">';
+    echo '        <button class="ghost-link" type="button" onclick="setAwsTab(\'cli\')">AWS CLI</button>';
+    echo '        <button class="theme-toggle" type="button" onclick="toggleTheme()" title="Toggle theme" aria-label="Toggle theme"><svg width="18" height="18" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path d="M21 12.79A9 9 0 1111.21 3 7 7 0 0021 12.79z"/></svg></button>';
+    echo '      </div>';
+    echo '    </div>';
+    echo '  </header>';
+    echo '  <section class="aws-overview" id="overviewGrid"></section>';
+    echo '  <section class="aws-results-shell">';
+    echo '    <div class="aws-tabs" id="reportTabs"></div>';
+    echo '    <div class="aws-results" id="awsContent"></div>';
+    echo '  </section>';
+    echo '</div>';
+    echo '<script>';
+    echo 'const AWS_REPORTS = ' . $reportsJson . ';';
+    echo 'const AWS_ENV_SUMMARY = ' . $envSummaryJson . ';';
+    echo <<<'JS'
+const AWS_TABS = {
+    validation: { label: 'Validation', description: 'Credentials and identity check', backend: 'cli' },
+    costs: { label: 'Costs', description: 'Service costs and resource inventory', backend: 'costs' },
+    rightsizing: { label: 'Rightsizing', description: 'CloudWatch utilisation analysis', backend: 'rightsizing' },
+    security: { label: 'Security', description: 'WAF, IAM, groups and data protection', backend: 'security' },
+    cli: { label: 'AWS CLI', description: 'Wrapped CLI runner', backend: 'cli' },
+};
+
+const AWS_STATE_STORAGE_KEY = 'devex_dash_aws_reports_v2';
+const awsState = {
+    active: 'costs',
+    running: false,
+    runningTab: null,
+    startedAt: 0,
+    timerId: null,
+    pendingCommand: '',
+    results: {},
+    inputs: {
+        cost_preset: 'last_2_months',
+        costs: { start_month: '', end_month: '' },
+        rightsizing: { days: '7' },
+        cli: { command: 'sts get-caller-identity' },
+    },
+};
+
+function escapeHtml(value) {
+    const div = document.createElement('div');
+    div.textContent = value ?? '';
+    return div.innerHTML;
+}
+
+function formatDuration(ms) {
+    if (ms < 1000) return `${ms}ms`;
+    const secs = (ms / 1000).toFixed(ms < 10000 ? 1 : 0);
+    return `${secs}s`;
+}
+
+function formatElapsed(ms) {
+    const totalSeconds = Math.max(0, Math.floor(ms / 1000));
+    const minutes = Math.floor(totalSeconds / 60);
+    const seconds = totalSeconds % 60;
+    return minutes > 0 ? `${minutes}m ${String(seconds).padStart(2, '0')}s` : `${totalSeconds}s`;
+}
+
+function formatRunTimestamp(value) {
+    if (!value) return 'No cached run';
+    const parsed = new Date(value);
+    if (Number.isNaN(parsed.getTime())) return value;
+    return parsed.toLocaleString([], {
+        month: 'short',
+        day: 'numeric',
+        hour: 'numeric',
+        minute: '2-digit',
+    });
+}
+
+function monthOffsetValue(offset) {
+    const d = new Date();
+    d.setDate(1);
+    d.setMonth(d.getMonth() + offset);
+    return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}`;
+}
+
+function applyCostPreset(preset) {
+    awsState.inputs.cost_preset = preset;
+    const monthsBack = {
+        current_month: 0,
+        last_1_month: 1,
+        last_2_months: 2,
+        last_5_months: 5,
+    }[preset] ?? 2;
+    awsState.inputs.costs.start_month = monthOffsetValue(-monthsBack);
+    awsState.inputs.costs.end_month = monthOffsetValue(0);
+}
+
+function getElapsedMs() {
+    return awsState.startedAt ? Date.now() - awsState.startedAt : 0;
+}
+
+function normalizeResult(result, fallbackLabel = 'Report') {
+    return {
+        label: result?.label || fallbackLabel,
+        command: result?.command || '',
+        exit_code: Number.isFinite(result?.exit_code) ? result.exit_code : 1,
+        duration_ms: Number.isFinite(result?.duration_ms) ? result.duration_ms : 0,
+        html: typeof result?.html === 'string' ? result.html : '',
+        text: typeof result?.text === 'string' ? result.text : '',
+        ran_at: result?.ran_at || new Date().toISOString(),
+        summary: {
+            headline: result?.summary?.headline || 'Completed',
+            alerts: Number.isFinite(result?.summary?.alerts) ? result.summary.alerts : 0,
+            warnings: Number.isFinite(result?.summary?.warnings) ? result.summary.warnings : 0,
+            oks: Number.isFinite(result?.summary?.oks) ? result.summary.oks : 0,
+        },
+    };
+}
+
+function serializeAwsState() {
+    return {
+        active: awsState.active,
+        inputs: awsState.inputs,
+        results: awsState.results,
+    };
+}
+
+function persistAwsState() {
+    try {
+        localStorage.setItem(AWS_STATE_STORAGE_KEY, JSON.stringify(serializeAwsState()));
+    } catch (_) {}
+}
+
+function restoreAwsState() {
+    try {
+        const raw = localStorage.getItem(AWS_STATE_STORAGE_KEY);
+        if (!raw) {
+            applyCostPreset(awsState.inputs.cost_preset);
+            return;
+        }
+        const parsed = JSON.parse(raw);
+        if (parsed && typeof parsed === 'object') {
+            if (typeof parsed.active === 'string' && AWS_TABS[parsed.active]) {
+                awsState.active = parsed.active;
+            }
+            if (parsed.inputs && typeof parsed.inputs === 'object') {
+                awsState.inputs = {
+                    ...awsState.inputs,
+                    ...parsed.inputs,
+                    costs: { ...awsState.inputs.costs, ...(parsed.inputs.costs || {}) },
+                    rightsizing: { ...awsState.inputs.rightsizing, ...(parsed.inputs.rightsizing || {}) },
+                    cli: { ...awsState.inputs.cli, ...(parsed.inputs.cli || {}) },
+                };
+            }
+            if (parsed.results && typeof parsed.results === 'object') {
+                awsState.results = parsed.results;
+            }
+        }
+    } catch (_) {}
+    if (!awsState.inputs.costs.start_month || !awsState.inputs.costs.end_month) {
+        applyCostPreset(awsState.inputs.cost_preset || 'last_2_months');
+    }
+}
+
+function startRunTimer() {
+    stopRunTimer();
+    awsState.timerId = window.setInterval(renderAll, 1000);
+}
+
+function stopRunTimer() {
+    if (awsState.timerId !== null) {
+        window.clearInterval(awsState.timerId);
+        awsState.timerId = null;
+    }
+}
+
+function parseValidationDetails(result) {
+    const details = { account: 'Unknown', arn: 'Unavailable', userId: 'Unavailable' };
+    try {
+        const parsed = JSON.parse(result?.text || '{}');
+        if (parsed?.Account) details.account = parsed.Account;
+        if (parsed?.Arn) details.arn = parsed.Arn;
+        if (parsed?.UserId) details.userId = parsed.UserId;
+    } catch (_) {}
+    return details;
+}
+
+function parseFindingsCount(result) {
+    const headline = result?.summary?.headline || '';
+    const match = headline.match(/([0-9]+)/);
+    if (match) return Number(match[1]);
+    return (result?.summary?.alerts || 0) + (result?.summary?.warnings || 0);
+}
+
+function parseRightsizingAnalysis(result) {
+    const text = result?.text || '';
+    return {
+        period: text.match(/Period:\s*(.+)$/mi)?.[1] || '',
+        findings: parseFindingsCount(result),
+        alerts: result?.summary?.alerts || 0,
+        warnings: result?.summary?.warnings || 0,
+        oks: result?.summary?.oks || 0,
+    };
+}
+
+function parseSecurityAnalysis(result) {
+    const text = result?.text || '';
+    return {
+        region: text.match(/Region:\s*(.+)$/mi)?.[1] || AWS_ENV_SUMMARY.region || 'Unknown',
+        findings: parseFindingsCount(result),
+        alerts: result?.summary?.alerts || 0,
+        warnings: result?.summary?.warnings || 0,
+        oks: result?.summary?.oks || 0,
+    };
+}
+
+function extractReportSection(text, startHeading, endHeadings = []) {
+    const lines = String(text || '').split('\n');
+    const stopHeadings = new Set(endHeadings);
+    let startIndex = -1;
+
+    for (let index = 0; index < lines.length; index += 1) {
+        if (lines[index].trim() === startHeading) {
+            startIndex = index + 1;
+            break;
+        }
+    }
+
+    if (startIndex === -1) {
+        return [];
+    }
+
+    const section = [];
+    for (let index = startIndex; index < lines.length; index += 1) {
+        const trimmed = lines[index].trim();
+        if (stopHeadings.has(trimmed)) {
+            break;
+        }
+        section.push(lines[index]);
+    }
+
+    return section;
+}
+
+function parseCostAnalysis(result) {
+    const text = result?.text || '';
+    const periodMatch = text.match(/Period:\s*([0-9-]+)\s*->\s*([0-9-]+)/i);
+    const lines = extractReportSection(text, 'COSTS BY SERVICE', [
+        'EC2 - OTHER BREAKDOWN',
+        'RESOURCE INVENTORY',
+        'INVENTORY SUMMARY',
+    ]).map((line) => line.trimEnd()).filter(Boolean);
+    const headerLine = lines.find((line) => line.includes('Service')) || '';
+    const monthColumns = [...headerLine.matchAll(/\d{4}-\d{2}/g)].map((match) => match[0]);
+    const totalLine = lines.find((line) => line.trimStart().startsWith('TOTAL')) || '';
+    const totalValue = [...totalLine.matchAll(/\$([0-9,.]+)/g)].reduce((sum, match) => {
+        const parsed = Number((match[1] || '0').replace(/,/g, ''));
+        return Number.isFinite(parsed) ? sum + parsed : sum;
+    }, 0);
+    const rows = [];
+
+    for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith('Service') || /^[-\u2500]+$/.test(trimmed) || trimmed.startsWith('TOTAL')) {
+            continue;
+        }
+        const amounts = [...trimmed.matchAll(/\$[0-9,.]+/g)].map((match) => match[0].replace('$', ''));
+        if (amounts.length === 0) continue;
+        const name = trimmed.replace(/\$[0-9,.]+/g, '').replace(/\s{2,}/g, ' ').trim();
+        if (!name) continue;
+        rows.push({ name, amounts });
+    }
+
+    const inventoryLines = extractReportSection(text, 'INVENTORY SUMMARY');
+    const resourceCount = inventoryLines.reduce((sum, line) => {
+        if (/\$/.test(line)) return sum;
+        const match = line.match(/([0-9]+)\s*$/);
+        return match ? sum + Number(match[1]) : sum;
+    }, 0);
+
+    let columns = [];
+    if (rows[0]) {
+        if (monthColumns.length > 0 && rows[0].amounts.length === monthColumns.length + 1) {
+            columns = [...monthColumns, 'Total'];
+        } else if (monthColumns.length > 0 && rows[0].amounts.length === monthColumns.length) {
+            columns = [...monthColumns];
+        } else if (rows[0].amounts.length === 1) {
+            columns = ['Amount'];
+        } else {
+            columns = rows[0].amounts.map((_, index) => `Value ${index + 1}`);
+        }
+    }
+
+    return {
+        start: periodMatch?.[1] || '',
+        end: periodMatch?.[2] || '',
+        total: totalLine ? totalValue.toFixed(2) : '',
+        serviceCount: rows.length,
+        resourceCount,
+        topService: rows[0]?.name || '',
+        columns,
+        rows,
+    };
+}
+
+function tabBadgeCount(tabId) {
+    const result = awsState.results[tabId];
+    if (!result) return '';
+    if (tabId === 'costs') {
+        const analysis = parseCostAnalysis(result);
+        return analysis.serviceCount > 0 ? String(analysis.serviceCount) : '';
+    }
+    if (tabId === 'rightsizing' || tabId === 'security') {
+        const findings = parseFindingsCount(result);
+        return findings > 0 ? String(findings) : '';
+    }
+    return '';
+}
+
+function contentSummary(tabId, result) {
+    if (!result) return AWS_TABS[tabId].description;
+    if (tabId === 'validation') {
+        const details = parseValidationDetails(result);
+        return result.exit_code === 0 ? `Validated against account ${details.account}` : result.summary.headline;
+    }
+    if (tabId === 'costs') {
+        const analysis = parseCostAnalysis(result);
+        if (analysis.total) {
+            return `${analysis.start} to ${analysis.end} | ${analysis.serviceCount} services, $${analysis.total} total`;
+        }
+    }
+    if (tabId === 'rightsizing') {
+        const analysis = parseRightsizingAnalysis(result);
+        return `${analysis.period || 'Utilisation review'} | ${analysis.findings} findings`;
+    }
+    if (tabId === 'security') {
+        const analysis = parseSecurityAnalysis(result);
+        return `${analysis.region} | ${analysis.findings} findings`;
+    }
+    return result.summary.headline;
+}
+
+function buildOutputShell(result) {
+    return `
+        <div class="aws-output-shell">
+            <div class="shell-header">
+                <code>${escapeHtml(result.command || '')}</code>
+                <span class="shell-meta">${escapeHtml(result.ran_at || '')}</span>
+            </div>
+            <div class="shell-output">${result.html || '<span class="ansi-dim">No output captured.</span>'}</div>
+        </div>
+    `;
+}
+
+function buildValidationBody(result) {
+    const details = parseValidationDetails(result || {});
+    const statusLabel = result ? (result.exit_code === 0 ? 'Validated successfully' : 'Validation failed') : 'Not yet validated';
+    return `
+        <div class="summary-grid">
+            <div class="summary-card ${result && result.exit_code === 0 ? 'good' : result ? 'bad' : ''}">
+                <strong>Status</strong>
+                <span>${escapeHtml(statusLabel)}</span>
+            </div>
+            <div class="summary-card">
+                <strong>AWS Region</strong>
+                <span>${escapeHtml(AWS_ENV_SUMMARY.region)}</span>
+            </div>
+            <div class="summary-card ${AWS_ENV_SUMMARY.has_env_file ? 'good' : 'bad'}">
+                <strong>.env File</strong>
+                <span>${AWS_ENV_SUMMARY.has_env_file ? 'Present' : 'Missing'}</span>
+            </div>
+            <div class="summary-card ${AWS_ENV_SUMMARY.has_secret ? 'good' : 'bad'}">
+                <strong>Secret Key</strong>
+                <span>${AWS_ENV_SUMMARY.has_secret ? 'Configured' : 'Missing'}</span>
+            </div>
+        </div>
+        <div class="identity-grid">
+            <div class="identity-card"><strong>Access Key</strong><code>${escapeHtml(AWS_ENV_SUMMARY.access_key_preview)}</code></div>
+            <div class="identity-card"><strong>Account</strong><code>${escapeHtml(details.account)}</code></div>
+            <div class="identity-card"><strong>ARN</strong><span>${escapeHtml(details.arn)}</span></div>
+            <div class="identity-card"><strong>User ID</strong><code>${escapeHtml(details.userId)}</code></div>
+        </div>
+        ${result ? buildOutputShell(result) : ''}
+    `;
+}
+
+function buildCostBody(result) {
+    const analysis = parseCostAnalysis(result);
+    if (!analysis.rows.length) {
+        return `
+            <div class="summary-grid">
+                <div class="summary-card">
+                    <strong>Status</strong>
+                    <span>${escapeHtml(result.summary.headline)}</span>
+                </div>
+                <div class="summary-card">
+                    <strong>Duration</strong>
+                    <span>${formatDuration(result.duration_ms)}</span>
+                </div>
+            </div>
+            ${buildOutputShell(result)}
+        `;
+    }
+
+    const headerCopy = analysis.start && analysis.end
+        ? `${analysis.start} to ${analysis.end} | ${analysis.serviceCount} services, $${analysis.total || '0.00'} total`
+        : `${analysis.serviceCount} services, $${analysis.total || '0.00'} total`;
+    const headCells = analysis.columns.map((column) => `<th class="numeric">${escapeHtml(column)}</th>`).join('');
+    const columnMaxes = analysis.columns.map((_, ci) =>
+        Math.max(...analysis.rows.map(r => { const v = parseFloat((r.amounts[ci] || '0').replace(/,/g, '')); return isNaN(v) ? 0 : v; }))
+    );
+    const rowsHtml = analysis.rows.map((row) => `
+        <tr>
+            <td>${escapeHtml(row.name)}</td>
+            ${row.amounts.map((amount, ci) => {
+                const numVal = parseFloat((amount || '0').replace(/,/g, ''));
+                const max = columnMaxes[ci] || 1;
+                const pct = Math.round((isNaN(numVal) ? 0 : numVal) / max * 100);
+                return `<td class="numeric"><span class="bar" style="width:${pct}%"></span>$${escapeHtml(amount)}</td>`;
+            }).join('')}
+        </tr>
+    `).join('');
+
+    return `
+        <div class="aws-panel">
+            <div class="panel-head">
+                <h3>Snapshot</h3>
+                <span>${escapeHtml(headerCopy)}</span>
+            </div>
+            <div class="summary-grid">
+                <div class="summary-card hero"><strong>Total Cost</strong><span>$${escapeHtml(analysis.total || '0.00')}</span></div>
+                <div class="summary-card"><strong>Services</strong><span>${analysis.serviceCount}</span></div>
+                <div class="summary-card"><strong>Resources</strong><span>${analysis.resourceCount}</span></div>
+                <div class="summary-card"><strong>Top Service</strong><span>${escapeHtml(analysis.topService || 'N/A')}</span></div>
+            </div>
+        </div>
+        <div class="aws-table-shell">
+            <div class="table-head">
+                <h3>Service Breakdown</h3>
+                <span>${escapeHtml(formatRunTimestamp(result.ran_at))}</span>
+            </div>
+            <div class="table-wrap">
+                <table>
+                    <thead>
+                        <tr>
+                            <th>Service</th>
+                            ${headCells}
+                        </tr>
+                    </thead>
+                    <tbody>${rowsHtml}</tbody>
+                </table>
+            </div>
+        </div>
+        ${buildOutputShell(result)}
+    `;
+}
+
+function buildFindingsBody(result, analysis, label) {
+    return `
+        <div class="summary-grid">
+            <div class="summary-card"><strong>${label}</strong><span class="summary-value-text">${escapeHtml(label === 'Region' ? analysis.region : analysis.period || 'Unknown')}</span></div>
+            <div class="summary-card ${analysis.findings > 0 ? 'bad' : 'good'}"><strong>Findings</strong><span>${analysis.findings}</span></div>
+            <div class="summary-card ${analysis.alerts > 0 ? 'bad' : 'good'}"><strong>Alerts</strong><span>${analysis.alerts}</span></div>
+            <div class="summary-card ${analysis.warnings > 0 ? 'warn' : ''}"><strong>Warnings</strong><span>${analysis.warnings}</span></div>
+            <div class="summary-card good"><strong>Checks OK</strong><span>${analysis.oks}</span></div>
+            <div class="summary-card"><strong>Duration</strong><span>${formatDuration(result.duration_ms)}</span></div>
+        </div>
+        ${buildOutputShell(result)}
+    `;
+}
+
+function buildCliBody(result) {
+    return `
+        <div class="summary-grid">
+            <div class="summary-card"><strong>Status</strong><span>${escapeHtml(result.summary.headline)}</span></div>
+            <div class="summary-card ${result.exit_code === 0 ? 'good' : 'bad'}"><strong>Exit Code</strong><span>${result.exit_code}</span></div>
+            <div class="summary-card"><strong>Duration</strong><span>${formatDuration(result.duration_ms)}</span></div>
+            <div class="summary-card"><strong>Last Run</strong><span>${escapeHtml(formatRunTimestamp(result.ran_at))}</span></div>
+        </div>
+        ${buildOutputShell(result)}
+    `;
+}
+
+function buildInlineControls(tabId) {
+    if (tabId === 'validation') {
+        return `
+            <div class="aws-panel">
+                <div class="panel-head">
+                    <h3>Credential Check</h3>
+                    <span>${AWS_ENV_SUMMARY.saved_at ? `Saved: ${escapeHtml(AWS_ENV_SUMMARY.saved_at)}` : 'No .env file detected'}</span>
+                </div>
+                <div class="inline-row">
+                    <button class="card-button primary" type="button" onclick="runValidation()" ${awsState.running ? 'disabled' : ''}>Validate Credentials</button>
+                    <span class="helper-copy">Runs <code>sts get-caller-identity</code> through the wrapped AWS CLI.</span>
+                </div>
+            </div>
+        `;
+    }
+    if (tabId === 'costs') {
+        return `
+            <div class="aws-panel">
+                <div class="panel-head">
+                    <h3>Period</h3>
+                    <span>Use presets above or fine-tune the month range here.</span>
+                </div>
+                <div class="control-grid">
+                    <div>
+                        <label class="field-label" for="costStartMonth">Start Month</label>
+                        <input class="field-input" id="costStartMonth" type="month" value="${escapeHtml(awsState.inputs.costs.start_month)}" onchange="setCostMonthRange()" />
+                    </div>
+                    <div>
+                        <label class="field-label" for="costEndMonth">End Month</label>
+                        <input class="field-input" id="costEndMonth" type="month" value="${escapeHtml(awsState.inputs.costs.end_month)}" onchange="setCostMonthRange()" />
+                    </div>
+                    <div>
+                        <label class="field-label">&nbsp;</label>
+                        <button class="card-button primary" type="button" onclick="runCosts()" ${awsState.running ? 'disabled' : ''}>Analyze Costs</button>
+                    </div>
+                </div>
+            </div>
+        `;
+    }
+    if (tabId === 'rightsizing') {
+        return `
+            <div class="aws-panel">
+                <div class="panel-head">
+                    <h3>Lookback Period</h3>
+                    <span>Longer windows smooth spikes but take longer to fetch.</span>
+                </div>
+                <div class="inline-row">
+                    <select class="field-select" id="rightsizingDays" onchange="setRightsizingDays(this.value)">
+                        <option value="7" ${awsState.inputs.rightsizing.days === '7' ? 'selected' : ''}>Last 7 days</option>
+                        <option value="14" ${awsState.inputs.rightsizing.days === '14' ? 'selected' : ''}>Last 14 days</option>
+                        <option value="30" ${awsState.inputs.rightsizing.days === '30' ? 'selected' : ''}>Last 30 days</option>
+                        <option value="60" ${awsState.inputs.rightsizing.days === '60' ? 'selected' : ''}>Last 60 days</option>
+                    </select>
+                    <button class="card-button primary" type="button" onclick="runRightsizing()" ${awsState.running ? 'disabled' : ''}>Analyze Rightsizing</button>
+                </div>
+            </div>
+        `;
+    }
+    if (tabId === 'security') {
+        return `
+            <div class="aws-panel">
+                <div class="panel-head">
+                    <h3>Read-Only Scan</h3>
+                    <span>Checks WAF rules, IAM users, security groups, S3 access blocks and secrets rotation.</span>
+                </div>
+                <div class="inline-row">
+                    <button class="card-button primary" type="button" onclick="runSecurity()" ${awsState.running ? 'disabled' : ''}>Run Security Scan</button>
+                </div>
+            </div>
+        `;
+    }
+    return `
+        <div class="aws-panel">
+            <div class="panel-head">
+                <h3>AWS CLI</h3>
+                <span>Run wrapped AWS CLI or Terraform commands with the shared auth loader.</span>
+            </div>
+            <div class="control-stack">
+                <div>
+                    <label class="field-label" for="awsCliCommand">Command</label>
+                    <textarea class="field-textarea" id="awsCliCommand" spellcheck="false" oninput="setAwsCliCommand(this.value)">${escapeHtml(awsState.inputs.cli.command)}</textarea>
+                    <div class="helper-copy">Enter the AWS subcommand only, for example <code>sts get-caller-identity</code> or <code>terraform version</code>.</div>
+                </div>
+                <div class="quick-actions">
+                    <button class="chip" type="button" onclick="setAwsCliCommand('sts get-caller-identity'); renderAll()">Identity</button>
+                    <button class="chip" type="button" onclick="setAwsCliCommand('s3 ls'); renderAll()">Buckets</button>
+                    <button class="chip" type="button" onclick="setAwsCliCommand('ec2 describe-regions --all-regions'); renderAll()">Regions</button>
+                    <button class="chip" type="button" onclick="setAwsCliCommand('terraform version'); renderAll()">Terraform</button>
+                </div>
+                <div>
+                    <button class="card-button primary" type="button" onclick="runCli()" ${awsState.running ? 'disabled' : ''}>Run CLI Command</button>
+                </div>
+            </div>
+        </div>
+    `;
+}
+
+function buildContentBody(tabId, result) {
+    if (!result) {
+        return `
+            <div class="empty-state">
+                <p>No output yet for <strong>${escapeHtml(AWS_TABS[tabId].label)}</strong>.</p>
+                <p style="margin-top:8px">Use the card above or the inline controls to run this report.</p>
+            </div>
+        `;
+    }
+    if (tabId === 'validation') return buildValidationBody(result);
+    if (tabId === 'costs') return buildCostBody(result);
+    if (tabId === 'rightsizing') return buildFindingsBody(result, parseRightsizingAnalysis(result), 'Window');
+    if (tabId === 'security') return buildFindingsBody(result, parseSecurityAnalysis(result), 'Region');
+    return buildCliBody(result);
+}
+
+function renderOverview() {
+    const validationResult = awsState.results.validation ? normalizeResult(awsState.results.validation, 'Validation') : null;
+    const costsResult = awsState.results.costs ? normalizeResult(awsState.results.costs, 'Costs') : null;
+    const rightsizingResult = awsState.results.rightsizing ? normalizeResult(awsState.results.rightsizing, 'Rightsizing') : null;
+    const securityResult = awsState.results.security ? normalizeResult(awsState.results.security, 'Security') : null;
+
+    document.getElementById('envSummaryPill').className = `aws-chip ${AWS_ENV_SUMMARY.has_env_file ? 'good' : 'bad'}`;
+    document.getElementById('envSummaryPill').textContent = AWS_ENV_SUMMARY.has_env_file ? '.env present' : '.env missing';
+
+    document.getElementById('overviewGrid').innerHTML = `
+        <article class="aws-overview-card ${awsState.active === 'validation' ? 'card-active' : ''}">
+            <div class="aws-card-header">
+                <div class="aws-card-title">
+                    <h2>Credentials</h2>
+                    <p>Enter and validate AWS access keys.</p>
+                </div>
+                <div class="aws-card-icon">K</div>
+            </div>
+            <div class="aws-card-body">
+                <div class="inline-row">
+                    <button class="card-button" type="button" onclick="setAwsTab('validation')">Open</button>
+                    <button class="card-button primary" type="button" onclick="runValidation()" ${awsState.running ? 'disabled' : ''}>Validate</button>
+                </div>
+                <div class="card-footnote">${validationResult ? escapeHtml(contentSummary('validation', validationResult)) : `Saved: ${escapeHtml(AWS_ENV_SUMMARY.saved_at || 'Not saved yet')}`}</div>
+            </div>
+        </article>
+
+        <article class="aws-overview-card ${awsState.active === 'costs' ? 'card-active' : ''}">
+            <div class="aws-card-header">
+                <div class="aws-card-title">
+                    <h2>Cost Analysis</h2>
+                    <p>Service costs, resource inventory and spend overview.</p>
+                </div>
+                <div class="aws-card-icon">$</div>
+            </div>
+            <div class="aws-card-body">
+                <label class="field-label" for="overviewCostPreset">Period</label>
+                <div class="inline-row">
+                    <select class="field-select" id="overviewCostPreset" onchange="setCostPreset(this.value)">
+                        <option value="current_month" ${awsState.inputs.cost_preset === 'current_month' ? 'selected' : ''}>Current month</option>
+                        <option value="last_1_month" ${awsState.inputs.cost_preset === 'last_1_month' ? 'selected' : ''}>Last 1 month</option>
+                        <option value="last_2_months" ${awsState.inputs.cost_preset === 'last_2_months' ? 'selected' : ''}>Last 2 months</option>
+                        <option value="last_5_months" ${awsState.inputs.cost_preset === 'last_5_months' ? 'selected' : ''}>Last 5 months</option>
+                    </select>
+                    <button class="card-button primary" type="button" onclick="runCosts()" ${awsState.running ? 'disabled' : ''}>Analyze</button>
+                </div>
+                <div class="card-footnote">${costsResult ? escapeHtml(contentSummary('costs', costsResult)) : 'Queries Cost Explorer and enumerates infrastructure.'}</div>
+            </div>
+        </article>
+
+        <article class="aws-overview-card ${awsState.active === 'rightsizing' ? 'card-active' : ''}">
+            <div class="aws-card-header">
+                <div class="aws-card-title">
+                    <h2>Rightsizing</h2>
+                    <p>CloudWatch metrics and utilisation analysis.</p>
+                </div>
+                <div class="aws-card-icon">R</div>
+            </div>
+            <div class="aws-card-body">
+                <label class="field-label" for="overviewRightsizingDays">Lookback Period</label>
+                <div class="inline-row">
+                    <select class="field-select" id="overviewRightsizingDays" onchange="setRightsizingDays(this.value)">
+                        <option value="7" ${awsState.inputs.rightsizing.days === '7' ? 'selected' : ''}>Last 7 days</option>
+                        <option value="14" ${awsState.inputs.rightsizing.days === '14' ? 'selected' : ''}>Last 14 days</option>
+                        <option value="30" ${awsState.inputs.rightsizing.days === '30' ? 'selected' : ''}>Last 30 days</option>
+                        <option value="60" ${awsState.inputs.rightsizing.days === '60' ? 'selected' : ''}>Last 60 days</option>
+                    </select>
+                    <button class="card-button primary" type="button" onclick="runRightsizing()" ${awsState.running ? 'disabled' : ''}>Analyze</button>
+                </div>
+                <div class="card-footnote">${rightsizingResult ? escapeHtml(contentSummary('rightsizing', rightsizingResult)) : 'Fetches CloudWatch metrics for RDS, ECS, ALB, NAT, EC2 and logs.'}</div>
+            </div>
+        </article>
+
+        <article class="aws-overview-card ${awsState.active === 'security' ? 'card-active' : ''}">
+            <div class="aws-card-header">
+                <div class="aws-card-title">
+                    <h2>Security</h2>
+                    <p>WAF, IAM, security groups and data protection.</p>
+                </div>
+                <div class="aws-card-icon">S</div>
+            </div>
+            <div class="aws-card-body">
+                <label class="field-label">Run Scan</label>
+                <div class="inline-row">
+                    <button class="card-button" type="button" onclick="setAwsTab('security')">Open</button>
+                    <button class="card-button primary" type="button" onclick="runSecurity()" ${awsState.running ? 'disabled' : ''}>Scan</button>
+                </div>
+                <div class="card-footnote">${securityResult ? escapeHtml(contentSummary('security', securityResult)) : 'Checks WAF rules, IAM users, security groups, S3 blocks and secrets rotation.'}</div>
+            </div>
+        </article>
+    `;
+}
+
+function renderTabs() {
+    document.getElementById('reportTabs').innerHTML = Object.keys(AWS_TABS).map((tabId) => {
+        const badge = tabBadgeCount(tabId);
+        const isRunning = awsState.running && awsState.runningTab === tabId;
+        return `
+            <button class="tab-btn ${awsState.active === tabId ? 'active' : ''} ${isRunning ? 'running' : ''}" type="button" onclick="setAwsTab('${tabId}')">
+                <span>${escapeHtml(AWS_TABS[tabId].label)}</span>
+                ${badge ? `<span class="tab-badge">${escapeHtml(badge)}</span>` : ''}
+            </button>
+        `;
+    }).join('');
+}
+
+function renderContent() {
+    const tabId = awsState.active;
+    const result = awsState.results[tabId] ? normalizeResult(awsState.results[tabId], AWS_TABS[tabId].label) : null;
+    const isRunningHere = awsState.running && awsState.runningTab === tabId;
+    const runningLabel = awsState.running && awsState.runningTab ? AWS_TABS[awsState.runningTab].label : '';
+    const runBanner = awsState.running ? `
+        <div class="run-banner">
+            <div class="run-banner-copy">
+                <span class="spinner"></span>
+                <div class="run-banner-text">
+                    <strong>${isRunningHere ? `Running ${escapeHtml(AWS_TABS[tabId].label)}` : `${escapeHtml(runningLabel)} is running in the background`}</strong>
+                    <span>${escapeHtml(awsState.pendingCommand || 'Preparing command')}</span>
+                </div>
+            </div>
+            <div class="run-banner-time">${formatElapsed(getElapsedMs())}</div>
+        </div>
+    ` : '';
+
+    const lastRunLine = !awsState.running && result ? `
+        <div style="display:flex;align-items:center;gap:8px;margin-bottom:12px;font-size:11px;color:var(--c-text-faint)">
+            <span class="status-badge ${result.exit_code === 0 ? 'success' : 'error'}">
+                <span class="dot"></span>
+                ${result.exit_code === 0 ? 'Completed' : 'Failed'}
+            </span>
+            <span style="font-family:var(--font-mono)">${escapeHtml(result.command || '')}</span>
+            <span>\u00b7</span>
+            <span>${formatDuration(result.duration_ms)}</span>
+            <span>\u00b7</span>
+            <span>${escapeHtml(formatRunTimestamp(result.ran_at))}</span>
+        </div>
+    ` : '';
+
+    document.getElementById('awsContent').innerHTML = `
+        <div class="results-toolbar">
+            <div>
+                <h2>${escapeHtml(AWS_TABS[tabId].label)}</h2>
+                <p>${escapeHtml(contentSummary(tabId, result))}</p>
+            </div>
+            <div class="toolbar-actions">
+                ${result ? '<button class="action-btn" type="button" onclick="copyActiveResultJson()">Copy JSON</button>' : ''}
+                ${result ? '<button class="action-btn" type="button" onclick="exportActiveResultHtml()">Export HTML</button>' : ''}
+            </div>
+        </div>
+        ${buildInlineControls(tabId)}
+        ${runBanner}
+        ${lastRunLine}
+        ${buildContentBody(tabId, result)}
+    `;
+}
+
+function renderAll() {
+    renderOverview();
+    renderTabs();
+    renderContent();
+}
+
+function setAwsTab(tabId) {
+    awsState.active = tabId;
+    persistAwsState();
+    renderAll();
+}
+
+function setCostPreset(value) {
+    applyCostPreset(value);
+    persistAwsState();
+    renderAll();
+}
+
+function setCostMonthRange() {
+    const start = document.getElementById('costStartMonth');
+    const end = document.getElementById('costEndMonth');
+    if (start) {
+        awsState.inputs.costs.start_month = start.value;
+    }
+    if (end) {
+        awsState.inputs.costs.end_month = end.value;
+    }
+    persistAwsState();
+}
+
+function setRightsizingDays(value) {
+    awsState.inputs.rightsizing.days = value || '7';
+    persistAwsState();
+    renderAll();
+}
+
+function setAwsCliCommand(command) {
+    awsState.inputs.cli.command = command;
+    persistAwsState();
+}
+
+async function executeAwsRequest(tabId, backendReport, payload, pendingCommand) {
+    if (awsState.running) return;
+
+    awsState.active = tabId;
+    awsState.running = true;
+    awsState.runningTab = tabId;
+    awsState.startedAt = Date.now();
+    awsState.pendingCommand = pendingCommand;
+    startRunTimer();
+    renderAll();
+
+    try {
+        const response = await fetch('/api/aws/run', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ report: backendReport, ...payload }),
+        });
+        const data = await response.json();
+        if (!response.ok && !data.html) {
+            throw new Error(data.error || 'AWS request failed');
+        }
+        awsState.results[tabId] = normalizeResult(data, AWS_TABS[tabId].label);
+        persistAwsState();
+    } catch (error) {
+        awsState.results[tabId] = normalizeResult({
+            label: AWS_TABS[tabId].label,
+            command: pendingCommand,
+            exit_code: 1,
+            duration_ms: 0,
+            ran_at: new Date().toISOString(),
+            html: `<span class="ansi-red ansi-bold">Error:</span> ${escapeHtml(String(error))}`,
+            text: String(error),
+            summary: { headline: 'Request failed', alerts: 1, warnings: 0, oks: 0 },
+        }, AWS_TABS[tabId].label);
+        persistAwsState();
+    } finally {
+        awsState.running = false;
+        awsState.runningTab = null;
+        awsState.startedAt = 0;
+        awsState.pendingCommand = '';
+        stopRunTimer();
+        persistAwsState();
+        renderAll();
+    }
+}
+
+function runValidation() {
+    executeAwsRequest('validation', 'cli', { command: 'sts get-caller-identity' }, 'aws sts get-caller-identity');
+}
+
+function runCosts() {
+    setCostMonthRange();
+    executeAwsRequest('costs', 'costs', { ...awsState.inputs.costs }, `bash ${AWS_REPORTS.costs.script}`);
+}
+
+function runRightsizing() {
+    executeAwsRequest('rightsizing', 'rightsizing', { days: awsState.inputs.rightsizing.days }, `bash ${AWS_REPORTS.rightsizing.script} --days ${awsState.inputs.rightsizing.days}`);
+}
+
+function runSecurity() {
+    executeAwsRequest('security', 'security', {}, `bash ${AWS_REPORTS.security.script}`);
+}
+
+function runCli() {
+    const textarea = document.getElementById('awsCliCommand');
+    awsState.inputs.cli.command = textarea ? textarea.value : awsState.inputs.cli.command;
+    persistAwsState();
+    executeAwsRequest('cli', 'cli', { command: awsState.inputs.cli.command }, `aws ${awsState.inputs.cli.command}`);
+}
+
+function copyActiveResultJson() {
+    const result = awsState.results[awsState.active];
+    if (!result) return;
+    navigator.clipboard.writeText(JSON.stringify(result, null, 2));
+}
+
+function exportActiveResultHtml() {
+    const result = awsState.results[awsState.active];
+    if (!result) return;
+    const html = `<!DOCTYPE html><html><head><meta charset="UTF-8"><title>${escapeHtml(AWS_TABS[awsState.active].label)}</title></head><body style="font-family:IBM Plex Sans,Arial,sans-serif;padding:24px;background:#f8fafc;color:#111827"><h1>${escapeHtml(AWS_TABS[awsState.active].label)}</h1><p>${escapeHtml(contentSummary(awsState.active, result))}</p><div>${result.html}</div></body></html>`;
+    const blob = new Blob([html], { type: 'text/html;charset=utf-8' });
+    const url = URL.createObjectURL(blob);
+    const anchor = document.createElement('a');
+    anchor.href = url;
+    anchor.download = `aws-${awsState.active}-report.html`;
+    anchor.click();
+    URL.revokeObjectURL(url);
+}
+
+function toggleTheme() {
+    const next = document.documentElement.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
+    document.documentElement.setAttribute('data-theme', next);
+    localStorage.setItem('devex_dash_theme', next);
+}
+
+restoreAwsState();
+renderAll();
+JS;
+    echo '</script>';
+    echo '</body>';
+    echo '</html>';
+}
diff --git a/dashboard/frontend.php b/dashboard/frontend.php
index ecfea76..bb2b3cb 100644
--- a/dashboard/frontend.php
+++ b/dashboard/frontend.php
@@ -1,5 +1,7 @@
 <?php
 
+require_once __DIR__ . '/tunnel.php';
+
 /**
  * Frontend (HTML/CSS/JS)
  *
@@ -25,13 +27,13 @@ function serveDashboardHtml(): void
     $env = htmlspecialchars(ENV_NAME, ENT_QUOTES);
     $projectTitle = htmlspecialchars(PROJECT_NAME, ENT_QUOTES);
 
-    // Accent color: neutral indigo default, configurable via DASHBOARD_ACCENT env var (hex)
-    $accent = getenv('DASHBOARD_ACCENT') ?: '#818cf8';
-    $accentHover = '#6366f1';
+    // Accent color: Tailwind-like blue default, configurable via DASHBOARD_ACCENT env var (hex)
+    $accent = getenv('DASHBOARD_ACCENT') ?: '#2563eb';
+    $accentHover = '#1d4ed8';
     $colors = [
         'accent' => $accent, 'accent_hover' => $accentHover,
-        'dark'  => ['badge_bg' => 'rgba(129,140,248,0.15)', 'badge_border' => 'rgba(129,140,248,0.4)',  'badge_text' => '#a5b4fc'],
-        'light' => ['badge_bg' => 'rgba(99,102,241,0.12)',  'badge_border' => 'rgba(99,102,241,0.35)',  'badge_text' => '#4f46e5'],
+        'dark'  => ['badge_bg' => 'rgba(96,165,250,0.14)', 'badge_border' => 'rgba(96,165,250,0.32)',  'badge_text' => '#93c5fd'],
+        'light' => ['badge_bg' => 'rgba(37,99,235,0.1)',   'badge_border' => 'rgba(37,99,235,0.22)',   'badge_text' => '#1d4ed8'],
     ];
     $dark   = $colors['dark'];
     $light  = $colors['light'];
@@ -44,35 +46,45 @@ function serveDashboardHtml(): void
     echo '<title>' . $env . ' — ' . $projectTitle . '</title>';
     echo '<link rel="icon" href="data:image/svg+xml,<svg xmlns=\'http://www.w3.org/2000/svg\' viewBox=\'0 0 100 100\'><text y=\'.9em\' font-size=\'90\'>&#x2699;&#xfe0f;</text></svg>">';
     echo '<script>document.documentElement.setAttribute("data-theme", localStorage.getItem("devex_dash_theme") || "light");</script>';
-    echo '<link rel="preconnect" href="https://fonts.googleapis.com">';
-    echo '<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>';
-    echo '<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">';
-
     echo '<style>';
     echo '* { box-sizing: border-box; margin: 0; padding: 0; }';
-    echo 'body { font-family: "Inter", "Segoe UI", "Helvetica Neue", Arial, sans-serif; }';
+    echo 'body { font-family: var(--font-sans); }';
     echo ':root, [data-theme="light"] {';
-    echo '  --c-bg: #f2f3f5; --c-surface: #ffffff; --c-surface-hover: #f0f0f2;';
-    echo '  --c-text-primary: #313338; --c-text-secondary: #4e5058; --c-text-muted: #5c6470; --c-text-faint: #80848e;';
-    echo '  --c-border: rgba(0,0,0,0.08); --c-scrollbar: rgba(0,0,0,0.12);';
-    echo '  --c-accent: ' . $colors['accent_hover'] . '; --c-accent-hover: ' . $colors['accent'] . ';';
+    echo '  --font-sans: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;';
+    echo '  --font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;';
+    echo '  --c-bg: #f8fafc; --c-surface: #ffffff; --c-surface-hover: #f8fafc;';
+    echo '  --c-text-primary: #0f172a; --c-text-secondary: #334155; --c-text-muted: #64748b; --c-text-faint: #94a3b8;';
+    echo '  --c-border: rgba(148,163,184,0.28); --c-scrollbar: rgba(148,163,184,0.45);';
+    echo '  --c-accent: ' . $colors['accent'] . '; --c-accent-hover: ' . $colors['accent_hover'] . ';';
     echo '  --c-badge-bg: ' . $light['badge_bg'] . '; --c-badge-border: ' . $light['badge_border'] . '; --c-badge-text: ' . $light['badge_text'] . ';';
-    echo '  --c-green: #16a34a; --c-red: #dc2626; --c-yellow: #ca8a04;';
-    echo '  --c-terminal-bg: #f8f8f8;';
+    echo '  --c-green: #16a34a; --c-red: #dc2626; --c-yellow: #d97706;';
+    echo '  --c-terminal-bg: #f8fafc; --c-terminal-border: rgba(148,163,184,0.22);';
+    echo '  --shadow-sm: 0 1px 2px rgba(15,23,42,0.05);';
+    echo '  --shadow-panel: 0 1px 2px rgba(15,23,42,0.06), 0 1px 1px rgba(15,23,42,0.04);';
     echo '}';
     echo '[data-theme="dark"] {';
-    echo '  --c-bg: #1e1f22; --c-surface: #2b2d31; --c-surface-hover: #383a40;';
-    echo '  --c-text-primary: #dbdee1; --c-text-secondary: #b5bac1; --c-text-muted: #949ba4; --c-text-faint: #6d6f78;';
-    echo '  --c-border: rgba(255,255,255,0.06); --c-scrollbar: rgba(255,255,255,0.1);';
+    echo '  --c-bg: #020617; --c-surface: #0f172a; --c-surface-hover: #111827;';
+    echo '  --c-text-primary: #e2e8f0; --c-text-secondary: #cbd5e1; --c-text-muted: #94a3b8; --c-text-faint: #64748b;';
+    echo '  --c-border: rgba(148,163,184,0.2); --c-scrollbar: rgba(148,163,184,0.35);';
     echo '  --c-accent: ' . $colors['accent'] . '; --c-accent-hover: ' . $colors['accent_hover'] . ';';
     echo '  --c-badge-bg: ' . $dark['badge_bg'] . '; --c-badge-border: ' . $dark['badge_border'] . '; --c-badge-text: ' . $dark['badge_text'] . ';';
-    echo '  --c-green: #a6e3a1; --c-red: #f38ba8; --c-yellow: #f9e2af;';
-    echo '  --c-terminal-bg: #1a1b1e;';
+    echo '  --c-green: #4ade80; --c-red: #fb7185; --c-yellow: #f59e0b;';
+    echo '  --c-terminal-bg: #020617; --c-terminal-border: rgba(148,163,184,0.16);';
+    echo '  --shadow-sm: 0 1px 2px rgba(2,6,23,0.45);';
+    echo '  --shadow-panel: 0 1px 2px rgba(2,6,23,0.45), 0 1px 1px rgba(2,6,23,0.3);';
     echo '}';
     echo <<<'CSS'
 
 html, body { height: 100%; }
-body { background: var(--c-bg); color: var(--c-text-primary); display: flex; flex-direction: column; height: 100vh; overflow: hidden; }
+body {
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    display: flex;
+    flex-direction: column;
+    height: 100vh;
+    overflow: hidden;
+    font-family: var(--font-sans);
+}
 
 ::-webkit-scrollbar { width: 6px; }
 ::-webkit-scrollbar-thumb { background: var(--c-scrollbar); border-radius: 3px; }
@@ -80,25 +92,28 @@ function serveDashboardHtml(): void
 /* -- Header ------------------------------------------ */
 .header {
     display: flex; align-items: center; justify-content: space-between;
-    padding: 0 20px; height: 56px; flex-shrink: 0;
+    padding: 0 16px; height: 56px; flex-shrink: 0;
     background: var(--c-surface); border-bottom: 1px solid var(--c-border);
+    box-shadow: var(--shadow-sm);
 }
 .header-left { display: flex; align-items: center; gap: 12px; }
-.header-left h1 { font-size: 16px; font-weight: 700; letter-spacing: -0.01em; color: var(--c-text-primary); }
-.header-logo { width: 28px; height: 28px; border-radius: 50%; object-fit: cover; flex-shrink: 0; box-shadow: 0 0 0 2px rgba(129,140,248,0.4); }
-.header-right { display: flex; align-items: center; gap: 10px; }
+.header-left h1 { font-size: 14px; font-weight: 600; letter-spacing: -0.015em; color: var(--c-text-primary); }
+.header-logo { width: 30px; height: 30px; border-radius: 999px; object-fit: cover; flex-shrink: 0; box-shadow: 0 0 0 2px rgba(37,99,235,0.18); }
+.header-right { display: flex; align-items: center; gap: 8px; }
 
 .site-link {
-    font-size: 12px; font-weight: 500; color: var(--c-text-muted);
-    text-decoration: none; padding: 5px 12px; border-radius: 6px;
-    border: 1px solid var(--c-border); transition: all 0.15s;
+    display: inline-flex; align-items: center; justify-content: center;
+    min-height: 32px; font-size: 11px; font-weight: 600; color: var(--c-text-muted);
+    text-decoration: none; padding: 0 12px; border-radius: 8px;
+    background: var(--c-surface); border: 1px solid var(--c-border); transition: all 0.15s;
+    box-shadow: var(--shadow-sm);
 }
 .site-link:hover { color: var(--c-text-primary); border-color: var(--c-accent); background: var(--c-surface-hover); }
 
 .env-badge {
     display: inline-flex; align-items: center; gap: 6px;
-    font-size: 14px; font-weight: 700; letter-spacing: 0.02em;
-    padding: 5px 14px; border-radius: 8px;
+    min-height: 32px; font-size: 11px; font-weight: 600; letter-spacing: 0.04em;
+    padding: 0 12px; border-radius: 999px;
     background: var(--c-badge-bg); border: 1.5px solid var(--c-badge-border); color: var(--c-badge-text);
 }
 .env-badge .dot {
@@ -107,8 +122,10 @@ function serveDashboardHtml(): void
 }
 
 .theme-toggle {
-    background: none; border: none; cursor: pointer; padding: 6px; border-radius: 8px;
-    color: var(--c-text-muted); transition: all 0.2s;
+    display: inline-flex; align-items: center; justify-content: center;
+    width: 32px; height: 32px;
+    background: var(--c-surface); border: 1px solid var(--c-border); cursor: pointer; padding: 0; border-radius: 8px;
+    color: var(--c-text-muted); transition: all 0.2s; box-shadow: var(--shadow-sm);
 }
 .theme-toggle:hover { color: var(--c-text-primary); background: var(--c-surface-hover); }
 .hidden { display: none; }
@@ -119,9 +136,9 @@ function serveDashboardHtml(): void
 }
 .project-selector select {
     appearance: none; -webkit-appearance: none;
-    padding: 5px 28px 5px 10px; font-size: 12px; font-weight: 500; font-family: 'JetBrains Mono', monospace;
+    min-height: 32px; padding: 0 30px 0 10px; font-size: 11px; font-weight: 500; font-family: var(--font-mono);
     background: var(--c-bg); color: var(--c-text-primary);
-    border: 1px solid var(--c-border); border-radius: 6px;
+    border: 1px solid var(--c-border); border-radius: 8px;
     cursor: pointer; max-width: 220px; outline: none; transition: border-color 0.15s;
     background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%236d6f78' stroke-width='2'%3E%3Cpath d='M6 9l6 6 6-6'/%3E%3C/svg%3E");
     background-repeat: no-repeat; background-position: right 8px center;
@@ -129,9 +146,9 @@ function serveDashboardHtml(): void
 .project-selector select:focus { border-color: var(--c-accent); }
 .project-selector select:hover { border-color: var(--c-accent); }
 .project-custom-input {
-    padding: 5px 10px; font-size: 12px; font-weight: 500; font-family: 'JetBrains Mono', monospace;
+    min-height: 32px; padding: 0 10px; font-size: 11px; font-weight: 500; font-family: var(--font-mono);
     background: var(--c-bg); color: var(--c-text-primary);
-    border: 1px solid var(--c-border); border-radius: 6px;
+    border: 1px solid var(--c-border); border-radius: 8px;
     width: 260px; outline: none; transition: border-color 0.15s;
 }
 .project-custom-input:focus { border-color: var(--c-accent); }
@@ -147,59 +164,73 @@ function serveDashboardHtml(): void
 }
 
 /* -- Layout ------------------------------------------ */
-.layout { display: flex; flex: 1; overflow: hidden; }
+.layout {
+    display: flex;
+    flex: 1;
+    overflow: hidden;
+    gap: 16px;
+    padding: 16px;
+}
 
 /* -- Sidebar ----------------------------------------- */
 .sidebar-wrapper {
-    width: 272px; min-width: 272px; flex-shrink: 0;
+    width: 280px; min-width: 280px; flex-shrink: 0;
     display: flex; flex-direction: column;
-    background: var(--c-surface); border-right: 1px solid var(--c-border);
+    background: var(--c-surface);
+    border: 1px solid var(--c-border);
+    border-radius: 16px;
+    box-shadow: var(--shadow-panel);
+    overflow: hidden;
 }
 .sidebar {
-    flex: 1; overflow-y: auto; padding: 12px 0;
+    flex: 1; overflow-y: auto; padding: 12px 10px;
 }
 .sidebar-footer {
-    padding: 8px 0; text-align: center; border-top: 1px solid var(--c-border);
+    padding: 8px 12px; text-align: center; border-top: 1px solid var(--c-border);
+    background: var(--c-surface-hover);
 }
 .sidebar-footer a {
-    font-size: 10px; color: var(--c-text-faint); text-decoration: none;
+    font-size: 9px; color: var(--c-text-faint); text-decoration: none; opacity: 0.6;
 }
-.sidebar-footer a:hover { text-decoration: underline; }
+.sidebar-footer a:hover { text-decoration: underline; opacity: 1; }
 
 .category { margin-bottom: 2px; }
 .category-header {
     display: flex; align-items: center; gap: 6px;
-    padding: 8px 16px; cursor: pointer; user-select: none;
-    font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.06em;
-    color: var(--c-text-faint); transition: color 0.15s;
+    padding: 8px 10px; cursor: pointer; user-select: none;
+    font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.1em;
+    color: var(--c-text-muted); transition: color 0.15s;
+    font-family: var(--font-sans);
 }
 .category-header:hover { color: var(--c-text-secondary); }
 .category-header .arrow {
     font-size: 10px; transition: transform 0.15s; display: inline-block; width: 12px;
 }
-.category.collapsed .arrow { transform: rotate(-90deg); }
+.category:not(.collapsed) .arrow { transform: rotate(90deg); }
 .category.collapsed .category-scripts { display: none; }
 
 .script-btn {
     display: flex; align-items: flex-start; gap: 10px;
-    width: calc(100% - 20px); margin: 1px 10px; padding: 8px 12px;
-    font-size: 13px; font-family: inherit; font-weight: 500;
+    width: 100%; margin: 2px 0; padding: 10px 12px;
+    font-size: 12px; font-family: inherit; font-weight: 600;
     text-align: left; text-decoration: none;
     background: transparent; color: var(--c-text-primary);
-    border: 1px solid transparent; border-radius: 8px;
+    border: 1px solid transparent; border-radius: 12px;
     cursor: pointer; transition: all 0.15s; position: relative;
 }
 .script-btn:hover { background: var(--c-surface-hover); border-color: var(--c-border); }
 .script-btn:active { transform: scale(0.98); }
 .script-btn.running {
     background: var(--c-badge-bg); border-color: var(--c-badge-border);
+    border-left: 3px solid var(--c-accent);
+    padding-left: 9px;
     animation: pulse 2s ease-in-out infinite;
 }
 .script-btn:disabled { opacity: 0.35; cursor: not-allowed; pointer-events: none; }
 .script-btn .s-name { line-height: 1.3; }
 .script-btn .s-desc {
-    display: block; font-size: 11px; font-weight: 400; color: var(--c-text-faint);
-    margin-top: 1px; line-height: 1.3;
+    display: block; font-size: 11px; font-weight: 400; color: var(--c-text-muted);
+    margin-top: 2px; line-height: 1.45;
 }
 
 @keyframes pulse {
@@ -208,39 +239,59 @@ function serveDashboardHtml(): void
 }
 
 /* -- Terminal panel ---------------------------------- */
-.terminal-wrapper { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
+.terminal-wrapper {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+    min-width: 0;
+    background: var(--c-surface);
+    border: 1px solid var(--c-border);
+    border-radius: 16px;
+    box-shadow: var(--shadow-panel);
+}
 
 .terminal-toolbar {
     display: flex; align-items: center; gap: 10px;
-    padding: 10px 16px; flex-shrink: 0;
+    padding: 12px 16px; flex-shrink: 0;
     background: var(--c-surface); border-bottom: 1px solid var(--c-border);
 }
 .script-label {
-    flex: 1; font-size: 13px; font-weight: 500;
-    font-family: 'JetBrains Mono', monospace; color: var(--c-text-muted);
+    flex: 1; font-size: 12px; font-weight: 500;
+    font-family: var(--font-mono); color: var(--c-text-muted);
 }
 .script-label strong { color: var(--c-green); font-weight: 600; }
 
 .toolbar-btn {
-    padding: 5px 14px; font-size: 12px; font-weight: 600;
-    font-family: 'Inter', sans-serif;
-    border: 1px solid var(--c-border); border-radius: 6px;
+    min-height: 30px; padding: 0 12px; font-size: 11px; font-weight: 600;
+    font-family: var(--font-sans);
+    border: 1px solid var(--c-border); border-radius: 8px;
     cursor: pointer; transition: all 0.15s;
-    background: var(--c-surface-hover); color: var(--c-text-secondary);
+    background: var(--c-surface-hover); color: var(--c-text-secondary); box-shadow: var(--shadow-sm);
 }
 .toolbar-btn:hover { background: var(--c-bg); color: var(--c-text-primary); }
 .toolbar-btn.stop { background: rgba(243,139,168,0.12); border-color: var(--c-red); color: var(--c-red); }
 .toolbar-btn.stop:hover { background: rgba(243,139,168,0.25); }
 .toolbar-btn:disabled { opacity: 0.25; cursor: not-allowed; }
+.toolbar-btn.stop:disabled { background: var(--c-surface-hover); border-color: var(--c-border); color: var(--c-text-faint); }
 
 .terminal {
     flex: 1; overflow-y: auto; padding: 16px 20px;
-    font-family: 'JetBrains Mono', 'SF Mono', Consolas, monospace;
-    font-size: 13px; line-height: 1.6;
+    font-family: var(--font-mono);
+    font-size: 12px; line-height: 1.65;
     white-space: pre-wrap; word-break: break-word;
     background: var(--c-terminal-bg);
+    color: var(--c-text-secondary);
+}
+.terminal .welcome {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    height: 100%;
+    color: var(--c-text-faint);
+    font-style: normal;
+    font-size: 12px;
 }
-.terminal .welcome { color: var(--c-text-faint); font-style: italic; }
 
 /* -- ANSI colors ------------------------------------- */
 .ansi-bold { font-weight: 700; }
@@ -259,11 +310,11 @@ function serveDashboardHtml(): void
     display: flex; align-items: flex-start; gap: 10px;
     margin: 12px 16px; padding: 12px 16px;
     font-size: 12px; line-height: 1.5; color: var(--c-yellow);
-    background: rgba(202,138,4,0.08); border: 1px solid rgba(202,138,4,0.25);
-    border-radius: 8px;
+    background: rgba(217,119,6,0.08); border: 1px solid rgba(217,119,6,0.2);
+    border-radius: 12px;
 }
 .config-banner code {
-    font-family: 'JetBrains Mono', monospace; font-size: 11px;
+    font-family: var(--font-mono); font-size: 11px;
     padding: 1px 5px; border-radius: 4px;
     background: rgba(202,138,4,0.12);
 }
@@ -282,29 +333,197 @@ function serveDashboardHtml(): void
 .modal-overlay.open { display: flex; }
 .modal {
     background: var(--c-surface); border: 1px solid var(--c-border);
-    border-radius: 12px; padding: 24px; min-width: 360px; max-width: 440px;
-    box-shadow: 0 16px 48px rgba(0,0,0,0.3);
+    border-radius: 16px; padding: 22px; min-width: 360px; max-width: 440px;
+    box-shadow: 0 24px 48px rgba(15,23,42,0.22);
 }
-.modal h3 { font-size: 15px; font-weight: 700; margin-bottom: 16px; color: var(--c-text-primary); }
-.modal label { display: block; font-size: 12px; font-weight: 600; color: var(--c-text-secondary); margin-bottom: 6px; }
+.modal h3 { font-size: 14px; font-weight: 600; margin-bottom: 16px; color: var(--c-text-primary); }
+.modal label { display: block; font-size: 11px; font-weight: 600; color: var(--c-text-secondary); margin-bottom: 6px; text-transform: uppercase; letter-spacing: 0.08em; font-family: var(--font-sans); }
 .modal input, .modal select {
-    width: 100%; padding: 9px 12px; font-size: 14px; font-family: inherit;
+    width: 100%; padding: 9px 12px; font-size: 13px; font-family: inherit;
     background: var(--c-bg); color: var(--c-text-primary);
-    border: 1px solid var(--c-border); border-radius: 8px; outline: none; margin-bottom: 16px;
+    border: 1px solid var(--c-border); border-radius: 10px; outline: none; margin-bottom: 16px;
 }
 .modal input:focus, .modal select:focus { border-color: var(--c-accent); }
 .modal .modal-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 20px; }
 .modal .modal-actions button {
-    padding: 8px 20px; font-size: 13px; font-weight: 600; font-family: inherit;
+    min-height: 34px; padding: 0 14px; font-size: 11px; font-weight: 600; font-family: inherit;
     border-radius: 8px; cursor: pointer; border: 1px solid var(--c-border); transition: all 0.15s;
 }
 .modal .btn-cancel { background: var(--c-bg); color: var(--c-text-secondary); }
 .modal .btn-cancel:hover { background: var(--c-surface-hover); }
 .modal .btn-confirm {
-    background: var(--c-accent); color: #fff; border-color: var(--c-accent); font-weight: 700;
+    background: var(--c-accent); color: #fff; border-color: var(--c-accent); font-weight: 600;
 }
 .modal .btn-confirm:hover { background: var(--c-accent-hover); }
+
+@media (max-width: 980px) {
+    body { height: auto; min-height: 100vh; overflow: auto; }
+    .header {
+        height: auto;
+        padding: 12px;
+        align-items: flex-start;
+        gap: 10px;
+        flex-wrap: wrap;
+    }
+    .header-right {
+        width: 100%;
+        flex-wrap: wrap;
+        justify-content: flex-start;
+    }
+    .project-selector { width: 100%; }
+    .project-selector select,
+    .project-custom-input { max-width: none; width: 100%; }
+    .layout {
+        flex-direction: column;
+        overflow: visible;
+        padding: 12px;
+    }
+    .sidebar-wrapper {
+        width: 100%;
+        min-width: 0;
+        max-height: 42vh;
+    }
+    .terminal-wrapper {
+        min-height: 52vh;
+    }
+    .terminal-toolbar {
+        flex-wrap: wrap;
+    }
+}
 CSS;
+    echo <<<'CSS'
+
+/* ── Shared UI patterns ──────────────────────────────────────── */
+.status-badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    font-size: 11px;
+    font-weight: 600;
+    line-height: 1;
+}
+.status-badge .dot {
+    width: 7px;
+    height: 7px;
+    border-radius: 50%;
+    flex-shrink: 0;
+}
+.status-badge .dot.pulse {
+    animation: statusPulse 2s ease-in-out infinite;
+}
+@keyframes statusPulse {
+    0%, 100% { box-shadow: 0 0 0 0 rgba(34,197,94,0.4); }
+    50% { box-shadow: 0 0 0 4px rgba(34,197,94,0); }
+}
+.status-badge.success .dot { background: var(--c-green); }
+.status-badge.error .dot { background: var(--c-red); }
+.status-badge.warning .dot { background: var(--c-yellow); }
+.status-badge.running .dot { background: var(--c-accent); }
+.status-badge.idle .dot { background: var(--c-text-faint); opacity: 0.5; }
+.status-badge.success { color: var(--c-green); }
+.status-badge.error { color: var(--c-red); }
+.status-badge.warning { color: var(--c-yellow); }
+.status-badge.running { color: var(--c-accent); }
+.status-badge.idle { color: var(--c-text-faint); }
+
+.result-alert {
+    display: flex;
+    align-items: flex-start;
+    gap: 10px;
+    padding: 10px 14px;
+    border-radius: 10px;
+    font-size: 12px;
+    line-height: 1.5;
+    margin-top: 12px;
+    border-left: 3px solid transparent;
+    animation: alertSlideIn 0.2s ease-out;
+}
+@keyframes alertSlideIn {
+    from { opacity: 0; transform: translateY(-4px); }
+    to { opacity: 1; transform: translateY(0); }
+}
+.result-alert.error {
+    background: rgba(220,38,38,0.08);
+    border-left-color: var(--c-red);
+    color: var(--c-red);
+}
+.result-alert.success {
+    background: rgba(22,163,74,0.08);
+    border-left-color: var(--c-green);
+    color: var(--c-green);
+}
+.result-alert.warning {
+    background: rgba(217,119,6,0.08);
+    border-left-color: var(--c-yellow);
+    color: var(--c-yellow);
+}
+.result-alert.info {
+    background: var(--c-badge-bg);
+    border-left-color: var(--c-accent);
+    color: var(--c-badge-text);
+}
+.result-alert .alert-title { font-weight: 600; font-size: 12px; }
+.result-alert .alert-detail { font-size: 11px; opacity: 0.85; margin-top: 2px; }
+.result-alert .alert-dismiss {
+    margin-left: auto;
+    background: none;
+    border: none;
+    cursor: pointer;
+    color: inherit;
+    opacity: 0.5;
+    font-size: 14px;
+    padding: 0 2px;
+    flex-shrink: 0;
+}
+.result-alert .alert-dismiss:hover { opacity: 1; }
+[data-theme="dark"] .result-alert.error { background: rgba(251,113,133,0.08); }
+[data-theme="dark"] .result-alert.success { background: rgba(74,222,128,0.08); }
+[data-theme="dark"] .result-alert.warning { background: rgba(245,158,11,0.08); }
+
+.collapsible-header {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    cursor: pointer;
+    user-select: none;
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    font-family: var(--font-sans);
+    padding: 8px 0;
+    transition: color 0.15s;
+}
+.collapsible-header:hover { color: var(--c-text-secondary); }
+.collapsible-header .chevron {
+    display: inline-block;
+    font-size: 10px;
+    transition: transform 0.2s;
+    width: 12px;
+}
+.collapsible-header.open .chevron { transform: rotate(90deg); }
+.collapsible-body {
+    overflow: hidden;
+    max-height: 0;
+    opacity: 0;
+    transition: max-height 0.25s ease, opacity 0.2s ease;
+}
+.collapsible-body.open {
+    max-height: 2000px;
+    opacity: 1;
+}
+
+button:focus-visible,
+input:focus-visible,
+select:focus-visible,
+textarea:focus-visible,
+[tabindex]:focus-visible {
+    outline: 2px solid var(--c-accent);
+    outline-offset: 2px;
+}
+CSS;
+    echo tunnelCss();
     echo '</style>';
     echo '</head>';
 
@@ -334,6 +553,8 @@ function serveDashboardHtml(): void
         $siteLabel = htmlspecialchars(is_string($parsedHost) ? $parsedHost : $siteUrlConst, ENT_QUOTES);
         echo '    <a class="site-link" href="' . $siteUrl . '" target="_blank">' . $siteLabel . '</a>';
     }
+    echo '    <a class="site-link" href="/aws">AWS Reports</a>';
+    echo tunnelHeaderIconHtml();
     echo '    <span class="env-badge" id="envBadge" title="' . htmlspecialchars(SCRIPTS_DIR, ENT_QUOTES) . '"><span class="dot"></span><span id="envBadgeText">' . $defaultDir . '</span></span>';
     echo '    <button class="theme-toggle" onclick="toggleTheme()" title="Toggle theme">';
     echo '      <svg id="themeIconSun" class="hidden" width="18" height="18" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><circle cx="12" cy="12" r="5"/><path d="M12 1v2M12 21v2M4.22 4.22l1.42 1.42M18.36 18.36l1.42 1.42M1 12h2M21 12h2M4.22 19.78l1.42-1.42M18.36 5.64l1.42-1.42"/></svg>';
@@ -342,7 +563,7 @@ function serveDashboardHtml(): void
     echo '  </div>';
     echo '</header>';
 
-    echo '<div class="layout">';
+    echo '<div class="layout" id="mainLayout">';
     echo '    <div class="sidebar-wrapper">';
     echo '        <div class="sidebar" id="sidebar"></div>';
     echo '        <div class="sidebar-footer"><a href="https://www.blundergoat.com" target="_blank" rel="noopener">created by BlunderGOAT</a></div>';
@@ -354,6 +575,7 @@ function serveDashboardHtml(): void
     echo '            <button class="toolbar-btn stop" id="stopBtn" disabled onclick="stopScript()">Stop</button>';
     echo '            <button class="toolbar-btn" id="clearBtn" onclick="clearTerminal()">Clear</button>';
     echo '        </div>';
+    echo '        <div id="terminalAlert" style="display:none"></div>';
     if (IS_EXAMPLE_CONFIG) {
         echo '<div class="config-banner" id="configBanner">';
         echo '<span>You\'re using the default example config. Edit <code>dashboard/config.php</code> to add the scripts useful for your project. Run <strong>Help</strong> to see what\'s available.</span>';
@@ -361,11 +583,13 @@ function serveDashboardHtml(): void
         echo '</div>';
     }
     echo '        <div class="terminal" id="terminal">';
-    echo '            <span class="welcome">Select a script from the sidebar to run it.</span>';
+    echo '            <div class="welcome">Select a script from the sidebar to run it.</div>';
     echo '        </div>';
     echo '    </div>';
     echo '</div>';
 
+    echo tunnelPageHtml();
+
     echo <<<'HTML_BODY'
 
 <div class="modal-overlay" id="modalOverlay">
@@ -409,6 +633,7 @@ function toggleTheme() {
     eventSource: null,     // active EventSource for SSE streaming, null when idle
     autoScroll: true,      // auto-scroll terminal to bottom on new output
     pendingCallback: null, // callback to execute when modal is confirmed, null when no modal
+    stopRequested: false,  // set when the user cancels a run before the stream closes
 };
 
 const $ = (sel) => document.querySelector(sel);
@@ -417,6 +642,12 @@ function toggleTheme() {
 const stopBtn  = $('#stopBtn');
 const scriptLabel = $('#scriptLabel');
 
+HTML_BODY;
+
+    echo tunnelJs();
+
+    echo <<<'HTML_BODY'
+
 /**
  * Init
  */
@@ -434,6 +665,7 @@ function toggleTheme() {
 
         // Load project list and restore saved selection from localStorage
         await loadProjects();
+        updateTunnelIcon();
 
         // Check if a script is already running (e.g. page was refreshed mid-run)
         const statusResp = await fetch('/api/status');
@@ -460,7 +692,7 @@ function renderSidebar() {
         div.className = 'category';
         div.innerHTML = `
             <div class="category-header" onclick="this.parentElement.classList.toggle('collapsed')">
-                <span class="arrow">\u25be</span>${esc(cat.category)}
+                <span class="arrow">\u25b8</span>${esc(cat.category)}
             </div>
             <div class="category-scripts"></div>
         `;
@@ -528,6 +760,7 @@ function onScriptClick(script) {
         state.runningId = data.id;
         state.runningScriptId = scriptId;
         state.startedAt = Date.now();
+        state.stopRequested = false;
 
         // Resolve the display label (show cmd + args rather than just the script ID)
         let label = scriptId;
@@ -563,12 +796,20 @@ function connectStream(id) {
     es.addEventListener('done', () => {
         const elapsedSecs = state.startedAt ? ((Date.now() - state.startedAt) / 1000) : 0;
         const duration = elapsedSecs < 1 ? '<1s' : elapsedSecs < 60 ? Math.round(elapsedSecs) + 's' : Math.floor(elapsedSecs/60) + 'm ' + Math.round(elapsedSecs%60) + 's';
-        appendToTerminal(`\n<span class="ansi-green ansi-bold">\u2714 Done</span> <span class="ansi-dim">in ${duration}</span>\n`);
-        setRunningState(false, null, duration);
+        if (!state.stopRequested) {
+            appendToTerminal(`\n<span class="ansi-green ansi-bold">\u2714 Done</span> <span class="ansi-dim">in ${duration}</span>\n`);
+            const alertDiv = document.getElementById('terminalAlert');
+            alertDiv.style.display = 'block';
+            alertDiv.innerHTML = '<div class="result-alert success" style="margin:0 12px;border-radius:0 0 8px 8px"><div><span class="alert-title">\u2714 Done in ' + esc(duration) + '</span></div><button class="alert-dismiss" onclick="this.closest(\'.result-alert\').parentElement.style.display=\'none\'">&times;</button></div>';
+            setRunningState(false, null, duration);
+        } else {
+            setRunningState(false);
+        }
         es.close();
         state.eventSource = null;
         state.runningId = null;
         state.runningScriptId = null;
+        state.stopRequested = false;
         updateButtons();
     });
 
@@ -582,6 +823,7 @@ function connectStream(id) {
                 setRunningState(false);
                 state.runningId = null;
                 state.runningScriptId = null;
+                state.stopRequested = false;
                 updateButtons();
             }
         }, 2000);
@@ -593,8 +835,15 @@ function connectStream(id) {
     stopBtn.disabled = true;
     stopBtn.textContent = 'Stopping\u2026';
     try {
-        await fetch(`/api/stop/${state.runningId}`, { method: 'POST' });
+        const resp = await fetch(`/api/stop/${state.runningId}`, { method: 'POST' });
+        if (!resp.ok) {
+            throw new Error('Stop request failed');
+        }
+        state.stopRequested = true;
         appendToTerminal('\n<span class="ansi-yellow">\u2500\u2500 Stopped \u2500\u2500</span>\n');
+        const alertDiv = document.getElementById('terminalAlert');
+        alertDiv.style.display = 'block';
+        alertDiv.innerHTML = '<div class="result-alert warning" style="margin:0 12px;border-radius:0 0 8px 8px"><div><span class="alert-title">Stopped</span></div><button class="alert-dismiss" onclick="this.closest(\'.result-alert\').parentElement.style.display=\'none\'">&times;</button></div>';
     } catch (e) {
         appendToTerminal('\n<span class="ansi-red">Failed to stop process</span>\n');
     }
@@ -653,7 +902,7 @@ function appendToTerminal(html) {
     state.autoScroll = terminal.scrollTop + terminal.clientHeight >= terminal.scrollHeight - 50;
 });
 
-function clearTerminal() { terminal.innerHTML = ''; }
+function clearTerminal() { terminal.innerHTML = ''; document.getElementById('terminalAlert').style.display = 'none'; state.stopRequested = false; }
 
 /** Copy terminal text content to clipboard (strips HTML/ANSI spans). */
 function copyOutput() {
@@ -747,7 +996,13 @@ function closeModal() {
 
 // Keyboard shortcuts: Escape closes modal, Enter confirms it
 document.addEventListener('keydown', (e) => {
-    if (e.key === 'Escape') closeModal();
+    if (e.key === 'Escape') {
+        if (state.tunnelPageVisible) {
+            toggleTunnelPage();
+            return;
+        }
+        closeModal();
+    }
     if (e.key === 'Enter' && $('#modalOverlay').classList.contains('open')) confirmModal();
 });
 
@@ -758,6 +1013,13 @@ function closeModal() {
 /** HTML-escape a string to prevent XSS when injecting into innerHTML. */
 function esc(str) { const d = document.createElement('div'); d.textContent = str || ''; return d.innerHTML; }
 
+function toggleCollapsible(header) {
+    const body = header.nextElementSibling;
+    const isOpen = header.classList.contains('open');
+    header.classList.toggle('open', !isOpen);
+    if (body) body.classList.toggle('open', !isOpen);
+}
+
 /**
  * Project Selector (WSL Path Selector)
  *
diff --git a/dashboard/index.php b/dashboard/index.php
index 9656af0..de70c61 100644
--- a/dashboard/index.php
+++ b/dashboard/index.php
@@ -58,6 +58,9 @@
 if (!defined('TIMINGS_FILE')) {
     define('TIMINGS_FILE', TMP_DIR . '/timings.json');
 }
+if (!defined('TUNNEL_STATE_FILE')) {
+    define('TUNNEL_STATE_FILE', TMP_DIR . '/tunnel-state.json');
+}
 
 // Environment name: from env var, or infer from grandparent directory name
 // (e.g. /srv/deploy/my-project → "deploy"), or fall back to 'local'
@@ -83,7 +86,9 @@
 // When true, the UI shows a banner prompting the user to customize it.
 $configFile = __DIR__ . '/config.php';
 $exampleFile = __DIR__ . '/config.example.php';
-$isExampleConfig = file_exists($exampleFile) && file_get_contents($configFile) === file_get_contents($exampleFile);
+$isExampleConfig = file_exists($configFile)
+    && file_exists($exampleFile)
+    && file_get_contents($configFile) === file_get_contents($exampleFile);
 if (!defined('IS_EXAMPLE_CONFIG')) {
     define('IS_EXAMPLE_CONFIG', $isExampleConfig);
 }
@@ -119,6 +124,7 @@ function findScript(string $id): ?array // @phpstan-ignore missingType.iterableV
 }
 
 require __DIR__ . '/frontend.php';
+require __DIR__ . '/aws.php';
 
 /* ============================================================
  * Project path helpers (Phase 2 - WSL Path Selector)
@@ -227,6 +233,20 @@ function validateProjectPath(string $path): ?string
     jsonResponse(getTimings());
 } elseif ($uri === '/api/projects' && $method === 'GET') {
     handleApiProjects();
+} elseif ($uri === '/api/aws/run' && $method === 'POST') {
+    handleAwsDashboardRequest($method);
+} elseif ($uri === '/api/tunnel-status' && $method === 'GET') {
+    handleApiTunnelStatus();
+} elseif ($uri === '/api/tunnel-start' && $method === 'POST') {
+    handleApiTunnelStart();
+} elseif ($uri === '/api/tunnel-stop' && $method === 'POST') {
+    handleApiTunnelStop();
+} elseif ($uri === '/api/tunnel-configure' && $method === 'POST') {
+    handleApiTunnelConfigure();
+} elseif ($uri === '/api/tunnel-logs' && $method === 'GET') {
+    handleApiTunnelLogs();
+} elseif ($uri === '/api/tunnel-test' && ($method === 'GET' || $method === 'POST')) {
+    handleApiTunnelTest();
 } elseif ($uri === '/api/run' && $method === 'POST') {
     handleApiRun();
 } elseif ($uri === '/api/status' && $method === 'GET') {
@@ -245,6 +265,8 @@ function validateProjectPath(string $path): ?string
     } else {
         http_response_code(404);
     }
+} elseif (($uri === '/aws' || $uri === '/aws.php') && ($method === 'GET' || $method === 'POST')) {
+    handleAwsDashboardRequest($method);
 } elseif ($uri === '/' || $uri === '') {
     serveDashboardHtml();
 } else {
@@ -262,7 +284,7 @@ function jsonResponse(array $data, int $status = 200): void // @phpstan-ignore m
 {
     http_response_code($status);
     header('Content-Type: application/json');
-    echo json_encode($data, JSON_UNESCAPED_SLASHES);
+    echo json_encode($data, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE);
 }
 
 /**
@@ -292,6 +314,425 @@ function getJsonBody(): array
     return $decoded;
 }
 
+/**
+ * @return array<string, mixed>|null
+ */
+function readTunnelState(): ?array
+{
+    if (!file_exists(TUNNEL_STATE_FILE)) {
+        return null;
+    }
+
+    $raw = file_get_contents(TUNNEL_STATE_FILE);
+    $decoded = $raw !== false ? json_decode($raw, true) : null;
+    if (!is_array($decoded)) {
+        return null;
+    }
+
+    $url = isset($decoded['url']) && is_string($decoded['url']) ? trim($decoded['url']) : '';
+    if ($url === '') {
+        return null;
+    }
+
+    $provider = isset($decoded['provider']) && is_string($decoded['provider']) ? $decoded['provider'] : 'manual';
+    $pid = isset($decoded['pid']) ? (int) $decoded['pid'] : 0;
+    if ($provider !== 'manual' && $pid > 0 && !processExists($pid)) {
+        @unlink(TUNNEL_STATE_FILE);
+        return null;
+    }
+
+    return [
+        'active' => true,
+        'url' => $url,
+        'provider' => $provider,
+        'pid' => $pid > 0 ? $pid : null,
+        'target' => isset($decoded['target']) && is_string($decoded['target']) ? $decoded['target'] : null,
+        'started_at' => isset($decoded['started_at']) && is_string($decoded['started_at']) ? $decoded['started_at'] : null,
+        'configured_at' => isset($decoded['configured_at']) && is_string($decoded['configured_at']) ? $decoded['configured_at'] : null,
+        'note' => isset($decoded['note']) && is_string($decoded['note']) ? $decoded['note'] : null,
+    ];
+}
+
+function getDefaultTunnelTarget(): string
+{
+    $port = (string) ($_SERVER['SERVER_PORT'] ?? getenv('DASHBOARD_PORT') ?: '8899');
+    if (!ctype_digit($port)) {
+        $port = '8899';
+    }
+
+    return 'http://127.0.0.1:' . $port;
+}
+
+function getTunnelLogFile(): string
+{
+    return TMP_DIR . '/tunnel-cloudflare.log';
+}
+
+function getTunnelPidFile(): string
+{
+    return TMP_DIR . '/tunnel-cloudflare.pid';
+}
+
+function findCloudflaredBinary(): ?string
+{
+    $candidates = [];
+    $envBin = getenv('CLOUDFLARED_BIN');
+    if (is_string($envBin) && $envBin !== '') {
+        $candidates[] = $envBin;
+    }
+
+    $home = getenv('HOME');
+    if (is_string($home) && $home !== '') {
+        $candidates[] = $home . '/.local/bin/cloudflared';
+    }
+
+    $candidates[] = '/usr/local/bin/cloudflared';
+    $candidates[] = '/usr/bin/cloudflared';
+
+    foreach ($candidates as $candidate) {
+        if (is_string($candidate) && $candidate !== '' && is_file($candidate) && is_executable($candidate)) {
+            return $candidate;
+        }
+    }
+
+    $resolved = trim((string) shell_exec('command -v cloudflared 2>/dev/null'));
+    return $resolved !== '' ? $resolved : null;
+}
+
+function parseCloudflareTunnelUrl(string $log): ?string
+{
+    if (preg_match_all('#https://([a-z0-9-]+)\.trycloudflare\.com#i', $log, $matches, PREG_SET_ORDER) === false) {
+        return null;
+    }
+
+    foreach (array_reverse($matches) as $match) {
+        $subdomain = strtolower((string) ($match[1] ?? ''));
+        if ($subdomain !== '' && $subdomain !== 'api') {
+            return (string) $match[0];
+        }
+    }
+
+    return null;
+}
+
+/**
+ * @return array<string, mixed>
+ */
+function tunnelStatusPayload(?array $state): array
+{
+    if ($state === null) {
+        return [
+            'active' => false,
+            'provider_default' => 'cloudflare',
+            'cloudflare_available' => findCloudflaredBinary() !== null,
+            'default_target' => getDefaultTunnelTarget(),
+        ];
+    }
+
+    return $state + [
+        'provider_default' => 'cloudflare',
+        'cloudflare_available' => findCloudflaredBinary() !== null,
+        'default_target' => getDefaultTunnelTarget(),
+    ];
+}
+
+function handleApiTunnelStatus(): void
+{
+    $state = readTunnelState();
+    jsonResponse(tunnelStatusPayload($state));
+}
+
+function handleApiTunnelLogs(): void
+{
+    $logFile = getTunnelLogFile();
+    if (!file_exists($logFile)) {
+        jsonResponse(['logs' => '', 'exists' => false]);
+        return;
+    }
+    $content = (string) file_get_contents($logFile);
+    if (strlen($content) > 4096) {
+        $content = substr($content, -4096);
+    }
+    jsonResponse(['logs' => $content, 'exists' => true]);
+}
+
+function handleApiTunnelStart(): void
+{
+    $existing = readTunnelState();
+    if ($existing !== null && ($existing['provider'] ?? '') === 'cloudflare' && isset($existing['url'])) {
+        jsonResponse(tunnelStatusPayload($existing));
+        return;
+    }
+
+    $cloudflared = findCloudflaredBinary();
+    if ($cloudflared === null) {
+        jsonResponse(['error' => 'cloudflared is not installed or not on PATH'], 500);
+        return;
+    }
+
+    $body = getJsonBody();
+    $target = trim((string) ($body['target'] ?? getDefaultTunnelTarget()));
+    if (filter_var($target, FILTER_VALIDATE_URL) === false) {
+        jsonResponse(['error' => 'Tunnel target must be a valid absolute URL'], 400);
+        return;
+    }
+
+    handleApiTunnelStop(true);
+
+    $logFile = getTunnelLogFile();
+    $pidFile = getTunnelPidFile();
+    @unlink($logFile);
+    @unlink($pidFile);
+
+    $inner = 'echo $$ > ' . escapeshellarg($pidFile)
+        . '; exec ' . escapeshellarg($cloudflared)
+        . ' tunnel --no-autoupdate --url ' . escapeshellarg($target)
+        . ' > ' . escapeshellarg($logFile) . ' 2>&1';
+    $command = 'setsid bash -lc ' . escapeshellarg($inner) . ' > /dev/null 2>&1 &';
+    exec($command);
+
+    $pid = 0;
+    for ($attempt = 0; $attempt < 50; $attempt++) {
+        usleep(100000);
+        if (file_exists($pidFile)) {
+            $pid = (int) trim((string) file_get_contents($pidFile));
+            if ($pid > 0) {
+                break;
+            }
+        }
+    }
+
+    $url = null;
+    $lastLog = '';
+    for ($attempt = 0; $attempt < 150; $attempt++) {
+        usleep(100000);
+        $lastLog = file_exists($logFile) ? (string) file_get_contents($logFile) : '';
+        $url = $lastLog !== '' ? parseCloudflareTunnelUrl($lastLog) : null;
+        if ($url !== null) {
+            break;
+        }
+        if ($pid > 0 && !processExists($pid)) {
+            break;
+        }
+    }
+
+    if ($url === null) {
+        if ($pid > 0) {
+            killProcess(-$pid, 15);
+            killProcess($pid, 15);
+        }
+
+        $message = 'Cloudflare tunnel did not start cleanly';
+        if ($lastLog !== '') {
+            $tail = trim(substr($lastLog, -400));
+            if ($tail !== '') {
+                $message .= ': ' . preg_replace('/\s+/', ' ', $tail);
+            }
+        }
+
+        jsonResponse(['error' => $message], 502);
+        return;
+    }
+
+    $state = [
+        'url' => rtrim($url, '/'),
+        'provider' => 'cloudflare',
+        'pid' => $pid > 0 ? $pid : null,
+        'target' => $target,
+        'started_at' => gmdate('Y-m-d\TH:i:s\Z'),
+        'configured_at' => gmdate('Y-m-d\TH:i:s\Z'),
+        'note' => 'Cloudflare quick tunnel',
+    ];
+
+    file_put_contents(TUNNEL_STATE_FILE, json_encode($state, JSON_UNESCAPED_SLASHES));
+    jsonResponse(tunnelStatusPayload($state));
+}
+
+function handleApiTunnelStop(bool $silent = false): void
+{
+    $state = readTunnelState();
+    $pid = is_array($state) && isset($state['pid']) ? (int) $state['pid'] : 0;
+
+    if ($pid > 0 && processExists($pid)) {
+        killProcess(-$pid, 15);
+        killProcess($pid, 15);
+
+        for ($attempt = 0; $attempt < 20; $attempt++) {
+            usleep(100000);
+            if (!processExists($pid)) {
+                break;
+            }
+        }
+
+        if (processExists($pid)) {
+            killProcess(-$pid, 9);
+            killProcess($pid, 9);
+        }
+    }
+
+    @unlink(TUNNEL_STATE_FILE);
+    @unlink(getTunnelPidFile());
+    @unlink(getTunnelLogFile());
+
+    if ($silent) {
+        return;
+    }
+
+    jsonResponse(tunnelStatusPayload(null));
+}
+
+function handleApiTunnelConfigure(): void
+{
+    $body = getJsonBody();
+    $clear = filter_var($body['clear'] ?? false, FILTER_VALIDATE_BOOLEAN);
+    $url = trim((string) ($body['url'] ?? ''));
+
+    if ($clear || $url === '') {
+        handleApiTunnelStop();
+        return;
+    }
+
+    if (filter_var($url, FILTER_VALIDATE_URL) === false) {
+        jsonResponse(['error' => 'Tunnel URL must be a valid absolute URL'], 400);
+        return;
+    }
+
+    $parts = parse_url($url);
+    $scheme = is_array($parts) ? strtolower((string) ($parts['scheme'] ?? '')) : '';
+    if (!in_array($scheme, ['http', 'https'], true)) {
+        jsonResponse(['error' => 'Tunnel URL must start with http:// or https://'], 400);
+        return;
+    }
+
+    $normalizedUrl = rtrim($url, '/');
+    handleApiTunnelStop(true);
+    $state = [
+        'url' => $normalizedUrl,
+        'provider' => 'manual',
+        'target' => isset($body['target']) && is_string($body['target']) && trim($body['target']) !== ''
+            ? trim($body['target'])
+            : null,
+        'started_at' => gmdate('Y-m-d\TH:i:s\Z'),
+        'configured_at' => gmdate('Y-m-d\TH:i:s\Z'),
+        'note' => isset($body['note']) && is_string($body['note']) && trim($body['note']) !== ''
+            ? trim($body['note'])
+            : null,
+    ];
+
+    file_put_contents(TUNNEL_STATE_FILE, json_encode($state, JSON_UNESCAPED_SLASHES));
+    jsonResponse(tunnelStatusPayload([
+        'active' => true,
+        'url' => $state['url'],
+        'provider' => $state['provider'],
+        'target' => $state['target'],
+        'started_at' => $state['started_at'],
+        'configured_at' => $state['configured_at'],
+        'note' => $state['note'],
+    ]));
+}
+
+function handleApiTunnelTest(): void
+{
+    $state = readTunnelState();
+    if ($state === null) {
+        jsonResponse(['reachable' => false, 'error' => 'No tunnel configured'], 400);
+        return;
+    }
+
+    $baseUrl = (string) ($state['url'] ?? '');
+    if ($baseUrl === '') {
+        jsonResponse(['reachable' => false, 'error' => 'No tunnel URL'], 400);
+        return;
+    }
+
+    $body = getJsonBody();
+    $requestMethod = strtoupper((string) ($body['method'] ?? $_GET['method'] ?? 'GET'));
+    if (!in_array($requestMethod, ['GET', 'HEAD'], true)) {
+        jsonResponse(['reachable' => false, 'error' => 'Unsupported request method'], 400);
+        return;
+    }
+
+    $path = trim((string) ($body['path'] ?? $_GET['path'] ?? '/'));
+    if ($path === '') {
+        $path = '/';
+    }
+    if (preg_match('#^[a-z][a-z0-9+.-]*://#i', $path) === 1) {
+        jsonResponse(['reachable' => false, 'error' => 'Path must be relative'], 400);
+        return;
+    }
+    if (str_contains($path, "\r") || str_contains($path, "\n")) {
+        jsonResponse(['reachable' => false, 'error' => 'Invalid path'], 400);
+        return;
+    }
+    if (!str_starts_with($path, '/')) {
+        $path = '/' . $path;
+    }
+
+    if (!function_exists('curl_init')) {
+        jsonResponse(['reachable' => false, 'error' => 'cURL extension is not available'], 500);
+        return;
+    }
+
+    $testUrl = rtrim($baseUrl, '/') . ($path === '/' ? '/' : $path);
+    $ch = curl_init($testUrl);
+    if ($ch === false) {
+        jsonResponse(['reachable' => false, 'error' => 'Failed to initialize cURL'], 500);
+        return;
+    }
+
+    curl_setopt_array($ch, [
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_CUSTOMREQUEST => $requestMethod,
+        CURLOPT_NOBODY => $requestMethod === 'HEAD',
+        CURLOPT_TIMEOUT => 12,
+        CURLOPT_CONNECTTIMEOUT => 5,
+        CURLOPT_FOLLOWLOCATION => true,
+        CURLOPT_MAXREDIRS => 3,
+        CURLOPT_SSL_VERIFYPEER => true,
+        CURLOPT_SSL_VERIFYHOST => 2,
+        CURLOPT_HTTPHEADER => ['Accept: */*'],
+    ]);
+
+    $startTime = hrtime(true);
+    $response = curl_exec($ch);
+    $elapsedMs = (int) round((hrtime(true) - $startTime) / 1_000_000);
+
+    $status = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    $finalUrl = (string) curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
+    $contentType = (string) curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
+    $error = curl_error($ch);
+    curl_close($ch);
+
+    if ($status <= 0) {
+        jsonResponse([
+            'reachable' => false,
+            'url' => $testUrl,
+            'method' => $requestMethod,
+            'path' => $path,
+            'error' => $error !== '' ? $error : 'No response',
+        ]);
+        return;
+    }
+
+    $bodyPreview = null;
+    if ($requestMethod !== 'HEAD' && is_string($response) && $response !== '') {
+        $cleanBody = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/', '', $response);
+        $bodyPreview = substr((string) $cleanBody, 0, 1200);
+    }
+
+    jsonResponse([
+        'reachable' => true,
+        'url' => $testUrl,
+        'final_url' => $finalUrl !== '' ? $finalUrl : $testUrl,
+        'method' => $requestMethod,
+        'path' => $path,
+        'status' => $status,
+        'time_ms' => $elapsedMs,
+        'content_type' => $contentType !== '' ? $contentType : null,
+        'body_preview' => $bodyPreview,
+    ]);
+}
+
 
 /**
  * Return known project directories for the WSL path selector.
diff --git a/dashboard/start-dev.sh b/dashboard/start-dev.sh
index fb0ae1f..456532d 100755
--- a/dashboard/start-dev.sh
+++ b/dashboard/start-dev.sh
@@ -170,6 +170,32 @@ export PROJECT_NAME
 export SITE_URL
 export ENV_NAME
 
+# ── Cleanup on exit ──────────────────────────────────────────────
+
+cleanup() {
+    local sanitized_slug
+    sanitized_slug=$(echo "${PROJECT_NAME}" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-zA-Z0-9_-]/-/g')
+    local tmp_dir="/tmp/${sanitized_slug}-dashboard"
+    local pid_file="${tmp_dir}/tunnel-cloudflare.pid"
+
+    if [[ -f "${pid_file}" ]]; then
+        local pid
+        pid=$(<"${pid_file}")
+        if [[ "${pid}" =~ ^[0-9]+$ ]] && kill -0 "${pid}" 2>/dev/null; then
+            log "Stopping cloudflared tunnel (pid ${pid})..."
+            kill -15 "-${pid}" 2>/dev/null || true
+            kill -15 "${pid}" 2>/dev/null || true
+            sleep 1
+            if kill -0 "${pid}" 2>/dev/null; then
+                kill -9 "${pid}" 2>/dev/null || true
+            fi
+        fi
+        rm -f "${pid_file}"
+    fi
+}
+
+trap cleanup EXIT
+
 # ── Start server ─────────────────────────────────────────────────
 
 echo ""
diff --git a/dashboard/tunnel.php b/dashboard/tunnel.php
new file mode 100644
index 0000000..450cb54
--- /dev/null
+++ b/dashboard/tunnel.php
@@ -0,0 +1,1138 @@
+<?php
+
+/**
+ * Generic tunnel UI fragments for the main dashboard.
+ *
+ * The tunnel surface is intentionally provider-agnostic. Users can paste any
+ * public base URL (Cloudflare, ngrok, localhost.run, etc.), store it in the
+ * dashboard temp dir, and manually test exposed routes from the browser.
+ */
+
+function tunnelCss(): string
+{
+    return <<<'CSS'
+
+/* Tunnel icon in header */
+.tunnel-icon-btn {
+    position: relative;
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    gap: 6px;
+    min-height: 32px;
+    padding: 0 12px;
+    font-size: 11px;
+    font-weight: 600;
+    font-family: inherit;
+    background: var(--c-surface);
+    border: 1px solid var(--c-border);
+    cursor: pointer;
+    border-radius: 8px;
+    color: var(--c-text-muted);
+    text-decoration: none;
+    transition: all 0.2s;
+    box-shadow: var(--shadow-sm);
+}
+.tunnel-icon-btn:hover { color: var(--c-text-primary); background: var(--c-surface-hover); }
+.tunnel-icon-btn.active-page { color: var(--c-accent); }
+.tunnel-icon-btn .tunnel-dot {
+    display: none;
+    position: absolute;
+    top: 4px;
+    right: 4px;
+    width: 7px;
+    height: 7px;
+    border-radius: 50%;
+    background: var(--c-green);
+    border: 1.5px solid var(--c-surface);
+}
+.tunnel-icon-btn.tunnel-live .tunnel-dot { display: block; }
+
+/* Tunnel page */
+.tunnel-page {
+    display: none;
+    flex: 1;
+    overflow-y: auto;
+    padding: 18px 16px 24px;
+    max-width: none;
+    margin: 0;
+    width: 100%;
+    background: var(--c-bg);
+}
+.tunnel-page.visible { display: block; }
+
+.tp-back {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    min-height: 30px;
+    font-size: 11px;
+    font-weight: 600;
+    color: var(--c-text-muted);
+    text-decoration: none;
+    cursor: pointer;
+    background: var(--c-surface);
+    border: 1px solid var(--c-border);
+    font-family: inherit;
+    padding: 0 10px;
+    margin-bottom: 16px;
+    border-radius: 8px;
+    transition: color 0.15s;
+    box-shadow: var(--shadow-sm);
+}
+.tp-back:hover { color: var(--c-text-primary); border-color: var(--c-accent); }
+
+.tp-title {
+    font-size: 18px;
+    font-weight: 600;
+    margin-bottom: 4px;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    letter-spacing: -0.02em;
+}
+.tp-subtitle {
+    font-size: 12px;
+    color: var(--c-text-muted);
+    margin-bottom: 18px;
+    line-height: 1.65;
+    max-width: 84ch;
+}
+
+.tp-grid {
+    display: grid;
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+    gap: 16px;
+}
+.tp-grid .tp-card.full { grid-column: 1 / -1; }
+
+.tp-card {
+    background: var(--c-surface);
+    border: 1px solid var(--c-border);
+    border-radius: 16px;
+    padding: 16px;
+    min-width: 0;
+    box-shadow: var(--shadow-panel);
+}
+.tp-card-title {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    margin-bottom: 12px;
+    font-family: var(--font-sans);
+}
+
+.tp-status-row {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 10px;
+}
+.tp-status-dot {
+    width: 10px;
+    height: 10px;
+    border-radius: 50%;
+    flex-shrink: 0;
+}
+.tp-status-dot.active { background: var(--c-green); }
+.tp-status-dot.inactive { background: var(--c-text-faint); opacity: 0.4; }
+.tp-status-label { font-size: 13px; font-weight: 600; }
+.tp-status-url {
+    font-size: 14px;
+    font-weight: 600;
+    font-family: var(--font-mono);
+    color: var(--c-accent);
+    word-break: break-all;
+    cursor: pointer;
+    transition: color 0.15s;
+}
+.tp-status-url:hover { opacity: 0.8; }
+.tp-status-meta {
+    font-size: 11px;
+    color: var(--c-text-faint);
+    margin-top: 4px;
+    line-height: 1.5;
+}
+
+.tp-manual-row,
+.tp-test-row,
+.tp-action-row {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+    margin-top: 14px;
+    flex-wrap: wrap;
+}
+.tp-manual-row input,
+.tp-test-row input,
+.tp-test-row select {
+    min-height: 36px;
+    padding: 0 12px;
+    font-size: 12px;
+    font-family: var(--font-mono);
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    outline: none;
+}
+.tp-manual-row input { flex: 1; min-width: 260px; }
+.tp-test-row input { flex: 1; min-width: 220px; }
+.tp-test-row select { min-width: 96px; }
+.tp-manual-row input:focus,
+.tp-test-row input:focus,
+.tp-test-row select:focus { border-color: var(--c-accent); }
+.tp-manual-row input::placeholder,
+.tp-test-row input::placeholder { color: var(--c-text-faint); }
+
+.tp-btn {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    min-height: 32px;
+    padding: 0 12px;
+    font-size: 11px;
+    font-weight: 600;
+    font-family: inherit;
+    border-radius: 8px;
+    cursor: pointer;
+    border: 1px solid var(--c-border);
+    transition: all 0.15s;
+    background: var(--c-surface-hover);
+    color: var(--c-text-secondary);
+    box-shadow: var(--shadow-sm);
+}
+.tp-btn:hover { background: var(--c-bg); color: var(--c-text-primary); }
+.tp-btn:disabled { opacity: 0.35; cursor: not-allowed; }
+.tp-btn.primary {
+    background: var(--c-accent);
+    color: #fff;
+    border-color: var(--c-accent);
+    font-weight: 600;
+}
+.tp-btn.primary:hover { background: var(--c-accent-hover); }
+.tp-btn.danger {
+    color: var(--c-red);
+    border-color: var(--c-red);
+    background: rgba(243,139,168,0.08);
+}
+.tp-btn.danger:hover { background: rgba(243,139,168,0.2); }
+
+.tp-copy {
+    border: 1px solid var(--c-border);
+    background: var(--c-bg);
+    color: var(--c-text-secondary);
+    border-radius: 8px;
+    min-height: 32px;
+    padding: 0 12px;
+    font-size: 11px;
+    font-weight: 600;
+    cursor: pointer;
+    box-shadow: var(--shadow-sm);
+}
+.tp-copy.copied,
+.tp-btn.copied {
+    color: var(--c-green);
+    border-color: var(--c-green);
+    background: rgba(22,163,74,0.12);
+}
+
+.tp-example,
+.tp-note {
+    margin-top: 12px;
+    padding: 10px 12px;
+    border-radius: 8px;
+    background: var(--c-surface-hover);
+    border: 1px solid var(--c-border);
+}
+.tp-example-label {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    margin-bottom: 6px;
+    font-family: var(--font-sans);
+}
+.tp-example-value {
+    font-size: 11px;
+    line-height: 1.6;
+    word-break: break-word;
+    font-family: var(--font-mono);
+    color: var(--c-text-secondary);
+    white-space: pre-wrap;
+}
+
+.tp-test-help,
+.tp-instructions {
+    font-size: 12px;
+    line-height: 1.7;
+    color: var(--c-text-secondary);
+}
+.tp-test-help code,
+.tp-instructions code {
+    font-family: var(--font-mono);
+    font-size: 11px;
+    background: var(--c-bg);
+    padding: 2px 6px;
+    border-radius: 4px;
+    border: 1px solid var(--c-border);
+}
+
+.tp-test-result-line {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+    flex-wrap: wrap;
+    font-size: 12px;
+    margin-top: 14px;
+}
+.tp-response,
+.tp-output {
+    display: none;
+    max-height: 260px;
+    overflow-y: auto;
+    padding: 12px 16px;
+    font-family: var(--font-mono);
+    font-size: 12px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-break: break-word;
+    background: var(--c-terminal-bg);
+    border-radius: 8px;
+    border: 1px solid var(--c-border);
+    margin-top: 12px;
+}
+.tp-response.visible,
+.tp-output.visible { display: block; }
+
+.tp-warning {
+    display: flex;
+    align-items: flex-start;
+    gap: 8px;
+    margin-top: 14px;
+    padding: 10px 14px;
+    border-radius: 8px;
+    background: rgba(217,119,6,0.08);
+    border: 1px solid rgba(217,119,6,0.2);
+    font-size: 12px;
+    color: var(--c-yellow);
+    line-height: 1.5;
+}
+[data-theme="dark"] .tp-warning {
+    background: rgba(245,158,11,0.08);
+    border-color: rgba(245,158,11,0.16);
+}
+
+/* Uptime display */
+.tp-uptime {
+    font-size: 11px;
+    font-family: var(--font-mono);
+    color: var(--c-text-faint);
+    margin-top: 6px;
+}
+
+/* Target input row */
+.tp-target-row {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+    margin-top: 10px;
+}
+.tp-target-row input {
+    flex: 1;
+    min-height: 34px;
+    padding: 0 12px;
+    font-size: 12px;
+    font-family: var(--font-mono);
+    background: var(--c-bg);
+    color: var(--c-text-primary);
+    border: 1px solid var(--c-border);
+    border-radius: 8px;
+    outline: none;
+}
+.tp-target-row input:focus { border-color: var(--c-accent); }
+.tp-target-row input::placeholder { color: var(--c-text-faint); }
+.tp-target-row label {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    font-family: var(--font-sans);
+    white-space: nowrap;
+}
+
+/* Recent URLs */
+.tp-recent { margin-top: 12px; }
+.tp-recent-label {
+    font-size: 11px;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.1em;
+    color: var(--c-text-muted);
+    margin-bottom: 6px;
+    font-family: var(--font-sans);
+}
+.tp-recent-chips {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+}
+.tp-recent-chip {
+    display: inline-flex;
+    align-items: center;
+    gap: 4px;
+    padding: 4px 10px;
+    font-size: 11px;
+    font-family: var(--font-mono);
+    background: var(--c-surface-hover);
+    border: 1px solid var(--c-border);
+    border-radius: 6px;
+    cursor: pointer;
+    color: var(--c-text-secondary);
+    transition: all 0.15s;
+    max-width: 100%;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+}
+.tp-recent-chip:hover { border-color: var(--c-accent); color: var(--c-accent); }
+.tp-recent-chip .remove {
+    font-size: 13px;
+    color: var(--c-text-faint);
+    cursor: pointer;
+    margin-left: 2px;
+    flex-shrink: 0;
+}
+.tp-recent-chip .remove:hover { color: var(--c-red); }
+
+/* Log output inside collapsible */
+.tp-log-output {
+    max-height: 260px;
+    overflow-y: auto;
+    padding: 12px 16px;
+    font-family: var(--font-mono);
+    font-size: 12px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-break: break-word;
+    background: var(--c-terminal-bg);
+    border-radius: 8px;
+    border: 1px solid var(--c-border);
+    margin-top: 8px;
+}
+
+@media (max-width: 860px) {
+    .tunnel-page { padding: 12px; }
+    .tp-grid { grid-template-columns: 1fr; }
+    .tp-grid .tp-card.full { grid-column: auto; }
+}
+CSS;
+}
+
+function tunnelHeaderIconHtml(): string
+{
+    return <<<'HTML'
+    <button class="tunnel-icon-btn" id="tunnelIconBtn" onclick="toggleTunnelPage()" title="Tunnel" aria-label="Tunnel">
+      <span class="tunnel-dot"></span>
+      <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="12" cy="12" r="10"/><line x1="2" y1="12" x2="22" y2="12"/><path d="M12 2a15.3 15.3 0 014 10 15.3 15.3 0 01-4 10 15.3 15.3 0 01-4-10 15.3 15.3 0 014-10z"/></svg>
+      Tunnel
+    </button>
+HTML;
+}
+
+function tunnelPageHtml(): string
+{
+    return <<<'HTML'
+
+<div class="tunnel-page" id="tunnelPage">
+    <button class="tp-back" onclick="toggleTunnelPage()">
+        <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M19 12H5"/><polyline points="12 19 5 12 12 5"/></svg>
+        Back to Dashboard
+    </button>
+
+    <div class="tp-title">
+        <svg width="22" height="22" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><line x1="2" y1="12" x2="22" y2="12"/><path d="M12 2a15.3 15.3 0 014 10 15.3 15.3 0 01-4 10 15.3 15.3 0 01-4-10 15.3 15.3 0 014-10z"/></svg>
+        Tunnel
+    </div>
+    <div class="tp-subtitle">Cloudflare quick tunnel is the default. Start a public demo URL that points back to this dashboard, or paste a manual URL from another provider.</div>
+
+    <div class="tp-grid">
+        <!-- STATUS — full width hero -->
+        <div class="tp-card full">
+            <div class="tp-card-title">Status</div>
+            <div class="tp-status-row">
+                <span class="tp-status-dot inactive" id="tpDot"></span>
+                <span class="tp-status-label" id="tpLabel">Inactive</span>
+            </div>
+            <div id="tpUrlDisplay" style="display:none">
+                <div class="tp-status-url" id="tpUrl" onclick="copyTunnelUrlText()" title="Click to copy"></div>
+                <div class="tp-status-meta" id="tpMeta"></div>
+                <div class="tp-uptime" id="tpUptime"></div>
+            </div>
+            <div class="tp-target-row" id="tpTargetRow">
+                <label>Target</label>
+                <input type="text" id="tpTargetInput" placeholder="http://127.0.0.1:8899" />
+            </div>
+            <div class="tp-action-row">
+                <button class="tp-btn primary" id="tpStartBtn" type="button" onclick="tunnelStart()">Start Cloudflare Demo</button>
+                <button class="tp-btn danger" id="tpStopBtn" type="button" onclick="tunnelStop()" style="display:none">Stop</button>
+                <button class="tp-copy" id="tpCopyUrlBtn" type="button" onclick="copyTunnelUrl(this)" style="display:none">Copy URL</button>
+                <button class="tp-btn danger" id="tpClearBtn" type="button" onclick="tunnelClear()" style="display:none">Clear</button>
+            </div>
+
+            <div class="collapsible-header" id="tpLogsToggle" onclick="toggleTunnelLogs()" style="display:none">
+                <span class="chevron">&#9654;</span> Logs
+            </div>
+            <div class="collapsible-body" id="tpLogsBody">
+                <pre class="tp-log-output" id="tpLogsContent"></pre>
+            </div>
+
+            <div class="collapsible-header" onclick="toggleCollapsible(this)">
+                <span class="chevron">&#9654;</span> Default Demo Target
+            </div>
+            <div class="collapsible-body">
+                <div class="tp-example" style="margin-top:4px">
+                    <div class="tp-example-value" id="tpDefaultTarget">Loading...</div>
+                </div>
+            </div>
+
+            <div class="tp-output" id="tpOutput"></div>
+        </div>
+
+        <!-- PASTE TUNNEL URL -->
+        <div class="tp-card">
+            <div class="tp-card-title">Paste Tunnel URL</div>
+            <div class="tp-test-help">Already have a public URL from ngrok, localhost.run, Tailscale Funnel, or another provider? Paste it here.</div>
+            <div class="tp-manual-row">
+                <input type="text" id="tpManualInput" placeholder="https://abc123.trycloudflare.com" />
+                <button class="tp-btn primary" id="tpSetManualBtn" type="button" onclick="tunnelSetManual()">Save URL</button>
+            </div>
+            <div class="tp-recent" id="tpRecentSection" style="display:none">
+                <div class="tp-recent-label">Recent URLs</div>
+                <div class="tp-recent-chips" id="tpRecentChips"></div>
+            </div>
+
+            <div class="tp-warning">
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="flex-shrink:0;margin-top:1px"><path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z"/><line x1="12" y1="9" x2="12" y2="13"/><line x1="12" y1="17" x2="12.01" y2="17"/></svg>
+                <div><strong>Public exposure:</strong> the stored URL is reachable outside your machine. Only expose routes safe for local development.</div>
+            </div>
+
+            <div class="collapsible-header" onclick="toggleCollapsible(this)">
+                <span class="chevron">&#9654;</span> Usage Notes
+            </div>
+            <div class="collapsible-body">
+                <div class="tp-instructions" style="margin-top:4px">
+                    <ol style="padding-left:20px">
+                        <li>Use <strong>Start Cloudflare Demo</strong> for a working public URL with no extra setup.</li>
+                        <li>Set a custom <strong>Target</strong> to tunnel a different local service (e.g. <code>http://127.0.0.1:3000</code>).</li>
+                        <li>The default target is this dashboard, so the public URL is immediately testable.</li>
+                        <li>Use the manual test card to verify routes before sharing.</li>
+                        <li>Paste a manual URL to use another tunnel provider or target.</li>
+                    </ol>
+                </div>
+            </div>
+        </div>
+
+        <!-- QUICK START -->
+        <div class="tp-card" style="border-left:3px solid var(--c-accent);background:var(--c-surface-hover)">
+            <div class="tp-card-title">Quick Start</div>
+            <div class="tp-test-help" style="line-height:1.7">The default Cloudflare action tunnels this dashboard itself, so you can confirm that public routing works before wiring it into another local service. No account or config needed — just click <strong>Start Cloudflare Demo</strong>.</div>
+        </div>
+
+        <!-- MANUAL TEST — full width -->
+        <div class="tp-card full" id="tpManualTestCard">
+            <div class="tp-card-title">Manual Test</div>
+            <div class="tp-test-help">Run a quick <code>GET</code> or <code>HEAD</code> against the current tunnel URL.</div>
+            <div class="tp-test-row">
+                <select id="tpTestMethod" onchange="updateTunnelTestPreview()">
+                    <option value="GET">GET</option>
+                    <option value="HEAD">HEAD</option>
+                </select>
+                <input type="text" id="tpTestPath" value="/" placeholder="/" oninput="updateTunnelTestPreview()" />
+                <button class="tp-btn primary" id="tpTestBtn" type="button" onclick="tunnelTest()">Run test</button>
+                <button class="tp-btn" id="tpOpenBtn" type="button" onclick="openTunnelTestUrl()" style="opacity:0.7">Open URL</button>
+                <button class="tp-btn" id="tpCopyCurlBtn" type="button" onclick="copyTunnelCurl(this)" style="opacity:0.7">Copy curl</button>
+            </div>
+            <div id="tpTestResult"></div>
+            <pre class="tp-response" id="tpTestResponse"></pre>
+
+            <div class="collapsible-header" onclick="toggleCollapsible(this)">
+                <span class="chevron">&#9654;</span> Request Details
+            </div>
+            <div class="collapsible-body">
+                <div class="tp-example" style="margin-top:4px">
+                    <div class="tp-example-label">Full URL</div>
+                    <div class="tp-example-value" id="tpTestUrl">Configure a tunnel URL to generate a test target.</div>
+                </div>
+                <div class="tp-example">
+                    <div class="tp-example-label">Example curl</div>
+                    <div class="tp-example-value" id="tpCurlPreview"></div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+HTML;
+}
+
+function tunnelJs(): string
+{
+    return <<<'JS'
+
+state.tunnelPageVisible = false;
+state.tunnel = {
+    active: false,
+    url: null,
+    provider: null,
+    target: null,
+    started_at: null,
+    default_target: null,
+    cloudflare_available: false,
+    provider_default: 'cloudflare',
+};
+
+function normalizeTunnelPath(value) {
+    const trimmed = (value || '').trim();
+    if (!trimmed) return '/';
+    if (/^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed)) return '/';
+    return trimmed.startsWith('/') ? trimmed : '/' + trimmed;
+}
+
+function buildTunnelTestUrl(path) {
+    if (!state.tunnel.active || !state.tunnel.url) return '';
+    const baseUrl = String(state.tunnel.url || '').replace(/\/+$/, '');
+    const normalizedPath = normalizeTunnelPath(path);
+    return normalizedPath === '/' ? `${baseUrl}/` : `${baseUrl}${normalizedPath}`;
+}
+
+function buildTunnelCurl(method, path) {
+    const url = buildTunnelTestUrl(path);
+    if (!url) return '';
+    return `curl ${method === 'HEAD' ? '-I' : '-i'} "${url}"`;
+}
+
+function updateTunnelTestPreview() {
+    const path = normalizeTunnelPath(document.getElementById('tpTestPath')?.value || '/');
+    const method = (document.getElementById('tpTestMethod')?.value || 'GET').toUpperCase();
+    const fullUrl = buildTunnelTestUrl(path);
+    document.getElementById('tpTestUrl').textContent = fullUrl || 'Start the Cloudflare demo or save a tunnel URL to generate a test target.';
+    document.getElementById('tpCurlPreview').textContent = fullUrl ? buildTunnelCurl(method, path) : '';
+}
+
+function toggleTunnelPage() {
+    state.tunnelPageVisible = !state.tunnelPageVisible;
+    document.getElementById('mainLayout').style.display = state.tunnelPageVisible ? 'none' : 'flex';
+    document.getElementById('tunnelPage').classList.toggle('visible', state.tunnelPageVisible);
+    document.getElementById('tunnelIconBtn').classList.toggle('active-page', state.tunnelPageVisible);
+    if (state.tunnelPageVisible) {
+        refreshTunnelPage();
+        renderRecentUrls();
+        startTunnelPolling();
+    } else {
+        stopTunnelPolling();
+        stopUptimeTimer();
+    }
+}
+
+function setTunnelOutput(message, isError = false) {
+    const output = document.getElementById('tpOutput');
+    output.innerHTML = isError ? `<span class="ansi-red">${esc(message)}</span>` : esc(message);
+    output.classList.add('visible');
+}
+
+function clearTunnelOutput() {
+    const output = document.getElementById('tpOutput');
+    output.textContent = '';
+    output.classList.remove('visible');
+}
+
+async function refreshTunnelPage() {
+    try {
+        const resp = await fetch('/api/tunnel-status');
+        if (!resp.ok) return;
+        const data = await resp.json();
+        state.tunnel = data;
+        renderTunnelStatus(data);
+    } catch (_) {
+        setTunnelOutput('Failed to load tunnel status.', true);
+    }
+}
+
+function renderTunnelStatus(data) {
+    const dot = document.getElementById('tpDot');
+    const label = document.getElementById('tpLabel');
+    const urlDisplay = document.getElementById('tpUrlDisplay');
+    const urlEl = document.getElementById('tpUrl');
+    const metaEl = document.getElementById('tpMeta');
+    const startBtn = document.getElementById('tpStartBtn');
+    const stopBtn = document.getElementById('tpStopBtn');
+    const clearBtn = document.getElementById('tpClearBtn');
+    const copyUrlBtn = document.getElementById('tpCopyUrlBtn');
+    const defaultTarget = document.getElementById('tpDefaultTarget');
+    const testResult = document.getElementById('tpTestResult');
+    const testResponse = document.getElementById('tpTestResponse');
+    const targetRow = document.getElementById('tpTargetRow');
+    const logsToggle = document.getElementById('tpLogsToggle');
+
+    defaultTarget.textContent = data.default_target || 'http://127.0.0.1:8899';
+    startBtn.disabled = data.cloudflare_available !== true;
+    startBtn.title = data.cloudflare_available ? 'Start a Cloudflare quick tunnel to the target URL' : 'cloudflared is not available';
+
+    if (data.active && data.url) {
+        const metaParts = [];
+        if (data.provider) metaParts.push(`Source: ${data.provider}`);
+        if (data.target) metaParts.push(`Target: ${data.target}`);
+        if (data.started_at) metaParts.push(`Saved: ${data.started_at}`);
+
+        dot.className = 'tp-status-dot active';
+        label.textContent = 'Active';
+        label.style.color = 'var(--c-green)';
+        urlEl.textContent = data.url;
+        metaEl.textContent = metaParts.join(' | ');
+        urlDisplay.style.display = 'block';
+        startBtn.style.display = 'none';
+        targetRow.style.display = 'none';
+        stopBtn.style.display = data.provider === 'cloudflare' ? 'inline-flex' : 'none';
+        clearBtn.style.display = data.provider === 'manual' ? 'inline-flex' : 'none';
+        copyUrlBtn.style.display = 'inline-flex';
+        logsToggle.style.display = data.provider === 'cloudflare' ? 'block' : 'none';
+        startUptimeTimer();
+        updateUptimeDisplay();
+        clearTunnelOutput();
+    } else {
+        dot.className = 'tp-status-dot inactive';
+        label.textContent = 'Inactive';
+        label.style.color = '';
+        urlDisplay.style.display = 'none';
+        startBtn.style.display = 'inline-flex';
+        targetRow.style.display = 'flex';
+        stopBtn.style.display = 'none';
+        clearBtn.style.display = 'none';
+        copyUrlBtn.style.display = 'none';
+        logsToggle.style.display = 'none';
+        // Collapse logs
+        document.getElementById('tpLogsBody').classList.remove('open');
+        logsToggle.classList.remove('open');
+        stopUptimeTimer();
+        document.getElementById('tpUptime').textContent = '';
+        testResult.innerHTML = '';
+        testResponse.textContent = '';
+        testResponse.classList.remove('visible');
+        const targetInput = document.getElementById('tpTargetInput');
+        if (targetInput && !targetInput.value) {
+            targetInput.placeholder = data.default_target || 'http://127.0.0.1:8899';
+        }
+    }
+
+    updateTunnelTestPreview();
+
+    // Disable test controls when no tunnel is active
+    const testDisabled = !(data.active && data.url);
+    ['tpTestBtn', 'tpOpenBtn', 'tpCopyCurlBtn'].forEach(id => {
+        const el = document.getElementById(id);
+        if (el) el.disabled = testDisabled;
+    });
+}
+
+async function tunnelStart() {
+    setTunnelActionState(true);
+    try {
+        const customTarget = (document.getElementById('tpTargetInput')?.value || '').trim();
+        const target = customTarget || state.tunnel.default_target || null;
+        const resp = await fetch('/api/tunnel-start', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ target }),
+        });
+        const data = await resp.json();
+        if (!resp.ok) {
+            throw new Error(data.error || 'Failed to start Cloudflare tunnel');
+        }
+
+        state.tunnel = data;
+        renderTunnelStatus(data);
+        updateTunnelIcon();
+        setTunnelOutput('Cloudflare quick tunnel started.');
+        notifyTunnelReady(data.url);
+    } catch (err) {
+        setTunnelOutput(String(err), true);
+    } finally {
+        setTunnelActionState(false);
+    }
+}
+
+async function tunnelSetManual() {
+    const input = document.getElementById('tpManualInput');
+    const url = (input.value || '').trim();
+    if (!url) {
+        setTunnelOutput('Enter a public tunnel URL first.', true);
+        return;
+    }
+
+    setTunnelActionState(true);
+    try {
+        const resp = await fetch('/api/tunnel-configure', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ url }),
+        });
+        const data = await resp.json();
+        if (!resp.ok) {
+            throw new Error(data.error || 'Failed to store tunnel URL');
+        }
+
+        state.tunnel = data;
+        renderTunnelStatus(data);
+        updateTunnelIcon();
+        addRecentUrl(url);
+        input.value = '';
+        setTunnelOutput('Tunnel URL saved.');
+    } catch (err) {
+        setTunnelOutput(String(err), true);
+    } finally {
+        setTunnelActionState(false);
+    }
+}
+
+async function tunnelClear() {
+    setTunnelActionState(true);
+    try {
+        const resp = await fetch('/api/tunnel-configure', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ clear: true }),
+        });
+        const data = await resp.json();
+        if (!resp.ok) {
+            throw new Error(data.error || 'Failed to clear tunnel URL');
+        }
+
+        state.tunnel = data;
+        renderTunnelStatus(data);
+        updateTunnelIcon();
+        setTunnelOutput('Tunnel URL cleared.');
+    } catch (err) {
+        setTunnelOutput(String(err), true);
+    } finally {
+        setTunnelActionState(false);
+    }
+}
+
+async function tunnelStop() {
+    setTunnelActionState(true);
+    try {
+        const resp = await fetch('/api/tunnel-stop', { method: 'POST' });
+        const data = await resp.json();
+        if (!resp.ok) {
+            throw new Error(data.error || 'Failed to stop Cloudflare tunnel');
+        }
+
+        state.tunnel = data;
+        renderTunnelStatus(data);
+        updateTunnelIcon();
+        setTunnelOutput('Cloudflare tunnel stopped.');
+    } catch (err) {
+        setTunnelOutput(String(err), true);
+    } finally {
+        setTunnelActionState(false);
+    }
+}
+
+function setTunnelActionState(disabled) {
+    ['tpStartBtn', 'tpStopBtn', 'tpSetManualBtn', 'tpClearBtn', 'tpTestBtn', 'tpOpenBtn', 'tpCopyCurlBtn'].forEach((id) => {
+        const el = document.getElementById(id);
+        if (el) el.disabled = disabled;
+    });
+}
+
+async function tunnelTest() {
+    const result = document.getElementById('tpTestResult');
+    const responseBox = document.getElementById('tpTestResponse');
+    const btn = document.getElementById('tpTestBtn');
+
+    if (!state.tunnel.active || !state.tunnel.url) {
+        setTunnelOutput('Save a tunnel URL before testing.', true);
+        return;
+    }
+
+    const method = (document.getElementById('tpTestMethod').value || 'GET').toUpperCase();
+    const path = normalizeTunnelPath(document.getElementById('tpTestPath').value || '/');
+    result.innerHTML = '<span class="ansi-dim" style="font-size:12px">Testing...</span>';
+    responseBox.textContent = '';
+    responseBox.classList.remove('visible');
+    btn.disabled = true;
+
+    try {
+        const resp = await fetch('/api/tunnel-test', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ method, path }),
+        });
+        const data = await resp.json();
+        if (!data.reachable) {
+            throw new Error(data.error || 'Unreachable');
+        }
+
+        let detail = `${esc(String(data.time_ms))}ms`;
+        if (data.content_type) detail += ` \u00b7 ${esc(String(data.content_type))}`;
+        if (data.final_url && data.final_url !== data.url) detail += ` \u2192 ${esc(String(data.final_url))}`;
+        result.innerHTML = `
+            <div class="result-alert success">
+                <div>
+                    <div class="alert-title">Reachable \u2014 HTTP ${esc(String(data.status))}</div>
+                    <div class="alert-detail">${detail}</div>
+                </div>
+            </div>`;
+        if (data.body_preview) {
+            responseBox.textContent = data.body_preview;
+            responseBox.classList.add('visible');
+        }
+    } catch (err) {
+        result.innerHTML = `
+            <div class="result-alert error">
+                <div>
+                    <div class="alert-title">Unreachable</div>
+                    <div class="alert-detail">${esc(String(err))}</div>
+                </div>
+            </div>`;
+    } finally {
+        btn.disabled = false;
+    }
+}
+
+function openTunnelTestUrl() {
+    const url = buildTunnelTestUrl(document.getElementById('tpTestPath')?.value || '/');
+    if (!url) return;
+    window.open(url, '_blank', 'noopener');
+}
+
+function copyTunnelCurl(btn) {
+    const method = (document.getElementById('tpTestMethod')?.value || 'GET').toUpperCase();
+    const path = document.getElementById('tpTestPath')?.value || '/';
+    const command = buildTunnelCurl(method, path);
+    if (!command) return;
+    navigator.clipboard.writeText(command).then(() => {
+        const original = btn.textContent;
+        btn.textContent = 'Copied';
+        btn.classList.add('copied');
+        setTimeout(() => {
+            btn.textContent = original;
+            btn.classList.remove('copied');
+        }, 1400);
+    });
+}
+
+function copyTunnelUrl(btn) {
+    if (!state.tunnel.url) return;
+    navigator.clipboard.writeText(state.tunnel.url).then(() => {
+        const original = btn.textContent;
+        btn.textContent = 'Copied';
+        btn.classList.add('copied');
+        setTimeout(() => {
+            btn.textContent = original;
+            btn.classList.remove('copied');
+        }, 1400);
+    });
+}
+
+function copyTunnelUrlText() {
+    if (!state.tunnel.url) return;
+    navigator.clipboard.writeText(state.tunnel.url).then(() => {
+        const el = document.getElementById('tpUrl');
+        const original = el.textContent;
+        el.textContent = 'Copied!';
+        el.style.color = 'var(--c-green)';
+        setTimeout(() => {
+            el.textContent = original;
+            el.style.color = '';
+        }, 1200);
+    });
+}
+
+async function updateTunnelIcon() {
+    try {
+        const resp = await fetch('/api/tunnel-status');
+        if (!resp.ok) return;
+        const data = await resp.json();
+        state.tunnel = data;
+        const btn = document.getElementById('tunnelIconBtn');
+        if (!btn) return;
+        btn.classList.toggle('tunnel-live', data.active === true);
+        btn.title = data.active ? `Tunnel: ${data.url}` : 'Tunnel';
+    } catch (_) {
+        // Ignore transient tunnel status failures in the header.
+    }
+}
+
+/* ── Auto-refresh polling ─────────────────────────────────────── */
+
+let tunnelPollInterval = null;
+
+function startTunnelPolling() {
+    stopTunnelPolling();
+    tunnelPollInterval = setInterval(() => {
+        if (state.tunnelPageVisible) refreshTunnelPage();
+    }, 20000);
+}
+
+function stopTunnelPolling() {
+    if (tunnelPollInterval) {
+        clearInterval(tunnelPollInterval);
+        tunnelPollInterval = null;
+    }
+}
+
+/* ── Uptime timer ─────────────────────────────────────────────── */
+
+let tunnelUptimeInterval = null;
+
+function startUptimeTimer() {
+    stopUptimeTimer();
+    tunnelUptimeInterval = setInterval(updateUptimeDisplay, 1000);
+}
+
+function stopUptimeTimer() {
+    if (tunnelUptimeInterval) {
+        clearInterval(tunnelUptimeInterval);
+        tunnelUptimeInterval = null;
+    }
+}
+
+function updateUptimeDisplay() {
+    const el = document.getElementById('tpUptime');
+    if (!el || !state.tunnel.active || !state.tunnel.started_at) {
+        if (el) el.textContent = '';
+        return;
+    }
+    const started = new Date(state.tunnel.started_at);
+    const elapsed = Math.floor((Date.now() - started.getTime()) / 1000);
+    if (elapsed < 0) { el.textContent = ''; return; }
+    const h = Math.floor(elapsed / 3600);
+    const m = Math.floor((elapsed % 3600) / 60);
+    const s = elapsed % 60;
+    let display = '';
+    if (h > 0) display = `${h}h ${m}m`;
+    else if (m > 0) display = `${m}m ${s}s`;
+    else display = `${s}s`;
+    el.textContent = `Uptime: ${display}`;
+}
+
+/* ── Recent URLs ──────────────────────────────────────────────── */
+
+const RECENT_URLS_KEY = 'devex_tunnel_recent_urls';
+const MAX_RECENT_URLS = 5;
+
+function getRecentUrls() {
+    try {
+        const raw = localStorage.getItem(RECENT_URLS_KEY);
+        return raw ? JSON.parse(raw) : [];
+    } catch (_) { return []; }
+}
+
+function addRecentUrl(url) {
+    let urls = getRecentUrls().filter(u => u !== url);
+    urls.unshift(url);
+    if (urls.length > MAX_RECENT_URLS) urls = urls.slice(0, MAX_RECENT_URLS);
+    localStorage.setItem(RECENT_URLS_KEY, JSON.stringify(urls));
+    renderRecentUrls();
+}
+
+function removeRecentUrl(url) {
+    const urls = getRecentUrls().filter(u => u !== url);
+    localStorage.setItem(RECENT_URLS_KEY, JSON.stringify(urls));
+    renderRecentUrls();
+}
+
+function renderRecentUrls() {
+    const container = document.getElementById('tpRecentChips');
+    if (!container) return;
+    const section = document.getElementById('tpRecentSection');
+    const urls = getRecentUrls();
+    if (urls.length === 0) {
+        if (section) section.style.display = 'none';
+        return;
+    }
+    if (section) section.style.display = 'block';
+    container.innerHTML = '';
+    urls.forEach(url => {
+        const chip = document.createElement('span');
+        chip.className = 'tp-recent-chip';
+        const short = url.replace(/^https?:\/\//, '').replace(/\/$/, '');
+        chip.title = url;
+        const label = document.createTextNode(short.length > 40 ? short.substring(0, 37) + '...' : short);
+        chip.appendChild(label);
+        chip.addEventListener('click', () => {
+            document.getElementById('tpManualInput').value = url;
+            tunnelSetManual();
+        });
+        const remove = document.createElement('span');
+        remove.className = 'remove';
+        remove.textContent = '\u00d7';
+        remove.addEventListener('click', (e) => {
+            e.stopPropagation();
+            removeRecentUrl(url);
+        });
+        chip.appendChild(remove);
+        container.appendChild(chip);
+    });
+}
+
+/* ── Cloudflare logs viewer ───────────────────────────────────── */
+
+async function toggleTunnelLogs() {
+    const header = document.getElementById('tpLogsToggle');
+    const body = document.getElementById('tpLogsBody');
+    const content = document.getElementById('tpLogsContent');
+    const isOpen = header.classList.contains('open');
+
+    if (isOpen) {
+        header.classList.remove('open');
+        body.classList.remove('open');
+        return;
+    }
+
+    content.textContent = 'Loading...';
+    header.classList.add('open');
+    body.classList.add('open');
+
+    try {
+        const resp = await fetch('/api/tunnel-logs');
+        const data = await resp.json();
+        content.textContent = data.logs || '(no logs)';
+    } catch (_) {
+        content.textContent = 'Failed to load logs.';
+    }
+}
+
+/* ── Browser notification ─────────────────────────────────────── */
+
+function notifyTunnelReady(url) {
+    if (!url || !('Notification' in window)) return;
+    if (Notification.permission === 'granted') {
+        new Notification('Tunnel Ready', { body: url });
+    } else if (Notification.permission !== 'denied') {
+        Notification.requestPermission().then(perm => {
+            if (perm === 'granted') new Notification('Tunnel Ready', { body: url });
+        });
+    }
+}
+
+/* ── Keyboard shortcuts ───────────────────────────────────────── */
+
+document.addEventListener('keydown', (e) => {
+    if (e.key === 'Enter' && document.activeElement) {
+        if (document.activeElement.id === 'tpManualInput') {
+            tunnelSetManual();
+        } else if (document.activeElement.id === 'tpTestPath') {
+            tunnelTest();
+        } else if (document.activeElement.id === 'tpTargetInput') {
+            tunnelStart();
+        }
+    }
+});
+JS;
+}
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..e3f90cb
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,47 @@
+# Architecture - devgoat-bash-scripts
+
+## What It Is
+
+`devgoat-bash-scripts` is a Bash-first toolkit with a small PHP dashboard. Most functionality lives in standalone scripts under `lib/`. The repo has no package manager or build step; quality comes from shell linting, bats tests, smoke paths, and targeted PHP linting.
+
+## Core Layout
+
+```text
+lib/
+  ai-cli/       AI installer scripts plus _common.sh
+  aws/          AWS wrappers plus _aws-common.sh
+  codegen/      generated artefacts and repo inspection helpers
+  docker/       Docker workflow helpers
+  health/       host and service health checks
+  maintenance/  repo maintenance helpers
+  stacks/       stack setup/verify scripts plus _common.sh
+  tools/        installer helpers for developer tools
+  workflow/     repo entrypoints and workflow utilities
+dashboard/      PHP UI for running scripts from a browser
+docs/           architecture, footguns, lessons, workflow docs
+scripts/        Codex workflow validation and preflight wrappers
+tests/          bats suite
+```
+
+## Runtime Flows
+
+1. Humans or agents run shell entrypoints directly, usually from `lib/`, `help.sh`, or `preflight-checks.sh`.
+2. The dashboard launches shell scripts and parses some human-readable output back into UI cards and tables.
+3. Three domains share helper libraries: `lib/ai-cli/_common.sh`, `lib/stacks/_common.sh`, and `lib/aws/_aws-common.sh`.
+4. Repo verification is split:
+   - `preflight-checks.sh` checks `lib/**/*.sh`
+   - `scripts/preflight-checks.sh` adds Codex workflow assets, root shell entrypoints, dashboard PHP linting, and workflow validation
+
+## Constraints
+
+- Template `# ---- CONFIGURATION ----` blocks are public interfaces, not placeholders to "fix".
+- Helper sourcing is domain-specific; `ai-cli`, `stacks`, and `aws` do not share one pattern.
+- Only `ai-cli/_common.sh` sanitises WSL PATH; other domains use plain `command -v`.
+- Some verify/preflight scripts intentionally omit `-e` so they can report multiple failures.
+- The dashboard has cross-domain coupling to exact shell output headings, especially in AWS reports.
+
+## Trade-Offs
+
+- Keeping scripts largely standalone makes them portable, but conventions are enforced socially and through linting rather than a framework.
+- Human-readable output is good for terminals and the dashboard, but it makes parsers sensitive to heading changes.
+- Root preflight stays fast by focusing on `lib/`, so the Codex wrapper has to cover root and workflow files explicitly.
diff --git a/docs/codex-playbooks/audit.md b/docs/codex-playbooks/audit.md
new file mode 100644
index 0000000..9d85384
--- /dev/null
+++ b/docs/codex-playbooks/audit.md
@@ -0,0 +1,28 @@
+# Audit Playbook
+
+Use this for audits of correctness, safety, consistency, or documentation quality.
+
+## Rules
+
+- Read broadly enough to verify each claim.
+- MUST NOT propose fixes unless the human asks for them after the audit.
+- Prefer findings with file:line evidence over style commentary.
+
+### Discovery
+
+- Enumerate the surfaces inspected.
+- Note the assumptions being tested.
+
+### Verification
+
+- Check each candidate finding against the real file, not memory.
+- Confirm whether the issue is isolated or systemic.
+
+### Prioritisation
+
+- Order findings by user impact, regression risk, or blast radius.
+- Call out missing tests when they hide real risk.
+
+### Self-Check
+
+- Ask: did I fabricate this, overstate this, or skip a conflicting file?
diff --git a/docs/codex-playbooks/code-review.md b/docs/codex-playbooks/code-review.md
new file mode 100644
index 0000000..52d40fc
--- /dev/null
+++ b/docs/codex-playbooks/code-review.md
@@ -0,0 +1,22 @@
+# Code Review Playbook
+
+Use this when the user asks for a review or when a change needs a structured risk pass.
+
+## Findings Order
+
+- `P0` data loss, security, or destructive workflow risk
+- `P1` behavioural regression or broken contract
+- `P2` test gap, maintainability risk, or fragile coupling
+
+## Review Checklist
+
+- Read the diff and the adjacent consumers.
+- Verify changed code against existing conventions in the same domain.
+- Check Ask First boundaries from `AGENTS.md` before treating a risky change as acceptable.
+- Look for missing verification, stale references after renames, and broken dashboard/report couplings.
+
+## Output
+
+- Findings first, with file:line references
+- Open questions or assumptions second
+- Short summary last
diff --git a/docs/codex-playbooks/debug-investigate.md b/docs/codex-playbooks/debug-investigate.md
new file mode 100644
index 0000000..a195ffc
--- /dev/null
+++ b/docs/codex-playbooks/debug-investigate.md
@@ -0,0 +1,27 @@
+# Debug Investigate Playbook
+
+Use this when behaviour is broken or inconsistent and the task is diagnosis-first.
+
+## Hard Gate
+
+- If you want to just try something before tracing the code path, STOP.
+- No fix until the diagnosis is written and the human reviews it.
+
+## Workflow
+
+1. Reproduce the issue or define the missing reproduction.
+2. Trace the execution path through the real files.
+3. Isolate the most likely root cause with file:line evidence.
+4. Report the diagnosis, missing evidence, and the next verification step.
+
+## Diagnosis Template
+
+### Symptom
+
+### Reproduction
+
+### Evidence
+
+### Root Cause Hypothesis
+
+### Next Check
diff --git a/docs/codex-playbooks/preflight.md b/docs/codex-playbooks/preflight.md
new file mode 100644
index 0000000..1fc1dc8
--- /dev/null
+++ b/docs/codex-playbooks/preflight.md
@@ -0,0 +1,28 @@
+# Preflight Playbook
+
+Use this when code changed, verification scope is unclear, or a task crossed multiple directories.
+
+## MUST
+
+- Re-read the changed files and the nearest consumer before choosing checks.
+- Run the strongest relevant mechanical gate. In this repo, prefer `./scripts/preflight-checks.sh` for cross-domain work.
+- Run stack-native lint, syntax, build, or type checks when the touched stack has them.
+- Run a dependency audit step when a package manager exists; if none exists, say so explicitly.
+- After renames or moves, run `rg` for the old name and report zero remaining references.
+
+## SHOULD
+
+- Run the full bats suite even if a narrower smoke test passed.
+- Run formatter or generator steps when the touched file is tool-generated.
+- Exercise at least one safe user-facing path, such as `--help`, `--dry-run`, or a read-only report command.
+
+## MAY
+
+- Add extra smoke tests for adjacent scripts or dashboard surfaces.
+- Capture command output snippets when they make the verification story clearer.
+
+## Output
+
+- Checks run
+- Results
+- Known gaps
diff --git a/docs/codex-playbooks/research.md b/docs/codex-playbooks/research.md
new file mode 100644
index 0000000..7ec13f0
--- /dev/null
+++ b/docs/codex-playbooks/research.md
@@ -0,0 +1,29 @@
+# Research Playbook
+
+Use this for deep-read tasks, design investigation, or any request where the human wants understanding before implementation.
+
+## Hard Gate
+
+- No code changes.
+- No implementation plan.
+- Stop after the research artefact until the human reviews it.
+
+### Files Involved
+
+- List the files read.
+- Separate primary files from adjacent context.
+
+### Request Flow
+
+- Trace the path from entrypoint to side effects.
+- Name where inputs come from, where decisions happen, and where outputs land.
+
+### Boundaries Touched
+
+- Call out shared helpers, dashboard consumers, templates, generated files, CI, or public interfaces.
+
+### Risks / Gotchas
+
+- Provide at least 3 concrete risks.
+- Each risk must cite file:line evidence.
+- Prefer repo-specific traps over generic concerns.
diff --git a/docs/domain-reference.md b/docs/domain-reference.md
new file mode 100644
index 0000000..f69d505
--- /dev/null
+++ b/docs/domain-reference.md
@@ -0,0 +1,64 @@
+# Domain Reference - devgoat-bash-scripts
+
+Technical reference for repo mechanics. This file holds project details that should not live in the runtime loop in `AGENTS.md`.
+
+## Common Workflows
+
+### Adding an ai-cli installer
+
+Copy an existing `install-*.sh`. Source `_common.sh` via `SCRIPT_DIR`. Use `block_gitbash`, `require_node_or_install`, and `verify_native_binary`. Keep output in the ai-cli style: colour only, no prefix tags.
+
+### Adding a stacks script
+
+Source `../_common.sh`. Use `step`/`pass`/`fail`/`summary` for checks and `log_info`/`log_ok` for actions. Omit `-e` only when the script must report all failures before exiting.
+
+### Adding a standalone script
+
+Applicable to `aws/`, `docker/`, `health/`, `workflow/`, `maintenance/`, `tools/`, and `codegen/`. Define inline colours plus local logging helpers. Use `set -euo pipefail` unless the file is a known strict-mode exception.
+
+## Domain Instruction Router
+
+| Domain | File | Use When |
+| --- | --- | --- |
+| All shell scripts | `.github/instructions/shell-conventions.instructions.md` | Writing or reviewing any `.sh` file |
+| `lib/ai-cli/` | `.github/instructions/ai-cli.instructions.md` | Working on AI CLI installers |
+| `lib/aws/` | `.github/instructions/aws.instructions.md` | Working on AWS scripts |
+| `lib/stacks/` | `.github/instructions/stacks.instructions.md` | Working on stack scripts |
+| Standalone domains | `.github/instructions/dev.instructions.md` | Working on `workflow/`, `docker/`, `health/`, `maintenance/`, `tools/`, or `codegen/` scripts |
+
+## Shared Helpers and Logging
+
+- `lib/ai-cli/` sources `_common.sh` from the same directory.
+- `lib/stacks/` sources `../_common.sh` from subdirectories.
+- `lib/aws/` sources `_aws-common.sh` from the same directory.
+- Logging is domain-scoped:
+  - `ai-cli`: direct coloured output
+  - `stacks`: `step`/`pass`/`fail` helpers
+  - standalone domains: inline `log`/`success`/`warn`/`error` helpers or close variants
+
+Read `docs/footguns.md` before changing any of these patterns.
+
+## Template and Output Contracts
+
+### Changing a template script
+
+- Treat the `# ---- CONFIGURATION ----` block as a public interface.
+- Do not replace placeholder defaults like `my-project` or `us-east-1` unless you are intentionally changing the template contract.
+- If the template interface changes, update help text and verify sibling templates still use the same pattern.
+
+### Changing output consumed by the dashboard
+
+- Read the shell producer and the PHP consumer before editing headings, table shapes, or summary rows.
+- AWS cost output is parsed by `dashboard/aws_ui.php`; report-heading changes can break the dashboard without touching PHP.
+- Prefer machine-readable output when the coupling becomes brittle.
+
+## Repo Entrypoints
+
+- `help.sh` delegates to `lib/workflow/help-index.sh` and is the root script index.
+- `preflight-checks.sh` is the repo-level quality gate for `lib/**/*.sh`.
+- `scripts/preflight-checks.sh` is the Codex workflow wrapper that adds context validation and non-`lib/` checks.
+- `dashboard/start-dev.sh` launches the PHP dashboard and is an Ask First boundary because it changes the browser entrypoint.
+
+## Commit Format
+
+Use short, imperative subjects such as `add docker restart wrapper`. Keep commits scoped to one script or one coherent workflow. Never commit credentials.
diff --git a/docs/footguns.md b/docs/footguns.md
index a217775..022b731 100644
--- a/docs/footguns.md
+++ b/docs/footguns.md
@@ -1,95 +1,85 @@
 # Footguns
 
-Cross-domain gotchas discovered in this codebase. When you cause a bug that spans multiple domains, append it here using the format below.
+Cross-domain gotchas confirmed in this codebase. Add entries only when the repo itself demonstrates the behaviour.
 
----
+## Footgun: Helper sourcing is directory-specific
 
-## Footgun: Strict mode exceptions
+**Symptoms:** A copied script cannot find its helper library, or it sources the wrong shared file after being moved.
 
-**Symptoms:** Script exits immediately on a non-zero return code that was expected to be handled. Or: adding `set -e` to a script that previously worked causes it to abort mid-run.
+**Why it happens:** `ai-cli`, `stacks`, and `aws` each resolve shared helpers differently, and the patterns are tied to the directory layout.
 
-**Why it happens:** Several scripts intentionally omit `-e` from their strict mode line (`set -uo pipefail` instead of `set -euo pipefail`) because they must continue past individual check failures to report a full summary:
+**Evidence:**
+- `lib/ai-cli/install-claude.sh:11`
+- `lib/stacks/node/setup.sh:17`
+- `lib/aws/aws-cli.sh:13`
 
-| Script | Reason |
-|--------|--------|
-| `lib/stacks/*/verify.sh` | Runs all prerequisite checks, reports summary at end |
-| `lib/stacks/*/preflight-checks.sh` | Runs quality gates, must report all failures |
-| `lib/health/check-gpu.sh` | Probes multiple GPU backends, some will always fail |
+**Prevention:** Match the helper source pattern used by sibling files in the same domain. Do not swap `SCRIPT_DIR/_common.sh` and `../_common.sh`.
 
-**Prevention:** Before adding `set -e` to any script, check if it uses `step`/`pass`/`fail` patterns or accumulates failures in an array. If it does, omitting `-e` is intentional.
+## Footgun: Only ai-cli sanitises WSL PATH
 
----
+**Symptoms:** A script resolves Windows binaries from `/mnt/*` inside WSL and then fails in confusing ways.
 
-## Footgun: WSL PATH pollution
+**Why it happens:** `ai-cli/_common.sh` rejects `/mnt/*` binaries and strips those PATH entries before Node/npm checks. Other domains rely on plain `command -v`.
 
-**Symptoms:** A script resolves a Windows binary (e.g., `/mnt/c/Program Files/nodejs/npm`) instead of the native Linux one. Commands appear to exist but produce wrong output or fail cryptically.
+**Evidence:**
+- `lib/ai-cli/_common.sh:54`
+- `lib/ai-cli/_common.sh:65`
+- `lib/aws/_aws-common.sh:101`
+- `lib/workflow/git-status.sh:44`
 
-**Why it happens:** Only `lib/ai-cli/_common.sh` sanitizes PATH for WSL (via `sanitize_path_for_wsl()` and `command_exists()`). Other domains use bare `command -v` checks, which can resolve Windows binaries leaking into WSL's PATH through `/mnt/*` entries.
+**Prevention:** If a non-ai-cli script must be WSL-safe, add an explicit native-binary check or document the assumption instead of assuming shared sanitisation exists.
 
-**Prevention:** When writing scripts that run on WSL and depend on native Linux binaries (node, npm, python, aws, docker), either source `ai-cli/_common.sh` or add an explicit `/mnt/*` rejection check. At minimum, document the WSL assumption.
+## Footgun: Strict-mode exceptions are intentional
 
----
+**Symptoms:** Adding `set -e` to a verify or preflight script causes it to abort before reporting the full failure summary.
 
-## Footgun: Three logging paradigms
+**Why it happens:** Some scripts intentionally use `set -uo pipefail` so they can accumulate failures. The root preflight script hard-codes these exceptions.
 
-**Symptoms:** A new script's output looks inconsistent with its sibling scripts. Log lines use a different prefix style, different colors, or different symbols than other scripts in the same directory.
+**Evidence:**
+- `lib/stacks/php/verify.sh:12`
+- `lib/stacks/node/preflight-checks.sh:8`
+- `lib/health/check-gpu.sh:21`
+- `preflight-checks.sh:251`
 
-**Why it happens:** The codebase uses three distinct logging paradigms:
+**Prevention:** Before changing strict mode, check whether the script is expected to keep running after a failed check and whether `preflight-checks.sh` already treats it as an exception.
 
-1. **ai-cli style** - Direct `echo -e` with color constants (`$RED`, `$GREEN`, etc.). No prefix tags. Used by all `lib/ai-cli/` scripts via `_common.sh`.
-2. **stacks style** - Structured `step`/`pass`/`fail`/`skip`/`warn` helpers with Unicode symbols (`✔`, `✘`, `○`, `▸`) plus `log_info`/`log_ok`/`log_warn`/`log_error` with `[INFO]`/`[OK]` prefix tags. Used by all `lib/stacks/` scripts via `_common.sh`.
-3. **standalone style** - Inline `log()`/`success()`/`warn()`/`error()` functions with `[tag]` prefixes. Each script defines its own. Used by `lib/aws/`, `lib/docker/`, `lib/health/`, `lib/workflow/`, `lib/maintenance/`, `lib/tools/`, `lib/codegen/`.
+## Footgun: Logging style is domain-scoped
 
-**Prevention:** Before writing a new script, read one sibling script in the same directory and match its logging pattern exactly. Never mix paradigms within a directory.
+**Symptoms:** A new script looks out of place because the log format, colours, or helper names do not match its neighbours.
 
----
+**Why it happens:** The repo uses at least three logging styles: ai-cli colour output, stacks `step`/`pass` helpers, and standalone inline log functions.
 
-## Footgun: `_common.sh` source patterns are not interchangeable
+**Evidence:**
+- `lib/ai-cli/install-claude.sh:16`
+- `lib/stacks/node/setup.sh:45`
+- `lib/aws/cloudfront-invalidate.sh:56`
+- `lib/maintenance/git-cleanup.sh:8`
 
-**Symptoms:** `source: No such file or directory` when running a script, or the wrong `_common.sh` gets loaded.
+**Prevention:** Read one sibling script in the touched directory before introducing a new logging helper or output style.
 
-**Why it happens:** The two shared libraries use different source patterns:
+## Footgun: Root preflight only scans lib scripts
 
-- **ai-cli** uses same-directory resolution:
-  ```bash
-  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-  source "${SCRIPT_DIR}/_common.sh"
-  ```
-- **stacks** uses parent-directory traversal:
-  ```bash
-  source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/../_common.sh"
-  ```
+**Symptoms:** A root shell entrypoint, dashboard launcher, or workflow helper passes unnoticed even though it has syntax or lint issues.
 
-These patterns are tied to the directory structure. Copying a stacks script into a flat directory (or an ai-cli script into a subdirectory) breaks the source path.
+**Why it happens:** `preflight-checks.sh` discovers scripts only under `lib/`, while valid shell entrypoints also exist at the repo root and under `dashboard/`.
 
-**Prevention:** When copying scripts out of this repo, verify the `source` line resolves correctly in the target directory structure. When creating a new script under `stacks/`, always use the `../` traversal pattern. Under `ai-cli/`, always use the same-directory pattern.
+**Evidence:**
+- `preflight-checks.sh:242`
+- `help.sh:1`
+- `dashboard/start-dev.sh:1`
 
----
+**Prevention:** When changing shell files outside `lib/`, run explicit `bash -n` and `shellcheck` on them or use `scripts/preflight-checks.sh`.
 
-## Footgun: Template default values are intentional placeholders
+## Footgun: Dashboard AWS parsing depends on exact report headings
 
-**Symptoms:** An AI or contributor "fixes" placeholder values like `my-project`, `us-east-1`, or `8081` in a template script's CONFIGURATION block, breaking the template for all users.
+**Symptoms:** A dashboard section absorbs rows from the next section, or totals drift after a shell report heading changes.
 
-**Why it happens:** Template scripts use generic defaults (e.g., `PROJECT_NAME="${PROJECT_NAME:-my-project}"`) as placeholders that users fill in when copying the script into their project. These look like incomplete code to automated tools or reviewers unfamiliar with the template pattern.
+**Why it happens:** `dashboard/aws_ui.php` slices human-readable AWS cost output by heading names such as `EC2 - OTHER BREAKDOWN`. The shell producer emits those headings directly.
 
-**Prevention:** Never modify values inside a `# ---- CONFIGURATION ----` / `# ---- END CONFIGURATION ----` block unless you are intentionally changing the template interface. The `${VAR:-default}` pattern means the value is overridable via environment variables - the literal default is the fallback, not a mistake.
+**Evidence:**
+- `lib/aws/aws-costs.sh:323`
+- `lib/aws/aws-costs.sh:335`
+- `dashboard/aws_ui.php:1071`
+- `dashboard/aws_ui.php:1078`
 
----
-
-## Footgun: Many scripts lack `show_help()` / `--help`
-
-**Symptoms:** Running a script with `--help` produces an error or unexpected behavior instead of usage information.
-
-**Why it happens:** Some user-facing scripts still have no `show_help()` function or `--help` flag handling, especially older AWS templates and `stacks/go/` scripts.
-
-**Prevention:** When adding a new script that accepts arguments or has a CONFIGURATION block, always include a `show_help()` function and wire it to `-h|--help` in the argument parser. For drop-in scripts with no arguments, help is optional.
-
----
-
-## Footgun: `((var++))` under `set -e` aborts on first increment from zero
-
-**Symptoms:** Script using `((counter++))` exits unexpectedly on the first successful/failed check.
-
-**Why it happens:** In bash, `((var++))` is post-increment - it returns the value **before** incrementing. When `var` is 0, the expression evaluates to 0 (falsy), returning exit status 1. Under `set -e`, this aborts the script.
-
-**Prevention:** Use `var=$((var + 1))` or `((var += 1))` instead of `((var++))`. These always return truthy because the assignment itself succeeds.
+**Prevention:** Read the shell report and the PHP parser together before changing section names or row shapes. If the coupling grows, add a machine-readable output mode instead of scraping terminal text.
diff --git a/docs/guidelines-ownership-split.md b/docs/guidelines-ownership-split.md
new file mode 100644
index 0000000..19e783c
--- /dev/null
+++ b/docs/guidelines-ownership-split.md
@@ -0,0 +1,25 @@
+# Guidelines Ownership Split
+
+This repo started with `AGENTS.md` acting as both a runtime file and a domain reference. The Codex workflow split those responsibilities so the runtime loop stays short and the engineering details stay searchable.
+
+## Before / After Overlap Report
+
+| Previous section in `AGENTS.md` | Action | New home | Why |
+| --- | --- | --- | --- |
+| `Project Identity` | Kept | `AGENTS.md` | Runtime needs a one-paragraph repo definition. |
+| `Essential Commands` | Kept and expanded | `AGENTS.md` | These are high-signal runtime commands Codex should see every task. |
+| `Hard Rules` | Split | `AGENTS.md`, `docs/domain-reference.md` | Autonomy boundaries stay in runtime; repo mechanics move to reference docs. |
+| `Common Workflows` | Moved | `docs/domain-reference.md` | Domain implementation patterns are reference material, not loop instructions. |
+| `Commit Format` | Moved | `docs/domain-reference.md` | Useful project policy, but not part of the execution loop. |
+| `Context Router` | Split | `AGENTS.md`, `docs/domain-reference.md` | `AGENTS.md` routes workflow artefacts; `docs/domain-reference.md` routes domain instruction files. |
+
+## What Stayed Out Of Scope
+
+- `.github/instructions/*.md` remains the source of domain-specific engineering guidance.
+- Existing Claude workflow files remain in place. This change adds a Codex-native layer instead of deleting the Claude one.
+
+## Result
+
+- `AGENTS.md` now owns the execution loop, autonomy tiers, definition of done, task files, and workflow router.
+- `docs/domain-reference.md` owns repo mechanics, common workflows, entrypoint descriptions, and the domain instruction router.
+- `docs/guidelines-ownership-split.md` records the migration so future edits do not drift back into overlap.
diff --git a/docs/lessons.md b/docs/lessons.md
new file mode 100644
index 0000000..f62f11b
--- /dev/null
+++ b/docs/lessons.md
@@ -0,0 +1,18 @@
+# Lessons
+
+Behavioural mistakes discovered during development. When Codex, Claude, or another assistant makes a repeatable mistake that could recur, append it here.
+
+## Rules
+
+- Keep at most 15 active entries.
+- When 3 or more entries share a theme, promote them to a named pattern and archive the individual entries.
+- Include a `created_at` date on each entry.
+- Review periodically and archive entries that have gone stale.
+
+## Patterns
+
+_(none yet)_
+
+## Entries
+
+_(none yet)_
diff --git a/lib/ai-cli/CLAUDE.md b/lib/ai-cli/CLAUDE.md
new file mode 100644
index 0000000..7627d89
--- /dev/null
+++ b/lib/ai-cli/CLAUDE.md
@@ -0,0 +1,6 @@
+# ai-cli — Local Warnings
+
+- **WSL PATH:** Only this domain sanitises PATH for WSL. `sanitize_path_for_wsl()` and `command_exists()` in `_common.sh` reject `/mnt/*` binaries. Other domains do NOT do this.
+- **Source pattern:** Same-directory: `source "${SCRIPT_DIR}/_common.sh"`. Do NOT use parent traversal (`../`) — that's the stacks pattern.
+- **Logging:** Direct `echo -e` with color constants. No `[tag]` prefixes. No `step`/`pass`/`fail` helpers.
+- **Ask First:** Changes to `_common.sh` affect all installers in this directory.
diff --git a/lib/aws/_aws-common.sh b/lib/aws/_aws-common.sh
new file mode 100755
index 0000000..f89becc
--- /dev/null
+++ b/lib/aws/_aws-common.sh
@@ -0,0 +1,146 @@
+#!/usr/bin/env bash
+# Shared helpers for AWS scripts. Source this file; do not execute it directly.
+#
+# Scripts that source this file (aws-costs.sh, aws-rightsizing.sh, aws-security.sh)
+# are NOT standalone templates. If copying them to another project, also copy this
+# file and preserve the relative path, or inline the helpers you need.
+
+set -euo pipefail
+
+if [[ -n "${_AWS_COMMON_LOADED:-}" ]]; then
+    if [[ "${BASH_SOURCE[0]}" != "$0" ]]; then
+        return 0
+    fi
+    exit 0
+fi
+_AWS_COMMON_LOADED=1
+
+# shellcheck disable=SC2034
+RED='\033[0;31m'
+# shellcheck disable=SC2034
+GREEN='\033[0;32m'
+# shellcheck disable=SC2034
+YELLOW='\033[1;33m'
+# shellcheck disable=SC2034
+BLUE='\033[0;34m'
+# shellcheck disable=SC2034
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+# shellcheck disable=SC2034
+DIM='\033[2m'
+NC='\033[0m'
+
+AWS_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(
+    git -C "$AWS_COMMON_DIR" rev-parse --show-toplevel 2>/dev/null \
+        || (cd "$AWS_COMMON_DIR/../.." && pwd)
+)"
+ENV_FILE="$PROJECT_ROOT/.env"
+
+trim_leading_whitespace() {
+    local value="$1"
+    printf '%s' "${value#"${value%%[![:space:]]*}"}"
+}
+
+trim_trailing_whitespace() {
+    local value="$1"
+    printf '%s' "${value%"${value##*[![:space:]]}"}"
+}
+
+load_env_file() {
+    local line key value
+
+    [[ -f "$ENV_FILE" ]] || return 0
+
+    while IFS= read -r line || [[ -n "$line" ]]; do
+        [[ "$line" =~ ^[[:space:]]*# ]] && continue
+        [[ "$line" =~ ^[[:space:]]*$ ]] && continue
+
+        if [[ "$line" =~ ^[[:space:]]*(export[[:space:]]+)?([A-Za-z_][A-Za-z_0-9]*)[[:space:]]*=(.*)$ ]]; then
+            key="${BASH_REMATCH[2]}"
+            value="${BASH_REMATCH[3]}"
+            value="$(trim_leading_whitespace "$value")"
+            value="$(trim_trailing_whitespace "$value")"
+
+            if [[ "$value" =~ ^\"(.*)\"$ ]]; then
+                value="${BASH_REMATCH[1]}"
+            elif [[ "$value" =~ ^\'(.*)\'$ ]]; then
+                value="${BASH_REMATCH[1]}"
+            fi
+
+            declare -gx "$key=$value"
+        fi
+    done < "$ENV_FILE"
+}
+
+load_env_file
+
+AWS_PROFILE_NAME="${AWS_PROFILE_NAME:-default}"
+AWS_REGION="${AWS_REGION:-${AWS_DEFAULT_REGION:-us-east-1}}"
+
+if [[ -n "${AWS_ACCESS_KEY_ID:-}" && -n "${AWS_SECRET_ACCESS_KEY:-}" ]]; then
+    export AWS_ACCESS_KEY_ID
+    export AWS_SECRET_ACCESS_KEY
+    export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-$AWS_REGION}"
+
+    if [[ -n "${AWS_SESSION_TOKEN:-}" ]]; then
+        # Preserve session token from .env or pre-existing environment (e.g. assume-role)
+        export AWS_SESSION_TOKEN
+    else
+        unset AWS_SESSION_TOKEN 2>/dev/null || true
+    fi
+
+    unset AWS_PROFILE 2>/dev/null || true
+    AWS_AUTH_MODE="access keys from .env"
+else
+    export AWS_PROFILE="${AWS_PROFILE:-$AWS_PROFILE_NAME}"
+    export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-$AWS_REGION}"
+    AWS_AUTH_MODE="profile (${AWS_PROFILE})"
+fi
+
+require_cmd() {
+    if ! command -v "$1" >/dev/null 2>&1; then
+        echo -e "${RED}Error: $1 is not installed${NC}"
+        if [[ -n "${2:-}" ]]; then
+            echo "$2"
+        fi
+        exit 1
+    fi
+}
+
+require_modern_bash() {
+    if (( BASH_VERSINFO[0] < 4 )); then
+        echo -e "${RED}Error: this script requires Bash 4+${NC}"
+        echo "Install a newer bash and ensure it is first on PATH."
+        exit 1
+    fi
+}
+
+require_unix() {
+    case "$(uname -s)" in
+        MINGW*|MSYS*)
+            echo -e "${RED}This script requires WSL, Linux, or macOS${NC}"
+            echo -e "Run it inside WSL: ${BOLD}wsl bash $0${NC}"
+            exit 1
+            ;;
+    esac
+}
+
+ensure_aws_cli() {
+    require_cmd aws "Install AWS CLI v2: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
+}
+
+show_aws_auth_help() {
+    echo "To fix this, either:"
+    echo "  1. Put AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY in .env"
+    echo "  2. Run: aws sso login --profile ${AWS_PROFILE:-$AWS_PROFILE_NAME}"
+}
+
+require_aws_auth() {
+    if ! aws sts get-caller-identity >/dev/null 2>&1; then
+        echo -e "${RED}Error: AWS credentials are not valid (${AWS_AUTH_MODE})${NC}"
+        echo ""
+        show_aws_auth_help
+        exit 1
+    fi
+}
diff --git a/lib/aws/aws-cli.sh b/lib/aws/aws-cli.sh
index 7fa3901..32719bb 100755
--- a/lib/aws/aws-cli.sh
+++ b/lib/aws/aws-cli.sh
@@ -1,115 +1,128 @@
 #!/usr/bin/env bash
-# =============================================================================
-# AWS CLI Wrapper Script
-# =============================================================================
 #
-# Ensures the correct AWS profile is used for all AWS commands.
-# Sets AWS_PROFILE and passes all arguments to the specified command.
+# aws-cli.sh - AWS CLI and Terraform wrapper with consistent auth loading
 #
-# USAGE:
-#   ./scripts/aws-cli.sh <command> [arguments...]
-#
-# EXAMPLES:
-#   ./scripts/aws-cli.sh sts get-caller-identity
-#   ./scripts/aws-cli.sh s3 ls
-#   ./scripts/aws-cli.sh ecs describe-services --cluster my-cluster --services my-service
-#   ./scripts/aws-cli.sh logs tail /ecs/my-app --follow
-#
-# =============================================================================
 
 set -euo pipefail
 
 # ---- CONFIGURATION ----
-# Customize these variables for your project, or set them as environment variables.
-AWS_PROFILE="${AWS_PROFILE:-default}"
+AWS_PROFILE_NAME="${AWS_PROFILE_NAME:-default}"
 AWS_REGION="${AWS_REGION:-us-east-1}"
-PROJECT_NAME="${PROJECT_NAME:-my-project}"
 # ---- END CONFIGURATION ----
 
-# Load .env file if it exists
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
-ENV_FILE="$PROJECT_ROOT/.env"
-
-if [[ -f "$ENV_FILE" ]]; then
-    while IFS='=' read -r key value; do
-        [[ -z "$key" || "$key" =~ ^[[:space:]]*# ]] && continue
-        key="${key#"${key%%[![:space:]]*}"}"
-        value="${value%\"}" ; value="${value#\"}"
-        value="${value%\'}" ; value="${value#\'}"
-        export "$key=$value"
-    done < "$ENV_FILE"
-fi
+# shellcheck disable=SC1091
+source "$SCRIPT_DIR/_aws-common.sh"
 
-# Colors
-RED='\033[0;31m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
+show_help() {
+    cat << EOF
+Usage: $0 [aws-subcommand...]
+       $0 aws [aws-subcommand...]
+       $0 terraform [terraform-args...]
 
-# Export AWS profile and region
-export AWS_PROFILE
-export AWS_DEFAULT_REGION="$AWS_REGION"
+Examples:
+  $0 sts get-caller-identity
+  $0 s3 ls
+  $0 ecr describe-repositories
+  $0 terraform plan
 
-# Print usage if no arguments
-if [[ $# -eq 0 ]]; then
-    echo -e "${BLUE}AWS CLI Wrapper Script - ${PROJECT_NAME}${NC}"
-    echo ""
-    echo "Usage: $0 <command> [arguments...]"
-    echo ""
-    echo "Examples:"
-    echo "  $0 sts get-caller-identity                        # Check AWS credentials"
-    echo "  $0 s3 ls                                          # List S3 buckets"
-    echo "  $0 ecr describe-repositories                      # List ECR repositories"
-    echo "  $0 ecs describe-services --cluster <cluster> --services <service>"
-    echo "  $0 logs tail /ecs/<log-group> --follow"
-    echo "  $0 secretsmanager get-secret-value --secret-id /<prefix>/prod/api-key"
-    echo ""
-    echo "Environment:"
-    echo "  AWS_PROFILE: $AWS_PROFILE"
-    echo "  AWS_REGION:  $AWS_REGION"
-    echo ""
-    echo "If you see credential errors, try:"
-    echo "  aws sso login --profile $AWS_PROFILE"
+Environment:
+  AWS auth:   $AWS_AUTH_MODE
+  AWS region: ${AWS_DEFAULT_REGION:-$AWS_REGION}
+
+EOF
+    show_aws_auth_help
+}
+
+aws_args_require_auth() {
+    local -a args=("$@")
+
+    if [[ ${#args[@]} -eq 0 ]]; then
+        return 1
+    fi
+
+    case "${args[0]}" in
+        help|-h|--help|version|--version|completion)
+            return 1
+            ;;
+        configure)
+            return 1
+            ;;
+        sso)
+            case "${args[1]:-}" in
+                ""|help|login|logout)
+                    return 1
+                    ;;
+            esac
+            ;;
+    esac
+
+    return 0
+}
+
+terraform_requires_aws_auth() {
+    case "${1:-}" in
+        ""|help|-help|--help|fmt|validate|version|-version|--version)
+            return 1
+            ;;
+    esac
+
+    return 0
+}
+
+ARGS=()
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -h|--help|help)
+            show_help
+            exit 0
+            ;;
+        --)
+            shift
+            while [[ $# -gt 0 ]]; do
+                ARGS+=("$1")
+                shift
+            done
+            ;;
+        *)
+            ARGS+=("$1")
+            shift
+            ;;
+    esac
+done
+
+if [[ ${#ARGS[@]} -eq 0 ]]; then
+    show_help
     exit 0
 fi
 
-# Get the command to run
-COMMAND="$1"
-shift
+COMMAND="${ARGS[0]}"
+REST=("${ARGS[@]:1}")
 
-# Check for required tools
 if [[ "$COMMAND" == "terraform" ]]; then
-    if ! command -v terraform &> /dev/null; then
-        echo -e "${RED}Error: Terraform is not installed${NC}"
-        echo "Install it from: https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli"
-        exit 1
+    require_cmd terraform "Install Terraform: https://developer.hashicorp.com/terraform/install"
+
+    if terraform_requires_aws_auth "${REST[@]}"; then
+        ensure_aws_cli
+        require_aws_auth
     fi
-elif ! command -v aws &> /dev/null; then
-    echo -e "${RED}Error: AWS CLI is not installed${NC}"
-    echo "Install it from: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
-    exit 1
-fi
 
-# Verify credentials
-if ! aws sts get-caller-identity &> /dev/null; then
-    echo -e "${YELLOW}AWS credentials not found or expired for profile '$AWS_PROFILE'${NC}"
+    echo -e "${BLUE}Running: terraform ${REST[*]}${NC}"
+    echo -e "${BLUE}Auth: $AWS_AUTH_MODE | Region: ${AWS_DEFAULT_REGION:-$AWS_REGION}${NC}"
     echo ""
-    echo "To fix this, run:"
-    echo "  aws sso login --profile $AWS_PROFILE"
-    echo ""
-    exit 1
+    exec terraform "${REST[@]}"
 fi
 
-# Run the command
+ensure_aws_cli
+
 if [[ "$COMMAND" == "aws" ]]; then
-    aws "$@"
-elif [[ "$COMMAND" == "terraform" ]]; then
-    echo -e "${BLUE}Running: terraform $*${NC}"
-    echo -e "${BLUE}Profile: $AWS_PROFILE | Region: $AWS_REGION${NC}"
-    echo ""
-    terraform "$@"
+    AWS_ARGS=("${REST[@]}")
 else
-    # Assume it's an AWS CLI subcommand (s3, ecs, ecr, logs, etc.)
-    aws "$COMMAND" "$@"
+    AWS_ARGS=("$COMMAND" "${REST[@]}")
 fi
+
+if aws_args_require_auth "${AWS_ARGS[@]}"; then
+    require_aws_auth
+fi
+
+exec aws "${AWS_ARGS[@]}"
diff --git a/lib/aws/aws-costs.sh b/lib/aws/aws-costs.sh
new file mode 100755
index 0000000..537fe66
--- /dev/null
+++ b/lib/aws/aws-costs.sh
@@ -0,0 +1,548 @@
+#!/usr/bin/env bash
+#
+# aws-costs.sh - Cost Explorer summary plus a lightweight AWS inventory snapshot
+#
+
+set -euo pipefail
+
+# ---- CONFIGURATION ----
+AWS_PROFILE_NAME="${AWS_PROFILE_NAME:-default}"
+AWS_REGION="${AWS_REGION:-us-east-1}"
+# ---- END CONFIGURATION ----
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck disable=SC1091
+source "$SCRIPT_DIR/_aws-common.sh"
+
+show_help() {
+    cat << EOF
+Usage:
+  $0
+  $0 YYYY-MM
+  $0 YYYY-MM YYYY-MM
+  $0 --start YYYY-MM [--end YYYY-MM]
+
+Examples:
+  $0
+  $0 2026-01
+  $0 2026-01 2026-03
+  $0 --start 2026-01 --end 2026-03
+
+Notes:
+  - No arguments shows the previous month plus the current month to date.
+  - Cost Explorer data is grouped by AWS service.
+  - The resource inventory is descriptive; only ECS Fargate gets a direct estimate.
+EOF
+}
+
+is_valid_month() {
+    [[ "$1" =~ ^[0-9]{4}-(0[1-9]|1[0-2])$ ]]
+}
+
+next_month() {
+    local year="${1:0:4}"
+    local month="${1:5:2}"
+
+    if [[ "$month" == "12" ]]; then
+        printf '%04d-01\n' $((10#$year + 1))
+    else
+        printf '%04d-%02d\n' "$year" $((10#$month + 1))
+    fi
+}
+
+last_day_of_month() {
+    local year="$1"
+    local month="$2"
+
+    case "$month" in
+        01|03|05|07|08|10|12) echo "31" ;;
+        04|06|09|11) echo "30" ;;
+        02)
+            if (( (10#$year % 400 == 0) || (10#$year % 4 == 0 && 10#$year % 100 != 0) )); then
+                echo "29"
+            else
+                echo "28"
+            fi
+            ;;
+        *)
+            echo "30"
+            ;;
+    esac
+}
+
+date_add_days() {
+    local date_value="$1"
+    local offset="$2"
+
+    if date -d "$date_value $offset day" +%Y-%m-%d >/dev/null 2>&1; then
+        date -d "$date_value $offset day" +%Y-%m-%d
+    else
+        local flag
+        if (( offset >= 0 )); then
+            flag="+${offset}d"
+        else
+            flag="${offset}d"
+        fi
+        date -j -f "%Y-%m-%d" "$date_value" -v"$flag" +%Y-%m-%d
+    fi
+}
+
+repeat_rule() {
+    printf '%*s' "$1" '' | tr ' ' '─'
+}
+
+run_cost_explorer() {
+    local output
+
+    if ! output=$(aws ce get-cost-and-usage "$@" --output json 2>&1); then
+        echo -e "${RED}Error: Cost Explorer query failed${NC}"
+        echo "$output"
+        exit 1
+    fi
+
+    printf '%s\n' "$output"
+}
+
+START_MONTH=""
+END_MONTH=""
+POSITIONAL=()
+HAS_MONTH_INPUT=false
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        --start)
+            [[ -n "${2:-}" ]] || { echo -e "${RED}Error: --start requires YYYY-MM${NC}"; exit 1; }
+            START_MONTH="$2"
+            HAS_MONTH_INPUT=true
+            shift 2
+            ;;
+        --end)
+            [[ -n "${2:-}" ]] || { echo -e "${RED}Error: --end requires YYYY-MM${NC}"; exit 1; }
+            END_MONTH="$2"
+            HAS_MONTH_INPUT=true
+            shift 2
+            ;;
+        -*)
+            echo -e "${RED}Error: unknown option '$1'${NC}"
+            show_help
+            exit 1
+            ;;
+        *)
+            POSITIONAL+=("$1")
+            shift
+            ;;
+    esac
+done
+
+if [[ ${#POSITIONAL[@]} -gt 2 ]]; then
+    echo -e "${RED}Error: expected at most 2 positional months${NC}"
+    show_help
+    exit 1
+fi
+
+if [[ ${#POSITIONAL[@]} -ge 1 && -z "$START_MONTH" ]]; then
+    START_MONTH="${POSITIONAL[0]}"
+    HAS_MONTH_INPUT=true
+fi
+if [[ ${#POSITIONAL[@]} -eq 2 && -z "$END_MONTH" ]]; then
+    END_MONTH="${POSITIONAL[1]}"
+    HAS_MONTH_INPUT=true
+fi
+
+if [[ -z "$START_MONTH" && -n "$END_MONTH" ]]; then
+    START_MONTH="$END_MONTH"
+    HAS_MONTH_INPUT=true
+fi
+
+if [[ -n "$START_MONTH" ]] && ! is_valid_month "$START_MONTH"; then
+    echo -e "${RED}Error: invalid start month '$START_MONTH'${NC}"
+    exit 1
+fi
+
+if [[ -n "$END_MONTH" ]] && ! is_valid_month "$END_MONTH"; then
+    echo -e "${RED}Error: invalid end month '$END_MONTH'${NC}"
+    exit 1
+fi
+
+require_unix
+require_modern_bash
+ensure_aws_cli
+require_cmd jq "Install jq: https://jqlang.github.io/jq/download/"
+require_cmd bc "Install bc via your package manager."
+require_aws_auth
+
+CURRENT_DATE="$(date +%Y-%m-%d)"
+CURRENT_MONTH="${CURRENT_DATE:0:7}"
+
+if [[ -z "$START_MONTH" ]]; then
+    if [[ "${CURRENT_MONTH:5:2}" == "01" ]]; then
+        START_MONTH="$(printf '%04d-12' $((10#${CURRENT_MONTH:0:4} - 1)))"
+    else
+        START_MONTH="$(printf '%04d-%02d' "${CURRENT_MONTH:0:4}" $((10#${CURRENT_MONTH:5:2} - 1)))"
+    fi
+fi
+
+if [[ -z "$END_MONTH" ]]; then
+    if [[ "$HAS_MONTH_INPUT" == false ]]; then
+        END_MONTH="$CURRENT_MONTH"
+    else
+        END_MONTH="$START_MONTH"
+    fi
+fi
+
+if [[ "$START_MONTH" > "$END_MONTH" ]]; then
+    echo -e "${RED}Error: start month must be before or equal to end month${NC}"
+    exit 1
+fi
+
+DISPLAY_START="${START_MONTH}-01"
+API_START="$DISPLAY_START"
+
+if [[ "$END_MONTH" == "$CURRENT_MONTH" ]]; then
+    DISPLAY_END="$CURRENT_DATE"
+    API_END="$(date_add_days "$CURRENT_DATE" 1)"
+else
+    DISPLAY_END="${END_MONTH}-$(last_day_of_month "${END_MONTH:0:4}" "${END_MONTH:5:2}")"
+    API_END="$(next_month "$END_MONTH")-01"
+fi
+
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${BOLD}${BLUE}  AWS Cost Summary${NC}"
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${DIM}  Period: $DISPLAY_START -> $DISPLAY_END${NC}"
+echo ""
+
+echo -e "${BOLD}${CYAN}  COSTS BY SERVICE${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+costs_json="$(run_cost_explorer \
+    --time-period "Start=$API_START,End=$API_END" \
+    --granularity MONTHLY \
+    --metrics BlendedCost \
+    --group-by Type=DIMENSION,Key=SERVICE)"
+
+mapfile -t months < <(
+    jq -r '.ResultsByTime[].TimePeriod.Start' <<<"$costs_json" |
+    cut -d'-' -f1-2 |
+    awk '!seen[$0]++'
+)
+
+if [[ ${#months[@]} -eq 0 ]]; then
+    echo -e "    ${DIM}No cost data returned for this period${NC}"
+    echo ""
+else
+    table_width=$((42 + 12 * ${#months[@]}))
+    header="  $(printf '%-42s' 'Service')"
+
+    for month in "${months[@]}"; do
+        header+="$(printf '%12s' "$month")"
+    done
+
+    echo -e "${BOLD}$header${NC}"
+    echo -e "${DIM}  $(repeat_rule "$table_width")${NC}"
+
+    declare -A service_costs
+    declare -A service_seen
+    declare -A service_totals
+    declare -a all_services=()
+    declare -a month_totals=()
+
+    for _ in "${months[@]}"; do
+        month_totals+=("0")
+    done
+
+    for month in "${months[@]}"; do
+        while IFS='|' read -r service cost; do
+            service="$(trim_trailing_whitespace "$(trim_leading_whitespace "$service")")"
+            cost="$(trim_trailing_whitespace "$(trim_leading_whitespace "$cost")")"
+
+            if [[ $(echo "$cost > 0.005" | bc -l) == "1" ]]; then
+                service_costs["$service|$month"]="$cost"
+
+                if [[ -z "${service_seen["$service"]:-}" ]]; then
+                    service_seen["$service"]=1
+                    all_services+=("$service")
+                fi
+            fi
+        done < <(
+            jq -r --arg month "$month" '
+                .ResultsByTime[] |
+                select(.TimePeriod.Start | startswith($month)) |
+                .Groups[] |
+                "\(.Keys[0])|\(.Metrics.BlendedCost.Amount)"
+            ' <<<"$costs_json"
+        )
+    done
+
+    for service in "${all_services[@]}"; do
+        total=0
+        for month in "${months[@]}"; do
+            total="$(echo "$total + ${service_costs["$service|$month"]:-0}" | bc -l)"
+        done
+        service_totals["$service"]="$total"
+    done
+
+    mapfile -t sorted_services < <(
+        for service in "${all_services[@]}"; do
+            printf '%s|%s\n' "${service_totals["$service"]}" "$service"
+        done | sort -t'|' -k1 -rn | cut -d'|' -f2
+    )
+
+    for service in "${sorted_services[@]}"; do
+        display_name="$service"
+        if [[ ${#display_name} -gt 40 ]]; then
+            display_name="${display_name:0:37}..."
+        fi
+
+        row="  $(printf '%-42s' "$display_name")"
+        month_idx=0
+
+        for month in "${months[@]}"; do
+            cost="${service_costs["$service|$month"]:-}"
+            if [[ -n "$cost" ]]; then
+                row+="$(printf '%12s' "$(printf '$%0.2f' "$cost")")"
+                month_totals[month_idx]="$(echo "${month_totals[month_idx]} + $cost" | bc -l)"
+            else
+                row+="$(printf '%12s' "—")"
+            fi
+            month_idx=$((month_idx + 1))
+        done
+
+        echo "$row"
+    done
+
+    echo -e "${DIM}  $(repeat_rule "$table_width")${NC}"
+    total_row="  $(printf '%-42s' 'TOTAL')"
+    for month_idx in "${!months[@]}"; do
+        total_row+="$(printf '%12s' "$(printf '$%0.2f' "${month_totals[$month_idx]}")")"
+    done
+    echo -e "${BOLD}$total_row${NC}"
+    echo ""
+
+    has_ec2_other=false
+    for service in "${sorted_services[@]}"; do
+        if [[ "$service" == "EC2 - Other" ]]; then
+            has_ec2_other=true
+            break
+        fi
+    done
+
+    if [[ "$has_ec2_other" == true ]]; then
+        echo -e "${BOLD}${CYAN}  EC2 - OTHER BREAKDOWN${NC}"
+        echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+        ec2_other_json="$(run_cost_explorer \
+            --time-period "Start=$API_START,End=$API_END" \
+            --granularity MONTHLY \
+            --metrics BlendedCost \
+            --filter '{"Dimensions":{"Key":"SERVICE","Values":["EC2 - Other"]}}' \
+            --group-by Type=DIMENSION,Key=USAGE_TYPE)"
+
+        bd_header="  $(printf '%-42s' 'Usage Type')"
+        for month in "${months[@]}"; do
+            bd_header+="$(printf '%12s' "$month")"
+        done
+        echo -e "${DIM}$bd_header${NC}"
+
+        declare -A breakdown_costs
+        declare -A breakdown_seen
+        declare -A breakdown_totals
+        declare -a breakdown_names=()
+
+        for month in "${months[@]}"; do
+            while IFS='|' read -r raw_type cost; do
+                raw_type="$(trim_trailing_whitespace "$(trim_leading_whitespace "$raw_type")")"
+                cost="$(trim_trailing_whitespace "$(trim_leading_whitespace "$cost")")"
+
+                if [[ $(echo "$cost > 0.005" | bc -l) != "1" ]]; then
+                    continue
+                fi
+
+                name="$raw_type"
+                if [[ "$name" =~ ^[A-Z0-9]{3,4}- ]]; then
+                    name="${name#*-}"
+                fi
+
+                case "$name" in
+                    NatGateway-Hours*) name="NAT Gateway (Hours)" ;;
+                    NatGateway-Bytes*) name="NAT Gateway (Data)" ;;
+                    EBS:VolumeUsage.*) name="EBS Volumes (${name#EBS:VolumeUsage.})" ;;
+                    EBS:VolumeUsage) name="EBS Volumes (gp2)" ;;
+                    EBS:Snapshot*) name="EBS Snapshots" ;;
+                    *ElasticIP*) name="Elastic IPs" ;;
+                    *DataTransfer*Out*) name="Data Transfer (Out)" ;;
+                    *DataTransfer*In*) name="Data Transfer (In)" ;;
+                    *DataTransfer*) name="Data Transfer" ;;
+                    *PublicIPv4*) name="Public IPv4 Addresses" ;;
+                esac
+
+                existing="${breakdown_costs["$name|$month"]:-0}"
+                breakdown_costs["$name|$month"]="$(echo "$existing + $cost" | bc -l)"
+
+                if [[ -z "${breakdown_seen["$name"]:-}" ]]; then
+                    breakdown_seen["$name"]=1
+                    breakdown_names+=("$name")
+                fi
+            done < <(
+                jq -r --arg month "$month" '
+                    .ResultsByTime[] |
+                    select(.TimePeriod.Start | startswith($month)) |
+                    .Groups[] |
+                    "\(.Keys[0])|\(.Metrics.BlendedCost.Amount)"
+                ' <<<"$ec2_other_json"
+            )
+        done
+
+        for name in "${breakdown_names[@]}"; do
+            total=0
+            for month in "${months[@]}"; do
+                total="$(echo "$total + ${breakdown_costs["$name|$month"]:-0}" | bc -l)"
+            done
+            breakdown_totals["$name"]="$total"
+        done
+
+        mapfile -t sorted_breakdowns < <(
+            for name in "${breakdown_names[@]}"; do
+                printf '%s|%s\n' "${breakdown_totals["$name"]}" "$name"
+            done | sort -t'|' -k1 -rn | cut -d'|' -f2
+        )
+
+        for name in "${sorted_breakdowns[@]}"; do
+            display_name="$name"
+            if [[ ${#display_name} -gt 40 ]]; then
+                display_name="${display_name:0:37}..."
+            fi
+
+            row="  $(printf '%-42s' "  $display_name")"
+            for month in "${months[@]}"; do
+                cost="${breakdown_costs["$name|$month"]:-}"
+                if [[ -n "$cost" && $(echo "$cost > 0.005" | bc -l) == "1" ]]; then
+                    row+="$(printf '%12s' "$(printf '$%0.2f' "$cost")")"
+                else
+                    row+="$(printf '%12s' "—")"
+                fi
+            done
+            echo -e "${DIM}$row${NC}"
+        done
+
+        echo ""
+    fi
+fi
+
+echo -e "${BOLD}${CYAN}  RESOURCE INVENTORY${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+ecs_service_count=0
+ecs_estimated_monthly=0
+
+echo -e "${BOLD}  ECS Fargate Services${NC}"
+clusters="$(aws ecs list-clusters --query 'clusterArns[*]' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$clusters")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    while IFS= read -r cluster_arn; do
+        cluster_name="${cluster_arn##*/}"
+        services="$(aws ecs list-services --cluster "$cluster_name" --query 'serviceArns[*]' --output json 2>/dev/null || echo '[]')"
+        while IFS= read -r service_arn; do
+            [[ -n "$service_arn" ]] || continue
+            service_name="${service_arn##*/}"
+            service_info="$(aws ecs describe-services --cluster "$cluster_name" --services "$service_name" \
+                --query 'services[0].{running:runningCount,desired:desiredCount,taskDef:taskDefinition}' --output json 2>/dev/null || echo '{}')"
+            task_def="$(jq -r '.taskDef // empty' <<<"$service_info")"
+            [[ -n "$task_def" ]] || continue
+
+            task_spec="$(aws ecs describe-task-definition --task-definition "$task_def" \
+                --query 'taskDefinition.{cpu:cpu,memory:memory}' --output json 2>/dev/null || echo '{}')"
+            cpu="$(jq -r '.cpu // 0' <<<"$task_spec")"
+            mem="$(jq -r '.memory // 0' <<<"$task_spec")"
+            desired="$(jq -r '.desired // 0' <<<"$service_info")"
+            running="$(jq -r '.running // 0' <<<"$service_info")"
+
+            cpu_units="$(echo "scale=2; $cpu / 1024" | bc)"
+            mem_gb="$(echo "scale=2; $mem / 1024" | bc)"
+            monthly_cost="$(printf '%.2f' "$(echo "($cpu_units * 0.04048 + $mem_gb * 0.004445) * 730 * $desired" | bc -l)")"
+
+            echo -e "    ${cluster_name}/${service_name}  ${DIM}${cpu} CPU / ${mem} MB | ${running}/${desired} tasks | ~\$${monthly_cost}/mo${NC}"
+
+            ecs_service_count=$((ecs_service_count + 1))
+            ecs_estimated_monthly="$(echo "$ecs_estimated_monthly + $monthly_cost" | bc -l)"
+        done < <(jq -r '.[]?' <<<"$services")
+    done < <(jq -r '.[]?' <<<"$clusters")
+fi
+
+echo -e "${BOLD}  Application Load Balancers${NC}"
+alb_json="$(aws elbv2 describe-load-balancers --query "LoadBalancers[?Type==\`application\`].{name:LoadBalancerName,state:State.Code}" --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$alb_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[] | "\(.name)|\(.state)"' <<<"$alb_json" | while IFS='|' read -r name state; do
+        echo -e "    ${name}  ${DIM}(${state})${NC}"
+    done
+fi
+
+echo -e "${BOLD}  WAF Web ACLs${NC}"
+waf_json="$(aws wafv2 list-web-acls --scope REGIONAL --query 'WebACLs[*].Name' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$waf_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[]?' <<<"$waf_json" | while IFS= read -r name; do
+        echo -e "    ${name}"
+    done
+fi
+
+echo -e "${BOLD}  RDS Instances${NC}"
+rds_json="$(aws rds describe-db-instances --query 'DBInstances[*].{id:DBInstanceIdentifier,class:DBInstanceClass,engine:Engine,status:DBInstanceStatus}' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$rds_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[] | "\(.id)|\(.class)|\(.engine)|\(.status)"' <<<"$rds_json" | while IFS='|' read -r id class engine status; do
+        echo -e "    ${id}  ${DIM}${class} (${engine}) | ${status}${NC}"
+    done
+fi
+
+echo -e "${BOLD}  NAT Gateways${NC}"
+nat_json="$(aws ec2 describe-nat-gateways --filter "Name=state,Values=available" --query 'NatGateways[*].NatGatewayId' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$nat_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[]?' <<<"$nat_json" | while IFS= read -r nat_id; do
+        echo -e "    ${nat_id}"
+    done
+fi
+
+echo -e "${BOLD}  Secrets Manager${NC}"
+secrets_json="$(aws secretsmanager list-secrets --query 'SecretList[*].Name' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$secrets_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[]?' <<<"$secrets_json" | while IFS= read -r name; do
+        echo -e "    ${name}"
+    done
+fi
+
+echo -e "${BOLD}  S3 Buckets${NC}"
+buckets_json="$(aws s3api list-buckets --query 'Buckets[*].Name' --output json 2>/dev/null || echo '[]')"
+if [[ "$(jq 'length' <<<"$buckets_json")" -eq 0 ]]; then
+    echo -e "    ${DIM}(none)${NC}"
+else
+    jq -r '.[]?' <<<"$buckets_json" | while IFS= read -r name; do
+        echo -e "    ${name}"
+    done
+fi
+
+echo ""
+echo -e "${BOLD}${CYAN}  INVENTORY SUMMARY${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+printf "  %-28s %12s\n" "ECS services" "$ecs_service_count"
+printf "  %-28s %12s\n" "Load balancers" "$(jq 'length' <<<"$alb_json")"
+printf "  %-28s %12s\n" "Web ACLs" "$(jq 'length' <<<"$waf_json")"
+printf "  %-28s %12s\n" "RDS instances" "$(jq 'length' <<<"$rds_json")"
+printf "  %-28s %12s\n" "NAT gateways" "$(jq 'length' <<<"$nat_json")"
+printf "  %-28s %12s\n" "Secrets" "$(jq 'length' <<<"$secrets_json")"
+printf "  %-28s %12s\n" "Buckets" "$(jq 'length' <<<"$buckets_json")"
+printf "  %-28s %12s\n" "ECS estimate" "$(printf '~$%0.2f/mo' "$ecs_estimated_monthly")"
+echo ""
diff --git a/lib/aws/aws-rightsizing.sh b/lib/aws/aws-rightsizing.sh
new file mode 100755
index 0000000..118abcb
--- /dev/null
+++ b/lib/aws/aws-rightsizing.sh
@@ -0,0 +1,701 @@
+#!/usr/bin/env bash
+#
+# aws-rightsizing.sh - Rightsizing review across common AWS resources
+#
+
+set -euo pipefail
+
+# ---- CONFIGURATION ----
+AWS_PROFILE_NAME="${AWS_PROFILE_NAME:-default}"
+AWS_REGION="${AWS_REGION:-us-east-1}"
+CPU_LOW="${CPU_LOW:-20}"
+CPU_HIGH="${CPU_HIGH:-80}"
+MEM_LOW="${MEM_LOW:-20}"
+MEM_HIGH="${MEM_HIGH:-80}"
+STORAGE_HIGH="${STORAGE_HIGH:-80}"
+CONN_LOW="${CONN_LOW:-5}"
+# ---- END CONFIGURATION ----
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck disable=SC1091
+source "$SCRIPT_DIR/_aws-common.sh"
+
+show_help() {
+    cat << EOF
+Usage:
+  $0
+  $0 14
+  $0 --days 30
+
+Examples:
+  $0
+  $0 14
+  $0 --days 30
+
+Environment overrides:
+  CPU_LOW, CPU_HIGH, MEM_LOW, MEM_HIGH, STORAGE_HIGH, CONN_LOW
+EOF
+}
+
+DAYS="7"
+POSITIONAL=()
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        --days)
+            [[ -n "${2:-}" ]] || { echo -e "${RED}Error: --days requires a value${NC}"; exit 1; }
+            DAYS="$2"
+            shift 2
+            ;;
+        -*)
+            echo -e "${RED}Error: unknown option '$1'${NC}"
+            show_help
+            exit 1
+            ;;
+        *)
+            POSITIONAL+=("$1")
+            shift
+            ;;
+    esac
+done
+
+if [[ ${#POSITIONAL[@]} -gt 1 ]]; then
+    echo -e "${RED}Error: expected at most one positional day count${NC}"
+    exit 1
+fi
+
+if [[ ${#POSITIONAL[@]} -eq 1 ]]; then
+    DAYS="${POSITIONAL[0]}"
+fi
+
+if ! [[ "$DAYS" =~ ^[0-9]+$ ]] || (( DAYS < 1 || DAYS > 90 )); then
+    echo -e "${RED}Error: days must be an integer between 1 and 90${NC}"
+    exit 1
+fi
+
+require_unix
+require_modern_bash
+ensure_aws_cli
+require_cmd jq "Install jq: https://jqlang.github.io/jq/download/"
+require_cmd bc "Install bc via your package manager."
+require_aws_auth
+
+START_TIME=$(date -u -d "$DAYS days ago" +%Y-%m-%dT%H:%M:%S 2>/dev/null || date -u -v-"${DAYS}"d +%Y-%m-%dT%H:%M:%S)
+END_TIME=$(date -u +%Y-%m-%dT%H:%M:%S)
+# Period in seconds: use 1-hour intervals for <= 7 days, 6-hour for longer
+if [[ "$DAYS" -le 7 ]]; then
+    PERIOD=3600
+else
+    PERIOD=21600
+fi
+
+# Helper: get a CloudWatch metric statistic
+get_metric() {
+    local namespace="$1" metric="$2" stat="$3" dim_name="$4" dim_value="$5"
+    local dim_name2="${6:-}" dim_value2="${7:-}"
+    local dimension_args=("--dimensions" "Name=$dim_name,Value=$dim_value")
+    if [[ -n "$dim_name2" ]]; then
+        dimension_args+=("Name=$dim_name2,Value=$dim_value2")
+    fi
+
+    aws cloudwatch get-metric-statistics \
+        --namespace "$namespace" \
+        --metric-name "$metric" \
+        --start-time "$START_TIME" \
+        --end-time "$END_TIME" \
+        --period "$PERIOD" \
+        --statistics "$stat" \
+        "${dimension_args[@]}" \
+        --output json 2>/dev/null
+}
+
+extract_stat() {
+    local json="$1" stat="$2"
+    echo "$json" | jq -r "[.Datapoints[].$stat] | if length == 0 then null else (add / length) end // empty" 2>/dev/null
+}
+
+extract_max() {
+    local json="$1" stat="$2"
+    echo "$json" | jq -r "[.Datapoints[].$stat] | if length == 0 then null else max end // empty" 2>/dev/null
+}
+
+extract_min() {
+    local json="$1" stat="$2"
+    echo "$json" | jq -r "[.Datapoints[].$stat] | if length == 0 then null else min end // empty" 2>/dev/null
+}
+
+datapoint_count() {
+    local json="$1"
+    echo "$json" | jq '.Datapoints | length' 2>/dev/null
+}
+
+print_bar() {
+    local value="$1" max="$2" label="$3" unit="${4:-%}"
+    local pct
+    if [[ "$max" == "0" || -z "$max" ]]; then
+        pct=0
+    else
+        pct=$(echo "scale=0; $value * 100 / $max" | bc)
+    fi
+
+    local bar_width=30
+    local filled
+    filled=$(echo "scale=0; $pct * $bar_width / 100" | bc)
+    if [[ "$filled" -gt "$bar_width" ]]; then filled=$bar_width; fi
+
+    local bar=""
+    local color="$GREEN"
+    if [[ "$pct" -gt "$CPU_HIGH" ]]; then color="$RED"
+    elif [[ "$pct" -gt "$CPU_LOW" ]]; then color="$YELLOW"
+    fi
+
+    for ((i=0; i<filled; i++)); do bar+="█"; done
+    for ((i=filled; i<bar_width; i++)); do bar+="░"; done
+
+    printf "      %-22s ${color}%s${NC} %6.1f%s\n" "$label" "$bar" "$value" "$unit"
+}
+
+verdict() {
+    local level="$1" message="$2"
+    case "$level" in
+        ok)    echo -e "      ${GREEN}✓ $message${NC}" ;;
+        warn)  echo -e "      ${YELLOW}⚠ $message${NC}" ;;
+        alert) echo -e "      ${RED}✗ $message${NC}" ;;
+        info)  echo -e "      ${DIM}ℹ $message${NC}" ;;
+    esac
+}
+
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${BOLD}${BLUE}  AWS Rightsizing Advisor${NC}"
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${DIM}  Period: last $DAYS days ($START_TIME → $END_TIME)${NC}"
+echo -e "${DIM}  Thresholds: CPU/Mem low <${CPU_LOW}% | high >${CPU_HIGH}% | Storage >${STORAGE_HIGH}%${NC}"
+echo ""
+
+findings_count=0
+
+# ═════════════════════════════════════════════════════════════
+# RDS INSTANCES
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  RDS INSTANCES${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+rds_instances=$(aws rds describe-db-instances --output json 2>/dev/null || echo '{"DBInstances":[]}')
+rds_count=$(echo "$rds_instances" | jq '.DBInstances | length')
+
+if [[ "$rds_count" -eq 0 ]]; then
+    echo -e "    ${DIM}No RDS instances found${NC}"
+else
+    while IFS='|' read -r id class engine version storage max_storage multi_az storage_type status; do
+        echo ""
+        echo -e "    ${BOLD}$id${NC}  ($class, $engine $version, ${storage}GB $storage_type)"
+        echo -e "    ${DIM}Status: $status | Multi-AZ: $multi_az | Max storage: $max_storage${NC}"
+
+        # CPU utilization
+        cpu_json=$(get_metric "AWS/RDS" "CPUUtilization" "Average" "DBInstanceIdentifier" "$id")
+        cpu_avg=$(extract_stat "$cpu_json" "Average")
+        cpu_max_json=$(get_metric "AWS/RDS" "CPUUtilization" "Maximum" "DBInstanceIdentifier" "$id")
+        cpu_max=$(extract_max "$cpu_max_json" "Maximum")
+
+        if [[ -n "$cpu_avg" ]]; then
+            print_bar "$cpu_avg" 100 "CPU avg"
+            if [[ -n "$cpu_max" ]]; then
+                printf "      %-22s %44.1f%s\n" "CPU peak" "$cpu_max" "%"
+            fi
+            cpu_int=$(echo "$cpu_avg" | cut -d'.' -f1)
+            if [[ "$cpu_int" -lt "$CPU_LOW" ]]; then
+                verdict "warn" "CPU avg ${cpu_avg}% — instance may be over-provisioned"
+                findings_count=$((findings_count + 1))
+            elif [[ "$cpu_int" -gt "$CPU_HIGH" ]]; then
+                verdict "alert" "CPU avg ${cpu_avg}% — consider upgrading instance class"
+                findings_count=$((findings_count + 1))
+            else
+                verdict "ok" "CPU utilization is healthy"
+            fi
+        fi
+
+        # Freeable memory
+        mem_json=$(get_metric "AWS/RDS" "FreeableMemory" "Average" "DBInstanceIdentifier" "$id")
+        mem_avg=$(extract_stat "$mem_json" "Average")
+        if [[ -n "$mem_avg" ]]; then
+            mem_mb=$(echo "scale=0; $mem_avg / 1048576" | bc)
+            # Estimate total memory by instance class
+            case "$class" in
+                db.t4g.micro|db.t3.micro)   total_mem_mb=1024 ;;
+                db.t4g.small|db.t3.small)   total_mem_mb=2048 ;;
+                db.t4g.medium|db.t3.medium) total_mem_mb=4096 ;;
+                db.t4g.large|db.t3.large)   total_mem_mb=8192 ;;
+                db.r6g.large|db.r5.large)   total_mem_mb=16384 ;;
+                *)                          total_mem_mb=0 ;;
+            esac
+            if [[ "$total_mem_mb" -gt 0 ]]; then
+                used_mb=$((total_mem_mb - mem_mb))
+                mem_pct=$(echo "scale=1; $used_mb * 100 / $total_mem_mb" | bc)
+                print_bar "$mem_pct" 100 "Memory used" "%"
+                printf "      %-22s %39s / %s\n" "" "${used_mb}MB used" "${total_mem_mb}MB total"
+                mem_pct_int=$(echo "$mem_pct" | cut -d'.' -f1)
+                if [[ "$mem_pct_int" -lt "$MEM_LOW" ]]; then
+                    verdict "warn" "Memory usage ${mem_pct}% — could downsize instance"
+                    findings_count=$((findings_count + 1))
+                elif [[ "$mem_pct_int" -gt "$MEM_HIGH" ]]; then
+                    verdict "alert" "Memory usage ${mem_pct}% — consider upgrading"
+                    findings_count=$((findings_count + 1))
+                else
+                    verdict "ok" "Memory utilization is healthy"
+                fi
+            else
+                printf "      %-22s %40s MB free\n" "Freeable memory" "$mem_mb"
+            fi
+        fi
+
+        # Storage usage
+        storage_json=$(get_metric "AWS/RDS" "FreeStorageSpace" "Average" "DBInstanceIdentifier" "$id")
+        storage_avg=$(extract_stat "$storage_json" "Average")
+        if [[ -n "$storage_avg" ]]; then
+            free_gb=$(echo "scale=1; $storage_avg / 1073741824" | bc)
+            used_gb=$(echo "scale=1; $storage - $free_gb" | bc)
+            storage_pct=$(echo "scale=1; $used_gb * 100 / $storage" | bc)
+            print_bar "$storage_pct" 100 "Storage used" "%"
+            printf "      %-22s %38s / %sGB\n" "" "${used_gb}GB used" "$storage"
+            storage_int=$(echo "$storage_pct" | cut -d'.' -f1)
+            if [[ "$storage_int" -gt "$STORAGE_HIGH" ]]; then
+                verdict "alert" "Storage ${storage_pct}% full — consider expanding"
+                findings_count=$((findings_count + 1))
+            elif [[ "$storage_int" -lt 10 ]]; then
+                verdict "warn" "Storage only ${storage_pct}% used — allocated storage may be excessive"
+                findings_count=$((findings_count + 1))
+            else
+                verdict "ok" "Storage utilization is healthy"
+            fi
+        fi
+
+        # Database connections
+        conn_json=$(get_metric "AWS/RDS" "DatabaseConnections" "Average" "DBInstanceIdentifier" "$id")
+        conn_avg=$(extract_stat "$conn_json" "Average")
+        conn_max_json=$(get_metric "AWS/RDS" "DatabaseConnections" "Maximum" "DBInstanceIdentifier" "$id")
+        conn_max=$(extract_max "$conn_max_json" "Maximum")
+        if [[ -n "$conn_avg" ]]; then
+            printf "      %-22s %40.1f avg, %.0f peak\n" "DB connections" "$conn_avg" "${conn_max:-0}"
+            conn_int=$(echo "$conn_avg" | cut -d'.' -f1)
+            if [[ "$conn_int" -lt "$CONN_LOW" && "$conn_int" -gt 0 ]]; then
+                verdict "info" "Low connection count — typical for small workloads"
+            elif [[ "$conn_int" -eq 0 ]]; then
+                verdict "warn" "Zero connections — is this database in use?"
+                findings_count=$((findings_count + 1))
+            fi
+        fi
+
+        # Read/Write IOPS
+        read_json=$(get_metric "AWS/RDS" "ReadIOPS" "Average" "DBInstanceIdentifier" "$id")
+        write_json=$(get_metric "AWS/RDS" "WriteIOPS" "Average" "DBInstanceIdentifier" "$id")
+        read_avg=$(extract_stat "$read_json" "Average")
+        write_avg=$(extract_stat "$write_json" "Average")
+        if [[ -n "$read_avg" && -n "$write_avg" ]]; then
+            printf "      %-22s %40.1f read, %.1f write\n" "IOPS (avg)" "$read_avg" "$write_avg"
+        fi
+
+        # Read/Write Latency
+        rlat_json=$(get_metric "AWS/RDS" "ReadLatency" "Average" "DBInstanceIdentifier" "$id")
+        wlat_json=$(get_metric "AWS/RDS" "WriteLatency" "Average" "DBInstanceIdentifier" "$id")
+        rlat_avg=$(extract_stat "$rlat_json" "Average")
+        wlat_avg=$(extract_stat "$wlat_json" "Average")
+        if [[ -n "$rlat_avg" && -n "$wlat_avg" ]]; then
+            rlat_ms=$(echo "scale=2; $rlat_avg * 1000" | bc)
+            wlat_ms=$(echo "scale=2; $wlat_avg * 1000" | bc)
+            printf "      %-22s %38s ms read, %s ms write\n" "Latency (avg)" "$rlat_ms" "$wlat_ms"
+            rlat_check=$(echo "$rlat_ms > 20" | bc)
+            wlat_check=$(echo "$wlat_ms > 20" | bc)
+            if [[ "$rlat_check" -eq 1 || "$wlat_check" -eq 1 ]]; then
+                verdict "alert" "High latency detected — check IOPS limits or storage type"
+                findings_count=$((findings_count + 1))
+            fi
+        fi
+
+        # Rightsizing suggestion
+        if [[ -n "$cpu_avg" ]]; then
+            cpu_int=$(echo "$cpu_avg" | cut -d'.' -f1)
+            if [[ "$cpu_int" -lt "$CPU_LOW" && "$class" != "db.t4g.micro" && "$class" != "db.t3.micro" ]]; then
+                case "$class" in
+                    db.t4g.small|db.t3.small)     suggest="db.t4g.micro (~\$12/mo)" ;;
+                    db.t4g.medium|db.t3.medium)   suggest="db.t4g.small (~\$24/mo)" ;;
+                    db.t4g.large|db.t3.large)     suggest="db.t4g.medium (~\$48/mo)" ;;
+                    db.r6g.large|db.r5.large)     suggest="db.r6g.medium or db.t4g.large" ;;
+                    *)                            suggest="one size smaller" ;;
+                esac
+                verdict "warn" "SUGGESTION: Downsize to $suggest"
+            elif [[ "$cpu_int" -lt "$CPU_LOW" && ("$class" == "db.t4g.micro" || "$class" == "db.t3.micro") ]]; then
+                verdict "ok" "Already smallest instance — no further downsizing available"
+            fi
+        fi
+    done < <(echo "$rds_instances" | jq -r '.DBInstances[] | "\(.DBInstanceIdentifier)|\(.DBInstanceClass)|\(.Engine)|\(.EngineVersion)|\(.AllocatedStorage)|\(.MaxAllocatedStorage // "n/a")|\(.MultiAZ)|\(.StorageType)|\(.DBInstanceStatus)"')
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# ECS FARGATE TASKS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  ECS FARGATE SERVICES${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+clusters=$(aws ecs list-clusters --query 'clusterArns[*]' --output json 2>/dev/null || echo '[]')
+while IFS= read -r cluster_arn; do
+    cluster_name=$(echo "$cluster_arn" | rev | cut -d'/' -f1 | rev)
+    services=$(aws ecs list-services --cluster "$cluster_name" --query 'serviceArns[*]' --output json 2>/dev/null || echo '[]')
+
+    while IFS= read -r service_arn; do
+        service_name=$(echo "$service_arn" | rev | cut -d'/' -f1 | rev)
+        service_info=$(aws ecs describe-services --cluster "$cluster_name" --services "$service_name" \
+            --query 'services[0].{desired:desiredCount,running:runningCount,taskDef:taskDefinition}' --output json 2>/dev/null || echo '{}')
+        task_def=$(echo "$service_info" | jq -r '.taskDef')
+        desired=$(echo "$service_info" | jq -r '.desired')
+        running=$(echo "$service_info" | jq -r '.running')
+        task_spec=$(aws ecs describe-task-definition --task-definition "$task_def" \
+            --query 'taskDefinition.{cpu:cpu,memory:memory,containers:containerDefinitions[*].name}' --output json 2>/dev/null || echo '{}')
+        cpu=$(echo "$task_spec" | jq -r '.cpu')
+        mem=$(echo "$task_spec" | jq -r '.memory')
+        containers=$(echo "$task_spec" | jq -r '.containers | join(", ")')
+
+        cpu_vcpu=$(echo "scale=2; $cpu / 1024" | bc)
+        mem_gb=$(echo "scale=2; $mem / 1024" | bc)
+        # Fargate us-east-1 Linux/x86 pricing (2026-01)
+        monthly_cost=$(printf '%.2f' "$(echo "($cpu_vcpu * 0.04048 + $mem_gb * 0.004445) * 730 * $desired" | bc -l)")
+
+        echo ""
+        echo -e "    ${BOLD}${cluster_name}/${service_name}${NC}"
+        echo -e "    ${DIM}Containers: $containers | Tasks: $running/$desired running | ${cpu} CPU (${cpu_vcpu} vCPU) / ${mem}MB (${mem_gb}GB) | ~\$${monthly_cost}/mo${NC}"
+
+        # CPU utilization
+        ecs_cpu_json=$(get_metric "AWS/ECS" "CPUUtilization" "Average" "ClusterName" "$cluster_name" "ServiceName" "$service_name")
+        ecs_cpu_avg=$(extract_stat "$ecs_cpu_json" "Average")
+        ecs_cpu_max_json=$(get_metric "AWS/ECS" "CPUUtilization" "Maximum" "ClusterName" "$cluster_name" "ServiceName" "$service_name")
+        ecs_cpu_max=$(extract_max "$ecs_cpu_max_json" "Maximum")
+        points=$(datapoint_count "$ecs_cpu_json")
+
+        if [[ -n "$ecs_cpu_avg" && "$points" -gt 0 ]]; then
+            print_bar "$ecs_cpu_avg" 100 "CPU avg"
+            if [[ -n "$ecs_cpu_max" ]]; then
+                printf "      %-22s %44.1f%s\n" "CPU peak" "$ecs_cpu_max" "%"
+            fi
+            ecs_cpu_int=$(echo "$ecs_cpu_avg" | cut -d'.' -f1)
+            if [[ "$ecs_cpu_int" -lt "$CPU_LOW" ]]; then
+                verdict "warn" "CPU avg ${ecs_cpu_avg}% — task may be over-provisioned"
+                findings_count=$((findings_count + 1))
+            elif [[ "$ecs_cpu_int" -gt "$CPU_HIGH" ]]; then
+                verdict "alert" "CPU avg ${ecs_cpu_avg}% — consider adding more CPU or tasks"
+                findings_count=$((findings_count + 1))
+            else
+                verdict "ok" "CPU utilization is healthy"
+            fi
+        else
+            verdict "info" "No CPU metrics available (service may be newly deployed)"
+        fi
+
+        # Memory utilization
+        ecs_mem_json=$(get_metric "AWS/ECS" "MemoryUtilization" "Average" "ClusterName" "$cluster_name" "ServiceName" "$service_name")
+        ecs_mem_avg=$(extract_stat "$ecs_mem_json" "Average")
+        ecs_mem_max_json=$(get_metric "AWS/ECS" "MemoryUtilization" "Maximum" "ClusterName" "$cluster_name" "ServiceName" "$service_name")
+        ecs_mem_max=$(extract_max "$ecs_mem_max_json" "Maximum")
+
+        if [[ -n "$ecs_mem_avg" ]]; then
+            used_mem_mb=$(echo "scale=0; $ecs_mem_avg * $mem / 100" | bc)
+            print_bar "$ecs_mem_avg" 100 "Memory avg"
+            printf "      %-22s %37s MB / %s MB\n" "" "${used_mem_mb}" "$mem"
+            if [[ -n "$ecs_mem_max" ]]; then
+                printf "      %-22s %44.1f%s\n" "Memory peak" "$ecs_mem_max" "%"
+            fi
+            ecs_mem_int=$(echo "$ecs_mem_avg" | cut -d'.' -f1)
+            if [[ "$ecs_mem_int" -lt "$MEM_LOW" ]]; then
+                verdict "warn" "Memory avg ${ecs_mem_avg}% — could reduce memory allocation"
+                findings_count=$((findings_count + 1))
+            elif [[ "$ecs_mem_int" -gt "$MEM_HIGH" ]]; then
+                verdict "alert" "Memory avg ${ecs_mem_avg}% — consider increasing memory"
+                findings_count=$((findings_count + 1))
+            else
+                verdict "ok" "Memory utilization is healthy"
+            fi
+        fi
+
+        # Fargate rightsizing suggestion
+        if [[ -n "$ecs_cpu_avg" && -n "$ecs_mem_avg" ]]; then
+            ecs_cpu_int=$(echo "$ecs_cpu_avg" | cut -d'.' -f1)
+            ecs_mem_int=$(echo "$ecs_mem_avg" | cut -d'.' -f1)
+            ecs_cpu_peak_int=$(echo "${ecs_cpu_max:-0}" | cut -d'.' -f1)
+            ecs_mem_peak_int=$(echo "${ecs_mem_max:-0}" | cut -d'.' -f1)
+
+            if [[ "$ecs_cpu_int" -lt "$CPU_LOW" && "$ecs_mem_int" -lt "$MEM_LOW" && "$cpu" != "256" ]]; then
+                # Suggest downsizing — find valid Fargate combo
+                case "${cpu}" in
+                    4096)  suggest="2048 CPU / $(echo "scale=0; $mem / 2" | bc) MB" ;;
+                    2048)  suggest="1024 CPU / $(echo "scale=0; $mem / 2" | bc) MB" ;;
+                    1024)  suggest="512 CPU / $(echo "scale=0; $mem / 2" | bc) MB" ;;
+                    512)   suggest="256 CPU / 512 MB" ;;
+                    *)     suggest="one size smaller" ;;
+                esac
+                new_cpu_vcpu=$(echo "scale=2; $cpu / 2 / 1024" | bc)
+                new_mem_gb=$(echo "scale=2; $mem / 2 / 1024" | bc)
+                new_cost=$(printf '%.2f' "$(echo "($new_cpu_vcpu * 0.04048 + $new_mem_gb * 0.004445) * 730 * $desired" | bc -l)")
+                savings=$(printf '%.2f' "$(echo "$monthly_cost - $new_cost" | bc -l)")
+                verdict "warn" "SUGGESTION: Downsize to $suggest (~\$${new_cost}/mo, save \$${savings}/mo)"
+            elif [[ "$ecs_cpu_int" -lt "$CPU_LOW" && "$ecs_mem_int" -lt "$MEM_LOW" && "$cpu" == "256" ]]; then
+                verdict "ok" "Already smallest Fargate size — no further downsizing"
+            elif [[ "$ecs_cpu_peak_int" -gt 90 || "$ecs_mem_peak_int" -gt 90 ]]; then
+                verdict "alert" "SUGGESTION: Peaks near limit — consider upsizing or adding tasks"
+            fi
+        fi
+    done < <(jq -r '.[]' <<<"$services")
+done < <(jq -r '.[]' <<<"$clusters")
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# APPLICATION LOAD BALANCERS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  APPLICATION LOAD BALANCERS${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+albs=$(aws elbv2 describe-load-balancers --query "LoadBalancers[?Type==\`application\`]" --output json 2>/dev/null || echo '[]')
+alb_count=$(echo "$albs" | jq 'length')
+
+for i in $(seq 0 $((alb_count - 1))); do
+    alb_name=$(echo "$albs" | jq -r ".[$i].LoadBalancerName")
+    alb_arn=$(echo "$albs" | jq -r ".[$i].LoadBalancerArn")
+    alb_arn_suffix="${alb_arn#*loadbalancer/}"
+
+    echo ""
+    echo -e "    ${BOLD}$alb_name${NC}"
+
+    # Request count
+    req_json=$(get_metric "AWS/ApplicationELB" "RequestCount" "Sum" "LoadBalancer" "$alb_arn_suffix")
+    req_total=0
+    req_points=$(datapoint_count "$req_json")
+    if [[ "$req_points" -gt 0 ]]; then
+        req_total=$(echo "$req_json" | jq '[.Datapoints[].Sum] | add // 0' 2>/dev/null)
+        req_per_day=$(echo "scale=0; $req_total / $DAYS" | bc)
+        req_per_min=$(echo "scale=1; $req_per_day / 1440" | bc)
+        printf "      %-22s %30s total (%s/day, %s/min)\n" "Requests (${DAYS}d)" "$req_total" "$req_per_day" "$req_per_min"
+    fi
+
+    # 5xx errors
+    err_json=$(get_metric "AWS/ApplicationELB" "HTTPCode_Target_5XX_Count" "Sum" "LoadBalancer" "$alb_arn_suffix")
+    err_points=$(datapoint_count "$err_json")
+    if [[ "$err_points" -gt 0 ]]; then
+        err_total=$(echo "$err_json" | jq '[.Datapoints[].Sum] | add // 0' 2>/dev/null)
+        if [[ $(echo "$err_total > 0" | bc) -eq 1 && $(echo "$req_total > 0" | bc) -eq 1 ]]; then
+            err_pct=$(echo "scale=3; $err_total * 100 / $req_total" | bc)
+            printf "      %-22s %30s (%.3f%%)\n" "5xx errors (${DAYS}d)" "$err_total" "$err_pct"
+            err_check=$(echo "$err_pct > 1" | bc)
+            if [[ "$err_check" -eq 1 ]]; then
+                verdict "alert" "Error rate ${err_pct}% — investigate application health"
+                findings_count=$((findings_count + 1))
+            fi
+        else
+            printf "      %-22s %44s\n" "5xx errors (${DAYS}d)" "0"
+        fi
+    else
+        printf "      %-22s %44s\n" "5xx errors (${DAYS}d)" "0"
+    fi
+
+    # Response time
+    resp_json=$(get_metric "AWS/ApplicationELB" "TargetResponseTime" "Average" "LoadBalancer" "$alb_arn_suffix")
+    resp_avg=$(extract_stat "$resp_json" "Average")
+    resp_max_json=$(get_metric "AWS/ApplicationELB" "TargetResponseTime" "Maximum" "LoadBalancer" "$alb_arn_suffix")
+    resp_max=$(extract_max "$resp_max_json" "Maximum")
+    if [[ -n "$resp_avg" ]]; then
+        resp_ms=$(printf "%.1f" "$(echo "$resp_avg * 1000" | bc)")
+        resp_max_ms=$(printf "%.1f" "$(echo "${resp_max:-0} * 1000" | bc)")
+        printf "      %-22s %38s ms avg, %s ms peak\n" "Response time" "$resp_ms" "$resp_max_ms"
+        resp_check=$(echo "$resp_ms > 1000" | bc)
+        if [[ "$resp_check" -eq 1 ]]; then
+            verdict "alert" "Avg response time >1s — investigate performance"
+            findings_count=$((findings_count + 1))
+        fi
+    fi
+
+    # Active connections
+    conn_json=$(get_metric "AWS/ApplicationELB" "ActiveConnectionCount" "Average" "LoadBalancer" "$alb_arn_suffix")
+    conn_avg=$(extract_stat "$conn_json" "Average")
+    if [[ -n "$conn_avg" ]]; then
+        printf "      %-22s %40.1f avg\n" "Active connections" "$conn_avg"
+    fi
+
+    # Cost assessment
+    req_total_int=$(echo "$req_total" | cut -d'.' -f1)
+    if [[ "$req_total_int" -lt 1000 ]]; then
+        verdict "warn" "Very low traffic ($req_total requests in ${DAYS}d) — ALB costs ~\$16/mo regardless"
+        findings_count=$((findings_count + 1))
+    elif [[ "$req_total_int" -lt 10000 ]]; then
+        verdict "info" "Low traffic — ALB base cost dominates"
+    else
+        verdict "ok" "Traffic levels justify ALB"
+    fi
+done
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# NAT GATEWAY
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  NAT GATEWAYS${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+nat_gws=$(aws ec2 describe-nat-gateways --filter "Name=state,Values=available" --query 'NatGateways[*].{id:NatGatewayId,subnet:SubnetId,state:State}' --output json 2>/dev/null || echo '[]')
+nat_count=$(echo "$nat_gws" | jq 'length')
+
+if [[ "$nat_count" -eq 0 ]]; then
+    echo -e "    ${DIM}No NAT gateways found${NC}"
+else
+    for i in $(seq 0 $((nat_count - 1))); do
+        nat_id=$(echo "$nat_gws" | jq -r ".[$i].id")
+        echo ""
+        echo -e "    ${BOLD}$nat_id${NC}  ${DIM}(~\$32/mo base + \$0.045/GB)${NC}"
+
+        # Bytes processed
+        bytes_out_json=$(get_metric "AWS/NATGateway" "BytesOutToDestination" "Sum" "NatGatewayId" "$nat_id")
+        bytes_in_json=$(get_metric "AWS/NATGateway" "BytesInFromDestination" "Sum" "NatGatewayId" "$nat_id")
+        bytes_out=$(echo "$bytes_out_json" | jq '[.Datapoints[].Sum] | add // 0' 2>/dev/null)
+        bytes_in=$(echo "$bytes_in_json" | jq '[.Datapoints[].Sum] | add // 0' 2>/dev/null)
+
+        bytes_out_gb=$(echo "scale=2; $bytes_out / 1073741824" | bc)
+        bytes_in_gb=$(echo "scale=2; $bytes_in / 1073741824" | bc)
+        data_cost=$(echo "scale=2; ($bytes_out_gb + $bytes_in_gb) * 0.045" | bc)
+
+        printf "      %-22s %38s GB out, %s GB in\n" "Data processed (${DAYS}d)" "$bytes_out_gb" "$bytes_in_gb"
+        printf "      %-22s %43s\n" "Data cost (${DAYS}d)" "\$${data_cost}"
+
+        # Active connections
+        nat_conn_json=$(get_metric "AWS/NATGateway" "ActiveConnectionCount" "Average" "NatGatewayId" "$nat_id")
+        nat_conn=$(extract_stat "$nat_conn_json" "Average")
+        if [[ -n "$nat_conn" ]]; then
+            printf "      %-22s %40.0f avg\n" "Active connections" "$nat_conn"
+        fi
+
+        total_gb=$(echo "scale=2; $bytes_out_gb + $bytes_in_gb" | bc)
+        if [[ $(echo "$total_gb < 1" | bc) -eq 1 ]]; then
+            verdict "warn" "Very low data transfer — NAT GW costs \$32/mo minimum"
+            verdict "info" "Consider VPC endpoints for S3/DynamoDB to reduce NAT traffic"
+            findings_count=$((findings_count + 1))
+        fi
+    done
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# EC2 INSTANCES (non-Fargate)
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  EC2 INSTANCES${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+ec2_raw=$(aws ec2 describe-instances \
+    --filters "Name=instance-state-name,Values=running,stopped" \
+    --query "Reservations[*].Instances[*].{id:InstanceId,type:InstanceType,state:State.Name,name:Tags[?Key==\`Name\`].Value|[0],launch:LaunchTime}" \
+    --output json 2>/dev/null || echo '[]')
+ec2_instances=$(echo "$ec2_raw" | jq 'flatten')
+ec2_count=$(echo "$ec2_instances" | jq 'length')
+
+if [[ "$ec2_count" -eq 0 ]]; then
+    echo -e "    ${DIM}No EC2 instances found (good — using Fargate)${NC}"
+else
+    for i in $(seq 0 $((ec2_count - 1))); do
+        ec2_id=$(echo "$ec2_instances" | jq -r ".[$i].id")
+        ec2_type=$(echo "$ec2_instances" | jq -r ".[$i].type")
+        ec2_state=$(echo "$ec2_instances" | jq -r ".[$i].state")
+        ec2_name=$(echo "$ec2_instances" | jq -r ".[$i].name // \"(unnamed)\"")
+        ec2_launch=$(echo "$ec2_instances" | jq -r ".[$i].launch")
+
+        echo ""
+        echo -e "    ${BOLD}$ec2_name${NC} ($ec2_id, $ec2_type, $ec2_state)"
+        echo -e "    ${DIM}Launched: $ec2_launch${NC}"
+
+        if [[ "$ec2_state" == "stopped" ]]; then
+            verdict "warn" "Instance is stopped — still paying for EBS volumes"
+            verdict "info" "Consider terminating if no longer needed"
+            findings_count=$((findings_count + 1))
+            continue
+        fi
+
+        ec2_cpu_json=$(get_metric "AWS/EC2" "CPUUtilization" "Average" "InstanceId" "$ec2_id")
+        ec2_cpu_avg=$(extract_stat "$ec2_cpu_json" "Average")
+        if [[ -n "$ec2_cpu_avg" ]]; then
+            print_bar "$ec2_cpu_avg" 100 "CPU avg"
+            ec2_cpu_int=$(echo "$ec2_cpu_avg" | cut -d'.' -f1)
+            if [[ "$ec2_cpu_int" -lt "$CPU_LOW" ]]; then
+                verdict "warn" "CPU avg ${ec2_cpu_avg}% — instance is under-utilized"
+                findings_count=$((findings_count + 1))
+            fi
+        fi
+
+        # Network
+        net_in_json=$(get_metric "AWS/EC2" "NetworkIn" "Average" "InstanceId" "$ec2_id")
+        net_out_json=$(get_metric "AWS/EC2" "NetworkOut" "Average" "InstanceId" "$ec2_id")
+        net_in=$(extract_stat "$net_in_json" "Average")
+        net_out=$(extract_stat "$net_out_json" "Average")
+        if [[ -n "$net_in" && -n "$net_out" ]]; then
+            net_in_kb=$(echo "scale=1; $net_in / 1024" | bc)
+            net_out_kb=$(echo "scale=1; $net_out / 1024" | bc)
+            printf "      %-22s %35s KB/s in, %s KB/s out\n" "Network (avg)" "$net_in_kb" "$net_out_kb"
+        fi
+    done
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# CLOUDWATCH LOG GROUPS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  CLOUDWATCH LOG GROUPS${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+log_groups=$(aws logs describe-log-groups --query "logGroups[?storedBytes > \`0\`].{name:logGroupName,bytes:storedBytes,retention:retentionInDays}" --output json 2>/dev/null || echo '[]')
+log_count=$(echo "$log_groups" | jq 'length')
+
+if [[ "$log_count" -gt 0 ]]; then
+    echo ""
+    printf "    ${BOLD}%-50s %10s %10s${NC}\n" "Log Group" "Size" "Retention"
+    echo -e "    ${DIM}$(printf '%*s' 70 '' | tr ' ' '─')${NC}"
+
+    while IFS='|' read -r name bytes retention; do
+        size_mb=$(echo "scale=1; $bytes / 1048576" | bc)
+        if [[ $(echo "$size_mb > 1024" | bc) -eq 1 ]]; then
+            size_display="$(echo "scale=1; $size_mb / 1024" | bc) GB"
+        else
+            size_display="${size_mb} MB"
+        fi
+        printf "    %-50s %10s %8s d\n" "$name" "$size_display" "$retention"
+    done < <(echo "$log_groups" | jq -r 'sort_by(-.bytes) | .[] | "\(.name)|\(.bytes)|\(.retention // "never")"')
+
+    total_bytes=$(echo "$log_groups" | jq '[.[].bytes] | add')
+    total_mb=$(echo "scale=1; $total_bytes / 1048576" | bc)
+    total_cost=$(echo "scale=2; $total_mb * 0.03 / 1024" | bc)
+    echo ""
+    printf "    %-50s %10s\n" "Total stored" "${total_mb} MB"
+    printf "    %-50s %10s\n" "Storage cost" "~\$${total_cost}/mo"
+
+    # Check for groups without retention
+    no_retention=$(aws logs describe-log-groups --query 'logGroups[?!retentionInDays].logGroupName' --output json 2>/dev/null | jq 'length')
+    if [[ "$no_retention" -gt 0 ]]; then
+        verdict "warn" "$no_retention log groups have no retention policy (logs kept forever)"
+        findings_count=$((findings_count + 1))
+    fi
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# SUMMARY
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+if [[ "$findings_count" -eq 0 ]]; then
+    echo -e "${BOLD}${GREEN}  No issues found — infrastructure looks well-sized!${NC}"
+elif [[ "$findings_count" -lt 4 ]]; then
+    echo -e "${BOLD}${YELLOW}  $findings_count findings — minor optimisation opportunities${NC}"
+else
+    echo -e "${BOLD}${RED}  $findings_count findings — review recommendations above${NC}"
+fi
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo ""
diff --git a/lib/aws/aws-security.sh b/lib/aws/aws-security.sh
new file mode 100755
index 0000000..90b2ac5
--- /dev/null
+++ b/lib/aws/aws-security.sh
@@ -0,0 +1,530 @@
+#!/usr/bin/env bash
+#
+# aws-security.sh - Security posture scan for common AWS services
+#
+
+set -euo pipefail
+
+# ---- CONFIGURATION ----
+AWS_PROFILE_NAME="${AWS_PROFILE_NAME:-default}"
+AWS_REGION="${AWS_REGION:-us-east-1}"
+# ---- END CONFIGURATION ----
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck disable=SC1091
+source "$SCRIPT_DIR/_aws-common.sh"
+
+show_help() {
+    cat << EOF
+Usage:
+  $0
+
+Runs a read-only AWS security review covering WAF, security groups, IAM,
+S3 public access blocks, RDS public exposure, EBS encryption, secrets
+rotation, and CloudTrail logging.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        -*)
+            echo -e "${RED}Error: unknown option '$1'${NC}"
+            show_help
+            exit 1
+            ;;
+        *)
+            echo -e "${RED}Error: unexpected argument '$1'${NC}"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+require_unix
+ensure_aws_cli
+require_cmd jq "Install jq: https://jqlang.github.io/jq/download/"
+require_cmd bc "Install bc via your package manager."
+require_aws_auth
+
+FINDINGS_FILE=$(mktemp)
+trap 'rm -f "$FINDINGS_FILE"' EXIT
+
+add_finding() {
+    local level="$1" resource="$2" rtype="$3" message="$4"
+    echo "${level}|${resource}|${rtype}|${message}" >> "$FINDINGS_FILE"
+}
+
+verdict() {
+    local level="$1" message="$2"
+    case "$level" in
+        ok)    echo -e "      ${GREEN}✓ $message${NC}" ;;
+        warn)  echo -e "      ${YELLOW}⚠ $message${NC}" ;;
+        alert) echo -e "      ${RED}✗ $message${NC}" ;;
+        info)  echo -e "      ${DIM}ℹ $message${NC}" ;;
+    esac
+}
+
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${BOLD}${BLUE}  AWS Security Scan${NC}"
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+echo -e "${DIM}  Region: ${AWS_DEFAULT_REGION:-$AWS_REGION}${NC}"
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# WAF ANALYSIS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  WAF WEB ACLs${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+waf_list=$(aws wafv2 list-web-acls --scope REGIONAL --output json 2>/dev/null || echo '{"WebACLs":[]}')
+waf_count=$(echo "$waf_list" | jq '.WebACLs | length')
+
+if [[ "$waf_count" -eq 0 ]]; then
+    echo ""
+    verdict "alert" "No WAF configured — web applications have no WAF protection"
+    add_finding "alert" "WAF" "WAF" "No WAF configured"
+else
+    while IFS='|' read -r waf_name waf_arn waf_id; do
+        echo ""
+        echo -e "    ${BOLD}$waf_name${NC}"
+
+        resource_arns=$(aws wafv2 list-resources-for-web-acl --web-acl-arn "$waf_arn" --output json 2>/dev/null)
+        resource_count=$(echo "$resource_arns" | jq '.ResourceArns | length')
+        if [[ "$resource_count" -eq 0 ]]; then
+            verdict "alert" "Not associated with any resource — orphaned WAF (~\$8/mo wasted)"
+            add_finding "alert" "$waf_name" "WAF" "Orphaned WAF (~\$8/mo wasted)"
+        else
+            verdict "ok" "Protecting $resource_count resource(s)"
+            echo "$resource_arns" | jq -r '.ResourceArns[]' | while read -r res_arn; do
+                echo -e "      ${DIM}  → $(echo "$res_arn" | rev | cut -d'/' -f1 | rev)${NC}"
+            done
+        fi
+
+        acl_detail=$(aws wafv2 get-web-acl --name "$waf_name" --scope REGIONAL --id "$waf_id" --output json 2>/dev/null)
+        rule_count=$(echo "$acl_detail" | jq '.WebACL.Rules | length')
+
+        if [[ "$rule_count" -eq 0 ]]; then
+            verdict "alert" "No rules configured — all traffic allowed through"
+            add_finding "alert" "$waf_name" "WAF" "No rules configured"
+        else
+            managed_groups=$(echo "$acl_detail" | jq -r '
+                .WebACL.Rules[]
+                | select(.Statement.ManagedRuleGroupStatement != null)
+                | "\(.Statement.ManagedRuleGroupStatement.VendorName)/\(.Statement.ManagedRuleGroupStatement.Name)"
+            ' 2>/dev/null)
+            if [[ -n "$managed_groups" ]]; then
+                managed_count="$(printf '%s\n' "$managed_groups" | grep -c .)"
+            else
+                managed_count=0
+            fi
+            custom_count=$((rule_count - managed_count))
+            rate_rules=$(echo "$acl_detail" | jq '[.WebACL.Rules[] | select(.Statement.RateBasedStatement != null)] | length')
+
+            echo -e "      ${DIM}Rules: $rule_count total ($managed_count managed, $custom_count custom${rate_rules:+, $rate_rules rate-based})${NC}"
+
+            if [[ -n "$managed_groups" ]]; then
+                echo -e "      ${DIM}Managed rule groups:${NC}"
+                echo "$managed_groups" | while read -r group; do
+                    echo -e "        ${GREEN}✓${NC} $group"
+                done
+            else
+                verdict "info" "No AWS managed rule groups — consider AWSManagedRulesCommonRuleSet"
+                add_finding "info" "$waf_name" "WAF" "No managed rule groups"
+            fi
+
+            if [[ "$rate_rules" -eq 0 ]]; then
+                verdict "info" "No rate-based rules — consider adding rate limiting"
+                add_finding "info" "$waf_name" "WAF" "No rate-based rules"
+            fi
+        fi
+
+        # Traffic stats (last 24h)
+        now_ts=$(date -u +%Y-%m-%dT%H:%M:%S 2>/dev/null)
+        day_ago_ts=$(date -u -d "24 hours ago" +%Y-%m-%dT%H:%M:%S 2>/dev/null || date -u -v-24H +%Y-%m-%dT%H:%M:%S 2>/dev/null)
+
+        if [[ -n "$now_ts" && -n "$day_ago_ts" ]]; then
+            blocked=$(aws cloudwatch get-metric-statistics \
+                --namespace AWS/WAFV2 --metric-name BlockedRequests \
+                --start-time "$day_ago_ts" --end-time "$now_ts" \
+                --period 86400 --statistics Sum \
+                --dimensions "Name=WebACL,Value=$waf_name" "Name=Rule,Value=ALL" \
+                --output json 2>/dev/null | jq '[.Datapoints[].Sum] | add // 0')
+            allowed=$(aws cloudwatch get-metric-statistics \
+                --namespace AWS/WAFV2 --metric-name AllowedRequests \
+                --start-time "$day_ago_ts" --end-time "$now_ts" \
+                --period 86400 --statistics Sum \
+                --dimensions "Name=WebACL,Value=$waf_name" "Name=Rule,Value=ALL" \
+                --output json 2>/dev/null | jq '[.Datapoints[].Sum] | add // 0')
+
+            blocked_int=$(printf '%.0f' "$blocked")
+            allowed_int=$(printf '%.0f' "$allowed")
+            total=$((blocked_int + allowed_int))
+
+            printf "      %-22s %30s blocked / %s allowed (24h)\n" "Traffic" "$blocked_int" "$allowed_int"
+            if [[ "$total" -gt 0 ]]; then
+                block_pct=$(echo "scale=1; $blocked_int * 100 / $total" | bc)
+                printf "      %-22s %44s%%\n" "Block rate" "$block_pct"
+            fi
+        fi
+
+        # Cost breakdown: $5/ACL + $1/rule + $0.60/million requests
+        cost_acl="5.00"
+        cost_rules=$(echo "scale=2; $rule_count * 1" | bc)
+        if [[ -n "${total:-}" && "$total" -gt 0 ]]; then
+            cost_requests=$(echo "scale=2; $total * 30 / 1000000 * 0.60" | bc)
+        else
+            cost_requests="0.00"
+        fi
+        cost_total=$(echo "scale=2; $cost_acl + $cost_rules + $cost_requests" | bc)
+
+        echo ""
+        echo -e "      ${BOLD}Cost Breakdown${NC}"
+        printf "      %-22s %44s\n" "Web ACL" "\$$cost_acl/mo"
+        printf "      %-22s %44s\n" "Rules ($rule_count)" "\$$cost_rules/mo"
+        printf "      %-22s %44s\n" "Request processing" "~\$$cost_requests/mo"
+        echo -e "      ${DIM}$(printf '─%.0s' {1..52})${NC}"
+        printf "      ${BOLD}%-22s %44s${NC}\n" "Estimated total" "~\$$cost_total/mo"
+        add_finding "info" "$waf_name" "WAF" "Estimated cost: ~\$$cost_total/mo"
+    done < <(echo "$waf_list" | jq -r '.WebACLs[] | "\(.Name)|\(.ARN)|\(.Id)"')
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# SECURITY GROUPS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  SECURITY GROUPS (open ingress)${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+
+# Fetch all security groups and filter for 0.0.0.0/0 or ::/0 ingress in jq
+# (AWS-side filter only supports IPv4 CIDR, so we filter both families client-side)
+raw_sgs=$(aws ec2 describe-security-groups \
+    --query 'SecurityGroups[*].{id:GroupId,name:GroupName,desc:Description,perms:IpPermissions}' \
+    --output json 2>/dev/null || echo '[]')
+open_sgs=$(echo "$raw_sgs" | jq '[.[] | select(.perms[]? | (.IpRanges[]?.CidrIp == "0.0.0.0/0") or (.Ipv6Ranges[]?.CidrIpv6 == "::/0"))]')
+open_sg_count=$(echo "$open_sgs" | jq 'length')
+
+if [[ "$open_sg_count" -eq 0 ]]; then
+    echo ""
+    verdict "ok" "No security groups with 0.0.0.0/0 ingress"
+else
+    echo ""
+    for i in $(seq 0 $((open_sg_count - 1))); do
+        sg_id=$(echo "$open_sgs" | jq -r ".[$i].id")
+        sg_name=$(echo "$open_sgs" | jq -r ".[$i].name")
+        sg_desc=$(echo "$open_sgs" | jq -r ".[$i].desc")
+
+        echo -e "    ${BOLD}$sg_name${NC} ($sg_id)"
+        echo -e "    ${DIM}$sg_desc${NC}"
+
+        echo "$open_sgs" | jq -r --argjson idx "$i" '
+            .[$idx].perms[] |
+            select(
+                (.IpRanges[]?.CidrIp == "0.0.0.0/0") or
+                (.Ipv6Ranges[]?.CidrIpv6 == "::/0")
+            ) |
+            "\(.FromPort // -1)|\(.ToPort // -1)"
+        ' 2>/dev/null | sort -u | while IFS='|' read -r from_port to_port; do
+            if [[ "$from_port" == "-1" ]]; then
+                port_label="ALL TRAFFIC"; level="alert"
+            elif [[ "$from_port" == "$to_port" ]]; then
+                port_label="$from_port"
+                case "$from_port" in
+                    22)   port_label="22 (SSH)"; level="alert" ;;
+                    3389) port_label="3389 (RDP)"; level="alert" ;;
+                    3306) port_label="3306 (MySQL)"; level="alert" ;;
+                    5432) port_label="5432 (PostgreSQL)"; level="alert" ;;
+                    80)   port_label="80 (HTTP)"; level="warn" ;;
+                    443)  port_label="443 (HTTPS)"; level="warn" ;;
+                    *)    level="warn" ;;
+                esac
+            else
+                port_label="$from_port-$to_port"; level="warn"
+            fi
+            verdict "$level" "Port $port_label open to the internet"
+            add_finding "$level" "$sg_name ($sg_id)" "SG" "Port $port_label open to the internet"
+        done
+        echo ""
+    done
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# IAM USERS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  IAM USERS${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+iam_users=$(aws iam list-users --query 'Users[*].UserName' --output json 2>/dev/null || echo '[]')
+user_count=$(echo "$iam_users" | jq 'length')
+no_mfa_count=0
+
+if [[ "$user_count" -eq 0 ]]; then
+    echo -e "    ${DIM}No IAM users found${NC}"
+else
+    printf "    ${BOLD}%-25s %-8s %-15s %-10s${NC}\n" "User" "MFA" "Key Age" "Status"
+    echo -e "    ${DIM}$(printf '─%.0s' {1..60})${NC}"
+
+    for user in $(echo "$iam_users" | jq -r '.[]'); do
+        mfa_devices=$(aws iam list-mfa-devices --user-name "$user" --query 'MFADevices | length(@)' --output text 2>/dev/null || echo "0")
+        if [[ "$mfa_devices" -gt 0 ]]; then
+            mfa_status="${GREEN}Yes${NC}"
+        else
+            mfa_status="${RED}No${NC}"
+            no_mfa_count=$((no_mfa_count + 1))
+        fi
+
+        key_info=$(aws iam list-access-keys --user-name "$user" --output json 2>/dev/null)
+        active_keys=$(echo "$key_info" | jq '[.AccessKeyMetadata[] | select(.Status == "Active")] | length')
+
+        if [[ "$active_keys" -eq 0 ]]; then
+            printf "    %-25s %b     %-15s %-10s\n" "$user" "$mfa_status" "—" "—"
+        else
+            first=true
+            echo "$key_info" | jq -r '.AccessKeyMetadata[] | select(.Status == "Active") | "\(.AccessKeyId)|\(.CreateDate)"' | while IFS='|' read -r _ created; do
+                # GNU date (Linux) then BSD date (macOS) fallback
+                created_epoch=$(date -d "$created" +%s 2>/dev/null \
+                    || date -j -f "%Y-%m-%dT%H:%M:%S%z" "${created/Z/+0000}" +%s 2>/dev/null \
+                    || echo "0")
+                now_epoch=$(date +%s)
+                if [[ "$created_epoch" -gt 0 ]]; then
+                    age_days=$(( (now_epoch - created_epoch) / 86400 ))
+                else
+                    age_days=0
+                fi
+
+                age_color="" age_status="OK"
+                if [[ "$age_days" -gt 365 ]]; then
+                    age_color="$RED"; age_status="CRITICAL"
+                elif [[ "$age_days" -gt 90 ]]; then
+                    age_color="$YELLOW"; age_status="Rotate"
+                fi
+
+                if $first; then
+                    printf "    %-25s %b     ${age_color}%-15s %-10s${NC}\n" "$user" "$mfa_status" "${age_days}d" "$age_status"
+                    first=false
+                else
+                    printf "    %-25s %-8s ${age_color}%-15s %-10s${NC}\n" "" "" "${age_days}d" "$age_status"
+                fi
+            done
+        fi
+    done
+
+    echo ""
+    if [[ "$no_mfa_count" -gt 0 ]]; then
+        verdict "alert" "$no_mfa_count user(s) without MFA enabled"
+        add_finding "alert" "IAM" "IAM" "$no_mfa_count user(s) without MFA enabled"
+    else
+        verdict "ok" "All users have MFA enabled"
+    fi
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# S3 PUBLIC ACCESS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  S3 PUBLIC ACCESS BLOCKS${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+s3_buckets=$(aws s3api list-buckets --query 'Buckets[*].Name' --output json 2>/dev/null || echo '[]')
+not_blocked_count=0
+
+for bucket in $(echo "$s3_buckets" | jq -r '.[]' 2>/dev/null); do
+    pab=$(aws s3api get-public-access-block --bucket "$bucket" 2>/dev/null || echo '{}')
+    block_acls=$(echo "$pab" | jq -r '.PublicAccessBlockConfiguration.BlockPublicAcls // false')
+    block_policy=$(echo "$pab" | jq -r '.PublicAccessBlockConfiguration.BlockPublicPolicy // false')
+    ignore_acls=$(echo "$pab" | jq -r '.PublicAccessBlockConfiguration.IgnorePublicAcls // false')
+    restrict=$(echo "$pab" | jq -r '.PublicAccessBlockConfiguration.RestrictPublicBuckets // false')
+
+    if [[ "$block_acls" == "true" && "$block_policy" == "true" && "$ignore_acls" == "true" && "$restrict" == "true" ]]; then
+        echo -e "    ${GREEN}✓${NC}  $bucket"
+    else
+        echo -e "    ${YELLOW}⚠${NC}  $bucket — ${YELLOW}partially open${NC}"
+        not_blocked_count=$((not_blocked_count + 1))
+    fi
+done
+
+echo ""
+if [[ "$not_blocked_count" -gt 0 ]]; then
+    verdict "warn" "$not_blocked_count bucket(s) without full public access blocks"
+    add_finding "warn" "S3" "S3" "$not_blocked_count bucket(s) without full public access blocks"
+else
+    verdict "ok" "All buckets have public access fully blocked"
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# RDS PUBLIC ACCESS
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  RDS PUBLIC ACCESSIBILITY${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+public_rds=$(aws rds describe-db-instances \
+    --query "DBInstances[?PubliclyAccessible==\`true\`].DBInstanceIdentifier" \
+    --output json 2>/dev/null || echo '[]')
+public_rds_count=$(echo "$public_rds" | jq 'length')
+
+if [[ "$public_rds_count" -eq 0 ]]; then
+    verdict "ok" "No publicly accessible RDS instances"
+else
+    for db_id in $(echo "$public_rds" | jq -r '.[]'); do
+        verdict "alert" "$db_id is publicly accessible"
+    done
+    add_finding "alert" "RDS" "RDS" "$public_rds_count publicly accessible database(s)"
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# EBS ENCRYPTION
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  EBS ENCRYPTION${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+unencrypted_ebs=$(aws ec2 describe-volumes \
+    --filters "Name=encrypted,Values=false" \
+    --query 'Volumes[*].{id:VolumeId,size:Size,type:VolumeType}' \
+    --output json 2>/dev/null || echo '[]')
+unencrypted_count=$(echo "$unencrypted_ebs" | jq 'length')
+
+if [[ "$unencrypted_count" -eq 0 ]]; then
+    verdict "ok" "All EBS volumes are encrypted"
+else
+    for i in $(seq 0 $((unencrypted_count - 1))); do
+        vol_id=$(echo "$unencrypted_ebs" | jq -r ".[$i].id")
+        vol_size=$(echo "$unencrypted_ebs" | jq -r ".[$i].size")
+        vol_type=$(echo "$unencrypted_ebs" | jq -r ".[$i].type")
+        verdict "warn" "$vol_id — $vol_type ${vol_size}GB — not encrypted"
+    done
+    add_finding "warn" "EBS" "EBS" "$unencrypted_count unencrypted EBS volume(s)"
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# SECRETS MANAGER ROTATION
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  SECRETS MANAGER ROTATION${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+secrets=$(aws secretsmanager list-secrets --output json 2>/dev/null || echo '{"SecretList":[]}')
+secret_count=$(echo "$secrets" | jq '.SecretList | length')
+no_rotation_count=0
+if [[ "$secret_count" -eq 0 ]]; then
+    echo -e "    ${DIM}No secrets found${NC}"
+else
+    while IFS='|' read -r name rotation_enabled _last_rotated; do
+        if [[ "$rotation_enabled" != "true" ]]; then
+            echo -e "    ${YELLOW}⚠${NC}  $name — ${YELLOW}No auto-rotation${NC}"
+            no_rotation_count=$((no_rotation_count + 1))
+        else
+            echo -e "    ${GREEN}✓${NC}  $name — Auto-rotation"
+        fi
+    done < <(echo "$secrets" | jq -r '.SecretList[] | "\(.Name)|\(.RotationEnabled // false)|\(.LastRotatedDate // "null")"')
+
+    echo ""
+    if [[ "$no_rotation_count" -gt 0 ]]; then
+        verdict "info" "$no_rotation_count secret(s) without automatic rotation"
+        add_finding "info" "Secrets Manager" "Secrets" "$no_rotation_count secret(s) without automatic rotation"
+    fi
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# CLOUDTRAIL
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${CYAN}  CLOUDTRAIL${NC}"
+echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+echo ""
+
+trails=$(aws cloudtrail describe-trails --output json 2>/dev/null || echo '{"trailList":[]}')
+trail_count=$(echo "$trails" | jq '.trailList | length')
+
+if [[ "$trail_count" -eq 0 ]]; then
+    verdict "alert" "No CloudTrail trails configured — API activity is not being logged"
+    add_finding "alert" "CloudTrail" "CloudTrail" "No CloudTrail trails"
+else
+    for i in $(seq 0 $((trail_count - 1))); do
+        trail_name=$(echo "$trails" | jq -r ".trailList[$i].Name")
+        is_multi=$(echo "$trails" | jq -r ".trailList[$i].IsMultiRegionTrail")
+        has_log_group=$(echo "$trails" | jq -r ".trailList[$i].CloudWatchLogsLogGroupArn // \"none\"")
+        s3_bucket=$(echo "$trails" | jq -r ".trailList[$i].S3BucketName")
+
+        status=$(aws cloudtrail get-trail-status --name "$trail_name" --output json 2>/dev/null)
+        is_logging=$(echo "$status" | jq -r '.IsLogging')
+
+        echo -e "    ${BOLD}$trail_name${NC}"
+        if [[ "$is_logging" == "true" ]]; then
+            verdict "ok" "Logging is active"
+        else
+            verdict "alert" "Logging is DISABLED"
+            add_finding "alert" "$trail_name" "CloudTrail" "Logging is disabled"
+        fi
+
+        if [[ "$is_multi" == "true" ]]; then
+            verdict "ok" "Multi-region trail"
+        else
+            verdict "info" "Single-region trail — consider multi-region"
+            add_finding "info" "$trail_name" "CloudTrail" "Single-region trail"
+        fi
+
+        if [[ "$has_log_group" == "none" ]]; then
+            verdict "warn" "No CloudWatch Logs integration"
+            add_finding "warn" "$trail_name" "CloudTrail" "No CloudWatch Logs integration"
+        fi
+
+        echo -e "      ${DIM}S3: $s3_bucket${NC}"
+    done
+fi
+
+echo ""
+
+# ═════════════════════════════════════════════════════════════
+# SUMMARY
+# ═════════════════════════════════════════════════════════════
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+
+alert_count=$(grep -c '^alert|' "$FINDINGS_FILE" 2>/dev/null || true)
+warn_count=$(grep -c '^warn|' "$FINDINGS_FILE" 2>/dev/null || true)
+info_count=$(grep -c '^info|' "$FINDINGS_FILE" 2>/dev/null || true)
+total_findings=$((alert_count + warn_count + info_count))
+
+if [[ "$total_findings" -eq 0 ]]; then
+    echo -e "${BOLD}${GREEN}  No security issues found!${NC}"
+elif [[ "$alert_count" -gt 0 ]]; then
+    echo -e "${BOLD}${RED}  $total_findings findings ($alert_count critical, $warn_count warnings, $info_count info)${NC}"
+else
+    echo -e "${BOLD}${YELLOW}  $total_findings findings ($warn_count warnings, $info_count info)${NC}"
+fi
+echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════════${NC}"
+
+if [[ -s "$FINDINGS_FILE" ]]; then
+    echo ""
+    echo -e "${BOLD}${CYAN}  FINDINGS SUMMARY${NC}"
+    echo -e "${DIM}  ─────────────────────────────────────────────────────────────${NC}"
+    for level in alert warn info; do
+        grep "^${level}|" "$FINDINGS_FILE" 2>/dev/null | while IFS='|' read -r lvl resource rtype message; do
+            case "$lvl" in
+                alert) icon="${RED}✗${NC}" ;;
+                warn)  icon="${YELLOW}⚠${NC}" ;;
+                info)  icon="${DIM}ℹ${NC}" ;;
+            esac
+            echo -e "    $icon  ${BOLD}$resource${NC} ($rtype): $message"
+        done
+    done
+fi
+
+echo ""
diff --git a/preflight-checks.sh b/preflight-checks.sh
index 9eccc53..735170c 100755
--- a/preflight-checks.sh
+++ b/preflight-checks.sh
@@ -38,6 +38,8 @@ _goat_now() {
     if [[ "$_GOAT_TIME_NS" == true ]]; then date +%s%N; else date +%s; fi
 }
 START_TIME=$(_goat_now)
+BATS_AUTO_INSTALL_ATTEMPTED=false
+BATS_AUTO_INSTALL_FAILED=false
 
 # ── Helpers ──────────────────────────────────────────────────────
 step() {
@@ -165,6 +167,35 @@ EXAMPLES:
 EOF
 }
 
+attempt_bats_auto_install() {
+    local installer_script="$REPO_ROOT/lib/tools/install-bats-core.sh"
+
+    if command -v bats &>/dev/null; then
+        return 0
+    fi
+
+    BATS_AUTO_INSTALL_ATTEMPTED=true
+    echo -e "  ${ARROW} Preparing bats-core${RESET} ${DIM}(not found; attempting auto-install)${RESET}"
+    echo ""
+
+    if [[ ! -x "$installer_script" ]]; then
+        BATS_AUTO_INSTALL_FAILED=true
+        warn "Installer not found or not executable: $installer_script"
+        echo ""
+        return 1
+    fi
+
+    if "$installer_script"; then
+        echo ""
+        return 0
+    fi
+
+    BATS_AUTO_INSTALL_FAILED=true
+    warn "Auto-install failed; falling back to skip"
+    echo ""
+    return 1
+}
+
 # ── Repo root & script discovery ─────────────────────────────────
 REPO_ROOT="$(git -C "$(dirname "$0")" rev-parse --show-toplevel)"
 FIX_MODE=false
@@ -407,15 +438,26 @@ else
     fi
 fi
 
+# Auto-install bats-core when available so the test check can run.
+attempt_bats_auto_install || true
+
 # 7. Bats tests
 step "Bats tests (tests/)"
 t=$(_goat_now)
 if ! command -v bats &>/dev/null; then
-    skip "bats not installed"
+    if [[ "$BATS_AUTO_INSTALL_FAILED" == true ]]; then
+        skip "bats auto-install failed"
+    else
+        skip "bats not installed"
+    fi
     echo -e "    ${DIM}Install: sudo apt install bats-core | brew install bats-core | ./lib/tools/install-bats-core.sh${RESET}"
 else
     if bats tests/ --recursive </dev/null; then
-        pass "$(elapsed_since "$t")"
+        if [[ "$BATS_AUTO_INSTALL_ATTEMPTED" == true ]]; then
+            pass "$(elapsed_since "$t") | auto-installed"
+        else
+            pass "$(elapsed_since "$t")"
+        fi
     else
         fail "Bats tests failed"
     fi
diff --git a/scripts/context-validate.sh b/scripts/context-validate.sh
new file mode 100755
index 0000000..43a00eb
--- /dev/null
+++ b/scripts/context-validate.sh
@@ -0,0 +1,252 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+BLUE='\033[0;34m'
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+NC='\033[0m'
+
+log() {
+    echo -e "${BLUE}[context]${NC} $*"
+}
+
+success() {
+    echo -e "${GREEN}[ok]${NC} $*"
+}
+
+warn() {
+    echo -e "${YELLOW}[warn]${NC} $*"
+}
+
+fail() {
+    echo -e "${RED}[fail]${NC} $*" >&2
+    FAILURES=$((FAILURES + 1))
+}
+
+show_help() {
+    cat <<'EOF'
+Usage: ./scripts/context-validate.sh
+
+Validate the Codex workflow assets:
+  - AGENTS.md router targets
+  - playbook required sections
+  - lessons/footguns/ownership docs
+  - tasks runtime files
+  - Codex eval presence
+
+Exit codes:
+  0  All checks passed
+  1  One or more checks failed
+EOF
+}
+
+check_required_heading() {
+    local file="$1"
+    local pattern="$2"
+
+    if ! grep -qF "$pattern" "$file"; then
+        fail "Missing heading '$pattern' in ${file#"$REPO_ROOT"/}"
+    fi
+}
+
+is_optional_runtime_path() {
+    local path="$1"
+
+    case "$path" in
+        tasks/todo.md|tasks/handoff.md)
+            return 0
+            ;;
+        *)
+            return 1
+            ;;
+    esac
+}
+
+check_router_targets() {
+    local agents_file="$REPO_ROOT/AGENTS.md"
+    local in_router=false
+    local line raw_path path
+
+    while IFS= read -r line; do
+        if [[ "$line" == "## Router" ]]; then
+            in_router=true
+            continue
+        fi
+
+        if [[ "$in_router" != true ]]; then
+            continue
+        fi
+
+        if [[ -z "$line" ]]; then
+            break
+        fi
+
+        if [[ "$line" != \|* ]]; then
+            continue
+        fi
+
+        raw_path="$(printf '%s\n' "$line" | awk -F'|' '{print $3}' | xargs)"
+        if [[ -z "$raw_path" || "$raw_path" == "Path" || "$raw_path" == "---" ]]; then
+            continue
+        fi
+
+        path="${raw_path//\`/}"
+        if [[ "$path" == *"*"* ]]; then
+            if ! compgen -G "$REPO_ROOT/$path" > /dev/null; then
+                fail "Router target missing: $path"
+            fi
+        elif is_optional_runtime_path "$path" && [[ ! -e "$REPO_ROOT/$path" ]]; then
+            warn "Optional local runtime file absent in this checkout: $path"
+        elif [[ ! -e "$REPO_ROOT/$path" ]]; then
+            fail "Router target missing: $path"
+        fi
+    done < "$agents_file"
+}
+
+check_playbooks() {
+    local dir="$REPO_ROOT/docs/codex-playbooks"
+    local count
+
+    count=$(find "$dir" -maxdepth 1 -type f -name '*.md' | wc -l)
+    if [[ "$count" -ne 5 ]]; then
+        fail "Expected 5 playbooks in docs/codex-playbooks, found $count"
+    fi
+
+    check_required_heading "$dir/preflight.md" "## MUST"
+    check_required_heading "$dir/preflight.md" "## SHOULD"
+    check_required_heading "$dir/preflight.md" "## MAY"
+    check_required_heading "$dir/preflight.md" "## Output"
+
+    check_required_heading "$dir/research.md" "## Hard Gate"
+    check_required_heading "$dir/research.md" "### Files Involved"
+    check_required_heading "$dir/research.md" "### Request Flow"
+    check_required_heading "$dir/research.md" "### Boundaries Touched"
+    check_required_heading "$dir/research.md" "### Risks / Gotchas"
+
+    check_required_heading "$dir/debug-investigate.md" "## Hard Gate"
+    check_required_heading "$dir/debug-investigate.md" "## Workflow"
+    check_required_heading "$dir/debug-investigate.md" "## Diagnosis Template"
+
+    check_required_heading "$dir/audit.md" "### Discovery"
+    check_required_heading "$dir/audit.md" "### Verification"
+    check_required_heading "$dir/audit.md" "### Prioritisation"
+    check_required_heading "$dir/audit.md" "### Self-Check"
+
+    check_required_heading "$dir/code-review.md" "## Findings Order"
+    check_required_heading "$dir/code-review.md" "## Review Checklist"
+    check_required_heading "$dir/code-review.md" "## Output"
+}
+
+check_docs() {
+    local architecture_lines
+    local lessons_file="$REPO_ROOT/docs/lessons.md"
+    local split_file="$REPO_ROOT/docs/guidelines-ownership-split.md"
+
+    architecture_lines=$(wc -l < "$REPO_ROOT/docs/architecture.md")
+    if (( architecture_lines > 100 )); then
+        fail "docs/architecture.md exceeds 100 lines ($architecture_lines)"
+    fi
+
+    check_required_heading "$lessons_file" "## Patterns"
+    check_required_heading "$lessons_file" "## Entries"
+    check_required_heading "$split_file" "## Before / After Overlap Report"
+}
+
+check_footguns() {
+    local footguns_file="$REPO_ROOT/docs/footguns.md"
+    local entry_count ref_count
+    local ref clean_ref path line total_lines
+    local footgun_ref_regex
+
+    if grep -qi "none confirmed yet" "$footguns_file"; then
+        success "docs/footguns.md explicitly states none confirmed yet"
+        return
+    fi
+
+    entry_count=$(grep -c '^## Footgun:' "$footguns_file")
+    if (( entry_count == 0 )); then
+        fail "docs/footguns.md has no '## Footgun:' entries"
+        return
+    fi
+
+    footgun_ref_regex="\`[^\`]+:[0-9]+\`"
+    ref_count=$(grep -oE "$footgun_ref_regex" "$footguns_file" | wc -l)
+    if (( ref_count == 0 )); then
+        fail "docs/footguns.md has no backticked file:line evidence"
+        return
+    fi
+
+    while IFS= read -r ref; do
+        clean_ref="${ref#\`}"
+        clean_ref="${clean_ref%\`}"
+        path="${clean_ref%:*}"
+        line="${clean_ref##*:}"
+
+        if [[ ! -f "$REPO_ROOT/$path" ]]; then
+            fail "Footgun evidence path missing: $path"
+            continue
+        fi
+
+        total_lines=$(wc -l < "$REPO_ROOT/$path")
+        if (( line < 1 || line > total_lines )); then
+            fail "Footgun evidence out of range: $clean_ref"
+        fi
+    done < <(grep -oE "$footgun_ref_regex" "$footguns_file")
+}
+
+check_tasks() {
+    local todo_file="$REPO_ROOT/tasks/todo.md"
+    local handoff_file="$REPO_ROOT/tasks/handoff.md"
+
+    if [[ ! -f "$todo_file" ]]; then
+        warn "Optional local runtime file absent in this checkout: tasks/todo.md"
+    fi
+
+    if [[ ! -f "$handoff_file" ]]; then
+        warn "Optional local runtime file absent in this checkout: tasks/handoff.md"
+    fi
+}
+
+check_evals() {
+    local eval_dir="$REPO_ROOT/codex-evals"
+    local eval_count
+
+    [[ -f "$eval_dir/README.md" ]] || fail "Missing codex-evals/README.md"
+    eval_count=$(find "$eval_dir" -maxdepth 1 -type f -name '*.md' ! -name 'README.md' | wc -l)
+    if (( eval_count < 5 )); then
+        fail "Expected at least 5 Codex evals, found $eval_count"
+    fi
+}
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+    show_help
+    exit 0
+fi
+
+if [[ $# -gt 0 ]]; then
+    echo "Unknown argument: $1" >&2
+    show_help
+    exit 1
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(git -C "$SCRIPT_DIR/.." rev-parse --show-toplevel)"
+FAILURES=0
+
+log "Validating Codex workflow assets"
+check_router_targets
+check_playbooks
+check_docs
+check_footguns
+check_tasks
+check_evals
+
+if (( FAILURES > 0 )); then
+    echo ""
+    echo -e "${RED}[fail]${NC} Context validation failed with $FAILURES issue(s)" >&2
+    exit 1
+fi
+
+success "Context validation passed"
diff --git a/scripts/deny-dangerous.sh b/scripts/deny-dangerous.sh
new file mode 100755
index 0000000..6296404
--- /dev/null
+++ b/scripts/deny-dangerous.sh
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+BLUE='\033[0;34m'
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+NC='\033[0m'
+
+log() {
+    echo -e "${BLUE}[policy]${NC} $*"
+}
+
+success() {
+    echo -e "${GREEN}[ok]${NC} $*"
+}
+
+warn() {
+    echo -e "${YELLOW}[warn]${NC} $*"
+}
+
+error() {
+    echo -e "${RED}[error]${NC} $*" >&2
+}
+
+show_help() {
+    cat <<'EOF'
+Usage:
+  ./scripts/deny-dangerous.sh
+  ./scripts/deny-dangerous.sh --policy
+  ./scripts/deny-dangerous.sh --check "command to inspect"
+  ./scripts/deny-dangerous.sh --self-test
+
+This script documents the deny policy for human review, CI, and preflight.
+It does not intercept commands automatically.
+EOF
+}
+
+show_policy() {
+    cat <<'EOF'
+Blocked commands and edits:
+  - any git commit
+  - any git push
+  - git push --force or -f
+  - git commit --no-verify or -n
+  - rm -rf
+  - curl|wget ... | sh|bash
+  - chmod 777
+  - direct writes to .env files
+  - direct writes to docs/code-map.md
+
+Project-specific tooling rule:
+  - regenerate docs/code-map.md via ./lib/codegen/generate-code-map.sh
+    instead of hand-editing it
+EOF
+}
+
+normalise_command() {
+    printf '%s' "$1" | tr '\n' ' ' | tr -s ' '
+}
+
+is_write_command_for_path() {
+    local cmd="$1"
+    local path_fragment="$2"
+
+    if [[ "$cmd" != *"$path_fragment"* ]]; then
+        return 1
+    fi
+
+    if [[ "$cmd" == *">"*"$path_fragment"* || "$cmd" == *">>"*"$path_fragment"* ]]; then
+        return 0
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])(sed|perl|python|python3|ruby|node|tee|cp|mv|touch|vi|vim|nvim|nano|ed|ex)([[:space:]]|$) ]]; then
+        return 0
+    fi
+
+    return 1
+}
+
+is_allowed_codegen_regeneration() {
+    local cmd="$1"
+
+    [[ "$cmd" == *"generate-code-map.sh"* && "$cmd" == *"docs/code-map.md"* ]]
+}
+
+check_command() {
+    local raw_cmd="$1"
+    local cmd
+
+    cmd="$(normalise_command "$raw_cmd")"
+    BLOCK_REASON=""
+
+    if [[ "$cmd" =~ (^|[[:space:]])git[[:space:]]+push([[:space:]].*)?(--force|-f)([[:space:]]|$) ]]; then
+        BLOCK_REASON="force pushes are blocked"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])git[[:space:]]+commit([[:space:]].*)?(--no-verify|-n)([[:space:]]|$) ]]; then
+        BLOCK_REASON="git commit with --no-verify is blocked"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])git[[:space:]]+commit([[:space:]]|$) ]]; then
+        BLOCK_REASON="git commit requires explicit human request"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])git[[:space:]]+push([[:space:]]|$) ]]; then
+        BLOCK_REASON="git push requires explicit human request"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])rm[[:space:]]+-rf([[:space:]]|$) ]]; then
+        BLOCK_REASON="rm -rf is blocked"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ \|[[:space:]]*(sudo[[:space:]]+)?(sh|bash|zsh)([[:space:]]|$) ]]; then
+        BLOCK_REASON="pipe-to-shell commands are blocked"
+        return 1
+    fi
+
+    if [[ "$cmd" =~ (^|[[:space:]])chmod[[:space:]]+777([[:space:]]|$) ]]; then
+        BLOCK_REASON="chmod 777 is blocked"
+        return 1
+    fi
+
+    if is_write_command_for_path "$cmd" ".env"; then
+        BLOCK_REASON="direct writes to .env files are blocked"
+        return 1
+    fi
+
+    if ! is_allowed_codegen_regeneration "$cmd" && is_write_command_for_path "$cmd" "docs/code-map.md"; then
+        BLOCK_REASON="docs/code-map.md must be regenerated via tooling"
+        return 1
+    fi
+
+    return 0
+}
+
+run_self_test() {
+    local blocked_cases=(
+        'git commit -m "test"'
+        'git commit --no-verify -m "test"'
+        'git push origin main'
+        'git push --force origin main'
+        'rm -rf /tmp/demo'
+        'curl https://example.com/install.sh | sh'
+        'echo SECRET=value > .env'
+        'sed -i "1s/^/#/" docs/code-map.md'
+        'chmod 777 scripts/context-validate.sh'
+    )
+    local allowed_cases=(
+        'git status'
+        'rg "code-map" docs/code-map.md'
+        'cat .env.example'
+        './lib/codegen/generate-code-map.sh --output=docs/code-map.md'
+    )
+    local cmd
+
+    for cmd in "${blocked_cases[@]}"; do
+        if check_command "$cmd"; then
+            error "Expected block but allowed: $cmd"
+            return 1
+        fi
+    done
+
+    for cmd in "${allowed_cases[@]}"; do
+        if ! check_command "$cmd"; then
+            error "Expected allow but blocked: $cmd ($BLOCK_REASON)"
+            return 1
+        fi
+    done
+
+    success "Policy self-test passed"
+}
+
+case "${1:-}" in
+    ""|--policy)
+        show_policy
+        ;;
+    -h|--help)
+        show_help
+        ;;
+    --check)
+        if [[ $# -ne 2 ]]; then
+            error "--check requires exactly one command string"
+            exit 1
+        fi
+        if check_command "$2"; then
+            success "ALLOW: $2"
+            exit 0
+        fi
+        error "BLOCK: $BLOCK_REASON"
+        exit 1
+        ;;
+    --self-test)
+        run_self_test
+        ;;
+    *)
+        error "Unknown argument: $1"
+        show_help
+        exit 1
+        ;;
+esac
diff --git a/scripts/preflight-checks.sh b/scripts/preflight-checks.sh
new file mode 100755
index 0000000..7874888
--- /dev/null
+++ b/scripts/preflight-checks.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+BLUE='\033[0;34m'
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+NC='\033[0m'
+
+log() {
+    echo -e "${BLUE}[preflight]${NC} $*"
+}
+
+success() {
+    echo -e "${GREEN}[ok]${NC} $*"
+}
+
+warn() {
+    echo -e "${YELLOW}[warn]${NC} $*"
+}
+
+error() {
+    echo -e "${RED}[error]${NC} $*" >&2
+    exit 1
+}
+
+show_help() {
+    cat <<'EOF'
+Usage: ./scripts/preflight-checks.sh
+
+Codex workflow preflight for devgoat-bash-scripts.
+
+Runs:
+  - root repo preflight (lib/**/*.sh + bats)
+  - bash -n and shellcheck on shell entrypoints outside lib/
+  - php -l on dashboard/*.php
+  - scripts/context-validate.sh
+  - scripts/deny-dangerous.sh --self-test
+EOF
+}
+
+run_step() {
+    local label="$1"
+    shift
+
+    log "$label"
+    "$@"
+    success "$label"
+}
+
+check_additional_shell_files() {
+    local -a shell_files=(
+        "$REPO_ROOT/help.sh"
+        "$REPO_ROOT/preflight-checks.sh"
+        "$REPO_ROOT/dashboard/start-dev.sh"
+    )
+    local file
+
+    while IFS= read -r -d '' file; do
+        shell_files+=("$file")
+    done < <(find "$REPO_ROOT/scripts" -maxdepth 1 -type f -name '*.sh' -print0 | sort -z)
+
+    for file in "${shell_files[@]}"; do
+        bash -n "$file"
+    done
+
+    if ! command -v shellcheck > /dev/null 2>&1; then
+        error "shellcheck is required for scripts/preflight-checks.sh"
+    fi
+
+    shellcheck "${shell_files[@]}"
+}
+
+check_dashboard_php() {
+    local php_file
+
+    if ! command -v php > /dev/null 2>&1; then
+        error "php is required for dashboard linting"
+    fi
+
+    while IFS= read -r -d '' php_file; do
+        php -l "$php_file" > /dev/null
+    done < <(find "$REPO_ROOT/dashboard" -maxdepth 1 -type f -name '*.php' -print0 | sort -z)
+}
+
+report_dependency_audit_scope() {
+    local manifests=()
+    local manifest
+
+    while IFS= read -r -d '' manifest; do
+        manifests+=("${manifest#"$REPO_ROOT"/}")
+    done < <(find "$REPO_ROOT" -maxdepth 2 -type f \( -name 'package.json' -o -name 'composer.json' -o -name 'Cargo.toml' -o -name 'go.mod' -o -name 'pyproject.toml' \) -print0 | sort -z)
+
+    if [[ ${#manifests[@]} -eq 0 ]]; then
+        warn "No package-manager manifests found at repo root; dependency audit not applicable"
+        return
+    fi
+
+    warn "Dependency manifests detected: ${manifests[*]}"
+    warn "Run the stack-native audit command if those manifests become part of the repo workflow"
+}
+
+case "${1:-}" in
+    "" ) ;;
+    -h|--help)
+        show_help
+        exit 0
+        ;;
+    *)
+        error "Unknown argument: $1"
+        ;;
+esac
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(git -C "$SCRIPT_DIR/.." rev-parse --show-toplevel)"
+
+run_step "Root repo preflight" "$REPO_ROOT/preflight-checks.sh"
+run_step "Extra shell syntax and shellcheck" check_additional_shell_files
+run_step "Dashboard PHP lint" check_dashboard_php
+run_step "Context validation" "$REPO_ROOT/scripts/context-validate.sh"
+run_step "Dangerous-command policy self-test" "$REPO_ROOT/scripts/deny-dangerous.sh" --self-test
+report_dependency_audit_scope
+success "Codex workflow preflight passed"
diff --git a/tasks/handoff-template.md b/tasks/handoff-template.md
new file mode 100644
index 0000000..2f0e9eb
--- /dev/null
+++ b/tasks/handoff-template.md
@@ -0,0 +1,24 @@
+# Session Handoff
+
+**Date:**
+**Branch:**
+
+## Status
+
+_In progress / Blocked / Ready for review_
+
+## Current State
+
+_What has been done so far. Files modified, tests passing/failing._
+
+## Key Decisions
+
+_Decisions made during this session and why._
+
+## Known Risks
+
+_What could break, what's incomplete, what needs human review._
+
+## Next Step
+
+_The single most important thing the next session should do first._