BaseInfinity · BaseInfinity · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -265,6 +265,9 @@ jobs:
       - name: Run Cowork plugin drift tests
         run: ./tests/test-cowork-drift.sh
 
+      - name: Run skill graduation tests
+        run: ./tests/test-skill-graduations.sh
+
   # Clean up old bot comments on PR push (keeps PRs tidy)
   # Also runs on workflow_dispatch (no-op) so branch protection doesn't block auto-merge.
   cleanup-old-comments:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -56,6 +56,7 @@ Thank you for your interest in improving the SDLC Wizard!
    ./tests/test-agents-md-interop.sh && \
    ./tests/test-self-pr-review-skip.sh && \
    ./tests/test-cowork-drift.sh && \
+   ./tests/test-skill-graduations.sh && \
    ./tests/e2e/run-simulation.sh && \
    ./tests/e2e/test-deterministic-checks.sh && \
    ./tests/e2e/test-scenario-rotation.sh && \
@@ -205,6 +206,7 @@ python3 -c "import yaml; yaml.safe_load(open('.github/workflows/ci.yml'))"
 ./tests/test-degradation-detection.sh
 ./tests/test-local-shepherd.sh
 ./tests/test-cowork-drift.sh
+./tests/test-skill-graduations.sh
 ./tests/e2e/run-simulation.sh
 ./tests/e2e/test-deterministic-checks.sh
 ./tests/e2e/test-scenario-rotation.sh

diff --git a/cowork/skills/sdlc/SKILL.md b/cowork/skills/sdlc/SKILL.md
@@ -102,7 +102,11 @@ State your confidence before presenting an approach:
 | FAILED 2x | Something's wrong | Codex for fresh perspective; if still stuck, STOP | **`/effort max` now** |
 | CONFUSED | Can't diagnose | Codex; if still confused, STOP and describe | **`/effort max` now** |
 
-**Dynamic effort bumping is NOT optional.** "Consider max effort" is the same as "ignore this." Bump BEFORE the next attempt, not after a third failure.
+**Effort bumping is NOT optional.** Bump BEFORE the next attempt, not after a third failure.
+
+**Confidence ramp:** Opus researches → Fable batch review → 95% list → /goal TDD → Codex check.
+
+**Advisor:** `advisor()` before plans; if down, spawn Fable subagent.
 
 ## Plan Mode
 
@@ -132,7 +136,7 @@ The loop goes back to PLANNING, not TDD RED. Run `/code-review`; issues at confi
 
 ## Cross-Model Review (REQUIRED for High-Stakes)
 
-**When to run:** high-stakes changes (auth, payments, data), releases/publishes, complex refactors. **When to skip (log justification):** trivial, hotfixes, risk < review cost. **Prerequisites:** Codex CLI (`npm i -g @openai/codex`) + OpenAI API key. **Reviewer at flagship tier (#233):** even on `opusplan` (Sonnet driver), reviewer runs `gpt-5.5` xhigh — adversarial diversity is the point.
+**When to run:** high-stakes changes (auth, payments, data), releases/publishes, complex refactors. **When to skip (log justification):** trivial, hotfixes, risk < review cost. **Prerequisites:** Codex CLI + OpenAI API key. **Reviewer:** `gpt-5.5` xhigh — adversarial diversity.
 
 PROTOCOL is universal across domains; only `review_instructions` and `verification_checklist` change.
 

diff --git a/skills/sdlc/SKILL.md b/skills/sdlc/SKILL.md
@@ -102,7 +102,11 @@ State your confidence before presenting an approach:
 | FAILED 2x | Something's wrong | Codex for fresh perspective; if still stuck, STOP | **`/effort max` now** |
 | CONFUSED | Can't diagnose | Codex; if still confused, STOP and describe | **`/effort max` now** |
 
-**Dynamic effort bumping is NOT optional.** "Consider max effort" is the same as "ignore this." Bump BEFORE the next attempt, not after a third failure.
+**Effort bumping is NOT optional.** Bump BEFORE the next attempt, not after a third failure.
+
+**Confidence ramp:** Opus researches → Fable batch review → 95% list → /goal TDD → Codex check.
+
+**Advisor:** `advisor()` before plans; if down, spawn Fable subagent.
 
 ## Plan Mode
 
@@ -132,7 +136,7 @@ The loop goes back to PLANNING, not TDD RED. Run `/code-review`; issues at confi
 
 ## Cross-Model Review (REQUIRED for High-Stakes)
 
-**When to run:** high-stakes changes (auth, payments, data), releases/publishes, complex refactors. **When to skip (log justification):** trivial, hotfixes, risk < review cost. **Prerequisites:** Codex CLI (`npm i -g @openai/codex`) + OpenAI API key. **Reviewer at flagship tier (#233):** even on `opusplan` (Sonnet driver), reviewer runs `gpt-5.5` xhigh — adversarial diversity is the point.
+**When to run:** high-stakes changes (auth, payments, data), releases/publishes, complex refactors. **When to skip (log justification):** trivial, hotfixes, risk < review cost. **Prerequisites:** Codex CLI + OpenAI API key. **Reviewer:** `gpt-5.5` xhigh — adversarial diversity.
 
 PROTOCOL is universal across domains; only `review_instructions` and `verification_checklist` change.
 

diff --git a/tests/test-skill-graduations.sh b/tests/test-skill-graduations.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+set -e
+
+PASS=0
+FAIL=0
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+SKILL="$PROJECT_ROOT/skills/sdlc/SKILL.md"
+
+pass() { echo "  PASS: $1"; PASS=$((PASS + 1)); }
+fail() { echo "  FAIL: $1"; FAIL=$((FAIL + 1)); }
+
+echo "=== Skill Graduation Tests ==="
+echo ""
+
+# --- Confidence Ramp Pattern ---
+echo "--- Confidence Ramp Pattern ---"
+
+# Test 1: SKILL.md mentions the confidence ramp workflow
+if grep -qi "confidence ramp" "$SKILL"; then
+  pass "SKILL.md documents confidence ramp pattern"
+else
+  fail "SKILL.md missing confidence ramp pattern"
+fi
+
+# Test 2: Mentions Fable batch review as part of the ramp
+if grep -q "batch.*review\|batch.*consult" "$SKILL"; then
+  pass "confidence ramp includes batch review step"
+else
+  fail "confidence ramp missing batch review step"
+fi
+
+# Test 3: Confidence ramp line includes /goal and Codex check
+if grep -qi "confidence ramp" "$SKILL" | head -1 && grep -i "confidence ramp" "$SKILL" | grep -q "/goal" && grep -i "confidence ramp" "$SKILL" | grep -q "Codex"; then
+  pass "confidence ramp includes /goal + Codex check"
+else
+  fail "confidence ramp missing /goal or Codex check on the ramp line"
+fi
+
+echo ""
+echo "--- Advisor Auto-Fallback ---"
+
+# Test 4: SKILL.md documents advisor fallback
+if grep -qi "advisor.*if down\|advisor.*fallback\|advisor.*unavailable\|fallback.*advisor" "$SKILL"; then
+  pass "SKILL.md documents advisor fallback"
+else
+  fail "SKILL.md missing advisor fallback"
+fi
+
+# Test 5: Fallback spawns Fable subagent
+if grep -q "Fable.*subagent\|subagent.*Fable\|spawn.*Fable\|Fable.*fallback" "$SKILL"; then
+  pass "advisor fallback uses Fable subagent"
+else
+  fail "advisor fallback missing Fable subagent instruction"
+fi
+
+echo ""
+echo "--- Budget Check ---"
+
+# Test 6: SKILL.md stays under 20K chars
+chars=$(wc -c < "$SKILL")
+if [ "$chars" -le 20000 ]; then
+  pass "SKILL.md is under 20K chars ($chars)"
+else
+  fail "SKILL.md exceeds 20K chars ($chars)"
+fi
+
+echo ""
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ] && exit 0 || exit 1