From cce15aafc9e68fe24c32e68eea73f49c10312604 Mon Sep 17 00:00:00 2001
From: hshum <hshum@users.noreply.github.com>
Date: Tue, 2 Jun 2026 12:08:46 -0700
Subject: [PATCH] =?UTF-8?q?feat(llm):=20eval-gated=20demote-down=20?=
 =?UTF-8?q?=E2=80=94=20the=20router=20cost=20lever?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Roadmap #2 of the LLM router. Adds DEMOTE-DOWN: a pool opts in with
`mode: "demote"` to default to its quality TARGET (heaviest) and drop to a
cheaper candidate ONLY on clearly-light turns, and ONLY to models eval-CLEARED
for the task class. This is the Prism cost lever for over-provisioned paths
(e.g. a persona router pinning Opus for every turn regardless of difficulty).

router (shared/llm/router.ts):
- `RouteMode = "escalate" | "demote"`; `TaskPool.mode?` (default "escalate" —
  every existing pool is UNCHANGED).
- `RouteDecision.demoted` (additive field).
- `DEMOTE_THRESHOLD` (0.25) — only clearly-trivial turns demote.
- `isDemoteCleared(taskClass, model, opts)`: a conservative static
  `DEMOTE_CLEARANCE` allowlist + a pluggable `RouteOptions.clearance` hook —
  the seam the live agentRunJudge / dogfood rolling-agreement feed plugs into.
- FAIL-SAFE: if no cheaper model is cleared, STAY on the target — quality is
  never dropped un-cleared. forceTarget always pins the target.
- `agent_reason` reframed as the first demote pool (target Opus, demote -> Sonnet
  on light turns). NO live caller yet — the cache-sticky agent wiring (roadmap
  #3) is the first caller — so this is behavior-preserving today.

Pure + DETERMINISTIC (no Date/random). Additive: the new optional `opts` param
and the `demoted` field don't touch the /ask or search.ts callers (tsc clean).

Tests (shared/llm/router.test.ts, +8 scenario): demote a trivial agent turn ->
Sonnet; hard turn -> stays Opus; forceTarget -> Opus; fail-safe (un-cleared) ->
Opus; live-clearance override both directions; determinism; threshold boundary;
escalate pools never report demoted.

Docs: docs/architecture/LLM_ROUTER.md roadmap updated.

Verification: tsc --noEmit clean, 20 router tests pass, build clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/architecture/LLM_ROUTER.md |  23 +++---
 shared/llm/router.test.ts       |  76 ++++++++++++++++++++
 shared/llm/router.ts            | 119 +++++++++++++++++++++++++++-----
 3 files changed, 194 insertions(+), 24 deletions(-)

diff --git a/docs/architecture/LLM_ROUTER.md b/docs/architecture/LLM_ROUTER.md
index 3f7de15d..2711a169 100644
--- a/docs/architecture/LLM_ROUTER.md
+++ b/docs/architecture/LLM_ROUTER.md
@@ -39,7 +39,7 @@ Convex agents) and layered on top of this core later.
 | Direction | When | Quality safety |
 |---|---|---|
 | **Escalate-up** (floor → climb on complexity) | default path is already cheap (e.g. `/ask` floors at Haiku) | always safe — going up never lowers quality. **V1 ships this.** |
-| **Demote-down** (strong default → drop to cheaper on light turns) | path over-provisions (e.g. persona router pinning Opus for every banker turn) | **eval-GATED** — only demote once the cheaper model's measured agreement with the target stays above threshold (fed by `agentRunJudge` / dogfood scores). *Next layer.* |
+| **Demote-down** (strong default → drop to cheaper on light turns) | path over-provisions (e.g. persona router pinning Opus for every banker turn) | **eval-GATED** — only demote once the cheaper model's measured agreement with the target stays above threshold (fed by `agentRunJudge` / dogfood scores). **Mechanism shipped** via `mode: "demote"` + `isDemoteCleared` (static clearance now; live feed = roadmap #4). |
 
 ## Reliability invariants (`.claude/rules/agentic_reliability.md`)
 
@@ -72,12 +72,19 @@ UI) and stores `modelId` + `estimatedCostCents` on `liveEventAnswers`, so
 escalations and their cost are auditable per answer. `getAskTelemetry`
 aggregates them.
 
-## Roadmap (next PRs)
+## Roadmap
 
-1. Wire `classify` / `extract` / `synthesize` in `server/routes/search.ts`.
-2. Cache-sticky wrapper for `agent_reason` (FastAgent + Convex agents) — cache
+1. ✅ Wire `classify` / `extract` / `synthesize` in `server/routes/search.ts`.
+2. ✅ **Demote-down mechanism** — `mode: "demote"` pools default to the quality
+   target and drop to a cheaper candidate on clearly-light turns, gated by
+   `isDemoteCleared` (a conservative static `DEMOTE_CLEARANCE` table + a
+   pluggable `RouteOptions.clearance` hook). Fail-safe: nothing cleared → stay
+   on target. `agent_reason` is the first demote pool. *No live caller yet.*
+3. Cache-sticky wrapper for `agent_reason` (FastAgent + Convex agents) — cache
    the route per conversation, reuse on tool-result turns, switch only when the
-   expected win beats the cache-eviction cost.
-3. Eval-gated **demote-down**: per `(taskClass, model)` rolling agreement from
-   `agentRunJudge` / dogfood scores; demote only above threshold.
-4. Routing dashboard: cost + escalation rate per task class.
+   expected win beats the cache-eviction cost. **This wires the first live
+   demote caller.**
+4. Live eval feed: implement `RouteOptions.clearance` from `agentRunJudge` /
+   dogfood per-`(taskClass, model)` rolling agreement, replacing the static
+   `DEMOTE_CLEARANCE` table.
+5. Routing dashboard: cost + escalation/demote rate per task class.
diff --git a/shared/llm/router.test.ts b/shared/llm/router.test.ts
index b74e0708..6f59342e 100644
--- a/shared/llm/router.test.ts
+++ b/shared/llm/router.test.ts
@@ -15,6 +15,7 @@ import {
   getPools,
   ESCALATE_THRESHOLD,
   HEAVY_THRESHOLD,
+  DEMOTE_THRESHOLD,
 } from "./router";
 
 const FLOOR = "claude-haiku-4-5-20251001";
@@ -130,3 +131,78 @@ describe("LLM router — ops env override", () => {
     expect(r.model).toBe("claude-haiku-pinned");
   });
 });
+
+const SONNET = "claude-sonnet-4-6";
+const OPUS = "claude-opus-4-7";
+
+describe("LLM router — eval-gated DEMOTE-DOWN (the cost lever)", () => {
+  /**
+   * Persona: an over-provisioned agent path (e.g. persona router pins Opus for
+   * every turn). Goal: keep Opus for hard reasoning, but stop paying for it on
+   * trivial turns. agent_reason is a demote pool: target=Opus, demote→Sonnet.
+   */
+  it("demotes a trivially-light agent turn to the eval-cleared mid model", () => {
+    const r = routeLLM("agent_reason", { inputChars: 30, sourceCount: 0 });
+    expect(r.model).toBe(SONNET); // Sonnet is statically cleared for agent_reason
+    expect(r.demoted).toBe(true);
+    expect(r.escalated).toBe(false);
+    expect(r.score).toBeLessThan(DEMOTE_THRESHOLD);
+  });
+
+  it("STAYS on the quality target (Opus) for a hard agent turn", () => {
+    const r = routeLLM("agent_reason", { complexityHint: "high", inputChars: 700, multiEntity: true });
+    expect(r.model).toBe(OPUS);
+    expect(r.demoted).toBe(false);
+    expect(r.escalated).toBe(false); // demote pools never "escalate" — target IS the default
+  });
+
+  it("forceTarget pins Opus even on a trivial turn", () => {
+    const r = routeLLM("agent_reason", { inputChars: 5, forceTarget: true });
+    expect(r.model).toBe(OPUS);
+    expect(r.demoted).toBe(false);
+  });
+
+  /**
+   * The eval gate is FAIL-SAFE: if the cheaper model is NOT cleared, a light
+   * turn stays on the quality target. This is the "never sacrifice quality
+   * un-cleared" guarantee — the whole point of "maintain quality".
+   */
+  it("fail-safe: an un-cleared cheaper model is NOT demoted to (stays on target)", () => {
+    const r = routeLLM("agent_reason", { inputChars: 30 }, { clearance: () => false });
+    expect(r.model).toBe(OPUS); // nothing cleared → stay on Opus despite the light turn
+    expect(r.demoted).toBe(false);
+  });
+
+  it("a live clearance hook OVERRIDES the static table (both directions)", () => {
+    // Live feed says NO → stay on target even though the static table cleared Sonnet.
+    const blocked = routeLLM("agent_reason", { inputChars: 30 }, { clearance: () => false });
+    expect(blocked.model).toBe(OPUS);
+    // Live feed says YES → demote.
+    const allowed = routeLLM("agent_reason", { inputChars: 30 }, { clearance: () => true });
+    expect(allowed.model).toBe(SONNET);
+    expect(allowed.demoted).toBe(true);
+  });
+
+  it("is deterministic in demote mode — same turn routes identically", () => {
+    const sig = { inputChars: 40, sourceCount: 1 };
+    expect(routeLLM("agent_reason", sig)).toEqual(routeLLM("agent_reason", sig));
+  });
+
+  it("only demotes BELOW the demote threshold (boundary)", () => {
+    // medium complexity (0.25) is NOT < DEMOTE_THRESHOLD(0.25) → stays target.
+    const atThreshold = routeLLM("agent_reason", { complexityHint: "medium" });
+    expect(atThreshold.model).toBe(OPUS);
+    // clearly-light → demote.
+    const below = routeLLM("agent_reason", { inputChars: 50 });
+    expect(below.model).toBe(SONNET);
+  });
+
+  it("escalate pools are unaffected — they never report demoted", () => {
+    const r = routeLLM("ask_answer", askAnswerSignals("What time is the keynote?", 1));
+    expect(r.demoted).toBe(false);
+    expect(r.escalated).toBe(false);
+    // and an escalate pool's mode default holds
+    expect(getPools().ask_answer.mode ?? "escalate").toBe("escalate");
+    expect(getPools().agent_reason.mode).toBe("demote");
+  });
+});
diff --git a/shared/llm/router.ts b/shared/llm/router.ts
index 108918ee..59b85943 100644
--- a/shared/llm/router.ts
+++ b/shared/llm/router.ts
@@ -29,11 +29,14 @@
  *   - ESCALATE-UP (default → floor, climb on complexity) is ALWAYS quality-safe
  *     and is what V1 ships (e.g. ask_answer floors at Haiku, climbs to Sonnet on
  *     hard/analytical/multi-entity questions). No eval gate needed to go up.
- *   - DEMOTE-DOWN (default → strong, drop to cheaper on light turns) is the cost
- *     lever for over-provisioned paths (e.g. persona router pinning Opus). It is
- *     intentionally eval-GATED — only demote a task class to a cheaper model once
- *     that model's measured agreement with the target stays above threshold. That
- *     gate (fed by agentRunJudge / dogfood scores) is the next layer; not in V1.
+ *   - DEMOTE-DOWN (default → strong, drop to cheaper on clearly-light turns) is
+ *     the cost lever for over-provisioned paths (e.g. a persona router pinning
+ *     Opus for every turn). It is intentionally eval-GATED — only demote to a
+ *     cheaper model that is CLEARED for the task class (a static conservative
+ *     allowlist now; the live agentRunJudge/dogfood rolling-agreement feed plugs
+ *     in via RouteOptions.clearance). Pools opt in with `mode: "demote"`. A pool
+ *     with no cleared cheaper model stays on the target — quality never drops
+ *     un-cleared. agent_reason is the first demote pool (no live caller yet).
  *
  * Reliability (.claude/rules/agentic_reliability.md):
  *   - DETERMINISTIC: routeLLM is a pure function of (taskClass, signals, env).
@@ -48,6 +51,18 @@
 export type RouteProvider = "anthropic" | "google" | "openai" | "openrouter";
 export type RouteTier = "light" | "balanced" | "heavy";
 
+/**
+ * Routing direction for a pool:
+ *  - "escalate" (default): default to the cheap FLOOR, climb on complexity.
+ *    Always quality-safe — going up never lowers quality.
+ *  - "demote": default to the quality TARGET (heaviest), drop to a cheaper
+ *    candidate on clearly-light turns — but ONLY to models that are eval-CLEARED
+ *    for this task class. Fail-safe: nothing cleared → stay on target. This is
+ *    the cost lever for over-provisioned paths (e.g. a persona router that pins
+ *    Opus for every turn regardless of difficulty).
+ */
+export type RouteMode = "escalate" | "demote";
+
 export type TaskClass =
   | "ask_answer" // event /ask synthesis — Haiku floor, escalate to Sonnet on hard Qs
   | "classify" // query intent classification — cheapest, latency-critical
@@ -68,6 +83,8 @@ export interface RouteCandidate {
 export interface TaskPool {
   /** Ordered lightest -> heaviest. candidates[0] is the cost floor. */
   candidates: RouteCandidate[];
+  /** Routing direction. Defaults to "escalate". See RouteMode. */
+  mode?: RouteMode;
 }
 
 export interface RouteSignals {
@@ -92,8 +109,10 @@ export interface RouteDecision {
   tier: RouteTier;
   /** 0..1 complexity score that drove the decision. */
   score: number;
-  /** Did we route above the pool floor? */
+  /** Did we route above the pool floor (escalate mode)? */
   escalated: boolean;
+  /** Did we route BELOW the quality target (demote mode, eval-cleared)? */
+  demoted: boolean;
   reason: string;
 }
 
@@ -102,6 +121,39 @@ export interface RouteDecision {
 export const ESCALATE_THRESHOLD = 0.5;
 export const HEAVY_THRESHOLD = 0.8;
 
+// In demote-mode pools, only drop below the quality target on clearly-light
+// turns — conservative, so only obviously-trivial work is demoted.
+export const DEMOTE_THRESHOLD = 0.25;
+
+/**
+ * Static, CONSERVATIVE demote clearance: which cheaper models are known
+ * quality-safe to demote to for a given task class, keyed "taskClass::model".
+ * This is the SAFE default until the live eval feed (agentRunJudge / dogfood
+ * rolling agreement) is wired via RouteOptions.clearance. Only pairs we are
+ * confident hold quality on LIGHT turns belong here — when in doubt, omit it and
+ * the router stays on the target model.
+ */
+export const DEMOTE_CLEARANCE: Record<string, boolean> = {
+  // A capable mid model handles trivially-light agent turns; the heavy target
+  // (Opus) is reserved for genuinely hard reasoning.
+  "agent_reason::claude-sonnet-4-6": true,
+};
+
+export interface RouteOptions {
+  /**
+   * Live eval-clearance hook. Returns whether demoting to `model` is quality-safe
+   * for `taskClass` RIGHT NOW — e.g. backed by agentRunJudge rolling agreement
+   * against the target. When provided it OVERRIDES the static DEMOTE_CLEARANCE
+   * table. This is the seam the eval-feedback layer plugs into.
+   */
+  clearance?: (taskClass: TaskClass, model: string) => boolean;
+}
+
+function isDemoteCleared(taskClass: TaskClass, model: string, opts: RouteOptions): boolean {
+  if (opts.clearance) return opts.clearance(taskClass, model);
+  return DEMOTE_CLEARANCE[`${taskClass}::${model}`] === true;
+}
+
 /** Pure env read (no Date/random). Lets ops pin or retune without a deploy. */
 function envModel(name: string, fallback: string): string {
   const raw =
@@ -153,6 +205,10 @@ export function getPools(): Record<TaskClass, TaskPool> {
       ],
     },
     agent_reason: {
+      // DEMOTE pool: the default IS the quality target (Opus); only clearly-light
+      // turns drop to the eval-cleared mid model (Sonnet). Models the over-
+      // provisioned persona/agent path (pins the top model for every turn).
+      mode: "demote",
       candidates: [
         { model: "claude-sonnet-4-6", provider: "anthropic", tier: "balanced", relCost: 1 },
         { model: "claude-opus-4-7", provider: "anthropic", tier: "heavy", relCost: 5 },
@@ -189,9 +245,10 @@ export function computeComplexityScore(signals: RouteSignals = {}): number {
 function decide(
   taskClass: TaskClass,
   c: RouteCandidate,
-  floor: RouteCandidate,
   score: number,
   reason: string,
+  escalated: boolean,
+  demoted: boolean,
 ): RouteDecision {
   return {
     taskClass,
@@ -199,7 +256,8 @@ function decide(
     provider: c.provider,
     tier: c.tier,
     score,
-    escalated: c.model !== floor.model || c.tier !== floor.tier,
+    escalated,
+    demoted,
     reason,
   };
 }
@@ -213,29 +271,58 @@ function decide(
  *     score >= ESCALATE_THRESHOLD -> next tier up from floor (if any)
  *     otherwise                  -> floor
  */
-export function routeLLM(taskClass: TaskClass, signals: RouteSignals = {}): RouteDecision {
+export function routeLLM(
+  taskClass: TaskClass,
+  signals: RouteSignals = {},
+  opts: RouteOptions = {},
+): RouteDecision {
   const pool = getPools()[taskClass];
   const candidates = pool.candidates;
   const floor = candidates[0];
   const heaviest = candidates[candidates.length - 1];
+  const mode: RouteMode = pool.mode ?? "escalate";
 
+  // forceTarget always lands on the quality target (heaviest), regardless of mode.
   if (signals.forceTarget) {
-    return decide(taskClass, heaviest, floor, 1, "forced quality target (high-stakes / caller override)");
+    const escalated = mode === "escalate" && heaviest.model !== floor.model;
+    return decide(taskClass, heaviest, 1, "forced quality target (high-stakes / caller override)", escalated, false);
   }
 
   const score = computeComplexityScore(signals);
+
+  if (mode === "demote") {
+    // Default IS the quality target (heaviest). Only on clearly-light turns drop
+    // to the cheapest demote-CLEARED candidate below the target. Fail-safe:
+    // nothing cleared → stay on target (never sacrifice quality un-cleared).
+    let chosen = heaviest;
+    if (score < DEMOTE_THRESHOLD) {
+      for (const c of candidates) {
+        if (c.model === heaviest.model) break; // reached the target — stop
+        if (isDemoteCleared(taskClass, c.model, opts)) {
+          chosen = c;
+          break;
+        }
+      }
+    }
+    const demoted = chosen.model !== heaviest.model;
+    const reason = demoted
+      ? `demoted to ${chosen.tier} (complexity ${score.toFixed(2)}, eval-cleared)`
+      : `target ${heaviest.tier} (complexity ${score.toFixed(2)})`;
+    return decide(taskClass, chosen, score, reason, false, demoted);
+  }
+
+  // escalate mode (default): climb from the floor on complexity.
   let chosen = floor;
   if (score >= HEAVY_THRESHOLD) {
     chosen = heaviest;
   } else if (score >= ESCALATE_THRESHOLD && candidates.length > 1) {
     chosen = candidates[1];
   }
-
-  const reason =
-    chosen.model === floor.model && chosen.tier === floor.tier
-      ? `floor ${floor.tier} (complexity ${score.toFixed(2)})`
-      : `escalated to ${chosen.tier} (complexity ${score.toFixed(2)})`;
-  return decide(taskClass, chosen, floor, score, reason);
+  const escalated = chosen.model !== floor.model || chosen.tier !== floor.tier;
+  const reason = escalated
+    ? `escalated to ${chosen.tier} (complexity ${score.toFixed(2)})`
+    : `floor ${floor.tier} (complexity ${score.toFixed(2)})`;
+  return decide(taskClass, chosen, score, reason, escalated, false);
 }
 
 // ── Per-task-class signal helpers ────────────────────────────────────────────