From 16b832a945d6c078a4dc78b5257999cc2f6fa8df Mon Sep 17 00:00:00 2001
From: hshum <hshum@users.noreply.github.com>
Date: Tue, 2 Jun 2026 12:22:01 -0700
Subject: [PATCH] feat(llm): LLM routing observability panel on the telemetry
 surface

Surface the EXISTING per-answer routing telemetry the /ask path persists on
liveEventAnswers (modelId / provider / agentMode / estimatedCostCents) so an
operator can see shared/llm/router.ts working in production: Haiku floor vs.
Sonnet escalation split, escalation rate, avg cost/answer, and provider-
fallback rate. Read-only + additive - does NOT touch routeLLM or the /ask
write path.

Backend (additive, bounded, honest):
- convex/events.ts: new GLOBAL query getAskRoutingTelemetry - a <=1000-row
  bounded scan over recent liveEventAnswers (BOUND via .take(cap)). Delegates
  the math to a pure aggregator so it is scenario-tested without a DB.
- shared/llm/askRoutingTelemetry.ts: pure aggregateAskRouting() - floor vs.
  escalated decided by the same modelId.includes("haiku") convention the cost
  estimator + router floor use (DETERMINISTIC, sorted breakdowns). Rates are
  null (not a fabricated 0%) when there is no denominator (HONEST_SCORES).

Frontend:
- src/features/telemetry/LlmRoutingPanel.tsx: glass-card panel with headline
  stats, model mix, provider mix, an honest "No routed /ask traffic yet" empty
  state, loading state, aria labels + reduced-motion-safe.
- Composed into AgentTelemetryDashboard (the named telemetry surface) +
  exported from the telemetry barrel.

Tests: shared/llm/askRoutingTelemetry.test.ts - 10 scenario-based tests
(operator floor/escalation split, true-0 vs null, cache/deterministic-only,
provider-fallback, cost-over-routed-only, adversarial pinned/blank models,
1000-row scale, determinism).

Verification: app tsc clean, convex tsc clean, vitest 22 pass (10 new + 12
router) + 78 existing event tests green, npm run build clean.

Note: AgentTelemetryDashboard is currently orphaned (no live route mounts it)
in the prod-parity build, so the panel compiles + is tested but is not yet
visually live until its host is routed. Reported for reviewer.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 convex/events.ts                              |  43 ++++
 shared/llm/askRoutingTelemetry.test.ts        | 220 ++++++++++++++++
 shared/llm/askRoutingTelemetry.ts             | 134 ++++++++++
 .../views/AgentTelemetryDashboard.tsx         |   4 +
 src/features/telemetry/LlmRoutingPanel.tsx    | 237 ++++++++++++++++++
 src/features/telemetry/index.ts               |   4 +
 6 files changed, 642 insertions(+)
 create mode 100644 shared/llm/askRoutingTelemetry.test.ts
 create mode 100644 shared/llm/askRoutingTelemetry.ts
 create mode 100644 src/features/telemetry/LlmRoutingPanel.tsx
diff --git a/convex/events.ts b/convex/events.ts
index 919bbb5cb..9ab178ff5 100644
--- a/convex/events.ts
+++ b/convex/events.ts
@@ -31,6 +31,7 @@ import { internal } from "./_generated/api";
 import { action, query, mutation, internalMutation, internalQuery } from "./_generated/server";
 import { enforceRateLimit } from "./scratchnodeRateLimit";
 import { routeLLM, askAnswerSignals } from "../shared/llm/router";
+import { aggregateAskRouting } from "../shared/llm/askRoutingTelemetry";
 import { rerankWithGemini, condenseQuery, type TriCandidate } from "../shared/search/triSearch";
 
 class ConvexError<T extends Record<string, unknown>> extends Error {
@@ -980,6 +981,48 @@ export const getAskTelemetry = query({
   },
 });
 
+/**
+ * LLM ROUTING observability (LLM Router roadmap #3) — a GLOBAL, read-only
+ * aggregate over recent `/ask` answers showing the `shared/llm/router.ts`
+ * `routeLLM("ask_answer", …)` decision in production: how often the cheap
+ * Haiku floor served the turn vs. how often it escalated to Sonnet, the avg
+ * estimated cost per answer, and the provider/agentMode mix.
+ *
+ * Unlike `getAskTelemetry` (per-event, host-facing), this is operator-facing
+ * and spans ALL events — it answers "is the router actually working?" for the
+ * `/?surface=telemetry` dashboard, which has no single-event context.
+ *
+ * Honesty (.claude/rules/agentic_reliability.md):
+ *   - BOUND: capped scan (≤1000 newest rows via `.take(cap)`); `capped` flag
+ *     surfaced when the window is full. No global time index exists on
+ *     liveEventAnswers, so this is a bounded table scan — the `.take()` is the
+ *     hard cap, never an unbounded read.
+ *   - HONEST_SCORES: every rate is computed from real rows. `escalationRate`
+ *     and `avgCostCents` are null (UI shows "no data yet") when there's no
+ *     denominator — never a fabricated 0% or $0.
+ *   - DETERMINISTIC: pure function of the rows. Floor vs. escalated is decided
+ *     by the same `modelId.includes("haiku")` convention used by
+ *     `estimateAnthropicCostCents` above and the router's Haiku floor.
+ *   - No private data: liveEventAnswers are public; never touches userNotes.
+ *
+ * NOTE: routed answers are those that actually reached a model — agentMode
+ * `provider` or `provider_fallback`. `cache` and `deterministic` answers never
+ * invoked routeLLM, so they're excluded from the routing (floor/escalated)
+ * denominator but still counted in the agentMode mix for completeness.
+ */
+export const getAskRoutingTelemetry = query({
+  args: { limit: v.optional(v.number()) },
+  handler: async (ctx, { limit }) => {
+    const cap = Math.min(Math.max(limit ?? 1000, 1), 1000); // BOUND
+    // No global time index on liveEventAnswers, so this is a bounded table
+    // scan — `.take(cap)` is the hard cap, never an unbounded read. The pure
+    // aggregator (shared/llm/askRoutingTelemetry.ts) does the rest, so the math
+    // is scenario-tested directly without a DB.
+    const rows = await ctx.db.query("liveEventAnswers").order("desc").take(cap);
+    return aggregateAskRouting(rows, rows.length >= cap);
+  },
+});
+
 export const getHostStatus = query({
   args: {
     eventId: v.id("liveEvents"),
diff --git a/shared/llm/askRoutingTelemetry.test.ts b/shared/llm/askRoutingTelemetry.test.ts
new file mode 100644
index 000000000..18ce61fd8
--- /dev/null
+++ b/shared/llm/askRoutingTelemetry.test.ts
@@ -0,0 +1,220 @@
+/**
+ * Scenario-based tests for the LLM-router observability aggregate
+ * (shared/llm/askRoutingTelemetry.ts).
+ *
+ * Per .claude/rules/scenario_testing.md each test names a persona + goal +
+ * prior state + scale + duration + edge cases. The panel on the telemetry
+ * surface renders these numbers verbatim, so the risks are:
+ *   - a fabricated 0%/"healthy" when there's no data (HONEST_SCORES),
+ *   - mis-counting cache/deterministic answers (that never hit the router) into
+ *     the floor/escalated denominator,
+ *   - non-deterministic breakdown ordering (UI jitter).
+ */
+import { describe, expect, it } from "vitest";
+import { aggregateAskRouting, tierForModelId, type AskRoutingRow } from "./askRoutingTelemetry";
+
+const HAIKU = "claude-haiku-4-5-20251001";
+const SONNET = "claude-sonnet-4-6";
+
+/** Build a routed (provider) answer row succinctly. */
+function answer(partial: Partial<AskRoutingRow>): AskRoutingRow {
+  return { agentMode: "provider", provider: "anthropic", modelId: HAIKU, estimatedCostCents: 0, ...partial };
+}
+
+describe("aggregateAskRouting — model mix + escalation (operator view)", () => {
+  /**
+   * Persona:     Operator opens /?surface=telemetry mid-event.
+   * Goal:        See how often the router stayed on the Haiku floor vs. escalated.
+   * Prior state: 10 routed answers — 7 Haiku floor, 3 Sonnet escalations.
+   * Scale:       10 answers. Duration: single query.
+   * Expected:    escalationRate = 3/10 = 0.3; floor/escalated counts exact;
+   *              model mix sorted by count desc.
+   */
+  it("computes the floor-vs-escalated split from real model ids", () => {
+    const rows: AskRoutingRow[] = [
+      ...Array.from({ length: 7 }, () => answer({ modelId: HAIKU, estimatedCostCents: 0.01 })),
+      ...Array.from({ length: 3 }, () => answer({ modelId: SONNET, estimatedCostCents: 0.05 })),
+    ];
+    const t = aggregateAskRouting(rows, false);
+
+    expect(t.total).toBe(10);
+    expect(t.routedCount).toBe(10);
+    expect(t.floorCount).toBe(7);
+    expect(t.escalatedCount).toBe(3);
+    expect(t.escalationRate).toBe(0.3);
+    // model mix is sorted by count desc — Haiku floor first.
+    expect(t.modelMix[0]).toEqual({ modelId: HAIKU, count: 7, tier: "floor" });
+    expect(t.modelMix[1]).toEqual({ modelId: SONNET, count: 3, tier: "escalated" });
+  });
+
+  /**
+   * Persona:     Operator on a calm room — every question was a quick lookup.
+   * Goal:        Confirm the router did NOT over-escalate (cost discipline).
+   * Prior state: 5 routed answers, all Haiku.
+   * Expected:    escalationRate = 0 (a REAL zero, not a null) — there IS a
+   *              denominator (5 routed), the router genuinely never escalated.
+   */
+  it("reports a TRUE 0% escalation when the floor served every routed answer", () => {
+    const rows = Array.from({ length: 5 }, () => answer({ modelId: HAIKU }));
+    const t = aggregateAskRouting(rows, false);
+    expect(t.routedCount).toBe(5);
+    expect(t.escalationRate).toBe(0); // real 0, not null
+    expect(t.escalatedCount).toBe(0);
+  });
+});
+
+describe("aggregateAskRouting — HONEST_SCORES (no fabricated metrics)", () => {
+  /**
+   * Persona:     Operator opens telemetry for a brand-new deployment.
+   * Goal:        Must NOT see a fake "0% escalation / $0 healthy" from no data.
+   * Prior state: 0 answers.
+   * Expected:    rates are null (panel renders "—"), counts are 0.
+   */
+  it("empty input → rates are null, never a fabricated 0% or $0/answer", () => {
+    const t = aggregateAskRouting([], false);
+    expect(t.total).toBe(0);
+    expect(t.routedCount).toBe(0);
+    expect(t.escalationRate).toBeNull();
+    expect(t.providerFallbackRate).toBeNull();
+    expect(t.avgCostCents).toBeNull();
+    expect(t.totalCostCents).toBe(0);
+    expect(t.modelMix).toEqual([]);
+    expect(t.providerMix).toEqual([]);
+  });
+
+  /**
+   * Persona:     Operator on a room where every answer was served from cache /
+   *              the deterministic synthesizer (no model ever ran).
+   * Goal:        The router metrics must stay null — these answers never invoked
+   *              routeLLM, so there's no routing to report.
+   * Prior state: 4 cache + 2 deterministic answers, 0 provider attempts.
+   * Expected:    routedCount 0, escalationRate null, providerFallbackRate null,
+   *              but the agentMode mix still counts all 6 for completeness.
+   */
+  it("cache/deterministic-only traffic → routing rates null, agentMode mix still counted", () => {
+    const rows: AskRoutingRow[] = [
+      ...Array.from({ length: 4 }, () => ({ agentMode: "cache" as const, modelId: null, estimatedCostCents: 0 })),
+      ...Array.from({ length: 2 }, () => ({ agentMode: "deterministic" as const, modelId: null, estimatedCostCents: 0 })),
+    ];
+    const t = aggregateAskRouting(rows, false);
+    expect(t.total).toBe(6);
+    expect(t.routedCount).toBe(0);
+    expect(t.escalationRate).toBeNull();
+    expect(t.providerFallbackRate).toBeNull();
+    expect(t.avgCostCents).toBeNull();
+    expect(t.agentModes).toEqual({ provider: 0, provider_fallback: 0, cache: 4, deterministic: 2 });
+  });
+});
+
+describe("aggregateAskRouting — provider-fallback + cost", () => {
+  /**
+   * Persona:     Operator notices answers feel degraded.
+   * Goal:        See the provider-fallback rate — the headline degraded signal.
+   * Prior state: 8 provider + 2 provider_fallback (Anthropic primary fell back).
+   * Expected:    providerFallbackRate = 2/10 = 0.2; routedCount counts BOTH
+   *              (a fallback still reached a model).
+   */
+  it("provider-fallback rate = fallbacks / (provider + fallback)", () => {
+    const rows: AskRoutingRow[] = [
+      ...Array.from({ length: 8 }, () => answer({ agentMode: "provider", modelId: HAIKU })),
+      ...Array.from({ length: 2 }, () => answer({ agentMode: "provider_fallback", modelId: SONNET })),
+    ];
+    const t = aggregateAskRouting(rows, false);
+    expect(t.routedCount).toBe(10);
+    expect(t.providerFallbackRate).toBe(0.2);
+    expect(t.agentModes.provider).toBe(8);
+    expect(t.agentModes.provider_fallback).toBe(2);
+  });
+
+  /**
+   * Persona:     Finance-minded operator checking cost discipline.
+   * Goal:        Avg cost/answer should reflect ONLY routed answers (cache is free
+   *              and must not dilute the average toward $0).
+   * Prior state: 2 routed answers @ 0.10 + 0.30 cents, plus 3 free cache answers.
+   * Expected:    avgCostCents = 0.40/2 = 0.2 (cache excluded from the average).
+   */
+  it("avg cost is over ROUTED answers only — free cache hits don't dilute it", () => {
+    const rows: AskRoutingRow[] = [
+      answer({ estimatedCostCents: 0.1 }),
+      answer({ estimatedCostCents: 0.3 }),
+      { agentMode: "cache", modelId: null, estimatedCostCents: 0 },
+      { agentMode: "cache", modelId: null, estimatedCostCents: 0 },
+      { agentMode: "cache", modelId: null, estimatedCostCents: 0 },
+    ];
+    const t = aggregateAskRouting(rows, false);
+    expect(t.routedCount).toBe(2);
+    expect(t.totalCostCents).toBe(0.4);
+    expect(t.avgCostCents).toBe(0.2);
+  });
+});
+
+describe("aggregateAskRouting — adversarial + scale + determinism", () => {
+  /**
+   * Persona:     Adversarial / messy data — env-pinned heavy model, blank model
+   *              ids, weird providers, missing cost.
+   * Goal:        Never crash; classify a pinned non-Haiku model as escalated; a
+   *              blank model id as "other" (excluded from the escalation denom).
+   * Prior state: 1 opus (pinned heavy), 1 blank-model provider answer, 1 haiku.
+   * Expected:    escalation denom = floor + escalated = 1 haiku + 1 opus = 2; the
+   *              blank-model row is "other" and excluded; escalationRate = 1/2.
+   */
+  it("classifies pinned-heavy as escalated and blank model id as 'other'", () => {
+    const rows: AskRoutingRow[] = [
+      answer({ modelId: "claude-opus-4-7", provider: "anthropic" }),
+      answer({ modelId: "", provider: "anthropic" }), // unrecorded model
+      answer({ modelId: HAIKU, provider: "anthropic" }),
+    ];
+    const t = aggregateAskRouting(rows, false);
+    expect(t.routedCount).toBe(3);
+    expect(t.floorCount).toBe(1);
+    expect(t.escalatedCount).toBe(1);
+    // 1 escalated / (1 floor + 1 escalated) — the "other" row is excluded.
+    expect(t.escalationRate).toBe(0.5);
+    const other = t.modelMix.find((m) => m.modelId === "(unrecorded model)");
+    expect(other?.tier).toBe("other");
+  });
+
+  /**
+   * Long-running accumulation: a multi-day room scanned at the BOUND cap.
+   * Goal:        BOUND — the caller's `capped` flag is surfaced so the UI can say
+   *              "(capped at 1000)". The aggregate stays correct over a large slice.
+   * Prior state: 1000 routed answers (the cap), all Haiku, capped=true.
+   * Expected:    total 1000, capped true, escalationRate 0 (all floor), O(n) stable.
+   */
+  it("scale: stays correct + honest about truncation at the read cap", () => {
+    const rows = Array.from({ length: 1000 }, () => answer({ modelId: HAIKU }));
+    const t = aggregateAskRouting(rows, /* capped */ true);
+    expect(t.total).toBe(1000);
+    expect(t.capped).toBe(true);
+    expect(t.routedCount).toBe(1000);
+    expect(t.escalationRate).toBe(0);
+  });
+
+  /**
+   * Determinism (replay safety): identical rows → byte-identical output, and the
+   * breakdowns tie-break deterministically by key when counts are equal.
+   */
+  it("is deterministic: same rows in → identical sorted breakdowns out", () => {
+    const rows: AskRoutingRow[] = [
+      answer({ provider: "zeta", modelId: SONNET }),
+      answer({ provider: "alpha", modelId: HAIKU }),
+      answer({ provider: "alpha", modelId: SONNET }),
+      answer({ provider: "zeta", modelId: HAIKU }),
+    ];
+    const a = aggregateAskRouting(rows, false);
+    const b = aggregateAskRouting(rows, false);
+    expect(a).toEqual(b);
+    // Equal counts (2 each) → alpha before zeta (localeCompare tie-break).
+    expect(a.providerMix.map((p) => p.provider)).toEqual(["alpha", "zeta"]);
+  });
+});
+
+describe("tierForModelId", () => {
+  it("maps Haiku → floor, Sonnet/Opus → escalated, blank → other", () => {
+    expect(tierForModelId(HAIKU)).toBe("floor");
+    expect(tierForModelId("claude-haiku-pinned")).toBe("floor");
+    expect(tierForModelId(SONNET)).toBe("escalated");
+    expect(tierForModelId("claude-opus-4-7")).toBe("escalated");
+    expect(tierForModelId("")).toBe("other");
+  });
+});
diff --git a/shared/llm/askRoutingTelemetry.ts b/shared/llm/askRoutingTelemetry.ts
new file mode 100644
index 000000000..0e40d2921
--- /dev/null
+++ b/shared/llm/askRoutingTelemetry.ts
@@ -0,0 +1,134 @@
+/**
+ * shared/llm/askRoutingTelemetry.ts — pure aggregation for LLM-router
+ * observability (LLM Router roadmap #3).
+ *
+ * `convex/events.ts:getAskRoutingTelemetry` does a BOUNDED read of recent
+ * `liveEventAnswers` rows and hands them here. Keeping the math in a pure,
+ * dependency-free function (no `ctx.db`, no Convex types) means it can be
+ * scenario-tested directly with plain arrays — exactly like `router.ts` itself
+ * — and guarantees DETERMINISTIC output (sorted breakdowns, no Date/random).
+ *
+ * Floor vs. escalated is decided by the SAME `modelId.includes("haiku")`
+ * convention the cost estimator in events.ts and the router's Haiku floor use,
+ * so the panel can never disagree with what was actually billed/routed.
+ *
+ * Honesty (.claude/rules/agentic_reliability.md):
+ *   - HONEST_SCORES: rates are `null` (not a fabricated 0%) when there's no
+ *     denominator. The panel renders "—" for null.
+ *   - DETERMINISTIC: same rows in → same object out; breakdowns are sorted by
+ *     count then key.
+ *   - BOUND lives in the caller (`.take(cap)`); this function is O(n) over
+ *     whatever bounded slice it's given.
+ */
+
+/** The fields of a `liveEventAnswers` row this aggregate reads. Bounded subset. */
+export interface AskRoutingRow {
+  agentMode?: "deterministic" | "provider" | "provider_fallback" | "cache" | null;
+  provider?: string | null;
+  modelId?: string | null;
+  estimatedCostCents?: number | null;
+}
+
+export type RouteTierBucket = "floor" | "escalated" | "other";
+
+export interface AskRoutingTelemetry {
+  /** Total /ask answers scanned (all modes). */
+  total: number;
+  /** True when the scan hit the read cap (more rows exist than were scanned). */
+  capped: boolean;
+  /** Answers that actually reached a model (provider + provider_fallback). */
+  routedCount: number;
+  /** Routed answers served by the Haiku floor. */
+  floorCount: number;
+  /** Routed answers that escalated above the floor (Sonnet / pinned heavy). */
+  escalatedCount: number;
+  /** escalated / (floor + escalated). Null when no routed answer recorded a model. */
+  escalationRate: number | null;
+  /** provider_fallback / (provider + provider_fallback). Null when no provider attempts. */
+  providerFallbackRate: number | null;
+  /** Avg estimated cost (cents) per routed answer. Null when nothing routed. */
+  avgCostCents: number | null;
+  /** Total estimated cost (cents) across routed answers. */
+  totalCostCents: number;
+  /** Count by agentMode across ALL scanned answers. */
+  agentModes: { provider: number; provider_fallback: number; cache: number; deterministic: number };
+  /** Routed-answer count per model id, sorted by count desc then id. */
+  modelMix: Array<{ modelId: string; count: number; tier: RouteTierBucket }>;
+  /** Routed-answer count per provider, sorted by count desc then provider. */
+  providerMix: Array<{ provider: string; count: number }>;
+}
+
+function round(x: number, p: number): number {
+  return Math.round(x * 10 ** p) / 10 ** p;
+}
+
+/** Classify a model id into the router's floor/escalated buckets. */
+export function tierForModelId(modelId: string): RouteTierBucket {
+  if (!modelId) return "other";
+  // The router's ask_answer floor is Haiku; anything else it climbed up to.
+  return modelId.toLowerCase().includes("haiku") ? "floor" : "escalated";
+}
+
+/**
+ * Aggregate a bounded slice of recent /ask answers into the routing telemetry
+ * the panel renders. `capped` is passed in by the caller (it knows whether the
+ * slice hit the read cap).
+ */
+export function aggregateAskRouting(rows: readonly AskRoutingRow[], capped: boolean): AskRoutingTelemetry {
+  const agentModes = { provider: 0, provider_fallback: 0, cache: 0, deterministic: 0 };
+  const providers: Record<string, number> = {};
+  const models: Record<string, { count: number; tier: RouteTierBucket }> = {};
+  let floorCount = 0;
+  let escalatedCount = 0;
+  let routedCount = 0;
+  let routedCostCentsTotal = 0;
+
+  for (const r of rows) {
+    const mode = (r.agentMode ?? "deterministic") as keyof typeof agentModes;
+    if (mode in agentModes) agentModes[mode] += 1;
+
+    const reachedModel = mode === "provider" || mode === "provider_fallback";
+    if (!reachedModel) continue;
+
+    routedCount += 1;
+    routedCostCentsTotal += r.estimatedCostCents ?? 0;
+
+    const provider = (r.provider ?? "unknown").trim() || "unknown";
+    providers[provider] = (providers[provider] ?? 0) + 1;
+
+    const modelId = (r.modelId ?? "").trim();
+    const tier = tierForModelId(modelId);
+    if (tier === "floor") floorCount += 1;
+    else if (tier === "escalated") escalatedCount += 1;
+
+    const modelKey = modelId || "(unrecorded model)";
+    if (!models[modelKey]) models[modelKey] = { count: 0, tier };
+    models[modelKey].count += 1;
+  }
+
+  const tierDenom = floorCount + escalatedCount;
+  const fallbackAttempts = agentModes.provider + agentModes.provider_fallback;
+
+  const modelMix = Object.entries(models)
+    .map(([modelId, v]) => ({ modelId, count: v.count, tier: v.tier }))
+    .sort((a, b) => b.count - a.count || a.modelId.localeCompare(b.modelId));
+  const providerMix = Object.entries(providers)
+    .map(([provider, count]) => ({ provider, count }))
+    .sort((a, b) => b.count - a.count || a.provider.localeCompare(b.provider));
+
+  return {
+    total: rows.length,
+    capped,
+    routedCount,
+    floorCount,
+    escalatedCount,
+    escalationRate: tierDenom > 0 ? round(escalatedCount / tierDenom, 3) : null,
+    providerFallbackRate:
+      fallbackAttempts > 0 ? round(agentModes.provider_fallback / fallbackAttempts, 3) : null,
+    avgCostCents: routedCount > 0 ? round(routedCostCentsTotal / routedCount, 4) : null,
+    totalCostCents: round(routedCostCentsTotal, 4),
+    agentModes,
+    modelMix,
+    providerMix,
+  };
+}
diff --git a/src/features/monitoring/views/AgentTelemetryDashboard.tsx b/src/features/monitoring/views/AgentTelemetryDashboard.tsx
index bede4aa6d..aace6b9f4 100644
--- a/src/features/monitoring/views/AgentTelemetryDashboard.tsx
+++ b/src/features/monitoring/views/AgentTelemetryDashboard.tsx
@@ -44,6 +44,7 @@ import { PipelineRollupPanel } from "@/features/monitoring/components/PipelineRo
 import { JudgeHeatmap, createDemoJudgeHeatmapData } from "@/features/telemetry/JudgeHeatmap";
 import { CostWaterfall } from "@/features/telemetry/CostWaterfall";
 import { FailureClusters, createDemoFailureClusters } from "@/features/telemetry/FailureClusters";
+import { LlmRoutingPanel } from "@/features/telemetry/LlmRoutingPanel";
 import {
   useLiveEvalScorecard,
   useLiveTraceAggregates,
@@ -669,6 +670,9 @@ function AgentTelemetryDashboardInner() {
           </SurfaceCard>
         </SurfaceSection>
 
+        {/* LLM Routing — live router observability from real /ask answers */}
+        <LlmRoutingPanel />
+
         {/* Error log */}
         <SurfaceSection
           title="Error Log"
diff --git a/src/features/telemetry/LlmRoutingPanel.tsx b/src/features/telemetry/LlmRoutingPanel.tsx
new file mode 100644
index 000000000..24ad52e7c
--- /dev/null
+++ b/src/features/telemetry/LlmRoutingPanel.tsx
@@ -0,0 +1,237 @@
+/**
+ * LlmRoutingPanel — operator-facing observability for the NodeBench LLM Router.
+ *
+ * Surfaces the EXISTING per-answer routing telemetry that the `/ask` path
+ * persists on `liveEventAnswers` (modelId / provider / agentMode /
+ * estimatedCostCents) so an operator can SEE `shared/llm/router.ts`'s
+ * `routeLLM("ask_answer", …)` decision working in production: how often the
+ * cheap Haiku floor served the turn vs. escalated to Sonnet, avg cost/answer,
+ * and the provider-fallback rate.
+ *
+ * Pattern: read-only projection over a bounded aggregate.
+ * Data source: `api.events.getAskRoutingTelemetry` (convex/events.ts) — a
+ *   GLOBAL, ≤1000-row bounded scan. Additive; does NOT touch routeLLM or the
+ *   /ask write path.
+ *
+ * Honesty (.claude/rules/agentic_reliability.md → HONEST_SCORES):
+ *   - Every number comes from the query. Rates that have no denominator arrive
+ *     as `null` and render "—" / a "no data yet" empty state — never a fake 0%.
+ *   - No animation; respects prefers-reduced-motion by construction.
+ *
+ * See: shared/llm/router.ts, docs/architecture/LLM_ROUTER.md
+ */
+import { memo } from "react";
+import { useQuery } from "convex/react";
+import { Cpu, ArrowUpRight, ShieldAlert } from "lucide-react";
+import { api } from "../../../convex/_generated/api";
+import { cn } from "@/lib/utils";
+import { SurfaceSection, SurfaceCard, SurfaceGrid, SurfaceStat, SurfaceBadge } from "@/shared/ui/SurfacePrimitives";
+import type { AskRoutingTelemetry } from "shared/llm/askRoutingTelemetry";
+
+const TERRACOTTA = "#d97757";
+
+/** Format a 0..1 rate as a percent string, or an honest dash when null. */
+function pct(rate: number | null | undefined): string {
+  return rate === null || rate === undefined ? "—" : `${Math.round(rate * 100)}%`;
+}
+
+/** Format cents to a compact $ string, or an honest dash when null. */
+function centsToUsd(cents: number | null | undefined): string {
+  if (cents === null || cents === undefined) return "—";
+  return `$${(cents / 100).toFixed(4)}`;
+}
+
+const SECTION_HEADER = "text-[11px] uppercase tracking-[0.2em] text-content-muted";
+
+/** Empty state — shown when the query returns no routed /ask traffic. */
+const RoutingEmptyState = memo(function RoutingEmptyState() {
+  return (
+    <SurfaceCard data-agent-action="llm-routing-empty">
+      <div className="flex flex-col items-center gap-2 py-6 text-center">
+        <Cpu className="h-5 w-5 text-content-muted" aria-hidden="true" />
+        <p className="text-sm font-medium text-content-secondary">No routed /ask traffic yet</p>
+        <p className="max-w-md text-xs text-content-muted">
+          The LLM router records a decision on every <code className="font-mono">/ask</code> answer
+          that reaches a provider. Once attendees ask questions in a live event room, the model mix
+          (Haiku floor vs. Sonnet escalation), escalation rate, and cost per answer appear here.
+        </p>
+      </div>
+    </SurfaceCard>
+  );
+});
+
+/** A labeled horizontal bar in a small distribution list. */
+const DistributionRow = memo(function DistributionRow({
+  label,
+  count,
+  total,
+  accent,
+}: {
+  label: string;
+  count: number;
+  total: number;
+  accent?: string;
+}) {
+  const widthPct = total > 0 ? Math.round((count / total) * 100) : 0;
+  return (
+    <div className="flex items-center gap-3">
+      <span className="w-44 shrink-0 truncate font-mono text-xs text-content-secondary" title={label}>
+        {label}
+      </span>
+      <div
+        className="h-1.5 flex-1 overflow-hidden rounded-full bg-white/[0.04]"
+        role="presentation"
+      >
+        <div
+          className="h-full rounded-full"
+          style={{ width: `${widthPct}%`, backgroundColor: accent ?? "rgba(255,255,255,0.25)" }}
+        />
+      </div>
+      <span className="w-20 shrink-0 text-right text-xs tabular-nums text-content-muted">
+        {count} ({widthPct}%)
+      </span>
+    </div>
+  );
+});
+
+export interface LlmRoutingPanelProps {
+  /** Override the data (used in tests/storybook). When omitted, queries live data. */
+  dataOverride?: AskRoutingTelemetry | null | undefined;
+}
+
+/**
+ * The routing observability panel. Composes into the telemetry surface as a
+ * `SurfaceSection`. Loading (undefined) → skeleton-ish card; empty → empty
+ * state; data → stat grid + distributions.
+ */
+export const LlmRoutingPanel = memo(function LlmRoutingPanel({ dataOverride }: LlmRoutingPanelProps) {
+  const liveData = useQuery(api.events.getAskRoutingTelemetry, {}) as AskRoutingTelemetry | undefined;
+  const data = dataOverride !== undefined ? dataOverride : liveData;
+
+  const isLoading = data === undefined;
+  const isEmpty = !!data && data.routedCount === 0;
+
+  const action = (
+    <span className="text-xs text-content-muted tabular-nums">
+      {data ? `${data.total} /ask answers${data.capped ? " (capped at 1000)" : ""}` : "loading…"}
+    </span>
+  );
+
+  return (
+    <SurfaceSection
+      title="LLM Routing"
+      subtitle="How the router splits /ask answers between the Haiku floor and Sonnet escalation — live, from real answers."
+      action={action}
+      data-agent-id="llm-routing"
+    >
+      {isLoading ? (
+        <SurfaceCard data-agent-action="llm-routing-loading">
+          <div className="flex items-center gap-2 py-4 text-sm text-content-muted">
+            <Cpu className="h-4 w-4 animate-pulse motion-reduce:animate-none" aria-hidden="true" />
+            Loading routing telemetry…
+          </div>
+        </SurfaceCard>
+      ) : isEmpty || !data ? (
+        <RoutingEmptyState />
+      ) : (
+        <div className="flex flex-col gap-4" role="region" aria-label="LLM routing telemetry">
+          {/* Headline stats */}
+          <SurfaceGrid>
+            <SurfaceCard data-agent-action="llm-routing-routed">
+              <SurfaceStat
+                value={data.routedCount}
+                label="Routed /ask answers"
+                sublabel={`${data.total} total · ${data.agentModes.cache} cache · ${data.agentModes.deterministic} deterministic`}
+              />
+            </SurfaceCard>
+            <SurfaceCard data-agent-action="llm-routing-escalation">
+              <div className="flex flex-col gap-1">
+                <span
+                  className="text-2xl font-semibold tabular-nums"
+                  style={{ color: data.escalationRate === null ? undefined : TERRACOTTA }}
+                >
+                  {pct(data.escalationRate)}
+                </span>
+                <span className={SECTION_HEADER}>Escalation rate</span>
+                <span className="text-[10px] text-content-muted">
+                  {data.escalatedCount} Sonnet / {data.floorCount} Haiku floor
+                </span>
+              </div>
+            </SurfaceCard>
+            <SurfaceCard data-agent-action="llm-routing-cost">
+              <SurfaceStat
+                value={centsToUsd(data.avgCostCents)}
+                label="Avg cost / answer"
+                sublabel={`${centsToUsd(data.totalCostCents)} across routed answers`}
+              />
+            </SurfaceCard>
+            <SurfaceCard data-agent-action="llm-routing-fallback">
+              <div className="flex flex-col gap-1">
+                <span
+                  className={cn(
+                    "flex items-center gap-1.5 text-2xl font-semibold tabular-nums",
+                    data.providerFallbackRate && data.providerFallbackRate > 0
+                      ? "text-amber-400"
+                      : "text-content",
+                  )}
+                >
+                  {data.providerFallbackRate && data.providerFallbackRate > 0 && (
+                    <ShieldAlert className="h-4 w-4" aria-hidden="true" />
+                  )}
+                  {pct(data.providerFallbackRate)}
+                </span>
+                <span className={SECTION_HEADER}>Provider-fallback rate</span>
+                <span className="text-[10px] text-content-muted">
+                  share of provider attempts that fell back
+                </span>
+              </div>
+            </SurfaceCard>
+          </SurfaceGrid>
+
+          {/* Model mix (floor vs escalated) */}
+          <SurfaceCard data-agent-action="llm-routing-model-mix">
+            <div className="mb-3 flex items-center gap-2">
+              <ArrowUpRight className="h-4 w-4 text-content-muted" aria-hidden="true" />
+              <span className={SECTION_HEADER}>Model mix</span>
+            </div>
+            <div className="flex flex-col gap-2">
+              {data.modelMix.map((m) => (
+                <div key={m.modelId} className="flex items-center gap-3">
+                  <DistributionRow
+                    label={m.modelId}
+                    count={m.count}
+                    total={data.routedCount}
+                    accent={m.tier === "escalated" ? TERRACOTTA : "rgba(255,255,255,0.35)"}
+                  />
+                  <SurfaceBadge tone={m.tier === "escalated" ? "warning" : m.tier === "floor" ? "info" : "neutral"}>
+                    {m.tier === "escalated" ? "escalated" : m.tier === "floor" ? "floor" : "other"}
+                  </SurfaceBadge>
+                </div>
+              ))}
+            </div>
+          </SurfaceCard>
+
+          {/* Provider / agentMode mix */}
+          <SurfaceCard data-agent-action="llm-routing-provider-mix">
+            <div className="mb-3 flex items-center gap-2">
+              <Cpu className="h-4 w-4 text-content-muted" aria-hidden="true" />
+              <span className={SECTION_HEADER}>Provider mix</span>
+            </div>
+            <div className="flex flex-col gap-2">
+              {data.providerMix.map((p) => (
+                <DistributionRow
+                  key={p.provider}
+                  label={p.provider}
+                  count={p.count}
+                  total={data.routedCount}
+                />
+              ))}
+            </div>
+          </SurfaceCard>
+        </div>
+      )}
+    </SurfaceSection>
+  );
+});
+
+export default LlmRoutingPanel;
diff --git a/src/features/telemetry/index.ts b/src/features/telemetry/index.ts
index 272b0be4a..82f89ae8f 100644
--- a/src/features/telemetry/index.ts
+++ b/src/features/telemetry/index.ts
@@ -36,6 +36,10 @@ export type {
 export { LiveDataBanner } from "./LiveDataBanner";
 export type { LiveDataBannerProps } from "./LiveDataBanner";
 
+// LLM Router observability (LLM Router roadmap #3)
+export { LlmRoutingPanel } from "./LlmRoutingPanel";
+export type { LlmRoutingPanelProps } from "./LlmRoutingPanel";
+
 // Phase 3-6: Flywheel visuals
 export { JudgeHeatmap, createDemoJudgeHeatmapData } from "./JudgeHeatmap";
 export type { JudgeHeatmapData, JudgeHeatmapProps, JudgeCell } from "./JudgeHeatmap";