From 16b832a945d6c078a4dc78b5257999cc2f6fa8df Mon Sep 17 00:00:00 2001 From: hshum Date: Tue, 2 Jun 2026 12:22:01 -0700 Subject: [PATCH] feat(llm): LLM routing observability panel on the telemetry surface Surface the EXISTING per-answer routing telemetry the /ask path persists on liveEventAnswers (modelId / provider / agentMode / estimatedCostCents) so an operator can see shared/llm/router.ts working in production: Haiku floor vs. Sonnet escalation split, escalation rate, avg cost/answer, and provider- fallback rate. Read-only + additive - does NOT touch routeLLM or the /ask write path. Backend (additive, bounded, honest): - convex/events.ts: new GLOBAL query getAskRoutingTelemetry - a <=1000-row bounded scan over recent liveEventAnswers (BOUND via .take(cap)). Delegates the math to a pure aggregator so it is scenario-tested without a DB. - shared/llm/askRoutingTelemetry.ts: pure aggregateAskRouting() - floor vs. escalated decided by the same modelId.includes("haiku") convention the cost estimator + router floor use (DETERMINISTIC, sorted breakdowns). Rates are null (not a fabricated 0%) when there is no denominator (HONEST_SCORES). Frontend: - src/features/telemetry/LlmRoutingPanel.tsx: glass-card panel with headline stats, model mix, provider mix, an honest "No routed /ask traffic yet" empty state, loading state, aria labels + reduced-motion-safe. - Composed into AgentTelemetryDashboard (the named telemetry surface) + exported from the telemetry barrel. Tests: shared/llm/askRoutingTelemetry.test.ts - 10 scenario-based tests (operator floor/escalation split, true-0 vs null, cache/deterministic-only, provider-fallback, cost-over-routed-only, adversarial pinned/blank models, 1000-row scale, determinism). Verification: app tsc clean, convex tsc clean, vitest 22 pass (10 new + 12 router) + 78 existing event tests green, npm run build clean. Note: AgentTelemetryDashboard is currently orphaned (no live route mounts it) in the prod-parity build, so the panel compiles + is tested but is not yet visually live until its host is routed. Reported for reviewer. Co-Authored-By: Claude Opus 4.8 (1M context) --- convex/events.ts | 43 ++++ shared/llm/askRoutingTelemetry.test.ts | 220 ++++++++++++++++ shared/llm/askRoutingTelemetry.ts | 134 ++++++++++ .../views/AgentTelemetryDashboard.tsx | 4 + src/features/telemetry/LlmRoutingPanel.tsx | 237 ++++++++++++++++++ src/features/telemetry/index.ts | 4 + 6 files changed, 642 insertions(+) create mode 100644 shared/llm/askRoutingTelemetry.test.ts create mode 100644 shared/llm/askRoutingTelemetry.ts create mode 100644 src/features/telemetry/LlmRoutingPanel.tsx diff --git a/convex/events.ts b/convex/events.ts index 919bbb5cb..9ab178ff5 100644 --- a/convex/events.ts +++ b/convex/events.ts @@ -31,6 +31,7 @@ import { internal } from "./_generated/api"; import { action, query, mutation, internalMutation, internalQuery } from "./_generated/server"; import { enforceRateLimit } from "./scratchnodeRateLimit"; import { routeLLM, askAnswerSignals } from "../shared/llm/router"; +import { aggregateAskRouting } from "../shared/llm/askRoutingTelemetry"; import { rerankWithGemini, condenseQuery, type TriCandidate } from "../shared/search/triSearch"; class ConvexError> extends Error { @@ -980,6 +981,48 @@ export const getAskTelemetry = query({ }, }); +/** + * LLM ROUTING observability (LLM Router roadmap #3) — a GLOBAL, read-only + * aggregate over recent `/ask` answers showing the `shared/llm/router.ts` + * `routeLLM("ask_answer", …)` decision in production: how often the cheap + * Haiku floor served the turn vs. how often it escalated to Sonnet, the avg + * estimated cost per answer, and the provider/agentMode mix. + * + * Unlike `getAskTelemetry` (per-event, host-facing), this is operator-facing + * and spans ALL events — it answers "is the router actually working?" for the + * `/?surface=telemetry` dashboard, which has no single-event context. + * + * Honesty (.claude/rules/agentic_reliability.md): + * - BOUND: capped scan (≤1000 newest rows via `.take(cap)`); `capped` flag + * surfaced when the window is full. No global time index exists on + * liveEventAnswers, so this is a bounded table scan — the `.take()` is the + * hard cap, never an unbounded read. + * - HONEST_SCORES: every rate is computed from real rows. `escalationRate` + * and `avgCostCents` are null (UI shows "no data yet") when there's no + * denominator — never a fabricated 0% or $0. + * - DETERMINISTIC: pure function of the rows. Floor vs. escalated is decided + * by the same `modelId.includes("haiku")` convention used by + * `estimateAnthropicCostCents` above and the router's Haiku floor. + * - No private data: liveEventAnswers are public; never touches userNotes. + * + * NOTE: routed answers are those that actually reached a model — agentMode + * `provider` or `provider_fallback`. `cache` and `deterministic` answers never + * invoked routeLLM, so they're excluded from the routing (floor/escalated) + * denominator but still counted in the agentMode mix for completeness. + */ +export const getAskRoutingTelemetry = query({ + args: { limit: v.optional(v.number()) }, + handler: async (ctx, { limit }) => { + const cap = Math.min(Math.max(limit ?? 1000, 1), 1000); // BOUND + // No global time index on liveEventAnswers, so this is a bounded table + // scan — `.take(cap)` is the hard cap, never an unbounded read. The pure + // aggregator (shared/llm/askRoutingTelemetry.ts) does the rest, so the math + // is scenario-tested directly without a DB. + const rows = await ctx.db.query("liveEventAnswers").order("desc").take(cap); + return aggregateAskRouting(rows, rows.length >= cap); + }, +}); + export const getHostStatus = query({ args: { eventId: v.id("liveEvents"), diff --git a/shared/llm/askRoutingTelemetry.test.ts b/shared/llm/askRoutingTelemetry.test.ts new file mode 100644 index 000000000..18ce61fd8 --- /dev/null +++ b/shared/llm/askRoutingTelemetry.test.ts @@ -0,0 +1,220 @@ +/** + * Scenario-based tests for the LLM-router observability aggregate + * (shared/llm/askRoutingTelemetry.ts). + * + * Per .claude/rules/scenario_testing.md each test names a persona + goal + + * prior state + scale + duration + edge cases. The panel on the telemetry + * surface renders these numbers verbatim, so the risks are: + * - a fabricated 0%/"healthy" when there's no data (HONEST_SCORES), + * - mis-counting cache/deterministic answers (that never hit the router) into + * the floor/escalated denominator, + * - non-deterministic breakdown ordering (UI jitter). + */ +import { describe, expect, it } from "vitest"; +import { aggregateAskRouting, tierForModelId, type AskRoutingRow } from "./askRoutingTelemetry"; + +const HAIKU = "claude-haiku-4-5-20251001"; +const SONNET = "claude-sonnet-4-6"; + +/** Build a routed (provider) answer row succinctly. */ +function answer(partial: Partial): AskRoutingRow { + return { agentMode: "provider", provider: "anthropic", modelId: HAIKU, estimatedCostCents: 0, ...partial }; +} + +describe("aggregateAskRouting — model mix + escalation (operator view)", () => { + /** + * Persona: Operator opens /?surface=telemetry mid-event. + * Goal: See how often the router stayed on the Haiku floor vs. escalated. + * Prior state: 10 routed answers — 7 Haiku floor, 3 Sonnet escalations. + * Scale: 10 answers. Duration: single query. + * Expected: escalationRate = 3/10 = 0.3; floor/escalated counts exact; + * model mix sorted by count desc. + */ + it("computes the floor-vs-escalated split from real model ids", () => { + const rows: AskRoutingRow[] = [ + ...Array.from({ length: 7 }, () => answer({ modelId: HAIKU, estimatedCostCents: 0.01 })), + ...Array.from({ length: 3 }, () => answer({ modelId: SONNET, estimatedCostCents: 0.05 })), + ]; + const t = aggregateAskRouting(rows, false); + + expect(t.total).toBe(10); + expect(t.routedCount).toBe(10); + expect(t.floorCount).toBe(7); + expect(t.escalatedCount).toBe(3); + expect(t.escalationRate).toBe(0.3); + // model mix is sorted by count desc — Haiku floor first. + expect(t.modelMix[0]).toEqual({ modelId: HAIKU, count: 7, tier: "floor" }); + expect(t.modelMix[1]).toEqual({ modelId: SONNET, count: 3, tier: "escalated" }); + }); + + /** + * Persona: Operator on a calm room — every question was a quick lookup. + * Goal: Confirm the router did NOT over-escalate (cost discipline). + * Prior state: 5 routed answers, all Haiku. + * Expected: escalationRate = 0 (a REAL zero, not a null) — there IS a + * denominator (5 routed), the router genuinely never escalated. + */ + it("reports a TRUE 0% escalation when the floor served every routed answer", () => { + const rows = Array.from({ length: 5 }, () => answer({ modelId: HAIKU })); + const t = aggregateAskRouting(rows, false); + expect(t.routedCount).toBe(5); + expect(t.escalationRate).toBe(0); // real 0, not null + expect(t.escalatedCount).toBe(0); + }); +}); + +describe("aggregateAskRouting — HONEST_SCORES (no fabricated metrics)", () => { + /** + * Persona: Operator opens telemetry for a brand-new deployment. + * Goal: Must NOT see a fake "0% escalation / $0 healthy" from no data. + * Prior state: 0 answers. + * Expected: rates are null (panel renders "—"), counts are 0. + */ + it("empty input → rates are null, never a fabricated 0% or $0/answer", () => { + const t = aggregateAskRouting([], false); + expect(t.total).toBe(0); + expect(t.routedCount).toBe(0); + expect(t.escalationRate).toBeNull(); + expect(t.providerFallbackRate).toBeNull(); + expect(t.avgCostCents).toBeNull(); + expect(t.totalCostCents).toBe(0); + expect(t.modelMix).toEqual([]); + expect(t.providerMix).toEqual([]); + }); + + /** + * Persona: Operator on a room where every answer was served from cache / + * the deterministic synthesizer (no model ever ran). + * Goal: The router metrics must stay null — these answers never invoked + * routeLLM, so there's no routing to report. + * Prior state: 4 cache + 2 deterministic answers, 0 provider attempts. + * Expected: routedCount 0, escalationRate null, providerFallbackRate null, + * but the agentMode mix still counts all 6 for completeness. + */ + it("cache/deterministic-only traffic → routing rates null, agentMode mix still counted", () => { + const rows: AskRoutingRow[] = [ + ...Array.from({ length: 4 }, () => ({ agentMode: "cache" as const, modelId: null, estimatedCostCents: 0 })), + ...Array.from({ length: 2 }, () => ({ agentMode: "deterministic" as const, modelId: null, estimatedCostCents: 0 })), + ]; + const t = aggregateAskRouting(rows, false); + expect(t.total).toBe(6); + expect(t.routedCount).toBe(0); + expect(t.escalationRate).toBeNull(); + expect(t.providerFallbackRate).toBeNull(); + expect(t.avgCostCents).toBeNull(); + expect(t.agentModes).toEqual({ provider: 0, provider_fallback: 0, cache: 4, deterministic: 2 }); + }); +}); + +describe("aggregateAskRouting — provider-fallback + cost", () => { + /** + * Persona: Operator notices answers feel degraded. + * Goal: See the provider-fallback rate — the headline degraded signal. + * Prior state: 8 provider + 2 provider_fallback (Anthropic primary fell back). + * Expected: providerFallbackRate = 2/10 = 0.2; routedCount counts BOTH + * (a fallback still reached a model). + */ + it("provider-fallback rate = fallbacks / (provider + fallback)", () => { + const rows: AskRoutingRow[] = [ + ...Array.from({ length: 8 }, () => answer({ agentMode: "provider", modelId: HAIKU })), + ...Array.from({ length: 2 }, () => answer({ agentMode: "provider_fallback", modelId: SONNET })), + ]; + const t = aggregateAskRouting(rows, false); + expect(t.routedCount).toBe(10); + expect(t.providerFallbackRate).toBe(0.2); + expect(t.agentModes.provider).toBe(8); + expect(t.agentModes.provider_fallback).toBe(2); + }); + + /** + * Persona: Finance-minded operator checking cost discipline. + * Goal: Avg cost/answer should reflect ONLY routed answers (cache is free + * and must not dilute the average toward $0). + * Prior state: 2 routed answers @ 0.10 + 0.30 cents, plus 3 free cache answers. + * Expected: avgCostCents = 0.40/2 = 0.2 (cache excluded from the average). + */ + it("avg cost is over ROUTED answers only — free cache hits don't dilute it", () => { + const rows: AskRoutingRow[] = [ + answer({ estimatedCostCents: 0.1 }), + answer({ estimatedCostCents: 0.3 }), + { agentMode: "cache", modelId: null, estimatedCostCents: 0 }, + { agentMode: "cache", modelId: null, estimatedCostCents: 0 }, + { agentMode: "cache", modelId: null, estimatedCostCents: 0 }, + ]; + const t = aggregateAskRouting(rows, false); + expect(t.routedCount).toBe(2); + expect(t.totalCostCents).toBe(0.4); + expect(t.avgCostCents).toBe(0.2); + }); +}); + +describe("aggregateAskRouting — adversarial + scale + determinism", () => { + /** + * Persona: Adversarial / messy data — env-pinned heavy model, blank model + * ids, weird providers, missing cost. + * Goal: Never crash; classify a pinned non-Haiku model as escalated; a + * blank model id as "other" (excluded from the escalation denom). + * Prior state: 1 opus (pinned heavy), 1 blank-model provider answer, 1 haiku. + * Expected: escalation denom = floor + escalated = 1 haiku + 1 opus = 2; the + * blank-model row is "other" and excluded; escalationRate = 1/2. + */ + it("classifies pinned-heavy as escalated and blank model id as 'other'", () => { + const rows: AskRoutingRow[] = [ + answer({ modelId: "claude-opus-4-7", provider: "anthropic" }), + answer({ modelId: "", provider: "anthropic" }), // unrecorded model + answer({ modelId: HAIKU, provider: "anthropic" }), + ]; + const t = aggregateAskRouting(rows, false); + expect(t.routedCount).toBe(3); + expect(t.floorCount).toBe(1); + expect(t.escalatedCount).toBe(1); + // 1 escalated / (1 floor + 1 escalated) — the "other" row is excluded. + expect(t.escalationRate).toBe(0.5); + const other = t.modelMix.find((m) => m.modelId === "(unrecorded model)"); + expect(other?.tier).toBe("other"); + }); + + /** + * Long-running accumulation: a multi-day room scanned at the BOUND cap. + * Goal: BOUND — the caller's `capped` flag is surfaced so the UI can say + * "(capped at 1000)". The aggregate stays correct over a large slice. + * Prior state: 1000 routed answers (the cap), all Haiku, capped=true. + * Expected: total 1000, capped true, escalationRate 0 (all floor), O(n) stable. + */ + it("scale: stays correct + honest about truncation at the read cap", () => { + const rows = Array.from({ length: 1000 }, () => answer({ modelId: HAIKU })); + const t = aggregateAskRouting(rows, /* capped */ true); + expect(t.total).toBe(1000); + expect(t.capped).toBe(true); + expect(t.routedCount).toBe(1000); + expect(t.escalationRate).toBe(0); + }); + + /** + * Determinism (replay safety): identical rows → byte-identical output, and the + * breakdowns tie-break deterministically by key when counts are equal. + */ + it("is deterministic: same rows in → identical sorted breakdowns out", () => { + const rows: AskRoutingRow[] = [ + answer({ provider: "zeta", modelId: SONNET }), + answer({ provider: "alpha", modelId: HAIKU }), + answer({ provider: "alpha", modelId: SONNET }), + answer({ provider: "zeta", modelId: HAIKU }), + ]; + const a = aggregateAskRouting(rows, false); + const b = aggregateAskRouting(rows, false); + expect(a).toEqual(b); + // Equal counts (2 each) → alpha before zeta (localeCompare tie-break). + expect(a.providerMix.map((p) => p.provider)).toEqual(["alpha", "zeta"]); + }); +}); + +describe("tierForModelId", () => { + it("maps Haiku → floor, Sonnet/Opus → escalated, blank → other", () => { + expect(tierForModelId(HAIKU)).toBe("floor"); + expect(tierForModelId("claude-haiku-pinned")).toBe("floor"); + expect(tierForModelId(SONNET)).toBe("escalated"); + expect(tierForModelId("claude-opus-4-7")).toBe("escalated"); + expect(tierForModelId("")).toBe("other"); + }); +}); diff --git a/shared/llm/askRoutingTelemetry.ts b/shared/llm/askRoutingTelemetry.ts new file mode 100644 index 000000000..0e40d2921 --- /dev/null +++ b/shared/llm/askRoutingTelemetry.ts @@ -0,0 +1,134 @@ +/** + * shared/llm/askRoutingTelemetry.ts — pure aggregation for LLM-router + * observability (LLM Router roadmap #3). + * + * `convex/events.ts:getAskRoutingTelemetry` does a BOUNDED read of recent + * `liveEventAnswers` rows and hands them here. Keeping the math in a pure, + * dependency-free function (no `ctx.db`, no Convex types) means it can be + * scenario-tested directly with plain arrays — exactly like `router.ts` itself + * — and guarantees DETERMINISTIC output (sorted breakdowns, no Date/random). + * + * Floor vs. escalated is decided by the SAME `modelId.includes("haiku")` + * convention the cost estimator in events.ts and the router's Haiku floor use, + * so the panel can never disagree with what was actually billed/routed. + * + * Honesty (.claude/rules/agentic_reliability.md): + * - HONEST_SCORES: rates are `null` (not a fabricated 0%) when there's no + * denominator. The panel renders "—" for null. + * - DETERMINISTIC: same rows in → same object out; breakdowns are sorted by + * count then key. + * - BOUND lives in the caller (`.take(cap)`); this function is O(n) over + * whatever bounded slice it's given. + */ + +/** The fields of a `liveEventAnswers` row this aggregate reads. Bounded subset. */ +export interface AskRoutingRow { + agentMode?: "deterministic" | "provider" | "provider_fallback" | "cache" | null; + provider?: string | null; + modelId?: string | null; + estimatedCostCents?: number | null; +} + +export type RouteTierBucket = "floor" | "escalated" | "other"; + +export interface AskRoutingTelemetry { + /** Total /ask answers scanned (all modes). */ + total: number; + /** True when the scan hit the read cap (more rows exist than were scanned). */ + capped: boolean; + /** Answers that actually reached a model (provider + provider_fallback). */ + routedCount: number; + /** Routed answers served by the Haiku floor. */ + floorCount: number; + /** Routed answers that escalated above the floor (Sonnet / pinned heavy). */ + escalatedCount: number; + /** escalated / (floor + escalated). Null when no routed answer recorded a model. */ + escalationRate: number | null; + /** provider_fallback / (provider + provider_fallback). Null when no provider attempts. */ + providerFallbackRate: number | null; + /** Avg estimated cost (cents) per routed answer. Null when nothing routed. */ + avgCostCents: number | null; + /** Total estimated cost (cents) across routed answers. */ + totalCostCents: number; + /** Count by agentMode across ALL scanned answers. */ + agentModes: { provider: number; provider_fallback: number; cache: number; deterministic: number }; + /** Routed-answer count per model id, sorted by count desc then id. */ + modelMix: Array<{ modelId: string; count: number; tier: RouteTierBucket }>; + /** Routed-answer count per provider, sorted by count desc then provider. */ + providerMix: Array<{ provider: string; count: number }>; +} + +function round(x: number, p: number): number { + return Math.round(x * 10 ** p) / 10 ** p; +} + +/** Classify a model id into the router's floor/escalated buckets. */ +export function tierForModelId(modelId: string): RouteTierBucket { + if (!modelId) return "other"; + // The router's ask_answer floor is Haiku; anything else it climbed up to. + return modelId.toLowerCase().includes("haiku") ? "floor" : "escalated"; +} + +/** + * Aggregate a bounded slice of recent /ask answers into the routing telemetry + * the panel renders. `capped` is passed in by the caller (it knows whether the + * slice hit the read cap). + */ +export function aggregateAskRouting(rows: readonly AskRoutingRow[], capped: boolean): AskRoutingTelemetry { + const agentModes = { provider: 0, provider_fallback: 0, cache: 0, deterministic: 0 }; + const providers: Record = {}; + const models: Record = {}; + let floorCount = 0; + let escalatedCount = 0; + let routedCount = 0; + let routedCostCentsTotal = 0; + + for (const r of rows) { + const mode = (r.agentMode ?? "deterministic") as keyof typeof agentModes; + if (mode in agentModes) agentModes[mode] += 1; + + const reachedModel = mode === "provider" || mode === "provider_fallback"; + if (!reachedModel) continue; + + routedCount += 1; + routedCostCentsTotal += r.estimatedCostCents ?? 0; + + const provider = (r.provider ?? "unknown").trim() || "unknown"; + providers[provider] = (providers[provider] ?? 0) + 1; + + const modelId = (r.modelId ?? "").trim(); + const tier = tierForModelId(modelId); + if (tier === "floor") floorCount += 1; + else if (tier === "escalated") escalatedCount += 1; + + const modelKey = modelId || "(unrecorded model)"; + if (!models[modelKey]) models[modelKey] = { count: 0, tier }; + models[modelKey].count += 1; + } + + const tierDenom = floorCount + escalatedCount; + const fallbackAttempts = agentModes.provider + agentModes.provider_fallback; + + const modelMix = Object.entries(models) + .map(([modelId, v]) => ({ modelId, count: v.count, tier: v.tier })) + .sort((a, b) => b.count - a.count || a.modelId.localeCompare(b.modelId)); + const providerMix = Object.entries(providers) + .map(([provider, count]) => ({ provider, count })) + .sort((a, b) => b.count - a.count || a.provider.localeCompare(b.provider)); + + return { + total: rows.length, + capped, + routedCount, + floorCount, + escalatedCount, + escalationRate: tierDenom > 0 ? round(escalatedCount / tierDenom, 3) : null, + providerFallbackRate: + fallbackAttempts > 0 ? round(agentModes.provider_fallback / fallbackAttempts, 3) : null, + avgCostCents: routedCount > 0 ? round(routedCostCentsTotal / routedCount, 4) : null, + totalCostCents: round(routedCostCentsTotal, 4), + agentModes, + modelMix, + providerMix, + }; +} diff --git a/src/features/monitoring/views/AgentTelemetryDashboard.tsx b/src/features/monitoring/views/AgentTelemetryDashboard.tsx index bede4aa6d..aace6b9f4 100644 --- a/src/features/monitoring/views/AgentTelemetryDashboard.tsx +++ b/src/features/monitoring/views/AgentTelemetryDashboard.tsx @@ -44,6 +44,7 @@ import { PipelineRollupPanel } from "@/features/monitoring/components/PipelineRo import { JudgeHeatmap, createDemoJudgeHeatmapData } from "@/features/telemetry/JudgeHeatmap"; import { CostWaterfall } from "@/features/telemetry/CostWaterfall"; import { FailureClusters, createDemoFailureClusters } from "@/features/telemetry/FailureClusters"; +import { LlmRoutingPanel } from "@/features/telemetry/LlmRoutingPanel"; import { useLiveEvalScorecard, useLiveTraceAggregates, @@ -669,6 +670,9 @@ function AgentTelemetryDashboardInner() { + {/* LLM Routing — live router observability from real /ask answers */} + + {/* Error log */} +
+
+ + ); +}); + +/** A labeled horizontal bar in a small distribution list. */ +const DistributionRow = memo(function DistributionRow({ + label, + count, + total, + accent, +}: { + label: string; + count: number; + total: number; + accent?: string; +}) { + const widthPct = total > 0 ? Math.round((count / total) * 100) : 0; + return ( +
+ + {label} + +
+
+
+ + {count} ({widthPct}%) + +
+ ); +}); + +export interface LlmRoutingPanelProps { + /** Override the data (used in tests/storybook). When omitted, queries live data. */ + dataOverride?: AskRoutingTelemetry | null | undefined; +} + +/** + * The routing observability panel. Composes into the telemetry surface as a + * `SurfaceSection`. Loading (undefined) → skeleton-ish card; empty → empty + * state; data → stat grid + distributions. + */ +export const LlmRoutingPanel = memo(function LlmRoutingPanel({ dataOverride }: LlmRoutingPanelProps) { + const liveData = useQuery(api.events.getAskRoutingTelemetry, {}) as AskRoutingTelemetry | undefined; + const data = dataOverride !== undefined ? dataOverride : liveData; + + const isLoading = data === undefined; + const isEmpty = !!data && data.routedCount === 0; + + const action = ( + + {data ? `${data.total} /ask answers${data.capped ? " (capped at 1000)" : ""}` : "loading…"} + + ); + + return ( + + {isLoading ? ( + +
+
+
+ ) : isEmpty || !data ? ( + + ) : ( +
+ {/* Headline stats */} + + + + + +
+ + {pct(data.escalationRate)} + + Escalation rate + + {data.escalatedCount} Sonnet / {data.floorCount} Haiku floor + +
+
+ + + + +
+ 0 + ? "text-amber-400" + : "text-content", + )} + > + {data.providerFallbackRate && data.providerFallbackRate > 0 && ( + + Provider-fallback rate + + share of provider attempts that fell back + +
+
+
+ + {/* Model mix (floor vs escalated) */} + +
+
+
+ {data.modelMix.map((m) => ( +
+ + + {m.tier === "escalated" ? "escalated" : m.tier === "floor" ? "floor" : "other"} + +
+ ))} +
+
+ + {/* Provider / agentMode mix */} + +
+
+
+ {data.providerMix.map((p) => ( + + ))} +
+
+
+ )} +
+ ); +}); + +export default LlmRoutingPanel; diff --git a/src/features/telemetry/index.ts b/src/features/telemetry/index.ts index 272b0be4a..82f89ae8f 100644 --- a/src/features/telemetry/index.ts +++ b/src/features/telemetry/index.ts @@ -36,6 +36,10 @@ export type { export { LiveDataBanner } from "./LiveDataBanner"; export type { LiveDataBannerProps } from "./LiveDataBanner"; +// LLM Router observability (LLM Router roadmap #3) +export { LlmRoutingPanel } from "./LlmRoutingPanel"; +export type { LlmRoutingPanelProps } from "./LlmRoutingPanel"; + // Phase 3-6: Flywheel visuals export { JudgeHeatmap, createDemoJudgeHeatmapData } from "./JudgeHeatmap"; export type { JudgeHeatmapData, JudgeHeatmapProps, JudgeCell } from "./JudgeHeatmap";