diff --git a/gateway/internal/adminapi/observability.go b/gateway/internal/adminapi/observability.go index 0d517f7f3..b52b8385e 100644 --- a/gateway/internal/adminapi/observability.go +++ b/gateway/internal/adminapi/observability.go @@ -48,6 +48,43 @@ type SpendByUserResponse struct { Results []UserSpend `json:"results"` } +// ProviderSpend is the per-provider slice carried inside an +// AgentUserSpend row. Lets the canvas drive gateway→provider edge +// widths (and the provider drawer) from real spend without a second +// round-trip — the data is already on every Bifrost log row, so +// surfacing it costs only a second bucket in the same pass. +type ProviderSpend struct { + Provider string `json:"provider"` + TotalCost float64 `json:"total_cost"` + RequestCount int64 `json:"request_count"` +} + +// AgentUserSpend is one row of /_plugin/spend/by-agent-user — the +// fan-out crossing of (agent-name × user-id) so the flowchart UI can +// render one box per pairing in a single round-trip. Rows missing +// either dim are excluded (same policy as by-agent / by-user). +// +// `Providers` is the breakdown across providers for this pairing. +// Sums of `Providers[*].TotalCost` and `Providers[*].RequestCount` +// equal `TotalCost` and `RequestCount` — the breakdown is additive +// to the top-line totals, not a replacement. +type AgentUserSpend struct { + AgentName string `json:"agent_name"` + UserID string `json:"user_id"` + UserName string `json:"user_name"` + TotalCost float64 `json:"total_cost"` + TotalTokens int64 `json:"total_tokens"` + RequestCount int64 `json:"request_count"` + Providers []ProviderSpend `json:"providers"` +} + +// SpendByAgentUserResponse is the envelope for +// /_plugin/spend/by-agent-user. +type SpendByAgentUserResponse struct { + Window string `json:"window"` + Results []AgentUserSpend `json:"results"` +} + // HistogramPoint is one (timestamp, cost) datum in a per-dimension // series. Timestamp is the bucket's start time in RFC3339. type HistogramPoint struct { @@ -338,6 +375,130 @@ func (h *observabilityHandlers) spendByUser(w http.ResponseWriter, r *http.Reque writeJSON(w, http.StatusOK, out) } +// ─── /_plugin/spend/by-agent-user ──────────────────────────────────── +// +// Fan-out of by-agent × by-user in a single pass over the same +// 200k-row ceiling as the other rollups. The flowchart canvas needs +// one row per (agent, user) pair — calling by-agent once per user +// would be N round-trips and N×200k row scans; doing it server-side +// here is a single scan that buckets by a compound key. +// +// Same dim-filter contract as spendByAgent: the optional +// ?user_id= / ?agent_name= scope down the data set at the Bifrost +// query layer. With no filter you get the whole swarm matrix. +func (h *observabilityHandlers) spendByAgentUser(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + methodNotAllowed(w, http.MethodGet) + return + } + window, start, end, ok := parseWindow(w, r) + if !ok { + return + } + + logs, err := h.logs.searchAll(r.Context(), searchOpts{ + StartTime: &start, + EndTime: &end, + Metadata: metadataFilterFromQuery(r), + }, 1000, 200_000) + if err != nil { + writeUpstreamError(w, err, "spend.by_agent_user") + return + } + + // Compound-key bucket. Using "agent\x00user" as the map key + // avoids allocating a struct-keyed map (and any collision + // concern — \x00 is the one byte we know never appears in a + // metadata value). `byProvider` is a sub-bucket keyed on the + // log row's `provider` column (anthropic/openai/...), so the + // per-pair breakdown comes out of the same single pass — no + // extra Bifrost calls. + type providerAgg struct { + cost float64 + count int64 + } + type agg struct { + agent string + user string + cost float64 + tokens int64 + count int64 + byProvider map[string]*providerAgg + } + by := map[string]*agg{} + for _, l := range logs { + name := l.Metadata["agent-name"] + uid := l.Metadata["user-id"] + if name == "" || uid == "" { + continue + } + k := name + "\x00" + uid + a, ok := by[k] + if !ok { + a = &agg{ + agent: name, + user: uid, + byProvider: map[string]*providerAgg{}, + } + by[k] = a + } + a.cost += l.Cost + a.count++ + // Provider is recorded on every Bifrost log row; unattributed + // rows would skip the loop above before reaching here. + // Defensive: fall back to "unknown" so a missing provider + // can't silently drop request counts from the breakdown. + prov := l.Provider + if prov == "" { + prov = "unknown" + } + pa, ok := a.byProvider[prov] + if !ok { + pa = &providerAgg{} + a.byProvider[prov] = pa + } + pa.cost += l.Cost + pa.count++ + } + + out := SpendByAgentUserResponse{ + Window: window, + Results: make([]AgentUserSpend, 0, len(by)), + } + for _, a := range by { + providers := make([]ProviderSpend, 0, len(a.byProvider)) + for prov, pa := range a.byProvider { + providers = append(providers, ProviderSpend{ + Provider: prov, + TotalCost: pa.cost, + RequestCount: pa.count, + }) + } + // Deterministic order: cost desc, then provider name as a + // stable tiebreaker. Keeps the SPA's render order stable + // across refreshes so list animations don't reshuffle. + sort.Slice(providers, func(i, j int) bool { + if providers[i].TotalCost != providers[j].TotalCost { + return providers[i].TotalCost > providers[j].TotalCost + } + return providers[i].Provider < providers[j].Provider + }) + out.Results = append(out.Results, AgentUserSpend{ + AgentName: a.agent, + UserID: a.user, + UserName: a.user, // user_id == user_name in v2 (see spendByUser note) + TotalCost: a.cost, + TotalTokens: a.tokens, + RequestCount: a.count, + Providers: providers, + }) + } + sort.Slice(out.Results, func(i, j int) bool { + return out.Results[i].TotalCost > out.Results[j].TotalCost + }) + writeJSON(w, http.StatusOK, out) +} + // ─── /_plugin/histogram/cost ───────────────────────────────────────── // // Same agent-name-in-metadata story as by-agent: bucket+sum in Go. diff --git a/gateway/internal/adminapi/observability_test.go b/gateway/internal/adminapi/observability_test.go index 513981563..19a6a8e81 100644 --- a/gateway/internal/adminapi/observability_test.go +++ b/gateway/internal/adminapi/observability_test.go @@ -317,6 +317,86 @@ func TestSpendByUser(t *testing.T) { } } +// ─── spend.by-agent-user ───────────────────────────────────────────── + +func TestSpendByAgentUser(t *testing.T) { + now := time.Now().UTC() + bf := newFakeBifrost(t, sampleLogs(now)) + srv := newObservabilityTestServer(t, bf) + + resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h") + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status %d", resp.StatusCode) + } + var out SpendByAgentUserResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + t.Fatal(err) + } + // Sample data: (coder, u_alice, 0.15), (web-search, u_bob, 0.02). + if out.Window != "1h" || len(out.Results) != 2 { + t.Fatalf("unexpected: %+v", out) + } + if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" { + t.Errorf("first row: %+v", out.Results[0]) + } + if out.Results[0].TotalCost < 0.149 || out.Results[0].RequestCount != 2 { + t.Errorf("coder/alice agg: %+v", out.Results[0]) + } + if out.Results[1].AgentName != "web-search" || out.Results[1].UserID != "u_bob" { + t.Errorf("second row: %+v", out.Results[1]) + } + // Provider breakdown — sampleLogs gives (coder, alice) two + // anthropic rows and (web-search, bob) one openai row. + if len(out.Results[0].Providers) != 1 || + out.Results[0].Providers[0].Provider != "anthropic" || + out.Results[0].Providers[0].RequestCount != 2 { + t.Errorf("coder/alice providers: %+v", out.Results[0].Providers) + } + if len(out.Results[1].Providers) != 1 || + out.Results[1].Providers[0].Provider != "openai" || + out.Results[1].Providers[0].RequestCount != 1 { + t.Errorf("web-search/bob providers: %+v", out.Results[1].Providers) + } +} + +func TestSpendByAgentUser_FiltersUnattributed(t *testing.T) { + now := time.Now().UTC() + logs := []fakeLog{ + // no agent-name → excluded + { + ID: "a", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 1.0, + Metadata: map[string]string{"user-id": "u_alice"}, + }, + // no user-id → excluded + { + ID: "b", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 1.0, + Metadata: map[string]string{"agent-name": "coder"}, + }, + // both → included + { + ID: "c", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 0.5, + Metadata: map[string]string{"agent-name": "coder", "user-id": "u_alice"}, + }, + } + bf := newFakeBifrost(t, logs) + srv := newObservabilityTestServer(t, bf) + + resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h") + defer resp.Body.Close() + var out SpendByAgentUserResponse + _ = json.NewDecoder(resp.Body).Decode(&out) + if len(out.Results) != 1 { + t.Fatalf("expected 1 row, got %d: %+v", len(out.Results), out.Results) + } + if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" { + t.Errorf("row: %+v", out.Results[0]) + } +} + // ─── histogram.cost ────────────────────────────────────────────────── func TestHistogramCost_ByAgent(t *testing.T) { diff --git a/gateway/internal/adminapi/server.go b/gateway/internal/adminapi/server.go index fec27cba0..176514c5f 100644 --- a/gateway/internal/adminapi/server.go +++ b/gateway/internal/adminapi/server.go @@ -232,6 +232,7 @@ func registerRoutes(mux *http.ServeMux, deps routeDeps) { obs := newObservabilityHandlers(deps.logstore) mux.HandleFunc("/_plugin/spend/by-agent", cookieOrBearer(obs.spendByAgent)) mux.HandleFunc("/_plugin/spend/by-user", cookieOrBearer(obs.spendByUser)) + mux.HandleFunc("/_plugin/spend/by-agent-user", cookieOrBearer(obs.spendByAgentUser)) mux.HandleFunc("/_plugin/histogram/cost", cookieOrBearer(obs.histogramCost)) // /_plugin/runs/ takes a trailing path segment as run-id mux.HandleFunc("/_plugin/runs/", cookieOrBearer(obs.runDetail)) diff --git a/gateway/internal/adminapi/ui/dist/index.html b/gateway/internal/adminapi/ui/dist/index.html index aad50684e..813e9946e 100644 --- a/gateway/internal/adminapi/ui/dist/index.html +++ b/gateway/internal/adminapi/ui/dist/index.html @@ -4,9 +4,9 @@ - Agent Gateway - - + Agent Mothership + +
diff --git a/gateway/internal/adminapi/ui/index.html b/gateway/internal/adminapi/ui/index.html index 525189d62..7d63a72c2 100644 --- a/gateway/internal/adminapi/ui/index.html +++ b/gateway/internal/adminapi/ui/index.html @@ -4,7 +4,7 @@ - Agent Gateway + Agent Mothership
diff --git a/gateway/internal/adminapi/ui/package.json b/gateway/internal/adminapi/ui/package.json index 1e3ef096a..74936f0da 100644 --- a/gateway/internal/adminapi/ui/package.json +++ b/gateway/internal/adminapi/ui/package.json @@ -15,6 +15,8 @@ "@preact/signals": "^2.0.2", "@tanstack/react-query": "^5.62.0", "preact": "^10.25.0", + "system-canvas": "^0.1.19", + "system-canvas-react": "^0.1.25", "uplot": "^1.6.31", "wouter": "^3.4.2", "wouter-preact": "^3.4.2" diff --git a/gateway/internal/adminapi/ui/src/api/queries.ts b/gateway/internal/adminapi/ui/src/api/queries.ts index 709276f58..facca0f03 100644 --- a/gateway/internal/adminapi/ui/src/api/queries.ts +++ b/gateway/internal/adminapi/ui/src/api/queries.ts @@ -13,6 +13,7 @@ import type { MeResponse, RunDetailResponse, SpendByAgentResponse, + SpendByAgentUserResponse, SpendByUserResponse, TrustOrg, UserDetailResponse, @@ -54,6 +55,24 @@ export function useSpendByAgent(window: Window, userID?: string) { }); } +// ─── /spend/by-agent-user ─────────────────────────────────────────── +// +// Single-pass (agent × user) crossing for the Canvas page. One +// round-trip replaces N parallel by-agent?user_id=… calls. 30s poll +// mirrors the other rollups so all three feel synchronized. + +export function useSpendByAgentUser(window: Window) { + return useQuery({ + queryKey: ["spend", "by-agent-user", window], + queryFn: () => + apiFetch( + `/spend/by-agent-user?window=${encodeURIComponent(window)}` + ), + refetchInterval: 30_000, + staleTime: 10_000, + }); +} + // ─── /spend/by-user ───────────────────────────────────────────────── export function useSpendByUser(window: Window) { diff --git a/gateway/internal/adminapi/ui/src/api/types.ts b/gateway/internal/adminapi/ui/src/api/types.ts index f9790a90e..f8a1698e6 100644 --- a/gateway/internal/adminapi/ui/src/api/types.ts +++ b/gateway/internal/adminapi/ui/src/api/types.ts @@ -49,6 +49,37 @@ export interface SpendByUserResponse { results: UserSpend[]; } +// Per-provider slice inside an AgentUserSpend row. Lets the canvas +// drive gateway→provider edge widths from real spend; same Bifrost +// call that computes the (agent × user) rollup also fills these, +// so no extra round-trips. +export interface ProviderSpend { + provider: string; + total_cost: number; + request_count: number; +} + +// One row of /_plugin/spend/by-agent-user — the (agent × user) fan-out +// the Canvas page uses to render one box per pairing without N round +// trips. Rows missing either dim are excluded server-side. +// +// `providers` sums to `total_cost` / `request_count` and is sorted by +// cost desc, with provider name as a stable tiebreaker. +export interface AgentUserSpend { + agent_name: string; + user_id: string; + user_name: string; + total_cost: number; + total_tokens: number; + request_count: number; + providers: ProviderSpend[]; +} + +export interface SpendByAgentUserResponse { + window: string; + results: AgentUserSpend[]; +} + export interface HistogramPoint { ts: string; cost: number; diff --git a/gateway/internal/adminapi/ui/src/app.tsx b/gateway/internal/adminapi/ui/src/app.tsx index aa2e3651a..42fcda04e 100644 --- a/gateway/internal/adminapi/ui/src/app.tsx +++ b/gateway/internal/adminapi/ui/src/app.tsx @@ -17,6 +17,7 @@ import { UnauthorizedError } from "./api/client"; import { Shell } from "./components/layout/Shell"; import { AgentDetail } from "./pages/AgentDetail"; import { Agents } from "./pages/Agents"; +import { Canvas } from "./pages/Canvas"; import { Dashboard } from "./pages/Dashboard"; import { Login } from "./pages/Login"; import { NotFound } from "./pages/NotFound"; @@ -106,11 +107,16 @@ function AppShell() { - + + + + + + diff --git a/gateway/internal/adminapi/ui/src/components/icons.tsx b/gateway/internal/adminapi/ui/src/components/icons.tsx index 2a125b6a4..ec8ac213e 100644 --- a/gateway/internal/adminapi/ui/src/components/icons.tsx +++ b/gateway/internal/adminapi/ui/src/components/icons.tsx @@ -42,6 +42,25 @@ export function UserIcon(props: IconProps) { ); } +/** ChevronLeftIcon — used by the sidebar collapse button. */ +export function ChevronLeftIcon(props: IconProps) { + return ( + + + + ); +} + +/** ChevronRightIcon — used by the topbar expand button when the + * sidebar is hidden. Mirror of ChevronLeftIcon. */ +export function ChevronRightIcon(props: IconProps) { + return ( + + + + ); +} + /** BotIcon — friendly bot head (antenna, screen-face, side-ports). */ export function BotIcon(props: IconProps) { return ( diff --git a/gateway/internal/adminapi/ui/src/components/layout/Shell.tsx b/gateway/internal/adminapi/ui/src/components/layout/Shell.tsx index e82697000..5110ab2b3 100644 --- a/gateway/internal/adminapi/ui/src/components/layout/Shell.tsx +++ b/gateway/internal/adminapi/ui/src/components/layout/Shell.tsx @@ -1,17 +1,52 @@ // Shell = sidebar + topbar + main outlet. Every authed page wraps // its content in {...}; login + standalone error // pages bypass this. +// +// Sidebar-collapse state lives here so both (renders the +// "collapse" chevron next to the brand) and (renders the +// "expand" chevron when the sidebar is hidden) can drive the same +// toggle. Persisted to localStorage so a refresh keeps the operator's +// chosen layout — the Canvas page in particular wants the full width +// for the flowchart-to-come. import type { ComponentChildren } from "preact"; +import { useCallback, useEffect, useState } from "preact/hooks"; import { Sidebar } from "./Sidebar"; import { Topbar } from "./Topbar"; +const STORAGE_KEY = "ui.sidebar.collapsed"; + +function readInitialCollapsed(): boolean { + // Guard SSR / non-browser contexts even though we never run there + // — the cost is one typeof check and it keeps the import safe to + // pull in from tests that stub window away. + if (typeof window === "undefined") return false; + try { + return window.localStorage.getItem(STORAGE_KEY) === "1"; + } catch { + return false; + } +} + export function Shell({ children }: { children: ComponentChildren }) { + const [collapsed, setCollapsed] = useState(readInitialCollapsed); + + useEffect(() => { + try { + window.localStorage.setItem(STORAGE_KEY, collapsed ? "1" : "0"); + } catch { + // localStorage unavailable (privacy mode, etc.) — non-fatal, + // the toggle still works in-memory. + } + }, [collapsed]); + + const toggle = useCallback(() => setCollapsed((v) => !v), []); + return ( -
- - +
+ {collapsed ? null : } +
{children}
); diff --git a/gateway/internal/adminapi/ui/src/components/layout/Sidebar.tsx b/gateway/internal/adminapi/ui/src/components/layout/Sidebar.tsx index 0195fb4e1..00ede6813 100644 --- a/gateway/internal/adminapi/ui/src/components/layout/Sidebar.tsx +++ b/gateway/internal/adminapi/ui/src/components/layout/Sidebar.tsx @@ -1,5 +1,7 @@ import { Link, useLocation } from "wouter-preact"; +import { ChevronLeftIcon } from "../icons"; + // Sidebar nav. Phase 8 has Dashboard + People + Agents; phase 9 // grows Sessions and Config — each is just another row here. // @@ -7,18 +9,30 @@ import { Link, useLocation } from "wouter-preact"; // governance plan: "every LLM call traces to a specific human." // The humans-first axis is the primary one; agents are tools. const NAV: { to: string; label: string }[] = [ - { to: "/", label: "Dashboard" }, + { to: "/", label: "Canvas" }, + { to: "/dashboard", label: "Dashboard" }, { to: "/people", label: "People" }, { to: "/agents", label: "Agents" }, ]; -export function Sidebar() { +export function Sidebar({ onCollapse }: { onCollapse?: () => void }) { const [loc] = useLocation(); return (