Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions gateway/internal/adminapi/observability.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,43 @@ type SpendByUserResponse struct {
Results []UserSpend `json:"results"`
}

// ProviderSpend is the per-provider slice carried inside an
// AgentUserSpend row. Lets the canvas drive gateway→provider edge
// widths (and the provider drawer) from real spend without a second
// round-trip — the data is already on every Bifrost log row, so
// surfacing it costs only a second bucket in the same pass.
type ProviderSpend struct {
Provider string `json:"provider"`
TotalCost float64 `json:"total_cost"`
RequestCount int64 `json:"request_count"`
}

// AgentUserSpend is one row of /_plugin/spend/by-agent-user — the
// fan-out crossing of (agent-name × user-id) so the flowchart UI can
// render one box per pairing in a single round-trip. Rows missing
// either dim are excluded (same policy as by-agent / by-user).
//
// `Providers` is the breakdown across providers for this pairing.
// Sums of `Providers[*].TotalCost` and `Providers[*].RequestCount`
// equal `TotalCost` and `RequestCount` — the breakdown is additive
// to the top-line totals, not a replacement.
type AgentUserSpend struct {
AgentName string `json:"agent_name"`
UserID string `json:"user_id"`
UserName string `json:"user_name"`
TotalCost float64 `json:"total_cost"`
TotalTokens int64 `json:"total_tokens"`
RequestCount int64 `json:"request_count"`
Providers []ProviderSpend `json:"providers"`
}

// SpendByAgentUserResponse is the envelope for
// /_plugin/spend/by-agent-user.
type SpendByAgentUserResponse struct {
Window string `json:"window"`
Results []AgentUserSpend `json:"results"`
}

// HistogramPoint is one (timestamp, cost) datum in a per-dimension
// series. Timestamp is the bucket's start time in RFC3339.
type HistogramPoint struct {
Expand Down Expand Up @@ -338,6 +375,130 @@ func (h *observabilityHandlers) spendByUser(w http.ResponseWriter, r *http.Reque
writeJSON(w, http.StatusOK, out)
}

// ─── /_plugin/spend/by-agent-user ────────────────────────────────────
//
// Fan-out of by-agent × by-user in a single pass over the same
// 200k-row ceiling as the other rollups. The flowchart canvas needs
// one row per (agent, user) pair — calling by-agent once per user
// would be N round-trips and N×200k row scans; doing it server-side
// here is a single scan that buckets by a compound key.
//
// Same dim-filter contract as spendByAgent: the optional
// ?user_id= / ?agent_name= scope down the data set at the Bifrost
// query layer. With no filter you get the whole swarm matrix.
func (h *observabilityHandlers) spendByAgentUser(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
methodNotAllowed(w, http.MethodGet)
return
}
window, start, end, ok := parseWindow(w, r)
if !ok {
return
}

logs, err := h.logs.searchAll(r.Context(), searchOpts{
StartTime: &start,
EndTime: &end,
Metadata: metadataFilterFromQuery(r),
}, 1000, 200_000)
if err != nil {
writeUpstreamError(w, err, "spend.by_agent_user")
return
}

// Compound-key bucket. Using "agent\x00user" as the map key
// avoids allocating a struct-keyed map (and any collision
// concern — \x00 is the one byte we know never appears in a
// metadata value). `byProvider` is a sub-bucket keyed on the
// log row's `provider` column (anthropic/openai/...), so the
// per-pair breakdown comes out of the same single pass — no
// extra Bifrost calls.
type providerAgg struct {
cost float64
count int64
}
type agg struct {
agent string
user string
cost float64
tokens int64
count int64
byProvider map[string]*providerAgg
}
by := map[string]*agg{}
for _, l := range logs {
name := l.Metadata["agent-name"]
uid := l.Metadata["user-id"]
if name == "" || uid == "" {
continue
}
k := name + "\x00" + uid
a, ok := by[k]
if !ok {
a = &agg{
agent: name,
user: uid,
byProvider: map[string]*providerAgg{},
}
by[k] = a
}
a.cost += l.Cost
a.count++
// Provider is recorded on every Bifrost log row; unattributed
// rows would skip the loop above before reaching here.
// Defensive: fall back to "unknown" so a missing provider
// can't silently drop request counts from the breakdown.
prov := l.Provider
if prov == "" {
prov = "unknown"
}
pa, ok := a.byProvider[prov]
if !ok {
pa = &providerAgg{}
a.byProvider[prov] = pa
}
pa.cost += l.Cost
pa.count++
}

out := SpendByAgentUserResponse{
Window: window,
Results: make([]AgentUserSpend, 0, len(by)),
}
for _, a := range by {
providers := make([]ProviderSpend, 0, len(a.byProvider))
for prov, pa := range a.byProvider {
providers = append(providers, ProviderSpend{
Provider: prov,
TotalCost: pa.cost,
RequestCount: pa.count,
})
}
// Deterministic order: cost desc, then provider name as a
// stable tiebreaker. Keeps the SPA's render order stable
// across refreshes so list animations don't reshuffle.
sort.Slice(providers, func(i, j int) bool {
if providers[i].TotalCost != providers[j].TotalCost {
return providers[i].TotalCost > providers[j].TotalCost
}
return providers[i].Provider < providers[j].Provider
})
out.Results = append(out.Results, AgentUserSpend{
AgentName: a.agent,
UserID: a.user,
UserName: a.user, // user_id == user_name in v2 (see spendByUser note)
TotalCost: a.cost,
TotalTokens: a.tokens,
RequestCount: a.count,
Providers: providers,
})
}
sort.Slice(out.Results, func(i, j int) bool {
return out.Results[i].TotalCost > out.Results[j].TotalCost
})
writeJSON(w, http.StatusOK, out)
}

// ─── /_plugin/histogram/cost ─────────────────────────────────────────
//
// Same agent-name-in-metadata story as by-agent: bucket+sum in Go.
Expand Down
80 changes: 80 additions & 0 deletions gateway/internal/adminapi/observability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,86 @@ func TestSpendByUser(t *testing.T) {
}
}

// ─── spend.by-agent-user ─────────────────────────────────────────────

func TestSpendByAgentUser(t *testing.T) {
now := time.Now().UTC()
bf := newFakeBifrost(t, sampleLogs(now))
srv := newObservabilityTestServer(t, bf)

resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h")
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Fatalf("status %d", resp.StatusCode)
}
var out SpendByAgentUserResponse
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
t.Fatal(err)
}
// Sample data: (coder, u_alice, 0.15), (web-search, u_bob, 0.02).
if out.Window != "1h" || len(out.Results) != 2 {
t.Fatalf("unexpected: %+v", out)
}
if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" {
t.Errorf("first row: %+v", out.Results[0])
}
if out.Results[0].TotalCost < 0.149 || out.Results[0].RequestCount != 2 {
t.Errorf("coder/alice agg: %+v", out.Results[0])
}
if out.Results[1].AgentName != "web-search" || out.Results[1].UserID != "u_bob" {
t.Errorf("second row: %+v", out.Results[1])
}
// Provider breakdown — sampleLogs gives (coder, alice) two
// anthropic rows and (web-search, bob) one openai row.
if len(out.Results[0].Providers) != 1 ||
out.Results[0].Providers[0].Provider != "anthropic" ||
out.Results[0].Providers[0].RequestCount != 2 {
t.Errorf("coder/alice providers: %+v", out.Results[0].Providers)
}
if len(out.Results[1].Providers) != 1 ||
out.Results[1].Providers[0].Provider != "openai" ||
out.Results[1].Providers[0].RequestCount != 1 {
t.Errorf("web-search/bob providers: %+v", out.Results[1].Providers)
}
}

func TestSpendByAgentUser_FiltersUnattributed(t *testing.T) {
now := time.Now().UTC()
logs := []fakeLog{
// no agent-name → excluded
{
ID: "a", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano),
Status: "success", Cost: 1.0,
Metadata: map[string]string{"user-id": "u_alice"},
},
// no user-id → excluded
{
ID: "b", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano),
Status: "success", Cost: 1.0,
Metadata: map[string]string{"agent-name": "coder"},
},
// both → included
{
ID: "c", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano),
Status: "success", Cost: 0.5,
Metadata: map[string]string{"agent-name": "coder", "user-id": "u_alice"},
},
}
bf := newFakeBifrost(t, logs)
srv := newObservabilityTestServer(t, bf)

resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h")
defer resp.Body.Close()
var out SpendByAgentUserResponse
_ = json.NewDecoder(resp.Body).Decode(&out)
if len(out.Results) != 1 {
t.Fatalf("expected 1 row, got %d: %+v", len(out.Results), out.Results)
}
if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" {
t.Errorf("row: %+v", out.Results[0])
}
}

// ─── histogram.cost ──────────────────────────────────────────────────

func TestHistogramCost_ByAgent(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions gateway/internal/adminapi/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ func registerRoutes(mux *http.ServeMux, deps routeDeps) {
obs := newObservabilityHandlers(deps.logstore)
mux.HandleFunc("/_plugin/spend/by-agent", cookieOrBearer(obs.spendByAgent))
mux.HandleFunc("/_plugin/spend/by-user", cookieOrBearer(obs.spendByUser))
mux.HandleFunc("/_plugin/spend/by-agent-user", cookieOrBearer(obs.spendByAgentUser))
mux.HandleFunc("/_plugin/histogram/cost", cookieOrBearer(obs.histogramCost))
// /_plugin/runs/ takes a trailing path segment as run-id
mux.HandleFunc("/_plugin/runs/", cookieOrBearer(obs.runDetail))
Expand Down
6 changes: 3 additions & 3 deletions gateway/internal/adminapi/ui/dist/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=1280" />
<meta name="color-scheme" content="dark" />
<title>Agent Gateway</title>
<script type="module" crossorigin src="/_plugin/ui/assets/index-9rKIYUe6.js"></script>
<link rel="stylesheet" crossorigin href="/_plugin/ui/assets/index-C2F5f8hx.css">
<title>Agent Mothership</title>
<script type="module" crossorigin src="/_plugin/ui/assets/index-BuAZSFd2.js"></script>
<link rel="stylesheet" crossorigin href="/_plugin/ui/assets/index-BGvSJWak.css">
</head>
<body>
<div id="root"></div>
Expand Down
2 changes: 1 addition & 1 deletion gateway/internal/adminapi/ui/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=1280" />
<meta name="color-scheme" content="dark" />
<title>Agent Gateway</title>
<title>Agent Mothership</title>
</head>
<body>
<div id="root"></div>
Expand Down
2 changes: 2 additions & 0 deletions gateway/internal/adminapi/ui/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"@preact/signals": "^2.0.2",
"@tanstack/react-query": "^5.62.0",
"preact": "^10.25.0",
"system-canvas": "^0.1.19",
"system-canvas-react": "^0.1.25",
"uplot": "^1.6.31",
"wouter": "^3.4.2",
"wouter-preact": "^3.4.2"
Expand Down
19 changes: 19 additions & 0 deletions gateway/internal/adminapi/ui/src/api/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type {
MeResponse,
RunDetailResponse,
SpendByAgentResponse,
SpendByAgentUserResponse,
SpendByUserResponse,
TrustOrg,
UserDetailResponse,
Expand Down Expand Up @@ -54,6 +55,24 @@ export function useSpendByAgent(window: Window, userID?: string) {
});
}

// ─── /spend/by-agent-user ───────────────────────────────────────────
//
// Single-pass (agent × user) crossing for the Canvas page. One
// round-trip replaces N parallel by-agent?user_id=… calls. 30s poll
// mirrors the other rollups so all three feel synchronized.

export function useSpendByAgentUser(window: Window) {
return useQuery({
queryKey: ["spend", "by-agent-user", window],
queryFn: () =>
apiFetch<SpendByAgentUserResponse>(
`/spend/by-agent-user?window=${encodeURIComponent(window)}`
),
refetchInterval: 30_000,
staleTime: 10_000,
});
}

// ─── /spend/by-user ─────────────────────────────────────────────────

export function useSpendByUser(window: Window) {
Expand Down
31 changes: 31 additions & 0 deletions gateway/internal/adminapi/ui/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,37 @@ export interface SpendByUserResponse {
results: UserSpend[];
}

// Per-provider slice inside an AgentUserSpend row. Lets the canvas
// drive gateway→provider edge widths from real spend; same Bifrost
// call that computes the (agent × user) rollup also fills these,
// so no extra round-trips.
export interface ProviderSpend {
provider: string;
total_cost: number;
request_count: number;
}

// One row of /_plugin/spend/by-agent-user — the (agent × user) fan-out
// the Canvas page uses to render one box per pairing without N round
// trips. Rows missing either dim are excluded server-side.
//
// `providers` sums to `total_cost` / `request_count` and is sorted by
// cost desc, with provider name as a stable tiebreaker.
export interface AgentUserSpend {
agent_name: string;
user_id: string;
user_name: string;
total_cost: number;
total_tokens: number;
request_count: number;
providers: ProviderSpend[];
}

export interface SpendByAgentUserResponse {
window: string;
results: AgentUserSpend[];
}

export interface HistogramPoint {
ts: string;
cost: number;
Expand Down
8 changes: 7 additions & 1 deletion gateway/internal/adminapi/ui/src/app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { UnauthorizedError } from "./api/client";
import { Shell } from "./components/layout/Shell";
import { AgentDetail } from "./pages/AgentDetail";
import { Agents } from "./pages/Agents";
import { Canvas } from "./pages/Canvas";
import { Dashboard } from "./pages/Dashboard";
import { Login } from "./pages/Login";
import { NotFound } from "./pages/NotFound";
Expand Down Expand Up @@ -106,11 +107,16 @@ function AppShell() {
<People />
</Shell>
</Route>
<Route path="/">
<Route path="/dashboard">
<Shell>
<Dashboard />
</Shell>
</Route>
<Route path="/">
<Shell>
<Canvas />
</Shell>
</Route>
<Route>
<Shell>
<NotFound />
Expand Down
Loading
Loading