diff --git a/gateway/internal/adminapi/observability.go b/gateway/internal/adminapi/observability.go index 0d517f7f3..b52b8385e 100644 --- a/gateway/internal/adminapi/observability.go +++ b/gateway/internal/adminapi/observability.go @@ -48,6 +48,43 @@ type SpendByUserResponse struct { Results []UserSpend `json:"results"` } +// ProviderSpend is the per-provider slice carried inside an +// AgentUserSpend row. Lets the canvas drive gateway→provider edge +// widths (and the provider drawer) from real spend without a second +// round-trip — the data is already on every Bifrost log row, so +// surfacing it costs only a second bucket in the same pass. +type ProviderSpend struct { + Provider string `json:"provider"` + TotalCost float64 `json:"total_cost"` + RequestCount int64 `json:"request_count"` +} + +// AgentUserSpend is one row of /_plugin/spend/by-agent-user — the +// fan-out crossing of (agent-name × user-id) so the flowchart UI can +// render one box per pairing in a single round-trip. Rows missing +// either dim are excluded (same policy as by-agent / by-user). +// +// `Providers` is the breakdown across providers for this pairing. +// Sums of `Providers[*].TotalCost` and `Providers[*].RequestCount` +// equal `TotalCost` and `RequestCount` — the breakdown is additive +// to the top-line totals, not a replacement. +type AgentUserSpend struct { + AgentName string `json:"agent_name"` + UserID string `json:"user_id"` + UserName string `json:"user_name"` + TotalCost float64 `json:"total_cost"` + TotalTokens int64 `json:"total_tokens"` + RequestCount int64 `json:"request_count"` + Providers []ProviderSpend `json:"providers"` +} + +// SpendByAgentUserResponse is the envelope for +// /_plugin/spend/by-agent-user. +type SpendByAgentUserResponse struct { + Window string `json:"window"` + Results []AgentUserSpend `json:"results"` +} + // HistogramPoint is one (timestamp, cost) datum in a per-dimension // series. Timestamp is the bucket's start time in RFC3339. type HistogramPoint struct { @@ -338,6 +375,130 @@ func (h *observabilityHandlers) spendByUser(w http.ResponseWriter, r *http.Reque writeJSON(w, http.StatusOK, out) } +// ─── /_plugin/spend/by-agent-user ──────────────────────────────────── +// +// Fan-out of by-agent × by-user in a single pass over the same +// 200k-row ceiling as the other rollups. The flowchart canvas needs +// one row per (agent, user) pair — calling by-agent once per user +// would be N round-trips and N×200k row scans; doing it server-side +// here is a single scan that buckets by a compound key. +// +// Same dim-filter contract as spendByAgent: the optional +// ?user_id= / ?agent_name= scope down the data set at the Bifrost +// query layer. With no filter you get the whole swarm matrix. +func (h *observabilityHandlers) spendByAgentUser(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + methodNotAllowed(w, http.MethodGet) + return + } + window, start, end, ok := parseWindow(w, r) + if !ok { + return + } + + logs, err := h.logs.searchAll(r.Context(), searchOpts{ + StartTime: &start, + EndTime: &end, + Metadata: metadataFilterFromQuery(r), + }, 1000, 200_000) + if err != nil { + writeUpstreamError(w, err, "spend.by_agent_user") + return + } + + // Compound-key bucket. Using "agent\x00user" as the map key + // avoids allocating a struct-keyed map (and any collision + // concern — \x00 is the one byte we know never appears in a + // metadata value). `byProvider` is a sub-bucket keyed on the + // log row's `provider` column (anthropic/openai/...), so the + // per-pair breakdown comes out of the same single pass — no + // extra Bifrost calls. + type providerAgg struct { + cost float64 + count int64 + } + type agg struct { + agent string + user string + cost float64 + tokens int64 + count int64 + byProvider map[string]*providerAgg + } + by := map[string]*agg{} + for _, l := range logs { + name := l.Metadata["agent-name"] + uid := l.Metadata["user-id"] + if name == "" || uid == "" { + continue + } + k := name + "\x00" + uid + a, ok := by[k] + if !ok { + a = &agg{ + agent: name, + user: uid, + byProvider: map[string]*providerAgg{}, + } + by[k] = a + } + a.cost += l.Cost + a.count++ + // Provider is recorded on every Bifrost log row; unattributed + // rows would skip the loop above before reaching here. + // Defensive: fall back to "unknown" so a missing provider + // can't silently drop request counts from the breakdown. + prov := l.Provider + if prov == "" { + prov = "unknown" + } + pa, ok := a.byProvider[prov] + if !ok { + pa = &providerAgg{} + a.byProvider[prov] = pa + } + pa.cost += l.Cost + pa.count++ + } + + out := SpendByAgentUserResponse{ + Window: window, + Results: make([]AgentUserSpend, 0, len(by)), + } + for _, a := range by { + providers := make([]ProviderSpend, 0, len(a.byProvider)) + for prov, pa := range a.byProvider { + providers = append(providers, ProviderSpend{ + Provider: prov, + TotalCost: pa.cost, + RequestCount: pa.count, + }) + } + // Deterministic order: cost desc, then provider name as a + // stable tiebreaker. Keeps the SPA's render order stable + // across refreshes so list animations don't reshuffle. + sort.Slice(providers, func(i, j int) bool { + if providers[i].TotalCost != providers[j].TotalCost { + return providers[i].TotalCost > providers[j].TotalCost + } + return providers[i].Provider < providers[j].Provider + }) + out.Results = append(out.Results, AgentUserSpend{ + AgentName: a.agent, + UserID: a.user, + UserName: a.user, // user_id == user_name in v2 (see spendByUser note) + TotalCost: a.cost, + TotalTokens: a.tokens, + RequestCount: a.count, + Providers: providers, + }) + } + sort.Slice(out.Results, func(i, j int) bool { + return out.Results[i].TotalCost > out.Results[j].TotalCost + }) + writeJSON(w, http.StatusOK, out) +} + // ─── /_plugin/histogram/cost ───────────────────────────────────────── // // Same agent-name-in-metadata story as by-agent: bucket+sum in Go. diff --git a/gateway/internal/adminapi/observability_test.go b/gateway/internal/adminapi/observability_test.go index 513981563..19a6a8e81 100644 --- a/gateway/internal/adminapi/observability_test.go +++ b/gateway/internal/adminapi/observability_test.go @@ -317,6 +317,86 @@ func TestSpendByUser(t *testing.T) { } } +// ─── spend.by-agent-user ───────────────────────────────────────────── + +func TestSpendByAgentUser(t *testing.T) { + now := time.Now().UTC() + bf := newFakeBifrost(t, sampleLogs(now)) + srv := newObservabilityTestServer(t, bf) + + resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h") + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status %d", resp.StatusCode) + } + var out SpendByAgentUserResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + t.Fatal(err) + } + // Sample data: (coder, u_alice, 0.15), (web-search, u_bob, 0.02). + if out.Window != "1h" || len(out.Results) != 2 { + t.Fatalf("unexpected: %+v", out) + } + if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" { + t.Errorf("first row: %+v", out.Results[0]) + } + if out.Results[0].TotalCost < 0.149 || out.Results[0].RequestCount != 2 { + t.Errorf("coder/alice agg: %+v", out.Results[0]) + } + if out.Results[1].AgentName != "web-search" || out.Results[1].UserID != "u_bob" { + t.Errorf("second row: %+v", out.Results[1]) + } + // Provider breakdown — sampleLogs gives (coder, alice) two + // anthropic rows and (web-search, bob) one openai row. + if len(out.Results[0].Providers) != 1 || + out.Results[0].Providers[0].Provider != "anthropic" || + out.Results[0].Providers[0].RequestCount != 2 { + t.Errorf("coder/alice providers: %+v", out.Results[0].Providers) + } + if len(out.Results[1].Providers) != 1 || + out.Results[1].Providers[0].Provider != "openai" || + out.Results[1].Providers[0].RequestCount != 1 { + t.Errorf("web-search/bob providers: %+v", out.Results[1].Providers) + } +} + +func TestSpendByAgentUser_FiltersUnattributed(t *testing.T) { + now := time.Now().UTC() + logs := []fakeLog{ + // no agent-name → excluded + { + ID: "a", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 1.0, + Metadata: map[string]string{"user-id": "u_alice"}, + }, + // no user-id → excluded + { + ID: "b", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 1.0, + Metadata: map[string]string{"agent-name": "coder"}, + }, + // both → included + { + ID: "c", Timestamp: now.Add(-10 * time.Minute).Format(time.RFC3339Nano), + Status: "success", Cost: 0.5, + Metadata: map[string]string{"agent-name": "coder", "user-id": "u_alice"}, + }, + } + bf := newFakeBifrost(t, logs) + srv := newObservabilityTestServer(t, bf) + + resp := bearerGet(t, srv, "/_plugin/spend/by-agent-user?window=1h") + defer resp.Body.Close() + var out SpendByAgentUserResponse + _ = json.NewDecoder(resp.Body).Decode(&out) + if len(out.Results) != 1 { + t.Fatalf("expected 1 row, got %d: %+v", len(out.Results), out.Results) + } + if out.Results[0].AgentName != "coder" || out.Results[0].UserID != "u_alice" { + t.Errorf("row: %+v", out.Results[0]) + } +} + // ─── histogram.cost ────────────────────────────────────────────────── func TestHistogramCost_ByAgent(t *testing.T) { diff --git a/gateway/internal/adminapi/server.go b/gateway/internal/adminapi/server.go index fec27cba0..176514c5f 100644 --- a/gateway/internal/adminapi/server.go +++ b/gateway/internal/adminapi/server.go @@ -232,6 +232,7 @@ func registerRoutes(mux *http.ServeMux, deps routeDeps) { obs := newObservabilityHandlers(deps.logstore) mux.HandleFunc("/_plugin/spend/by-agent", cookieOrBearer(obs.spendByAgent)) mux.HandleFunc("/_plugin/spend/by-user", cookieOrBearer(obs.spendByUser)) + mux.HandleFunc("/_plugin/spend/by-agent-user", cookieOrBearer(obs.spendByAgentUser)) mux.HandleFunc("/_plugin/histogram/cost", cookieOrBearer(obs.histogramCost)) // /_plugin/runs/ takes a trailing path segment as run-id mux.HandleFunc("/_plugin/runs/", cookieOrBearer(obs.runDetail)) diff --git a/gateway/internal/adminapi/ui/dist/index.html b/gateway/internal/adminapi/ui/dist/index.html index aad50684e..813e9946e 100644 --- a/gateway/internal/adminapi/ui/dist/index.html +++ b/gateway/internal/adminapi/ui/dist/index.html @@ -4,9 +4,9 @@ -