Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gateway/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ Routes today:
| GET | `/_plugin/spend/by-{agent,user}?window=…` | cookie or bearer | Phase-7 per-dim aggregations over `logs.db` via loopback to bifrost-http's `/api/logs`. |
| GET | `/_plugin/histogram/cost?window=…&bucket=…&dimension=…` | cookie or bearer | Time-bucketed cost series, grouped by dim. agent-name / run-id / session-id / realm-id / user-id. |
| GET | `/_plugin/runs/:run_id` | cookie or bearer | Drill-down: every call recorded for one run_id, paginated. |
| GET | `/_plugin/runs/:run_id/calls/:call_id` | cookie or bearer | Single call body — full input_history / output_message / params / tools / error_details / raw_response. |
| GET/POST/DELETE | `/_plugin/trust/*` | bearer | Phase-5 trust registry CRUD. |

### Admin UI (`/_plugin/ui/`)
Expand Down
73 changes: 73 additions & 0 deletions gateway/internal/adminapi/logstore_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -203,6 +204,78 @@ func (c *logstoreClient) searchAll(
return all, nil
}

// ─── single-log lookup ───────────────────────────────────────────────

// logstoreLogDetail mirrors the shape returned by Bifrost's
// GET /api/logs/{id} — the same row as a list entry plus the
// "heavy" TEXT columns that the list endpoint deliberately
// trims (full input_history, output_message, params, tools,
// error_details, raw_response).
//
// Every body field is delivered as a parsed JSON value (Bifrost's
// `xParsed` GORM-ignored fields serialize under the bare name, see
// framework/logstore/tables.go). We treat them as opaque
// json.RawMessage here because the plugin doesn't introspect them —
// it just relays the bytes to the SPA, which renders them as
// pretty-printed JSON. This keeps the plugin's schema coupling to
// Bifrost minimal: a new field in `schemas.ChatMessage` upstream is
// invisible to us.
type logstoreLogDetail struct {
ID string `json:"id"`
Timestamp string `json:"timestamp"`
Provider string `json:"provider"`
Model string `json:"model"`
Status string `json:"status"`
Cost float64 `json:"cost"`
Latency float64 `json:"latency"`
CustomerID string `json:"customer_id"`
Metadata map[string]string `json:"metadata"`

// Heavy body fields — present on /api/logs/{id}, absent on
// /api/logs. Optional because some rows (errors, realtime
// turns) won't populate every field.
InputHistory json.RawMessage `json:"input_history,omitempty"`
OutputMessage json.RawMessage `json:"output_message,omitempty"`
Params json.RawMessage `json:"params,omitempty"`
Tools json.RawMessage `json:"tools,omitempty"`
ErrorDetails json.RawMessage `json:"error_details,omitempty"`
RawRequest string `json:"raw_request,omitempty"`
RawResponse string `json:"raw_response,omitempty"`
ContentSummary string `json:"content_summary,omitempty"`

// TokenUsage carries the full provider-reported usage breakdown,
// including cached read/write splits (Anthropic prompt-cache,
// OpenAI cached_tokens). We pass it through as-is; the SPA
// renders the relevant sub-fields without the plugin having to
// stay in lockstep with provider-specific shape changes.
TokenUsage json.RawMessage `json:"token_usage,omitempty"`
// CacheDebug is Bifrost's *semantic* cache record (hit/miss,
// similarity, threshold). Separate from prompt-cache tokens.
CacheDebug json.RawMessage `json:"cache_debug,omitempty"`

StopReason string `json:"stop_reason,omitempty"`
Stream bool `json:"stream"`
NumberOfRetries int `json:"number_of_retries"`
FallbackIndex int `json:"fallback_index"`
}

// findByID fetches the full detail row for a single log entry via
// Bifrost's GET /api/logs/{id}. Returns (nil, nil) on 404 so the
// handler layer can map that to its own 404 without a sentinel
// errors.Is dance.
func (c *logstoreClient) findByID(ctx context.Context, id string) (*logstoreLogDetail, error) {
var out logstoreLogDetail
err := c.getJSON(ctx, "/api/logs/"+url.PathEscape(id), &out)
if err != nil {
var ue *upstreamError
if errors.As(err, &ue) && ue.status == http.StatusNotFound {
return nil, nil
}
return nil, err
}
return &out, nil
}

// ─── user rankings (bifrost native) ──────────────────────────────────

// logstoreUserRanking mirrors UserRankingEntry from
Expand Down
172 changes: 161 additions & 11 deletions gateway/internal/adminapi/observability.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package adminapi

import (
"encoding/json"
"errors"
"net/http"
"sort"
Expand Down Expand Up @@ -96,6 +97,75 @@ type RunDetailResponse struct {
Stats RunStats `json:"stats"`
}

// CallDetailResponse is the envelope for
// /_plugin/runs/:run_id/calls/:call_id — the full request/response
// content for a single LLM call, fetched on-demand when the
// operator clicks a row in the RunDetail call log.
//
// Run-scoping rationale: we verify that the fetched log's
// metadata.run-id matches the URL's run_id before returning, so a
// caller can't enumerate other workspaces' logs by guessing IDs.
// Bifrost's /api/logs/{id} doesn't do this check itself (it just
// looks up by primary key), so the plugin enforces it.
//
// Body fields are pass-through json.RawMessage from Bifrost — the
// SPA pretty-prints them; the plugin doesn't introspect them. This
// keeps the schema coupling minimal: new fields upstream surface in
// the UI without code changes here.
type CallDetailResponse struct {
ID string `json:"id"`
RunID string `json:"run_id"`
Timestamp string `json:"timestamp"`
Provider string `json:"provider"`
Model string `json:"model"`
Status string `json:"status"`
Cost float64 `json:"cost"`
Latency float64 `json:"latency"`
CustomerID string `json:"customer_id"`
Metadata map[string]string `json:"metadata"`

// Per-request descriptors stamped by Bifrost. `stop_reason`
// tells the operator why the model stopped (stop, length,
// content_filter, tool_calls, refusal). `stream` flags
// streaming responses (which lack a single output_message and
// surface their content via Bifrost's stream chunk replay).
// Retries / fallback_index are zero on the happy path; non-zero
// means Bifrost had to retry the call or fall back to a
// different provider, which is useful provenance.
StopReason string `json:"stop_reason,omitempty"`
Stream bool `json:"stream"`
NumberOfRetries int `json:"number_of_retries"`
FallbackIndex int `json:"fallback_index"`

// TokenUsage is the provider-reported usage breakdown
// (BifrostLLMUsage). Includes prompt/completion/total totals
// plus cached read/write splits (Anthropic prompt-cache,
// OpenAI cached_tokens), audio/image token counts, and a
// per-call cost record. Pass-through JSON — the SPA introspects
// it. Bifrost's row-level prompt_tokens / completion_tokens /
// total_tokens columns are denormalized helpers tagged
// `json:"-"`, so this is the only place token data is on the
// wire.
TokenUsage json.RawMessage `json:"token_usage,omitempty"`

// CacheDebug carries Bifrost's *semantic* cache verdict for
// this call (hit/miss + similarity score). Distinct from
// prompt-cache tokens, which live in TokenUsage above. Absent
// when no semantic cache is configured for this swarm.
CacheDebug json.RawMessage `json:"cache_debug,omitempty"`

// All optional; missing on failures, realtime turns, or rows
// recorded before a given column existed.
InputHistory json.RawMessage `json:"input_history,omitempty"`
OutputMessage json.RawMessage `json:"output_message,omitempty"`
Params json.RawMessage `json:"params,omitempty"`
Tools json.RawMessage `json:"tools,omitempty"`
ErrorDetails json.RawMessage `json:"error_details,omitempty"`
RawRequest string `json:"raw_request,omitempty"`
RawResponse string `json:"raw_response,omitempty"`
ContentSummary string `json:"content_summary,omitempty"`
}

// ─── handler scaffold ────────────────────────────────────────────────

// observabilityHandlers carries the logstore client into the handler
Expand Down Expand Up @@ -391,28 +461,45 @@ func dimensionValue(l logstoreLog, dim string) string {

// ─── /_plugin/runs/:run_id ───────────────────────────────────────────
//
// Routed under `/_plugin/runs/` (subtree); the trailing segment is
// the run_id. We don't pull in a router library for this since
// phase 8 only has one such route — extracting the segment with
// string ops is shorter than gluing in a dependency.
// Routed under `/_plugin/runs/` (subtree); the trailing segments
// dispatch by shape:
//
// /_plugin/runs/{run_id} → runDetail (list)
// /_plugin/runs/{run_id}/calls/{call_id} → runCallDetail (body)
//
// Phase 6 will add /:id/state and /:id/kill under the same prefix;
// any other shape returns 404. We don't pull in a router library
// since the dispatch fits in a switch.
func (h *observabilityHandlers) runDetail(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
methodNotAllowed(w, http.MethodGet)
return
}
const prefix = "/_plugin/runs/"
rest := strings.TrimPrefix(r.URL.Path, prefix)
if rest == "" || strings.ContainsRune(rest, '/') {
// Phase 6 will introduce /_plugin/runs/:id/state and
// /_plugin/runs/:id/kill — both contain a slash after the
// id. Phase 8 doesn't serve those, so reject early with a
// crisp 404 rather than confusing the caller with a
// stripped-id query.
if rest == "" {
http.NotFound(w, r)
return
}
runID := rest

// Two valid shapes: "{run_id}" (list) and "{run_id}/calls/{call_id}".
parts := strings.Split(rest, "/")
switch {
case len(parts) == 1:
h.runDetailList(w, r, parts[0])
case len(parts) == 3 && parts[1] == "calls" && parts[2] != "":
h.runCallDetail(w, r, parts[0], parts[2])
default:
// Anything else (e.g. /:id/state, /:id/kill, trailing slash,
// 4-segment paths) is phase-6 territory or malformed; 404.
http.NotFound(w, r)
}
}

// runDetailList serves the paginated call-log envelope for a run.
// Extracted from runDetail's dispatch so the URL parsing stays one
// concern and the data path another.
func (h *observabilityHandlers) runDetailList(w http.ResponseWriter, r *http.Request, runID string) {
limit, offset, ok := parsePagination(w, r)
if !ok {
return
Expand Down Expand Up @@ -454,6 +541,69 @@ func (h *observabilityHandlers) runDetail(w http.ResponseWriter, r *http.Request
writeJSON(w, http.StatusOK, out)
}

// runCallDetail serves the full content of one LLM call. Backed by
// Bifrost's GET /api/logs/{id}, which (unlike /api/logs) returns
// the parsed input_history / output_message / params / tools /
// error_details / raw_response fields the list endpoint trims.
//
// The {run_id} segment isn't strictly needed to look up the row
// (Bifrost's primary key is the call id), but we verify it matches
// `metadata.run-id` on the row before returning. Two reasons:
//
// 1. Defence in depth — keeps the call-detail URL self-describing
// and prevents the SPA from being tricked into enumerating
// call IDs across runs.
// 2. Symmetric with /_plugin/runs/{id} which is already run-scoped.
// Operators reasonably expect /runs/A/calls/X and /runs/B/calls/X
// to give 404 for whichever doesn't actually contain X.
func (h *observabilityHandlers) runCallDetail(w http.ResponseWriter, r *http.Request, runID, callID string) {
log, err := h.logs.findByID(r.Context(), callID)
if err != nil {
writeUpstreamError(w, err, "runs.call_detail")
return
}
if log == nil {
http.NotFound(w, r)
return
}
if log.Metadata["run-id"] != runID {
// Either the caller guessed an ID from a different run, or
// the run-id dim was never stamped. Either way we don't
// want to leak the row's existence — 404 matches the
// "row doesn't exist under this run" semantic the URL
// implies.
http.NotFound(w, r)
return
}

writeJSON(w, http.StatusOK, CallDetailResponse{
ID: log.ID,
RunID: runID,
Timestamp: log.Timestamp,
Provider: log.Provider,
Model: log.Model,
Status: log.Status,
Cost: log.Cost,
Latency: log.Latency,
CustomerID: log.CustomerID,
Metadata: log.Metadata,
StopReason: log.StopReason,
Stream: log.Stream,
NumberOfRetries: log.NumberOfRetries,
FallbackIndex: log.FallbackIndex,
TokenUsage: log.TokenUsage,
CacheDebug: log.CacheDebug,
InputHistory: log.InputHistory,
OutputMessage: log.OutputMessage,
Params: log.Params,
Tools: log.Tools,
ErrorDetails: log.ErrorDetails,
RawRequest: log.RawRequest,
RawResponse: log.RawResponse,
ContentSummary: log.ContentSummary,
})
}

// ─── parameter parsing ───────────────────────────────────────────────

// parseWindow reads ?window=1h|24h|7d|30d and returns the canonical
Expand Down
Loading
Loading