From 49ce3adc7ce57db7c090e28903da23365c273e85 Mon Sep 17 00:00:00 2001 From: Evanfeenstra Date: Tue, 19 May 2026 17:55:32 -0700 Subject: [PATCH] organized user by user-id not uuid' --- gateway/.dockerignore | 35 +++++++++++++ gateway/internal/adminapi/observability.go | 60 ++++++++++++---------- 2 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 gateway/.dockerignore diff --git a/gateway/.dockerignore b/gateway/.dockerignore new file mode 100644 index 000000000..3c693913c --- /dev/null +++ b/gateway/.dockerignore @@ -0,0 +1,35 @@ +# Keep the Docker build context small and deterministic — exclude +# anything Vite / Go / pytest / IDE tooling drops in the working +# tree that isn't a build input. The actual Dockerfile COPYs are +# explicit (file-by-file) so missing entries here can't cause an +# incorrect build, but pruning shrinks the upload to the daemon +# (and to CI's docker buildx) materially. + +# Compiled / build outputs. +build/ +internal/adminapi/ui/node_modules/ +internal/adminapi/ui/dist/assets/ +internal/adminapi/ui/.vite/ +*.so + +# Local Bifrost runtime state — never an input to the image build +# (the image seeds /app/data from data/config.json at boot). +data/config.db +data/config.db-* +data/logs.db +data/logs.db-* +data/logs/ + +# Environment overrides — secrets / per-dev settings. +.env +.env.* + +# OS / IDE. +.DS_Store +.idea/ +.vscode/ +*.swp + +# Git metadata — buildkit doesn't need it. +.git/ +.gitignore diff --git a/gateway/internal/adminapi/observability.go b/gateway/internal/adminapi/observability.go index 35b32f56d..b2a33928a 100644 --- a/gateway/internal/adminapi/observability.go +++ b/gateway/internal/adminapi/observability.go @@ -189,12 +189,28 @@ func (h *observabilityHandlers) spendByAgent(w http.ResponseWriter, r *http.Requ // ─── /_plugin/spend/by-user ────────────────────────────────────────── // -// `customer_id` is a first-class indexed column on Bifrost's logs -// table (v2 invariant: customer_id = user_id). We could call -// Bifrost's rankings endpoint, but it adds a "trends" calculation -// against the previous window that complicates the shape and is -// phase-9's job. Phase 8 just sums per-user from the same paged -// row scan we already do for by-agent. +// Identity source-of-truth +// ------------------------ +// We aggregate by `metadata["user-id"]` only, NOT by the indexed +// `customer_id` column. Two reasons: +// +// 1. The v2 invariant `customer_id = user_id` holds in theory, but +// `customer_id` is populated from the virtual key (whatever Hive +// issued), which for production traffic is a Hive UUID — not the +// human-readable identifier callers stamp on `x-bf-dim-user-id`. +// Mixing the two confuses the dashboard (UUID rows in the list, +// username rows on RunDetail) and breaks click-through (clicking +// a UUID queries by username and gets empty). +// +// 2. Post-phase-6 `metadata.user-id` is overwritten from the +// verified macaroon claim, so it becomes the cryptographically +// attested identity. Treating it as authoritative now means +// phase-6 lights up without any UI change. +// +// Rows with no `metadata.user-id` (e.g. ad-hoc curl without dim +// headers, or pre-onboarding traffic) are intentionally excluded — +// they'd otherwise pile up as orphan "unknown" entries that the +// dashboard can't do anything useful with anyway. func (h *observabilityHandlers) spendByUser(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { methodNotAllowed(w, http.MethodGet) @@ -222,12 +238,7 @@ func (h *observabilityHandlers) spendByUser(w http.ResponseWriter, r *http.Reque } by := map[string]*agg{} for _, l := range logs { - uid := l.CustomerID - if uid == "" { - // Fall back to the verified macaroon dim so logs from - // before phase-6 canonicalisation still attribute. - uid = l.Metadata["user-id"] - } + uid := l.Metadata["user-id"] if uid == "" { continue } @@ -365,22 +376,17 @@ func seriesTotal(s HistogramSeries) float64 { } // dimensionValue returns the value of `dim` from a logstoreLog. -// Metadata-backed dims (agent-name, run-id, session-id, realm-id) -// are pulled from l.Metadata; the native customer_id column is -// pulled from l.CustomerID. Unknown dimensions return "" — the -// caller already rejected those in parseDimensionParam, so this -// branch only fires for genuinely-empty rows. +// Every dim — including user-id — is read from l.Metadata. The +// `customer_id` column is intentionally NOT consulted: it carries +// the virtual key's customer (a Hive UUID in production), which +// disagrees with the human-readable `metadata.user-id` stamped by +// callers. See the source-of-truth note on spendByUser above. +// +// Unknown dimensions return "" — the caller already rejected those +// in parseDimensionParam, so this branch only fires for rows that +// genuinely have no value for the dim. func dimensionValue(l logstoreLog, dim string) string { - switch dim { - case "user-id": - // user-id and customer_id are aliased per v2. - if l.CustomerID != "" { - return l.CustomerID - } - return l.Metadata["user-id"] - default: - return l.Metadata[dim] - } + return l.Metadata[dim] } // ─── /_plugin/runs/:run_id ───────────────────────────────────────────