From 399f4463388f812b133dbe721c44e0851c52f07b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 27 Jun 2026 12:44:35 +0000 Subject: [PATCH] =?UTF-8?q?perf(hub):=20seal=20run=20digest=20on=20crash/f?= =?UTF-8?q?ailure=20+=20session-less=20terminate=20(#118=20=C2=A74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the operator stop path (stopSessionInternal) finalized the run digest at terminal time. A crash/failure (PATCH status=crashed|failed) and a session-less terminate fell through without folding, so the FIRST Insight open on those runs paid the full O(n) backfill (#118 bottleneck #3/#4) on the user's read path. Converge both missing terminal transitions through finalizeDigestOutcome (fold current if stale, then O(1) outcome stamp) — same hook the stop path already uses. Chose finalizeDigestOutcome's idempotent recompute over the background worker's incremental foldDirtyAgent to avoid a double-fold race with that worker. - handlePatchAgent crashed/failed branch -> finalizeDigestOutcome - applyAgentTerminationEffects session-less branch -> finalizeDigestOutcome - test: a crashed agent's digest is sealed (watermark==max, outcome stamped) right after the PATCH, so no read-time backfill is owed Co-Authored-By: Claude Opus 4.8 --- .../server/digest_seal_on_terminal_test.go | 56 +++++++++++++++++++ hub/internal/server/handlers_agents.go | 9 +++ 2 files changed, 65 insertions(+) create mode 100644 hub/internal/server/digest_seal_on_terminal_test.go diff --git a/hub/internal/server/digest_seal_on_terminal_test.go b/hub/internal/server/digest_seal_on_terminal_test.go new file mode 100644 index 00000000..edefb739 --- /dev/null +++ b/hub/internal/server/digest_seal_on_terminal_test.go @@ -0,0 +1,56 @@ +package server + +import ( + "context" + "net/http" + "testing" +) + +// TestDigestSealedOnCrash verifies the #118 §4 fold-on-close: when an agent +// flips to a crash/failure terminal state (not the operator stop path), the run +// digest is folded current + outcome-stamped right then, so the first Insight +// open is an O(1) read rather than a full O(n) backfill. +func TestDigestSealedOnCrash(t *testing.T) { + s, token := newA2ATestServer(t) + ctx := context.Background() + const sesID = "ses-crash" + const agentID = "agent-crash" + + seedSessionWithAgent(t, s, defaultTeamID, sesID, agentID) + insertEventRow(t, s, agentID, sesID, 1, "text", `{"text":"a"}`) + insertEventRow(t, s, agentID, sesID, 2, "tool_call", `{"name":"read","id":"c1"}`) + insertEventRow(t, s, agentID, sesID, 3, "text", `{"text":"b"}`) + + // Pre-condition: no digest row yet. + dr, err := s.digestReader(defaultTeamID) + if err != nil { + t.Fatalf("digestReader: %v", err) + } + if _, ok, _ := loadAgentDigest(ctx, dr, agentID); ok { + t.Fatal("digest unexpectedly present before terminal transition") + } + + // Crash the agent via the same PATCH the host-runner reconcile uses. + status, body := doReq(t, s, token, http.MethodPatch, + "/v1/teams/"+defaultTeamID+"/agents/"+agentID+"/", + map[string]any{"status": "crashed"}) + if status != http.StatusNoContent { + t.Fatalf("PATCH crashed: status=%d body=%s", status, body) + } + + // Post-condition: digest exists, watermark caught up to the last event, and + // the terminal outcome is stamped — i.e. no read-time backfill is owed. + d, ok, err := loadAgentDigest(ctx, dr, agentID) + if err != nil { + t.Fatalf("loadAgentDigest: %v", err) + } + if !ok { + t.Fatal("digest not sealed after crash (no row) — fold-on-close missing") + } + if d.WatermarkSeq != 3 { + t.Fatalf("watermark = %d, want 3 (digest left stale after crash)", d.WatermarkSeq) + } + if d.Outcome == "" { + t.Fatal("digest outcome not stamped on crash") + } +} diff --git a/hub/internal/server/handlers_agents.go b/hub/internal/server/handlers_agents.go index ee254f96..f69bd51a 100644 --- a/hub/internal/server/handlers_agents.go +++ b/hub/internal/server/handlers_agents.go @@ -404,6 +404,12 @@ func (s *Server) handlePatchAgent(w http.ResponseWriter, r *http.Request) { AND status = 'active'`, NowUTC(), team, id) _, _ = auth.RevokeAgentTokens(r.Context(), s.writeDB, id, NowUTC()) + // Fold + stamp the run digest now (#118 §4). The operator stop path + // finalizes via stopSessionInternal; a crash/failure flows through + // here instead, so without this the first Insight open after the crash + // pays the full O(n) backfill. finalizeDigestOutcome brings the digest + // current off the read path. + s.finalizeDigestOutcome(r.Context(), team, id) } // ADR-029 D-3: auto-derive the linked task's status from the // agent's terminal transition. Most-recent-spawn drives; older @@ -454,6 +460,9 @@ func (s *Server) applyAgentTerminationEffects(ctx context.Context, team, id, rea } s.recordAudit(ctx, team, "agent.terminate", "agent", id, "terminate "+handle, map[string]any{"handle": handle}) + // Seal the run digest for the session-less terminate too (#118 §4) — the + // live-session branch above already finalizes via stopSessionInternal. + s.finalizeDigestOutcome(ctx, team, id) } // handleStopAgent is POST /v1/teams/{team}/agents/{agent}/stop — the