From eabe0eb602d7d97ca04c467af24863d06d98839a Mon Sep 17 00:00:00 2001 From: Ashish Huddar Date: Wed, 10 Jun 2026 11:08:32 +0530 Subject: [PATCH 1/4] fix(codex): deliver activity hooks via -c session flags, trust worktree at launch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex (0.136+) never loads hook config from AO's per-session worktrees: project-local .codex/ layers only load from trusted directories, and for linked git worktrees codex sources hook declarations from the matching folder in the root checkout — so the workspace-local .codex/hooks.json AO wrote was dead config and codex sessions never reported activity. Deliver the hooks on the launch/resume command instead: - -c 'hooks.=[...]' session-flag config for SessionStart, UserPromptSubmit, PermissionRequest, and Stop; the session-flags layer is not trust-gated and aggregates with the user's own hooks. The existing --dangerously-bypass-hook-trust flag lets them run without a persisted trust hash. - -c 'projects={""={trust_level="trusted"}}' (inline-table form; the dotted projects."".trust_level key is corrupted by codex's naive -c dot-split) so spawns into never-trusted repos don't hang invisibly on the interactive directory-trust prompt. Both the literal and symlink-resolved worktree paths are trusted. - -c notice.hide_rate_limit_model_nudge=true so the "switch to a cheaper model?" dialog can't hang a headless pane and swallow the spawn prompt. GetAgentHooks no longer writes workspace files (worktrees stay clean); it only strips entries older AO versions left in .codex/hooks.json, preserving user hooks. UninstallHooks/AreHooksInstalled now operate on those legacy files only. Verified with a real spawn into a fresh untrusted repo: activity transitions idle -> active -> idle hands-free, no .codex dir in the worktree, no hook delivery failures. Co-Authored-By: Claude Fable 5 --- .../internal/adapters/agent/codex/codex.go | 31 +- .../adapters/agent/codex/codex_test.go | 347 +++++++++++------- .../internal/adapters/agent/codex/hooks.go | 319 +++++++--------- docs/agent/README.md | 4 + 4 files changed, 368 insertions(+), 333 deletions(-) diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index 8f2ad62f..d078fb43 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -57,9 +57,10 @@ func (p *Plugin) GetConfigSpec(ctx context.Context) (ports.ConfigSpec, error) { } // GetLaunchCommand builds the argv to start a new Codex session, applying the -// no-update-check, hook-trust bypass, and approval flags, optional -// system-prompt instructions, and the initial prompt (passed after `--` so a -// leading "-" is not read as a flag). +// no-update-check, hook-trust bypass, and approval flags, AO's session-flag +// activity hooks, the workspace trust override, optional system-prompt +// instructions, and the initial prompt (passed after `--` so a leading "-" is +// not read as a flag). func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg ports.LaunchConfig) (cmd []string, err error) { binary, err := p.codexBinary(ctx) if err != nil { @@ -68,8 +69,11 @@ func (p *Plugin) GetLaunchCommand(ctx context.Context, cfg ports.LaunchConfig) ( cmd = []string{binary} appendNoUpdateCheckFlag(&cmd) + appendHideRateLimitNudgeFlag(&cmd) appendHookTrustBypassFlag(&cmd) appendApprovalFlags(&cmd, cfg.Permissions) + appendSessionHookFlags(&cmd) + appendWorkspaceTrustFlag(&cmd, cfg.WorkspacePath) if cfg.SystemPromptFile != "" { cmd = append(cmd, "-c", "model_instructions_file="+cfg.SystemPromptFile) @@ -111,11 +115,14 @@ func (p *Plugin) GetRestoreCommand(ctx context.Context, cfg ports.RestoreConfig) return nil, false, err } - cmd = make([]string, 0, 8) + cmd = make([]string, 0, 24) cmd = append(cmd, binary, "resume") appendNoUpdateCheckFlag(&cmd) + appendHideRateLimitNudgeFlag(&cmd) appendHookTrustBypassFlag(&cmd) appendApprovalFlags(&cmd, cfg.Permissions) + appendSessionHookFlags(&cmd) + appendWorkspaceTrustFlag(&cmd, cfg.Session.WorkspacePath) cmd = append(cmd, agentSessionID) return cmd, true, nil } @@ -226,11 +233,19 @@ func appendNoUpdateCheckFlag(cmd *[]string) { *cmd = append(*cmd, "-c", "check_for_update_on_startup=false") } +func appendHideRateLimitNudgeFlag(cmd *[]string) { + // When the account nears its rate limit, the Codex TUI interposes an + // interactive "switch to a cheaper model?" dialog before the first turn. + // In a headless AO pane that dialog hangs the session invisibly and + // swallows the auto-submitted spawn prompt, so suppress it. + *cmd = append(*cmd, "-c", "notice.hide_rate_limit_model_nudge=true") +} + func appendHookTrustBypassFlag(cmd *[]string) { - // AO installs deterministic workspace-local Codex hooks immediately before - // launch/restore. Without this flag, a fresh per-session worktree can skip - // those hooks until an interactive /hooks trust review happens, leaving AO - // without activity signals. + // AO's activity hooks ride the launch command as session-flag config (see + // appendSessionHookFlags) and carry no persisted trust hash in the user's + // `[hooks.state]`. Without this flag Codex would hold them for an + // interactive hooks review, leaving AO without activity signals. *cmd = append(*cmd, "--dangerously-bypass-hook-trust") } diff --git a/backend/internal/adapters/agent/codex/codex_test.go b/backend/internal/adapters/agent/codex/codex_test.go index 63757e3f..64803f47 100644 --- a/backend/internal/adapters/agent/codex/codex_test.go +++ b/backend/internal/adapters/agent/codex/codex_test.go @@ -6,20 +6,47 @@ import ( "os" "path/filepath" "reflect" + "runtime" "strings" "testing" "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) +// canonicalTempDir returns a t.TempDir() with symlinks resolved so the +// workspace trust flag collapses to a single predictable entry (macOS TempDir +// lives under a /var -> /private/var symlink). +func canonicalTempDir(t *testing.T) string { + t.Helper() + dir, err := filepath.EvalSymlinks(t.TempDir()) + if err != nil { + t.Fatal(err) + } + return dir +} + +// sessionHookFlags mirrors the `-c` hook config appendSessionHookFlags emits, +// asserted literally so accidental format drift fails loudly: Codex parses +// these values as TOML. +func sessionHookFlags() []string { + return []string{ + "-c", `hooks.SessionStart=[{hooks=[{type="command",command="ao hooks codex session-start",timeout=30}]}]`, + "-c", `hooks.UserPromptSubmit=[{hooks=[{type="command",command="ao hooks codex user-prompt-submit",timeout=30}]}]`, + "-c", `hooks.PermissionRequest=[{hooks=[{type="command",command="ao hooks codex permission-request",timeout=30}]}]`, + "-c", `hooks.Stop=[{hooks=[{type="command",command="ao hooks codex stop",timeout=30}]}]`, + } +} + func TestGetLaunchCommandBuildsCrossPlatformArgv(t *testing.T) { plugin := &Plugin{resolvedBinary: "codex"} + workspace := canonicalTempDir(t) cmd, err := plugin.GetLaunchCommand(context.Background(), ports.LaunchConfig{ Permissions: ports.PermissionModeBypassPermissions, Prompt: "-fix this", SystemPromptFile: filepath.Join("tmp", "prompt with spaces.md"), SystemPrompt: "ignored", + WorkspacePath: workspace, }) if err != nil { t.Fatal(err) @@ -28,16 +55,38 @@ func TestGetLaunchCommandBuildsCrossPlatformArgv(t *testing.T) { want := []string{ "codex", "-c", "check_for_update_on_startup=false", + "-c", "notice.hide_rate_limit_model_nudge=true", "--dangerously-bypass-hook-trust", "--dangerously-bypass-approvals-and-sandbox", - "-c", "model_instructions_file=" + filepath.Join("tmp", "prompt with spaces.md"), - "--", "-fix this", } + want = append(want, sessionHookFlags()...) + want = append(want, + "-c", `projects={"`+workspace+`"={trust_level="trusted"}}`, + "-c", "model_instructions_file="+filepath.Join("tmp", "prompt with spaces.md"), + "--", "-fix this", + ) if !reflect.DeepEqual(cmd, want) { t.Fatalf("unexpected command\nwant: %#v\n got: %#v", want, cmd) } } +func TestGetLaunchCommandWithoutWorkspaceOmitsTrustFlag(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + + cmd, err := plugin.GetLaunchCommand(context.Background(), ports.LaunchConfig{}) + if err != nil { + t.Fatal(err) + } + for _, arg := range cmd { + if strings.HasPrefix(arg, "projects=") { + t.Fatalf("command %#v contains a projects trust flag without a workspace", cmd) + } + } + if !containsSubsequence(cmd, sessionHookFlags()) { + t.Fatalf("command %#v missing session hook flags", cmd) + } +} + func TestGetLaunchCommandMapsApprovalModes(t *testing.T) { tests := []struct { name string @@ -91,6 +140,61 @@ func TestGetLaunchCommandMapsApprovalModes(t *testing.T) { } } +func TestAppendWorkspaceTrustFlagCoversLiteralAndResolvedPaths(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlink creation needs extra privileges on Windows") + } + base := canonicalTempDir(t) + target := filepath.Join(base, "real") + if err := os.Mkdir(target, 0o755); err != nil { + t.Fatal(err) + } + link := filepath.Join(base, "link") + if err := os.Symlink(target, link); err != nil { + t.Fatal(err) + } + + var cmd []string + appendWorkspaceTrustFlag(&cmd, link) + want := []string{ + "-c", + `projects={"` + link + `"={trust_level="trusted"},"` + target + `"={trust_level="trusted"}}`, + } + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("trust flag\nwant: %#v\n got: %#v", want, cmd) + } + + cmd = nil + appendWorkspaceTrustFlag(&cmd, target) + want = []string{"-c", `projects={"` + target + `"={trust_level="trusted"}}`} + if !reflect.DeepEqual(cmd, want) { + t.Fatalf("canonical-path trust flag\nwant: %#v\n got: %#v", want, cmd) + } + + cmd = nil + appendWorkspaceTrustFlag(&cmd, " ") + if cmd != nil { + t.Fatalf("blank workspace produced %#v, want no flag", cmd) + } +} + +func TestCodexTOMLBasicStringEscapes(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"plain", "\"plain\""}, + {"C:\\Users\\dev", "\"C:\\\\Users\\\\dev\""}, + {"with \"quotes\"", "\"with \\\"quotes\\\"\""}, + {"tab\there", "\"tab\\u0009here\""}, + } + for _, tt := range tests { + if got := codexTOMLBasicString(tt.in); got != tt.want { + t.Fatalf("codexTOMLBasicString(%q) = %s, want %s", tt.in, got, tt.want) + } + } +} + func TestGetPromptDeliveryStrategyIsInCommand(t *testing.T) { plugin := &Plugin{resolvedBinary: "codex"} @@ -115,16 +219,61 @@ func TestGetConfigSpecHasNoCustomFieldsYet(t *testing.T) { } } -func TestGetAgentHooksInstallsCodexHooks(t *testing.T) { +// legacyHooksJSON builds a hooks.json in the shape older AO versions wrote: +// AO-managed entries plus one user-defined Stop hook. +func legacyHooksJSON() string { + return `{ + "hooks": { + "Stop": [ + {"matcher": null, "hooks": [ + {"type": "command", "command": "custom stop hook", "timeout": 3}, + {"type": "command", "command": "ao hooks codex stop", "timeout": 30} + ]} + ], + "UserPromptSubmit": [ + {"matcher": null, "hooks": [ + {"type": "command", "command": "ao hooks codex user-prompt-submit", "timeout": 30} + ]} + ] + }, + "unmanagedKey": {"keep": true} +}` +} + +func TestGetAgentHooksWritesNothingIntoFreshWorkspace(t *testing.T) { plugin := &Plugin{resolvedBinary: "codex"} workspace := t.TempDir() - hooksDir := filepath.Join(workspace, ".codex") - if err := os.MkdirAll(hooksDir, 0o755); err != nil { + + cfg := ports.WorkspaceHookConfig{ + DataDir: t.TempDir(), + SessionID: "sess-1", + WorkspacePath: workspace, + } + if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { t.Fatal(err) } - hooksPath := filepath.Join(hooksDir, "hooks.json") - existing := `{"hooks":{"Stop":[{"matcher":null,"hooks":[{"type":"command","command":"custom stop hook","timeout":3}]}]}}` - if err := os.WriteFile(hooksPath, []byte(existing), 0o644); err != nil { + + if _, err := os.Stat(filepath.Join(workspace, codexHooksDirName)); !os.IsNotExist(err) { + t.Fatalf(".codex dir state = %v, want not-exist: hooks ride the launch command", err) + } +} + +func TestGetAgentHooksRequiresWorkspacePath(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + err := plugin.GetAgentHooks(context.Background(), ports.WorkspaceHookConfig{WorkspacePath: " "}) + if err == nil { + t.Fatal("expected error for blank WorkspacePath") + } +} + +func TestGetAgentHooksStripsLegacyAOEntries(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + workspace := t.TempDir() + hooksPath := filepath.Join(workspace, codexHooksDirName, codexHooksFileName) + if err := os.MkdirAll(filepath.Dir(hooksPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(hooksPath, []byte(legacyHooksJSON()), 0o644); err != nil { t.Fatal(err) } @@ -136,10 +285,6 @@ func TestGetAgentHooksInstallsCodexHooks(t *testing.T) { if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { t.Fatal(err) } - // A second install must not duplicate AO hook commands. - if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { - t.Fatal(err) - } data, err := os.ReadFile(hooksPath) if err != nil { @@ -149,51 +294,73 @@ func TestGetAgentHooksInstallsCodexHooks(t *testing.T) { if err := json.Unmarshal(data, &config); err != nil { t.Fatal(err) } - if config.Hooks == nil { - t.Fatalf("hooks config missing hooks object: %#v", config) - } for _, spec := range codexManagedHooks { - entries := config.Hooks[spec.Event] - if count := countCodexHookCommand(entries, spec.Command); count != 1 { - t.Fatalf("%s command count = %d, want 1 in %#v", spec.Event, count, entries) + if got := countCodexHookCommand(config.Hooks[spec.Event], spec.Command); got != 0 { + t.Fatalf("%s command %q count = %d after cleanup, want 0", spec.Event, spec.Command, got) } } - stopEntries := config.Hooks["Stop"] - if countCodexHookCommand(stopEntries, "custom stop hook") != 1 { - t.Fatalf("existing Stop hook was not preserved: %#v", stopEntries) + if countCodexHookCommand(config.Hooks["Stop"], "custom stop hook") != 1 { + t.Fatalf("user Stop hook not preserved: %#v", config.Hooks["Stop"]) + } + if _, ok := config.Hooks["UserPromptSubmit"]; ok { + t.Fatalf("UserPromptSubmit left behind after its only entry was AO's: %#v", config.Hooks) + } + if !strings.Contains(string(data), "unmanagedKey") { + t.Fatalf("top-level keys AO doesn't manage were dropped: %s", data) + } +} + +func TestGetAgentHooksLeavesFilesWithoutAOEntriesUntouched(t *testing.T) { + plugin := &Plugin{resolvedBinary: "codex"} + workspace := t.TempDir() + hooksPath := filepath.Join(workspace, codexHooksDirName, codexHooksFileName) + if err := os.MkdirAll(filepath.Dir(hooksPath), 0o755); err != nil { + t.Fatal(err) + } + seed := `{"hooks":{"Stop":[{"matcher":null,"hooks":[{"type":"command","command":"custom stop hook","timeout":3}]}]}}` + if err := os.WriteFile(hooksPath, []byte(seed), 0o644); err != nil { + t.Fatal(err) + } + + cfg := ports.WorkspaceHookConfig{ + DataDir: t.TempDir(), + SessionID: "sess-1", + WorkspacePath: workspace, + } + if err := plugin.GetAgentHooks(context.Background(), cfg); err != nil { + t.Fatal(err) } - configData, err := os.ReadFile(filepath.Join(workspace, ".codex", "config.toml")) + data, err := os.ReadFile(hooksPath) if err != nil { t.Fatal(err) } - if !strings.Contains(string(configData), codexHooksFeatureLine) { - t.Fatalf("config.toml missing hooks feature flag: %s", configData) + if string(data) != seed { + t.Fatalf("user-only hooks.json was rewritten\n--- before ---\n%s\n--- after ---\n%s", seed, data) } } -func TestUninstallHooksRemovesCodexHooks(t *testing.T) { +func TestUninstallHooksRemovesLegacyCodexHooks(t *testing.T) { plugin := &Plugin{resolvedBinary: "codex"} workspace := t.TempDir() - hooksPath := filepath.Join(workspace, ".codex", "hooks.json") + hooksPath := filepath.Join(workspace, codexHooksDirName, codexHooksFileName) ctx := context.Background() - cfg := ports.WorkspaceHookConfig{DataDir: t.TempDir(), SessionID: "sess-1", WorkspacePath: workspace} - // Pre-seed a user's own Stop hook; it must survive uninstall. - if err := os.MkdirAll(filepath.Dir(hooksPath), 0o755); err != nil { + // Missing file is a no-op. + if err := plugin.UninstallHooks(ctx, workspace); err != nil { t.Fatal(err) } - existing := `{"hooks":{"Stop":[{"matcher":null,"hooks":[{"type":"command","command":"custom stop hook","timeout":3}]}]}}` - if err := os.WriteFile(hooksPath, []byte(existing), 0o644); err != nil { + + if err := os.MkdirAll(filepath.Dir(hooksPath), 0o755); err != nil { t.Fatal(err) } - - if err := plugin.GetAgentHooks(ctx, cfg); err != nil { + if err := os.WriteFile(hooksPath, []byte(legacyHooksJSON()), 0o644); err != nil { t.Fatal(err) } + if installed, err := plugin.AreHooksInstalled(ctx, workspace); err != nil || !installed { - t.Fatalf("AreHooksInstalled after install = (%v, %v), want (true, nil)", installed, err) + t.Fatalf("AreHooksInstalled with legacy entries = (%v, %v), want (true, nil)", installed, err) } if err := plugin.UninstallHooks(ctx, workspace); err != nil { @@ -219,25 +386,17 @@ func TestUninstallHooksRemovesCodexHooks(t *testing.T) { if countCodexHookCommand(config.Hooks["Stop"], "custom stop hook") != 1 { t.Fatalf("user Stop hook not preserved: %#v", config.Hooks["Stop"]) } - - // The shared hooks feature flag in config.toml is left in place — it enables - // every Codex hook, not just AO's. - configData, err := os.ReadFile(filepath.Join(workspace, ".codex", "config.toml")) - if err != nil { - t.Fatal(err) - } - if !strings.Contains(string(configData), codexHooksFeatureLine) { - t.Fatalf("config.toml hooks feature flag removed by uninstall: %s", configData) - } } func TestGetRestoreCommandReadsAgentSessionID(t *testing.T) { plugin := &Plugin{resolvedBinary: "codex"} + workspace := canonicalTempDir(t) cmd, ok, err := plugin.GetRestoreCommand(context.Background(), ports.RestoreConfig{ Permissions: ports.PermissionModeAuto, Session: ports.SessionRef{ - Metadata: map[string]string{ports.MetadataKeyAgentSessionID: "thread-123"}, + Metadata: map[string]string{ports.MetadataKeyAgentSessionID: "thread-123"}, + WorkspacePath: workspace, }, }) if err != nil { @@ -250,11 +409,16 @@ func TestGetRestoreCommandReadsAgentSessionID(t *testing.T) { "codex", "resume", "-c", "check_for_update_on_startup=false", + "-c", "notice.hide_rate_limit_model_nudge=true", "--dangerously-bypass-hook-trust", "--ask-for-approval", "on-request", "-c", `approvals_reviewer="auto_review"`, - "thread-123", } + want = append(want, sessionHookFlags()...) + want = append(want, + "-c", `projects={"`+workspace+`"={trust_level="trusted"}}`, + "thread-123", + ) if !reflect.DeepEqual(cmd, want) { t.Fatalf("restore cmd\nwant: %#v\n got: %#v", want, cmd) } @@ -341,97 +505,6 @@ func TestSessionInfoFalseWhenNoHookMetadata(t *testing.T) { } } -func TestEnsureCodexHooksFeatureEnabledEdgeCases(t *testing.T) { - tests := []struct { - name string - seed *string // nil means do not create config.toml - wantHas []string - wantMiss []string - }{ - { - name: "missing config.toml is created with features block", - seed: nil, - wantHas: []string{"[features]", codexHooksFeatureLine}, - }, - { - name: "empty config.toml is populated with features block", - seed: strPtr(""), - wantHas: []string{"[features]", codexHooksFeatureLine}, - }, - { - name: "existing features block without hooks gains hooks=true", - seed: strPtr("[features]\nother = true\n"), - wantHas: []string{"[features]", codexHooksFeatureLine, "other = true"}, - }, - { - name: "hooks=true already present is a no-op", - seed: strPtr("[features]\nhooks = true\n"), - wantHas: []string{"[features]", codexHooksFeatureLine}, - wantMiss: []string{codexLegacyHookFeatureLine}, - }, - { - name: "legacy codex_hooks=true is replaced with hooks=true", - seed: strPtr("[features]\ncodex_hooks = true\n"), - wantHas: []string{"[features]", codexHooksFeatureLine}, - wantMiss: []string{codexLegacyHookFeatureLine}, - }, - { - name: "both hooks=true and legacy line keep only the new line", - seed: strPtr("[features]\nhooks = true\ncodex_hooks = true\n"), - wantHas: []string{"[features]", codexHooksFeatureLine}, - wantMiss: []string{codexLegacyHookFeatureLine}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - workspace := t.TempDir() - configDir := filepath.Join(workspace, codexHooksDirName) - configPath := filepath.Join(configDir, codexConfigFileName) - if tt.seed != nil { - if err := os.MkdirAll(configDir, 0o755); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(configPath, []byte(*tt.seed), 0o600); err != nil { - t.Fatal(err) - } - } - - // No-op check: snapshot the file content before and after for - // the cases the helper documents as no-ops. - var before []byte - if tt.seed != nil && strings.Contains(*tt.seed, codexHooksFeatureLine) && !strings.Contains(*tt.seed, codexLegacyHookFeatureLine) { - before = []byte(*tt.seed) - } - - if err := ensureCodexHooksFeatureEnabled(workspace); err != nil { - t.Fatalf("ensureCodexHooksFeatureEnabled: %v", err) - } - - data, err := os.ReadFile(configPath) - if err != nil { - t.Fatalf("read %s: %v", configPath, err) - } - got := string(data) - for _, want := range tt.wantHas { - if !strings.Contains(got, want) { - t.Fatalf("config.toml missing %q\n--- got ---\n%s", want, got) - } - } - for _, miss := range tt.wantMiss { - if strings.Contains(got, miss) { - t.Fatalf("config.toml unexpectedly contains %q\n--- got ---\n%s", miss, got) - } - } - if before != nil && string(data) != string(before) { - t.Fatalf("expected no-op, content changed\n--- before ---\n%s\n--- after ---\n%s", before, data) - } - }) - } -} - -func strPtr(s string) *string { return &s } - func contains(values []string, needle string) bool { for _, value := range values { if value == needle { diff --git a/backend/internal/adapters/agent/codex/hooks.go b/backend/internal/adapters/agent/codex/hooks.go index 88c2cef5..cd5a901c 100644 --- a/backend/internal/adapters/agent/codex/hooks.go +++ b/backend/internal/adapters/agent/codex/hooks.go @@ -13,17 +13,26 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) +// Codex (0.136+) never loads hook config from AO's per-session worktrees, so +// AO's hooks ride the launch command as `-c` session-flag config instead of +// workspace files: +// +// - Project-local `.codex/` layers only load when the directory is trusted, +// and for linked git worktrees Codex sources hook declarations from the +// matching `.codex/` folder in the ROOT checkout, not the worktree. A +// hooks.json written into an AO worktree is therefore dead config. +// - Hooks passed as `-c 'hooks.=[...]'` land in Codex's session-flags +// config layer, which is not trust-gated and aggregates with (never +// replaces) the user's own hooks from `~/.codex`. They carry no persisted +// trust hash, so the launch command also passes +// `--dangerously-bypass-hook-trust` to let them run. const ( codexHooksDirName = ".codex" codexHooksFileName = "hooks.json" - codexConfigFileName = "config.toml" - codexHooksFeatureLine = "hooks = true" - codexLegacyHookFeatureLine = "codex_hooks = true" - - // codexHookCommandPrefix identifies the hook commands AO owns, so - // install skips duplicates and uninstall recognizes AO entries by - // prefix without an embedded template to diff against. + // codexHookCommandPrefix identifies the hook commands AO owns, so the + // legacy-file cleanup and uninstall recognize AO entries by prefix + // without an embedded template to diff against. codexHookCommandPrefix = "ao hooks codex " codexHookTimeout = 30 ) @@ -45,15 +54,15 @@ type codexHookEntry struct { Timeout int `json:"timeout,omitempty"` } -// codexHookSpec describes one hook AO installs, defined in code rather -// than read from an embedded hooks file. +// codexHookSpec describes one hook AO delivers via launch-command config. type codexHookSpec struct { Event string Command string } -// codexManagedHooks is the source of truth for the hooks AO installs. -// Codex groups every hook under the nil matcher. +// codexManagedHooks is the source of truth for the hooks AO delivers. Event +// names must not contain dots: they are spliced into a dotted `-c` key path, +// and Codex splits that path on every dot without honoring quoting. var codexManagedHooks = []codexHookSpec{ {Event: "SessionStart", Command: codexHookCommandPrefix + "session-start"}, {Event: "UserPromptSubmit", Command: codexHookCommandPrefix + "user-prompt-submit"}, @@ -61,9 +70,70 @@ var codexManagedHooks = []codexHookSpec{ {Event: "Stop", Command: codexHookCommandPrefix + "stop"}, } -// GetAgentHooks installs AO's Codex hooks into the worktree-local -// .codex/hooks.json file. Existing hook entries are preserved and duplicate -// AO commands are not appended. +// appendSessionHookFlags adds AO's activity hooks to the argv as `-c` +// session-flag config, one flag per managed event. +func appendSessionHookFlags(cmd *[]string) { + for _, spec := range codexManagedHooks { + flag := fmt.Sprintf(`hooks.%s=[{hooks=[{type="command",command=%s,timeout=%d}]}]`, + spec.Event, codexTOMLBasicString(spec.Command), codexHookTimeout) + *cmd = append(*cmd, "-c", flag) + } +} + +// appendWorkspaceTrustFlag marks the session's worktree as a trusted Codex +// project for this invocation only, so spawns into never-before-trusted repos +// don't hang on the interactive "Do you trust this directory?" prompt. +// +// The override is shaped as a single `projects={...}` value (not a dotted +// `projects."".trust_level` key) because Codex splits `-c` key paths on +// every dot without honoring quoted segments, which corrupts path keys. The +// inline table deep-merges with the user's persisted projects map. Both the +// literal and symlink-resolved paths are trusted because Codex looks trust up +// by the canonicalized cwd first and the literal path second (on macOS the two +// commonly differ, e.g. /tmp vs /private/tmp). +func appendWorkspaceTrustFlag(cmd *[]string, workspacePath string) { + path := strings.TrimSpace(workspacePath) + if path == "" { + return + } + keys := []string{path} + if resolved, err := filepath.EvalSymlinks(path); err == nil && resolved != path { + keys = append(keys, resolved) + } + entries := make([]string, 0, len(keys)) + for _, key := range keys { + entries = append(entries, codexTOMLBasicString(key)+`={trust_level="trusted"}`) + } + *cmd = append(*cmd, "-c", "projects={"+strings.Join(entries, ",")+"}") +} + +// codexTOMLBasicString renders s as a TOML basic string, escaping backslashes +// and quotes (Windows paths) plus control characters so the value survives +// Codex's TOML parse of the `-c` override. +func codexTOMLBasicString(s string) string { + var b strings.Builder + b.WriteByte('"') + for _, r := range s { + switch { + case r == '\\': + b.WriteString(`\\`) + case r == '"': + b.WriteString(`\"`) + case r < 0x20 || r == 0x7f: + fmt.Fprintf(&b, `\u%04X`, r) + default: + b.WriteRune(r) + } + } + b.WriteByte('"') + return b.String() +} + +// GetAgentHooks no longer installs workspace files — Codex never loads them +// from AO's worktrees (see the package comment above); the hooks ride the +// launch command instead. It still strips hook entries that older AO versions +// wrote into the worktree-local .codex/hooks.json so reused or restored +// worktrees don't keep dead AO config, preserving user-defined hooks. func (p *Plugin) GetAgentHooks(ctx context.Context, cfg ports.WorkspaceHookConfig) error { if err := ctx.Err(); err != nil { return err @@ -71,43 +141,15 @@ func (p *Plugin) GetAgentHooks(ctx context.Context, cfg ports.WorkspaceHookConfi if strings.TrimSpace(cfg.WorkspacePath) == "" { return errors.New("codex.GetAgentHooks: WorkspacePath is required") } - - hooksPath := codexHooksPath(cfg.WorkspacePath) - topLevel, rawHooks, err := readCodexHooks(hooksPath) - if err != nil { - return fmt.Errorf("codex.GetAgentHooks: %w", err) - } - - for event, specs := range groupCodexHooksByEvent() { - var existingGroups []codexMatcherGroup - if err := parseCodexHookType(rawHooks, event, &existingGroups); err != nil { - return fmt.Errorf("codex.GetAgentHooks: %w", err) - } - for _, spec := range specs { - if !codexHookCommandExists(existingGroups, spec.Command) { - entry := codexHookEntry{Type: "command", Command: spec.Command, Timeout: codexHookTimeout} - existingGroups = addCodexHook(existingGroups, entry) - } - } - if err := marshalCodexHookType(rawHooks, event, existingGroups); err != nil { - return fmt.Errorf("codex.GetAgentHooks: %w", err) - } - } - - if err := writeCodexHooks(hooksPath, topLevel, rawHooks); err != nil { + if err := removeLegacyWorkspaceHooks(cfg.WorkspacePath); err != nil { return fmt.Errorf("codex.GetAgentHooks: %w", err) } - - if err := ensureCodexHooksFeatureEnabled(cfg.WorkspacePath); err != nil { - return fmt.Errorf("codex.GetAgentHooks: enable hooks feature: %w", err) - } return nil } -// UninstallHooks removes AO's Codex hooks from the workspace-local +// UninstallHooks removes AO's legacy Codex hooks from the workspace-local // .codex/hooks.json file, leaving user-defined hooks untouched. A missing file -// is a no-op. The .codex/config.toml `hooks = true` feature flag is left in -// place because it enables every Codex hook, not just AO's. +// is a no-op. func (p *Plugin) UninstallHooks(ctx context.Context, workspacePath string) error { if err := ctx.Err(); err != nil { return err @@ -115,35 +157,53 @@ func (p *Plugin) UninstallHooks(ctx context.Context, workspacePath string) error if strings.TrimSpace(workspacePath) == "" { return errors.New("codex.UninstallHooks: workspacePath is required") } + if err := removeLegacyWorkspaceHooks(workspacePath); err != nil { + return fmt.Errorf("codex.UninstallHooks: %w", err) + } + return nil +} +// removeLegacyWorkspaceHooks strips AO-owned entries from a workspace-local +// hooks.json left behind by older AO versions. Files without one are untouched. +func removeLegacyWorkspaceHooks(workspacePath string) error { hooksPath := codexHooksPath(workspacePath) if _, err := os.Stat(hooksPath); errors.Is(err, os.ErrNotExist) { return nil } topLevel, rawHooks, err := readCodexHooks(hooksPath) if err != nil { - return fmt.Errorf("codex.UninstallHooks: %w", err) + return err } - for _, event := range codexManagedEvents() { + changed := false + for event, raw := range rawHooks { var groups []codexMatcherGroup - if err := parseCodexHookType(rawHooks, event, &groups); err != nil { - return fmt.Errorf("codex.UninstallHooks: %w", err) + if err := json.Unmarshal(raw, &groups); err != nil { + return fmt.Errorf("parse %s hooks: %w", event, err) + } + kept := removeCodexManagedHooks(groups) + if countCodexHooks(kept) == countCodexHooks(groups) { + continue } - groups = removeCodexManagedHooks(groups) - if err := marshalCodexHookType(rawHooks, event, groups); err != nil { - return fmt.Errorf("codex.UninstallHooks: %w", err) + changed = true + if len(kept) == 0 { + delete(rawHooks, event) + continue } + data, err := json.Marshal(kept) + if err != nil { + return fmt.Errorf("encode %s hooks: %w", event, err) + } + rawHooks[event] = data } - - if err := writeCodexHooks(hooksPath, topLevel, rawHooks); err != nil { - return fmt.Errorf("codex.UninstallHooks: %w", err) + if !changed { + return nil } - return nil + return writeCodexHooks(hooksPath, topLevel, rawHooks) } -// AreHooksInstalled reports whether any AO Codex hook is present in the -// workspace-local hooks file. A missing file means none are installed. +// AreHooksInstalled reports whether any legacy AO Codex hook is still present +// in the workspace-local hooks file. A missing file means none are installed. func (p *Plugin) AreHooksInstalled(ctx context.Context, workspacePath string) (bool, error) { if err := ctx.Err(); err != nil { return false, err @@ -161,10 +221,10 @@ func (p *Plugin) AreHooksInstalled(ctx context.Context, workspacePath string) (b return false, fmt.Errorf("codex.AreHooksInstalled: %w", err) } - for _, event := range codexManagedEvents() { + for event, raw := range rawHooks { var groups []codexMatcherGroup - if err := parseCodexHookType(rawHooks, event, &groups); err != nil { - return false, fmt.Errorf("codex.AreHooksInstalled: %w", err) + if err := json.Unmarshal(raw, &groups); err != nil { + return false, fmt.Errorf("codex.AreHooksInstalled: parse %s hooks: %w", event, err) } for _, group := range groups { for _, hook := range group.Hooks { @@ -236,32 +296,19 @@ func writeCodexHooks(hooksPath string, topLevel, rawHooks map[string]json.RawMes return nil } -// groupCodexHooksByEvent groups the managed hook specs by their Codex event so -// each event's array is rewritten once. -func groupCodexHooksByEvent() map[string][]codexHookSpec { - byEvent := map[string][]codexHookSpec{} - for _, spec := range codexManagedHooks { - byEvent[spec.Event] = append(byEvent[spec.Event], spec) - } - return byEvent +func isCodexManagedHook(command string) bool { + return strings.HasPrefix(command, codexHookCommandPrefix) } -// codexManagedEvents returns the distinct Codex events AO manages, in the -// order they first appear in codexManagedHooks. -func codexManagedEvents() []string { - seen := map[string]bool{} - events := make([]string, 0, len(codexManagedHooks)) - for _, spec := range codexManagedHooks { - if !seen[spec.Event] { - seen[spec.Event] = true - events = append(events, spec.Event) - } +// countCodexHooks totals the hook entries across groups so the legacy cleanup +// can tell whether stripping AO entries changed anything, including removals +// inside a group that survives. +func countCodexHooks(groups []codexMatcherGroup) int { + total := 0 + for _, group := range groups { + total += len(group.Hooks) } - return events -} - -func isCodexManagedHook(command string) bool { - return strings.HasPrefix(command, codexHookCommandPrefix) + return total } // removeCodexManagedHooks strips AO hook entries from every group, @@ -282,107 +329,3 @@ func removeCodexManagedHooks(groups []codexMatcherGroup) []codexMatcherGroup { } return result } - -func parseCodexHookType(rawHooks map[string]json.RawMessage, event string, target *[]codexMatcherGroup) error { - data, ok := rawHooks[event] - if !ok { - return nil - } - if err := json.Unmarshal(data, target); err != nil { - return fmt.Errorf("parse %s hooks: %w", event, err) - } - return nil -} - -func marshalCodexHookType(rawHooks map[string]json.RawMessage, event string, groups []codexMatcherGroup) error { - if len(groups) == 0 { - delete(rawHooks, event) - return nil - } - data, err := json.Marshal(groups) - if err != nil { - return fmt.Errorf("encode %s hooks: %w", event, err) - } - rawHooks[event] = data - return nil -} - -func codexHookCommandExists(groups []codexMatcherGroup, command string) bool { - for _, group := range groups { - for _, hook := range group.Hooks { - if hook.Command == command { - return true - } - } - } - return false -} - -func addCodexHook(groups []codexMatcherGroup, hook codexHookEntry) []codexMatcherGroup { - for i, group := range groups { - if group.Matcher == nil { - groups[i].Hooks = append(groups[i].Hooks, hook) - return groups - } - } - return append(groups, codexMatcherGroup{ - Matcher: nil, - Hooks: []codexHookEntry{hook}, - }) -} - -func ensureCodexHooksFeatureEnabled(workspacePath string) error { - configPath := filepath.Join(workspacePath, codexHooksDirName, codexConfigFileName) - data, err := os.ReadFile(configPath) //nolint:gosec // path built from caller-owned workspace dir - if err != nil && !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("read config.toml: %w", err) - } - - content := string(data) - hasNew := containsCodexFeatureLine(content, codexHooksFeatureLine) - hasLegacy := containsCodexFeatureLine(content, codexLegacyHookFeatureLine) - switch { - case hasNew && hasLegacy: - content = stripCodexLegacyHookFeatureLine(content) - case hasNew: - return nil - case hasLegacy: - content = strings.Replace(content, codexLegacyHookFeatureLine, codexHooksFeatureLine, 1) - case strings.Contains(content, "[features]"): - content = strings.Replace(content, "[features]", "[features]\n"+codexHooksFeatureLine, 1) - default: - if content != "" && !strings.HasSuffix(content, "\n") { - content += "\n" - } - content += "\n[features]\n" + codexHooksFeatureLine + "\n" - } - - if err := os.MkdirAll(filepath.Dir(configPath), 0o750); err != nil { - return fmt.Errorf("create .codex directory: %w", err) - } - if err := hookutil.AtomicWriteFile(configPath, []byte(content), 0o600); err != nil { - return fmt.Errorf("write config.toml: %w", err) - } - return nil -} - -func containsCodexFeatureLine(content, line string) bool { - for raw := range strings.SplitSeq(content, "\n") { - if strings.TrimSpace(raw) == line { - return true - } - } - return false -} - -func stripCodexLegacyHookFeatureLine(content string) string { - idx := strings.Index(content, codexLegacyHookFeatureLine) - if idx < 0 { - return content - } - end := idx + len(codexLegacyHookFeatureLine) - if end < len(content) && content[end] == '\n' { - end++ - } - return content[:idx] + content[end:] -} diff --git a/docs/agent/README.md b/docs/agent/README.md index 55d1e506..669605c8 100644 --- a/docs/agent/README.md +++ b/docs/agent/README.md @@ -55,6 +55,8 @@ The original spawn prompt may remain in metadata as `prompt` for launch/debug fa Agent adapters install hooks into the worktree-local config owned by the native agent. +Codex is the exception: Codex (0.136+) only loads project-local `.codex/` hook config from trusted directories, and for linked git worktrees it sources hook declarations from the matching folder in the root checkout — never from AO's per-session worktree. The Codex adapter therefore passes its hooks on the launch command as `-c 'hooks.=[...]'` session-flag config (plus `--dangerously-bypass-hook-trust`, since session-flag hooks have no persisted trust hash), and marks the worktree as a trusted project for the invocation with `-c 'projects={""={trust_level="trusted"}}'` so spawns into never-before-trusted repos don't hang on Codex's interactive directory-trust prompt. Its `GetAgentHooks` writes nothing; it only strips entries older AO versions left in the worktree-local `.codex/hooks.json`. + Hook callbacks run through hidden AO CLI commands: ```text @@ -72,6 +74,8 @@ The callback: The spawn engine inserts the AO session row before launching the durability provider so early startup hooks can update an existing row. If launch fails after insertion, spawn deletes the row during rollback. +The hook commands are a bare `ao hooks ...` on purpose: worktree-committed hook files stay machine-portable, and adapters recognize their own entries by command prefix for install/dedup/uninstall. To make the bare `ao` resolve to the daemon that installed the hooks (not a foreign or legacy `ao` earlier on the user's PATH), the session manager pins each spawned session's `PATH` with the daemon executable's directory first. When the pin cannot be applied (executable unresolvable or not named `ao`), the daemon logs a warning at spawn. Hook delivery failures are best-effort appended to `hooks.log` under `AO_DATA_DIR` (agents swallow hook stderr), and `ao doctor` warns when the `ao` on PATH is not the running binary. + ## Restore Boundary Session display info and native restore are separate concerns. From 12230cf25e51ebb1996f6b88422383bba8d62c16 Mon Sep 17 00:00:00 2001 From: Ashish Huddar Date: Thu, 11 Jun 2026 09:38:28 +0530 Subject: [PATCH 2/4] feat(sessions): activity-signal watchdog + hook delivery hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A codex upgrade broke activity tracking silently: sessions showed a confident "idle" forever while the agent worked. This bundle makes hook delivery verifiable end to end and makes any future breakage loud instead of invisible. Watchdog (no_signal status): - sessions.first_signal_at (migration 0010) records the FIRST hook callback per spawn/restore — raw signal receipt, independent of the derived activity state. lifecycle.ApplyActivitySignal stamps it (and writes through same-state repeats until stamped, e.g. Codex SessionStart reporting idle on an idle-seeded row); MarkSpawned clears it so every relaunch re-proves its hook pipeline. - deriveStatus downgrades a live session with no receipt to the new no_signal display status after a 90s grace, instead of idle. Terminated/PR-derived statuses still win. The sessions CDC update trigger now also fires on first-signal receipt so the dashboard transition is pushed live. - frontend maps no_signal -> needs_you (a human should look at the pane). Hook callback hardening (re-landed from the closed redesign PR #156): - the session manager pins each spawned session's PATH with the daemon executable's directory first, so the bare `ao` in hook commands resolves to the daemon that installed them, with a spawn-time warning when the pin cannot apply. - `ao hooks` failures append to $AO_DATA_DIR/hooks.log (size-capped); `ao doctor` gains a hooks-log check that warns on failures from the last 24h, and an ao-binary identity check. Codex launch-surface canary: - `ao doctor` gains codex-launch-flags: it runs probes exported by the codex adapter (built from the same flag builders as the real spawn argv) against the installed binary, warning when codex rejects the hook-trust bypass flag or AO's -c session-flag overrides. - codex hook callback timeout drops 30s -> 5s so a hung daemon cannot stall the agent's turn. Docs: the agent PRD callback section now describes the implemented flow (derive state, POST /sessions/{id}/activity, hooks.log) instead of the unbuilt SQLite/metadata merge, and notes that hook-derived metadata persistence (codex resume) is still not implemented. Frontend note: main's renderer test suite has 7 pre-existing failing files and a vite-config typecheck error unrelated to this change; workspace.test.ts (the only frontend file touched) passes 26/26. Co-Authored-By: Claude Fable 5 --- .../internal/adapters/agent/codex/codex.go | 21 ++ .../adapters/agent/codex/codex_test.go | 32 ++- .../internal/adapters/agent/codex/hooks.go | 5 +- backend/internal/cli/doctor.go | 123 +++++++++++- backend/internal/cli/doctor_test.go | 190 +++++++++++++++++- backend/internal/cli/hooks.go | 61 +++++- backend/internal/cli/hooks_test.go | 97 +++++++++ backend/internal/daemon/lifecycle_wiring.go | 1 + backend/internal/domain/session.go | 28 ++- backend/internal/domain/status.go | 5 + backend/internal/lifecycle/manager.go | 13 +- backend/internal/lifecycle/manager_test.go | 49 +++++ backend/internal/service/session/service.go | 13 +- backend/internal/service/session/status.go | 24 ++- .../internal/service/session/status_test.go | 35 +++- backend/internal/session_manager/manager.go | 124 ++++++++++-- .../internal/session_manager/manager_test.go | 161 ++++++++++++++- .../session_manager/provision_test.go | 73 +++++++ backend/internal/storage/sqlite/gen/models.go | 1 + .../storage/sqlite/gen/sessions.sql.go | 20 +- .../migrations/0010_add_first_signal_at.sql | 59 ++++++ .../storage/sqlite/queries/sessions.sql | 12 +- .../storage/sqlite/store/session_store.go | 21 +- .../storage/sqlite/store/store_test.go | 79 +++----- docs/agent/README.md | 13 +- frontend/src/renderer/types/workspace.test.ts | 2 + frontend/src/renderer/types/workspace.ts | 5 + 27 files changed, 1149 insertions(+), 118 deletions(-) create mode 100644 backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql diff --git a/backend/internal/adapters/agent/codex/codex.go b/backend/internal/adapters/agent/codex/codex.go index d078fb43..68b33d7d 100644 --- a/backend/internal/adapters/agent/codex/codex.go +++ b/backend/internal/adapters/agent/codex/codex.go @@ -229,6 +229,27 @@ func (p *Plugin) codexBinary(ctx context.Context) (string, error) { return binary, nil } +// DoctorLaunchProbes returns argv tails `ao doctor` runs against the installed +// codex binary to smoke-test the launch surface AO's hook delivery depends on. +// Probe 1 confirms --dangerously-bypass-hook-trust still exists (clap rejects +// unknown flags with a non-zero exit even alongside --version). Probe 2 loads +// codex's config with AO's `-c` session-flag overrides through the offline +// `features list` subcommand, so an override-parse regression surfaces as a +// non-zero exit or warning output. Both are built from the same flag builders +// the launch command uses, so the probes cannot drift from the real spawn argv. +func DoctorLaunchProbes() [][]string { + flagProbe := make([]string, 0, 2) + appendHookTrustBypassFlag(&flagProbe) + flagProbe = append(flagProbe, "--version") + + overrideProbe := []string{"features", "list"} + appendNoUpdateCheckFlag(&overrideProbe) + appendHideRateLimitNudgeFlag(&overrideProbe) + appendSessionHookFlags(&overrideProbe) + appendWorkspaceTrustFlag(&overrideProbe, os.TempDir()) + return [][]string{flagProbe, overrideProbe} +} + func appendNoUpdateCheckFlag(cmd *[]string) { *cmd = append(*cmd, "-c", "check_for_update_on_startup=false") } diff --git a/backend/internal/adapters/agent/codex/codex_test.go b/backend/internal/adapters/agent/codex/codex_test.go index 64803f47..dba7e2e2 100644 --- a/backend/internal/adapters/agent/codex/codex_test.go +++ b/backend/internal/adapters/agent/codex/codex_test.go @@ -30,10 +30,10 @@ func canonicalTempDir(t *testing.T) string { // these values as TOML. func sessionHookFlags() []string { return []string{ - "-c", `hooks.SessionStart=[{hooks=[{type="command",command="ao hooks codex session-start",timeout=30}]}]`, - "-c", `hooks.UserPromptSubmit=[{hooks=[{type="command",command="ao hooks codex user-prompt-submit",timeout=30}]}]`, - "-c", `hooks.PermissionRequest=[{hooks=[{type="command",command="ao hooks codex permission-request",timeout=30}]}]`, - "-c", `hooks.Stop=[{hooks=[{type="command",command="ao hooks codex stop",timeout=30}]}]`, + "-c", `hooks.SessionStart=[{hooks=[{type="command",command="ao hooks codex session-start",timeout=5}]}]`, + "-c", `hooks.UserPromptSubmit=[{hooks=[{type="command",command="ao hooks codex user-prompt-submit",timeout=5}]}]`, + "-c", `hooks.PermissionRequest=[{hooks=[{type="command",command="ao hooks codex permission-request",timeout=5}]}]`, + "-c", `hooks.Stop=[{hooks=[{type="command",command="ao hooks codex stop",timeout=5}]}]`, } } @@ -549,3 +549,27 @@ func countCodexHookCommand(entries []codexMatcherGroup, command string) int { } return count } + +func TestDoctorLaunchProbesMirrorLaunchFlags(t *testing.T) { + probes := DoctorLaunchProbes() + if len(probes) != 2 { + t.Fatalf("probes = %d, want 2", len(probes)) + } + if !reflect.DeepEqual(probes[0], []string{"--dangerously-bypass-hook-trust", "--version"}) { + t.Fatalf("flag probe = %#v", probes[0]) + } + override := probes[1] + if len(override) < 2 || override[0] != "features" || override[1] != "list" { + t.Fatalf("override probe must ride `features list`, got %#v", override) + } + joined := strings.Join(override, " ") + for _, want := range []string{ + "hooks.SessionStart=", "hooks.UserPromptSubmit=", "hooks.PermissionRequest=", "hooks.Stop=", + "notice.hide_rate_limit_model_nudge=true", + `projects={"`, + } { + if !strings.Contains(joined, want) { + t.Fatalf("override probe missing %q in %s", want, joined) + } + } +} diff --git a/backend/internal/adapters/agent/codex/hooks.go b/backend/internal/adapters/agent/codex/hooks.go index cd5a901c..031d28ae 100644 --- a/backend/internal/adapters/agent/codex/hooks.go +++ b/backend/internal/adapters/agent/codex/hooks.go @@ -34,7 +34,10 @@ const ( // legacy-file cleanup and uninstall recognize AO entries by prefix // without an embedded template to diff against. codexHookCommandPrefix = "ao hooks codex " - codexHookTimeout = 30 + // codexHookTimeout caps how long Codex waits on one AO hook callback. The + // callback is a loopback POST that normally returns in milliseconds; a + // tight cap keeps a hung daemon from stalling the agent's turn. + codexHookTimeout = 5 ) // codexHookFile is the on-disk shape of .codex/hooks.json. It is used by tests diff --git a/backend/internal/cli/doctor.go b/backend/internal/cli/doctor.go index 5a87a559..60b0e296 100644 --- a/backend/internal/cli/doctor.go +++ b/backend/internal/cli/doctor.go @@ -13,9 +13,11 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/spf13/cobra" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/codex" "github.com/aoagents/agent-orchestrator/backend/internal/adapters/runtime/zellij" "github.com/aoagents/agent-orchestrator/backend/internal/config" ) @@ -141,7 +143,7 @@ func (c *commandContext) runDoctor(ctx context.Context) []doctorCheck { ) } - checks = append(checks, checkStore(cfg.DataDir)) + checks = append(checks, checkStore(cfg.DataDir), checkHooksLog(cfg.DataDir, time.Now())) st, err := c.inspectDaemon(ctx) if err != nil { @@ -167,11 +169,12 @@ func (c *commandContext) runDoctor(ctx context.Context) []doctorCheck { checks = append(checks, c.checkGit(ctx), c.checkZellij(ctx), + c.checkAOBinary(), ) for _, harness := range doctorHarnesses { checks = append(checks, c.checkHarness(ctx, harness)) } - checks = append(checks, c.checkGitHubToken(ctx)) + checks = append(checks, c.checkCodexLaunchFlags(ctx), c.checkGitHubToken(ctx)) return checks } @@ -221,6 +224,47 @@ func checkDataDirWritable(dataDir string) doctorCheck { return doctorCheck{Level: doctorPass, Section: doctorSectionCore, Name: "data-dir-write", Message: "write probe succeeded"} } +// checkAOBinary verifies the `ao` that workspace hooks would invoke. Agent +// adapters install hook commands as a bare `ao hooks `, so an +// `ao` earlier on PATH that is not this binary (e.g. a legacy CLI without the +// hooks command) fails every callback and silently kills activity tracking. +// The daemon pins PATH inside the sessions it spawns, so a mismatch here is a +// warning about every other context (manual runs, foreign panes), not a hard +// failure. +func (c *commandContext) checkAOBinary() doctorCheck { + const name = "ao-binary" + self, err := c.deps.Executable() + if err != nil { + return doctorCheck{Level: doctorWarn, Section: doctorSectionTools, Name: name, Message: fmt.Sprintf("could not resolve the running executable: %v", err)} + } + onPath, err := c.deps.LookPath("ao") + if err != nil || onPath == "" { + return doctorCheck{ + Level: doctorWarn, Section: doctorSectionTools, Name: name, + Message: "ao not found in PATH; workspace hooks invoke `ao hooks ` (daemon-spawned sessions pin PATH to the daemon binary and are unaffected)", + } + } + if sameBinary(self, onPath) { + return doctorCheck{Level: doctorPass, Section: doctorSectionTools, Name: name, Message: fmt.Sprintf("ao in PATH is this binary (%s)", onPath)} + } + return doctorCheck{ + Level: doctorWarn, Section: doctorSectionTools, Name: name, + Message: fmt.Sprintf("ao in PATH is %s, not this binary (%s); workspace hooks run `ao hooks` and a foreign ao breaks activity tracking outside daemon-spawned sessions", onPath, self), + } +} + +// sameBinary reports whether two paths name the same file, tolerating symlinks +// via os.SameFile and falling back to cleaned-path equality when either stat +// fails. +func sameBinary(a, b string) bool { + ai, aErr := os.Stat(a) + bi, bErr := os.Stat(b) + if aErr == nil && bErr == nil { + return os.SameFile(ai, bi) + } + return filepath.Clean(a) == filepath.Clean(b) +} + func (c *commandContext) checkGit(ctx context.Context) doctorCheck { path, err := c.deps.LookPath("git") if err != nil || path == "" { @@ -264,6 +308,48 @@ func (c *commandContext) checkZellij(ctx context.Context) doctorCheck { return doctorCheck{Level: doctorPass, Section: doctorSectionTools, Name: "zellij", Message: fmt.Sprintf("%s (version %s; require >= %s)", path, version, zellij.RequiredVersion())} } +// checkHooksLog surfaces recent agent hook delivery failures. `ao hooks` +// callbacks deliberately swallow errors (a hook must never break the user's +// agent), so $AO_DATA_DIR/hooks.log is the only place a dead activity feed +// becomes visible. Lines start with an RFC3339 timestamp (see appendHooksLog). +func checkHooksLog(dataDir string, now time.Time) doctorCheck { + const name = "hooks-log" + path := filepath.Join(dataDir, hooksLogName) + data, err := os.ReadFile(path) //nolint:gosec // path rooted in AO's own data dir + if errors.Is(err, fs.ErrNotExist) { + return doctorCheck{Level: doctorPass, Section: doctorSectionCore, Name: name, Message: "no hook delivery failures recorded"} + } + if err != nil { + return doctorCheck{Level: doctorWarn, Section: doctorSectionCore, Name: name, Message: err.Error()} + } + + recent := 0 + latest := "" + for line := range strings.SplitSeq(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + stamp, _, ok := strings.Cut(line, " ") + if !ok { + continue + } + ts, err := time.Parse(time.RFC3339, stamp) + if err != nil || now.Sub(ts) > 24*time.Hour { + continue + } + recent++ + latest = line + } + if recent == 0 { + return doctorCheck{Level: doctorPass, Section: doctorSectionCore, Name: name, Message: fmt.Sprintf("no hook delivery failures in the last 24h (%s)", path)} + } + return doctorCheck{ + Level: doctorWarn, Section: doctorSectionCore, Name: name, + Message: fmt.Sprintf("%d hook delivery failure(s) in the last 24h — activity tracking may be degraded; latest: %s (full log: %s)", recent, latest, path), + } +} + func (c *commandContext) checkHarness(ctx context.Context, harness harnessProbe) doctorCheck { path, err := c.deps.LookPath(harness.BinaryName) if err != nil || path == "" { @@ -291,6 +377,39 @@ func (c *commandContext) checkHarness(ctx context.Context, harness harnessProbe) return doctorCheck{Level: doctorPass, Section: doctorSectionAgents, Name: harness.Name, Message: fmt.Sprintf("%s resolves to %s (%s)", harness.BinaryName, path, version)} } +// checkCodexLaunchFlags smoke-tests AO's codex launch surface against the +// installed binary: the hook-trust bypass flag and the `-c` session-flag +// config AO injects at spawn (activity hooks, worktree trust, nudge +// suppression). Codex has no stable hook-config contract, so a codex upgrade +// can silently break activity tracking; this canary turns that breakage into +// a doctor warning. The probes come from the codex adapter itself so they +// cannot drift from the real spawn argv. +func (c *commandContext) checkCodexLaunchFlags(ctx context.Context) doctorCheck { + const name = "codex-launch-flags" + path, err := c.deps.LookPath("codex") + if err != nil || path == "" { + return doctorCheck{Level: doctorPass, Section: doctorSectionAgents, Name: name, Message: "skipped: codex not found in PATH"} + } + for _, probe := range codex.DoctorLaunchProbes() { + reqCtx, cancel := context.WithTimeout(ctx, probeTimeout) + out, err := c.deps.CommandOutput(reqCtx, path, probe...) + cancel() + if err != nil { + return doctorCheck{ + Level: doctorWarn, Section: doctorSectionAgents, Name: name, + Message: fmt.Sprintf("codex rejected AO's launch flags (`codex %s`: %v) — codex sessions may spawn without activity hooks; a codex CLI update likely changed its flag/config surface", strings.Join(probe, " "), err), + } + } + if strings.Contains(string(out), "unknown configuration field") { + return doctorCheck{ + Level: doctorWarn, Section: doctorSectionAgents, Name: name, + Message: fmt.Sprintf("codex no longer recognizes one of AO's config overrides (%s) — codex sessions may spawn without activity hooks", firstOutputLine(out)), + } + } + } + return doctorCheck{Level: doctorPass, Section: doctorSectionAgents, Name: name, Message: "codex accepts AO's hook/trust launch flags"} +} + func (c *commandContext) checkGitHubToken(ctx context.Context) doctorCheck { token, source, err := c.githubToken(ctx) if err != nil { diff --git a/backend/internal/cli/doctor_test.go b/backend/internal/cli/doctor_test.go index 34e013f1..c1bf1694 100644 --- a/backend/internal/cli/doctor_test.go +++ b/backend/internal/cli/doctor_test.go @@ -8,8 +8,11 @@ import ( "io" "net/http" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" + "time" ) func TestDoctorChecksGitVersion(t *testing.T) { @@ -111,10 +114,15 @@ func TestDoctorChecksHarnessVersions(t *testing.T) { case "/bin/git": return []byte("git version 2.43.0\n"), nil case "/bin/claude", "/bin/codex": - if len(args) != 1 || args[0] != "--version" { - t.Fatalf("unexpected harness command: %s %v", name, args) + if len(args) == 1 && args[0] == "--version" { + return []byte(strings.TrimPrefix(name, "/bin/") + " 1.2.3\n"), nil + } + // The codex launch-flag canary probes the same binary. + if name == "/bin/codex" && len(args) > 0 && (args[0] == "--dangerously-bypass-hook-trust" || args[0] == "features") { + return []byte("ok\n"), nil } - return []byte(strings.TrimPrefix(name, "/bin/") + " 1.2.3\n"), nil + t.Fatalf("unexpected harness command: %s %v", name, args) + return nil, nil default: t.Fatalf("unexpected command: %s %v", name, args) return nil, nil @@ -284,6 +292,70 @@ func clearDoctorGitHubEnv(t *testing.T) { t.Setenv("GH_TOKEN", "") } +// TestDoctorChecksAOBinaryIdentity covers the `ao-binary` check: workspace +// hooks invoke a bare `ao hooks `, so doctor must surface when +// the `ao` on PATH is not the running binary (e.g. a legacy CLI without the +// hooks command shadowing the Go one). +func TestDoctorChecksAOBinaryIdentity(t *testing.T) { + dir := t.TempDir() + self := filepath.Join(dir, "ao") + other := filepath.Join(dir, "ao-legacy") + for _, p := range []string{self, other} { + if err := os.WriteFile(p, []byte("#!/bin/sh\n"), 0o755); err != nil { //nolint:gosec // test fixture must be executable-shaped + t.Fatal(err) + } + } + selfExe := func() (string, error) { return self, nil } + + cases := []struct { + name string + executable func() (string, error) + paths map[string]string + wantLevel doctorLevel + wantIn string + }{ + {"ao in PATH is this binary", selfExe, map[string]string{"ao": self}, doctorPass, "this binary"}, + {"ao in PATH is a different binary", selfExe, map[string]string{"ao": other}, doctorWarn, "not this binary"}, + {"ao missing from PATH", selfExe, map[string]string{}, doctorWarn, "not found in PATH"}, + {"running executable unresolvable", func() (string, error) { return "", errors.New("no exe") }, map[string]string{"ao": self}, doctorWarn, "could not resolve"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + deps := Deps{ + Executable: tc.executable, + LookPath: func(name string) (string, error) { + path, ok := tc.paths[name] + if !ok || path == "" { + return "", fmt.Errorf("%s missing", name) + } + return path, nil + }, + ProcessAlive: func(int) bool { return false }, + } + c := &commandContext{deps: deps.withDefaults()} + check := c.checkAOBinary() + if check.Level != tc.wantLevel || !strings.Contains(check.Message, tc.wantIn) { + t.Fatalf("ao-binary check = %+v, want level %s with %q", check, tc.wantLevel, tc.wantIn) + } + }) + } +} + +// TestDoctorIncludesAOBinaryCheck asserts runDoctor actually surfaces the +// ao-binary check, so the identity probe cannot silently fall out of the report. +func TestDoctorIncludesAOBinaryCheck(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git"}, func(context.Context, string, ...string) ([]byte, error) { + return []byte("git version 2.43.0\n"), nil + }) + + // doctorContext's LookPath has no "ao", so the check lands as a WARN. + check := findDoctorCheck(t, c.runDoctor(context.Background()), "ao-binary") + if check.Level != doctorWarn || !strings.Contains(check.Message, "not found in PATH") { + t.Fatalf("ao-binary check = %+v, want WARN for missing ao", check) + } +} + func doctorContext(t *testing.T, paths map[string]string, commandOutput func(context.Context, string, ...string) ([]byte, error)) *commandContext { t.Helper() clearDoctorGitHubEnv(t) @@ -331,3 +403,115 @@ func findDoctorCheck(t *testing.T, checks []doctorCheck, name string) doctorChec t.Fatalf("doctor check %q not found in %+v", name, checks) return doctorCheck{} } + +func codexCanaryFake(t *testing.T, probeOutput string, probeErr error) func(context.Context, string, ...string) ([]byte, error) { + t.Helper() + return func(_ context.Context, name string, args ...string) ([]byte, error) { + switch { + case name == "/bin/git": + return []byte("git version 2.43.0\n"), nil + case name == "/bin/codex" && len(args) == 1 && args[0] == "--version": + return []byte("codex-cli 0.136.0\n"), nil + case name == "/bin/codex": + return []byte(probeOutput), probeErr + default: + t.Fatalf("unexpected command: %s %v", name, args) + return nil, nil + } + } +} + +func TestDoctorCodexLaunchFlagsPass(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git", "codex": "/bin/codex"}, codexCanaryFake(t, "ok\n", nil)) + + check := findDoctorCheck(t, c.runDoctor(context.Background()), "codex-launch-flags") + if check.Level != doctorPass || !strings.Contains(check.Message, "accepts") { + t.Fatalf("canary = %+v, want PASS accepts", check) + } +} + +func TestDoctorCodexLaunchFlagsWarnOnRejectedFlag(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git", "codex": "/bin/codex"}, + codexCanaryFake(t, "error: unexpected argument '--dangerously-bypass-hook-trust' found\n", errors.New("exit status 2"))) + + check := findDoctorCheck(t, c.runDoctor(context.Background()), "codex-launch-flags") + if check.Level != doctorWarn || !strings.Contains(check.Message, "rejected AO's launch flags") { + t.Fatalf("canary = %+v, want WARN rejected flags", check) + } +} + +func TestDoctorCodexLaunchFlagsWarnOnUnknownConfigField(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git", "codex": "/bin/codex"}, + codexCanaryFake(t, "unknown configuration field `hooks` in -c/--config override\n", nil)) + + check := findDoctorCheck(t, c.runDoctor(context.Background()), "codex-launch-flags") + if check.Level != doctorWarn || !strings.Contains(check.Message, "no longer recognizes") { + t.Fatalf("canary = %+v, want WARN unknown config field", check) + } +} + +func TestDoctorCodexLaunchFlagsSkippedWithoutCodex(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git"}, func(context.Context, string, ...string) ([]byte, error) { + return []byte("git version 2.43.0\n"), nil + }) + + check := findDoctorCheck(t, c.runDoctor(context.Background()), "codex-launch-flags") + if check.Level != doctorPass || !strings.Contains(check.Message, "skipped") { + t.Fatalf("canary = %+v, want skipped PASS", check) + } +} + +func TestDoctorHooksLogStates(t *testing.T) { + gitOnly := func(context.Context, string, ...string) ([]byte, error) { + return []byte("git version 2.43.0\n"), nil + } + + t.Run("missing log passes", func(t *testing.T) { + setConfigEnv(t) + c := doctorContext(t, map[string]string{"git": "/bin/git"}, gitOnly) + check := findDoctorCheck(t, c.runDoctor(context.Background()), "hooks-log") + if check.Level != doctorPass || !strings.Contains(check.Message, "no hook delivery failures") { + t.Fatalf("hooks-log = %+v, want PASS no failures", check) + } + }) + + t.Run("recent failures warn", func(t *testing.T) { + cfg := setConfigEnv(t) + writeHooksLogLines(t, cfg.dataDir, + time.Now().Add(-48*time.Hour).UTC().Format(time.RFC3339)+" session=old ao hooks codex stop: stale", + time.Now().Add(-time.Hour).UTC().Format(time.RFC3339)+" session=mer-1 ao hooks codex stop: connection refused", + ) + c := doctorContext(t, map[string]string{"git": "/bin/git"}, gitOnly) + check := findDoctorCheck(t, c.runDoctor(context.Background()), "hooks-log") + if check.Level != doctorWarn || !strings.Contains(check.Message, "1 hook delivery failure") || !strings.Contains(check.Message, "connection refused") { + t.Fatalf("hooks-log = %+v, want WARN with recent count and latest line", check) + } + }) + + t.Run("only stale failures pass", func(t *testing.T) { + cfg := setConfigEnv(t) + writeHooksLogLines(t, cfg.dataDir, + time.Now().Add(-72*time.Hour).UTC().Format(time.RFC3339)+" session=old ao hooks codex stop: stale", + ) + c := doctorContext(t, map[string]string{"git": "/bin/git"}, gitOnly) + check := findDoctorCheck(t, c.runDoctor(context.Background()), "hooks-log") + if check.Level != doctorPass || !strings.Contains(check.Message, "last 24h") { + t.Fatalf("hooks-log = %+v, want PASS stale-only", check) + } + }) +} + +func writeHooksLogLines(t *testing.T, dataDir string, lines ...string) { + t.Helper() + if err := os.MkdirAll(dataDir, 0o750); err != nil { + t.Fatal(err) + } + content := strings.Join(lines, "\n") + "\n" + if err := os.WriteFile(filepath.Join(dataDir, hooksLogName), []byte(content), 0o600); err != nil { + t.Fatal(err) + } +} diff --git a/backend/internal/cli/hooks.go b/backend/internal/cli/hooks.go index a5a991ce..2ad06e00 100644 --- a/backend/internal/cli/hooks.go +++ b/backend/internal/cli/hooks.go @@ -6,8 +6,10 @@ import ( "io" "net/url" "os" + "path/filepath" "regexp" "strings" + "time" "github.com/spf13/cobra" @@ -19,6 +21,17 @@ import ( // before it reaches the loopback URL keeps it from steering the request. var sessionIDPattern = regexp.MustCompile(`^[A-Za-z0-9_-]+$`) +const ( + // hooksLogName is the file under AO_DATA_DIR where hook delivery failures + // are appended. Agent hook runners swallow stderr, so without a durable + // sink a dead activity feed (e.g. an unreachable daemon) stays invisible. + hooksLogName = "hooks.log" + // maxHooksLogBytes caps hooks.log: an append against a file already past + // the cap truncates it first, so a persistently failing hook cannot grow + // the file without bound. + maxHooksLogBytes = 1 << 20 +) + // setActivityAPIRequest mirrors the daemon's SetActivityRequest body for // POST /api/v1/sessions/{id}/activity. The CLI keeps its own copy so it need // not import httpd. @@ -56,10 +69,10 @@ func (c *commandContext) runHook(ctx context.Context, agent, event string) error } payload, err := io.ReadAll(c.deps.In) if err != nil { - // Surface read errors to stderr for parity with the daemon-error path, - // but keep the empty payload and exit 0: a failed hook must not break - // the agent. The deriver tolerates an empty payload. - _, _ = fmt.Fprintf(c.deps.Err, "ao hooks %s %s: read stdin: %v\n", agent, event, err) + // Surface read errors for parity with the daemon-error path, but keep + // the empty payload and exit 0: a failed hook must not break the + // agent. The deriver tolerates an empty payload. + c.reportHookFailure(agent, event, sessionID, fmt.Errorf("read stdin: %w", err)) } state, ok := activitydispatch.Derive(agent, event, payload) @@ -70,9 +83,43 @@ func (c *commandContext) runHook(ctx context.Context, agent, event string) error path := "sessions/" + url.PathEscape(sessionID) + "/activity" if err := c.postJSON(ctx, path, setActivityAPIRequest{State: string(state)}, nil); err != nil { - // Report to stderr (the agent's hook runner captures it) for diagnosis, - // but exit 0: a failed activity report must not disrupt the agent. - _, _ = fmt.Fprintf(c.deps.Err, "ao hooks %s %s: %v\n", agent, event, err) + // Surface the failure for diagnosis, but exit 0: a failed activity + // report must not disrupt the agent. + c.reportHookFailure(agent, event, sessionID, err) } return nil } + +// reportHookFailure surfaces a hook delivery failure without breaking the +// agent: stderr for the agent's hook runner, plus a best-effort append to +// $AO_DATA_DIR/hooks.log so the failure can be diagnosed after the fact. +func (c *commandContext) reportHookFailure(agent, event, sessionID string, cause error) { + msg := fmt.Sprintf("ao hooks %s %s: %v", agent, event, cause) + _, _ = fmt.Fprintln(c.deps.Err, msg) + dataDir := strings.TrimSpace(os.Getenv("AO_DATA_DIR")) + if dataDir == "" { + return + } + line := fmt.Sprintf("%s session=%s %s\n", time.Now().UTC().Format(time.RFC3339), sessionID, msg) + appendHooksLog(dataDir, line) +} + +// appendHooksLog appends one line to the hooks log, truncating first when the +// file has outgrown maxHooksLogBytes. Errors are dropped: this sink is itself +// best-effort and has nowhere better to report. +func appendHooksLog(dataDir, line string) { + if err := os.MkdirAll(dataDir, 0o750); err != nil { + return + } + path := filepath.Join(dataDir, hooksLogName) + flags := os.O_APPEND | os.O_CREATE | os.O_WRONLY + if info, err := os.Stat(path); err == nil && info.Size() > maxHooksLogBytes { + flags = os.O_TRUNC | os.O_CREATE | os.O_WRONLY + } + f, err := os.OpenFile(path, flags, 0o600) //nolint:gosec // path is rooted in AO's own data dir + if err != nil { + return + } + defer func() { _ = f.Close() }() + _, _ = f.WriteString(line) +} diff --git a/backend/internal/cli/hooks_test.go b/backend/internal/cli/hooks_test.go index 57fb90a1..25723f87 100644 --- a/backend/internal/cli/hooks_test.go +++ b/backend/internal/cli/hooks_test.go @@ -2,9 +2,13 @@ package cli import ( "encoding/json" + "errors" "io" + "io/fs" "net/http" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" ) @@ -210,6 +214,99 @@ func TestHooks_DaemonDownIsBestEffort(t *testing.T) { } } +// TestHooks_DeliveryFailureGoesToHooksLog covers the durable failure sink: +// agents swallow hook stderr, so a delivery failure must also land in +// $AO_DATA_DIR/hooks.log — and a delivered hook must not write the file at all. +func TestHooks_DeliveryFailureGoesToHooksLog(t *testing.T) { + cases := []struct { + name string + status int + body string + wantLog bool + wantIn []string + }{ + { + name: "daemon error is appended", + status: http.StatusInternalServerError, + body: `{"error":"internal","code":"BOOM","message":"boom"}`, + wantLog: true, + wantIn: []string{"ao hooks claude-code session-end", "session=ao-7"}, + }, + { + name: "successful delivery writes nothing", + status: http.StatusOK, + body: `{"ok":true}`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("AO_SESSION_ID", "ao-7") + cfg := setConfigEnv(t) + srv, _ := activityServer(t, tc.status, tc.body) + writeRunFileFor(t, cfg, srv) + + _, _, err := executeCLI(t, Deps{ + In: strings.NewReader(`{"reason":"logout"}`), + ProcessAlive: func(int) bool { return true }, + }, "hooks", "claude-code", "session-end") + if err != nil { + t.Fatalf("hooks must exit 0, got: %v", err) + } + + logPath := filepath.Join(cfg.dataDir, "hooks.log") + data, err := os.ReadFile(logPath) + if !tc.wantLog { + if !errors.Is(err, fs.ErrNotExist) { + t.Fatalf("hooks.log should not exist after a delivered hook, got err=%v data=%q", err, data) + } + return + } + if err != nil { + t.Fatalf("hooks.log not written: %v", err) + } + for _, want := range tc.wantIn { + if !strings.Contains(string(data), want) { + t.Errorf("hooks.log missing %q:\n%s", want, data) + } + } + }) + } +} + +// TestHooks_HooksLogTruncatesPastCap asserts the size guard: an append against +// a hooks.log already past the cap truncates it first, so a persistently +// failing hook cannot grow the file without bound. +func TestHooks_HooksLogTruncatesPastCap(t *testing.T) { + t.Setenv("AO_SESSION_ID", "ao-7") + cfg := setConfigEnv(t) // no run file written: every delivery fails + logPath := filepath.Join(cfg.dataDir, "hooks.log") + if err := os.MkdirAll(cfg.dataDir, 0o750); err != nil { + t.Fatal(err) + } + oversized := strings.Repeat("x", maxHooksLogBytes+1) + if err := os.WriteFile(logPath, []byte(oversized), 0o600); err != nil { + t.Fatal(err) + } + + _, _, err := executeCLI(t, Deps{ + In: strings.NewReader(`{"reason":"logout"}`), + }, "hooks", "claude-code", "session-end") + if err != nil { + t.Fatalf("hooks must exit 0, got: %v", err) + } + + data, err := os.ReadFile(logPath) + if err != nil { + t.Fatal(err) + } + if len(data) > maxHooksLogBytes { + t.Fatalf("hooks.log = %d bytes, want truncated below the %d cap", len(data), maxHooksLogBytes) + } + if !strings.Contains(string(data), "ao hooks claude-code session-end") { + t.Errorf("truncated hooks.log missing the new failure line:\n%s", data) + } +} + func TestHooks_DaemonErrorIsSwallowed(t *testing.T) { t.Setenv("AO_SESSION_ID", "ao-7") cfg := setConfigEnv(t) diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 47251f27..61264a4c 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -78,6 +78,7 @@ func startSession(cfg config.Config, runtime ports.Runtime, store *sqlite.Store, Messenger: messenger, Lifecycle: lcm, DataDir: cfg.DataDir, + Logger: log, }) scmProvider, err := newGitHubSCMProvider(log) if err != nil { diff --git a/backend/internal/domain/session.go b/backend/internal/domain/session.go index cc4e6ea1..7e289b37 100644 --- a/backend/internal/domain/session.go +++ b/backend/internal/domain/session.go @@ -36,17 +36,23 @@ type SessionMetadata struct { // facts: identity, agent harness, activity_state, is_terminated, and operational // metadata. The user-facing Status is derived from these facts plus PR facts. type SessionRecord struct { - ID SessionID `json:"id"` - ProjectID ProjectID `json:"projectId"` - IssueID IssueID `json:"issueId,omitempty"` - Kind SessionKind `json:"kind"` - Harness AgentHarness `json:"harness,omitempty"` - DisplayName string `json:"displayName,omitempty"` - Activity Activity `json:"activity"` - IsTerminated bool `json:"isTerminated"` - Metadata SessionMetadata `json:"-"` - CreatedAt time.Time `json:"createdAt"` - UpdatedAt time.Time `json:"updatedAt"` + ID SessionID `json:"id"` + ProjectID ProjectID `json:"projectId"` + IssueID IssueID `json:"issueId,omitempty"` + Kind SessionKind `json:"kind"` + Harness AgentHarness `json:"harness,omitempty"` + DisplayName string `json:"displayName,omitempty"` + Activity Activity `json:"activity"` + // FirstSignalAt is when the FIRST agent hook callback arrived for the + // current spawn/restore: raw signal receipt, independent of the derived + // activity state. Zero means no hook has ever reported, which deriveStatus + // surfaces as StatusNoSignal after a grace period. Internal fact, not part + // of the API read model. + FirstSignalAt time.Time `json:"-"` + IsTerminated bool `json:"isTerminated"` + Metadata SessionMetadata `json:"-"` + CreatedAt time.Time `json:"createdAt"` + UpdatedAt time.Time `json:"updatedAt"` } // Session is the read-model returned across the API boundary: a SessionRecord diff --git a/backend/internal/domain/status.go b/backend/internal/domain/status.go index e42a0ed0..4b6cf254 100644 --- a/backend/internal/domain/status.go +++ b/backend/internal/domain/status.go @@ -18,4 +18,9 @@ const ( StatusNeedsInput SessionStatus = "needs_input" StatusIdle SessionStatus = "idle" StatusTerminated SessionStatus = "terminated" + // StatusNoSignal marks a live session whose agent has never delivered a + // hook callback for the current spawn/restore: AO cannot tell whether the + // agent is working or stuck (broken hook pipeline, blocked interactive + // prompt). Rendered instead of a confident idle. + StatusNoSignal SessionStatus = "no_signal" ) diff --git a/backend/internal/lifecycle/manager.go b/backend/internal/lifecycle/manager.go index d0bd9c24..10554121 100644 --- a/backend/internal/lifecycle/manager.go +++ b/backend/internal/lifecycle/manager.go @@ -85,10 +85,17 @@ func (m *Manager) ApplyActivitySignal(ctx context.Context, id domain.SessionID, } next := cur act := domain.Activity{State: s.State, LastActivityAt: timeOr(s.Timestamp, now)} - if sameActivity(cur.Activity, act) { + // A same-state repeat is still a write when it is the FIRST signal for + // this spawn: the receipt itself is a durable fact (it clears the + // no_signal display status), e.g. Codex's SessionStart reports idle on + // an idle-seeded row. + if sameActivity(cur.Activity, act) && !cur.FirstSignalAt.IsZero() { return cur, false } next.Activity = act + if next.FirstSignalAt.IsZero() { + next.FirstSignalAt = timeOr(s.Timestamp, now) + } if s.State == domain.ActivityExited { next.IsTerminated = true } @@ -110,6 +117,10 @@ func (m *Manager) MarkSpawned(ctx context.Context, id domain.SessionID, metadata now := m.clock() rec.IsTerminated = false rec.Activity = domain.Activity{State: domain.ActivityIdle, LastActivityAt: now} + // Each spawn/restore must re-prove its hook pipeline: clear the receipt so + // a relaunch with broken hooks degrades to no_signal instead of inheriting + // a stale "signals worked once" fact. + rec.FirstSignalAt = time.Time{} rec.Metadata = mergeMetadata(rec.Metadata, metadata) rec.UpdatedAt = now return m.store.UpdateSession(ctx, rec) diff --git a/backend/internal/lifecycle/manager_test.go b/backend/internal/lifecycle/manager_test.go index 5a87298a..e739fa01 100644 --- a/backend/internal/lifecycle/manager_test.go +++ b/backend/internal/lifecycle/manager_test.go @@ -497,3 +497,52 @@ func TestPRObservation_RetriesAfterMessengerFailure(t *testing.T) { t.Fatalf("want retry to send once, got %v", msg.msgs) } } + +func TestActivity_FirstSignalStampsReceipt(t *testing.T) { + m, st, _ := newManager() + st.sessions["mer-1"] = domain.SessionRecord{ID: "mer-1", ProjectID: "mer", Activity: domain.Activity{State: domain.ActivityIdle, LastActivityAt: time.Now()}} + // A same-state repeat (idle on an idle-seeded row) must still write: the + // receipt itself is the durable fact that clears no_signal. + if err := m.ApplyActivitySignal(ctx, "mer-1", ports.ActivitySignal{Valid: true, State: domain.ActivityIdle}); err != nil { + t.Fatal(err) + } + got := st.sessions["mer-1"] + if got.FirstSignalAt.IsZero() { + t.Fatalf("first signal not stamped: %+v", got) + } + stamped := got.FirstSignalAt + // Later signals must not move the receipt. + if err := m.ApplyActivitySignal(ctx, "mer-1", ports.ActivitySignal{Valid: true, State: domain.ActivityActive, Timestamp: time.Now().Add(time.Minute)}); err != nil { + t.Fatal(err) + } + if got := st.sessions["mer-1"]; !got.FirstSignalAt.Equal(stamped) { + t.Fatalf("first signal moved: %v -> %v", stamped, got.FirstSignalAt) + } +} + +func TestActivity_SameStateRepeatAfterReceiptIsNoOp(t *testing.T) { + m, st, _ := newManager() + rec := working("mer-1") + rec.FirstSignalAt = time.Now() + st.sessions["mer-1"] = rec + before := st.sessions["mer-1"] + if err := m.ApplyActivitySignal(ctx, "mer-1", ports.ActivitySignal{Valid: true, State: domain.ActivityActive}); err != nil { + t.Fatal(err) + } + if st.sessions["mer-1"] != before { + t.Fatalf("same-state repeat after receipt must not rewrite: %+v", st.sessions["mer-1"]) + } +} + +func TestMarkSpawnedClearsFirstSignal(t *testing.T) { + m, st, _ := newManager() + rec := working("mer-1") + rec.FirstSignalAt = time.Now().Add(-time.Hour) + st.sessions["mer-1"] = rec + if err := m.MarkSpawned(ctx, "mer-1", domain.SessionMetadata{}); err != nil { + t.Fatal(err) + } + if got := st.sessions["mer-1"]; !got.FirstSignalAt.IsZero() { + t.Fatalf("spawn/restore must clear the receipt, got %+v", got) + } +} diff --git a/backend/internal/service/session/service.go b/backend/internal/service/session/service.go index de5dd2f6..f0ccf548 100644 --- a/backend/internal/service/session/service.go +++ b/backend/internal/service/session/service.go @@ -301,7 +301,16 @@ func (s *Service) toSession(ctx context.Context, rec domain.SessionRecord) (doma return domain.Session{}, fmt.Errorf("pr facts %s: %w", rec.ID, err) } if !ok { - return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, nil), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil + return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, nil, s.now()), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil } - return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, &pr), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil + return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, &pr, s.now()), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil +} + +// now tolerates a zero-value Service (tests construct the struct literally +// without going through New, which is where clock gets its default). +func (s *Service) now() time.Time { + if s.clock == nil { + return time.Now() + } + return s.clock() } diff --git a/backend/internal/service/session/status.go b/backend/internal/service/session/status.go index ee929852..a3ebdcba 100644 --- a/backend/internal/service/session/status.go +++ b/backend/internal/service/session/status.go @@ -1,8 +1,21 @@ package session -import "github.com/aoagents/agent-orchestrator/backend/internal/domain" +import ( + "time" -func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts) domain.SessionStatus { + "github.com/aoagents/agent-orchestrator/backend/internal/domain" +) + +// noSignalGrace is how long after spawn/restore a session may stay silent +// before its idle reading is downgraded to StatusNoSignal. It covers the +// agent's TUI boot plus the gap to its first hook callback (Codex fires +// SessionStart/UserPromptSubmit on the first turn, seconds after an +// auto-submitted prompt); past it, a session that has never signaled is +// indistinguishable from one with a broken hook pipeline, and the dashboard +// must not claim a confident "idle". +const noSignalGrace = 90 * time.Second + +func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts, now time.Time) domain.SessionStatus { if rec.IsTerminated { if pr != nil && pr.Merged { return domain.StatusMerged @@ -26,6 +39,13 @@ func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts) domain.SessionSt if rec.Activity.State == domain.ActivityActive { return domain.StatusWorking } + + // No hook callback has ever arrived for this spawn/restore. The seeded + // LastActivityAt marks the launch, so once the grace passes the honest + // status is "no signal", not "idle". + if rec.FirstSignalAt.IsZero() && now.Sub(rec.Activity.LastActivityAt) > noSignalGrace { + return domain.StatusNoSignal + } return domain.StatusIdle } diff --git a/backend/internal/service/session/status_test.go b/backend/internal/service/session/status_test.go index 3543214b..bfcc21e6 100644 --- a/backend/internal/service/session/status_test.go +++ b/backend/internal/service/session/status_test.go @@ -2,12 +2,29 @@ package session import ( "testing" + "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" ) +var statusNow = time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC) + +// statusRec builds a session whose agent HAS delivered a hook signal; the +// no-signal cases below zero FirstSignalAt explicitly. func statusRec(activity domain.ActivityState, terminated bool) domain.SessionRecord { - return domain.SessionRecord{Activity: domain.Activity{State: activity}, IsTerminated: terminated} + return domain.SessionRecord{ + Activity: domain.Activity{State: activity, LastActivityAt: statusNow}, + FirstSignalAt: statusNow, + IsTerminated: terminated, + } +} + +// silentRec builds a live session that has never delivered a hook signal, +// seeded (spawned/restored) `age` before the derivation time. +func silentRec(age time.Duration) domain.SessionRecord { + return domain.SessionRecord{ + Activity: domain.Activity{State: domain.ActivityIdle, LastActivityAt: statusNow.Add(-age)}, + } } func statusPR(facts domain.PRFacts) *domain.PRFacts { return &facts } @@ -31,10 +48,24 @@ func TestServiceDerivesStatusFromSessionFactsAndPR(t *testing.T) { {"pr-open", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{}), domain.StatusPROpen}, {"working", statusRec(domain.ActivityActive, false), nil, domain.StatusWorking}, {"idle", statusRec(domain.ActivityIdle, false), nil, domain.StatusIdle}, + + // A live session whose agent never signaled is no_signal once the + // grace passes — never a confident idle. + {"no-signal-after-grace", silentRec(2 * noSignalGrace), nil, domain.StatusNoSignal}, + // Right after spawn the agent legitimately hasn't called back yet. + {"silent-within-grace-is-idle", silentRec(10 * time.Second), nil, domain.StatusIdle}, + // Termination and PR facts outrank the missing-signal downgrade. + { + "no-signal-terminated-wins", + domain.SessionRecord{Activity: domain.Activity{State: domain.ActivityExited, LastActivityAt: statusNow.Add(-2 * noSignalGrace)}, IsTerminated: true}, + nil, + domain.StatusTerminated, + }, + {"no-signal-pr-wins", silentRec(2 * noSignalGrace), statusPR(domain.PRFacts{}), domain.StatusPROpen}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := deriveStatus(tt.rec, tt.pr); got != tt.want { + if got := deriveStatus(tt.rec, tt.pr, statusNow); got != tt.want { t.Fatalf("got %q want %q", got, tt.want) } }) diff --git a/backend/internal/session_manager/manager.go b/backend/internal/session_manager/manager.go index 17f7fe10..4e2fd2e7 100644 --- a/backend/internal/session_manager/manager.go +++ b/backend/internal/session_manager/manager.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "log/slog" "os" "os/exec" "path/filepath" @@ -38,6 +39,12 @@ const ( EnvDataDir = "AO_DATA_DIR" ) +// hookBinaryName is the executable name the workspace hook commands invoke: +// every agent adapter installs a bare `ao hooks `. The session +// PATH pin (hookPATH) only works when the daemon's own executable carries this +// name, since prepending its directory must change what `ao` resolves to. +const hookBinaryName = "ao" + type lifecycleRecorder interface { MarkSpawned(ctx context.Context, id domain.SessionID, metadata domain.SessionMetadata) error MarkTerminated(ctx context.Context, id domain.SessionID) error @@ -80,6 +87,11 @@ type Manager struct { // they don't need real binaries on PATH. Returns ports.ErrAgentBinaryNotFound // when the binary is missing so the sentinel propagates through toAPIError. lookPath func(string) (string, error) + // executable resolves the daemon's own binary (os.Executable in + // production); its directory is prepended to spawned sessions' PATH so the + // workspace hook commands resolve back to this daemon. Tests inject a stub. + executable func() (string, error) + logger *slog.Logger } // Deps are the collaborators a Session Manager needs; New wires them together. @@ -98,21 +110,30 @@ type Deps struct { // Production wiring leaves this nil and the manager defaults to // exec.LookPath; tests inject a stub so they need not seed real binaries. LookPath func(string) (string, error) + // Executable overrides os.Executable for the session PATH pin (see + // hookPATH). Production wiring leaves this nil; tests inject a stub so they + // control what the test binary appears to be. + Executable func() (string, error) + // Logger receives spawn-time diagnostics (e.g. when the session PATH + // cannot be pinned to the daemon binary). Nil defaults to slog.Default(). + Logger *slog.Logger } // New builds a Session Manager from its dependencies, defaulting the clock to // time.Now when Deps.Clock is nil. func New(d Deps) *Manager { m := &Manager{ - runtime: d.Runtime, - agents: d.Agents, - workspace: d.Workspace, - store: d.Store, - messenger: d.Messenger, - lcm: d.Lifecycle, - dataDir: d.DataDir, - clock: d.Clock, - lookPath: d.LookPath, + runtime: d.Runtime, + agents: d.Agents, + workspace: d.Workspace, + store: d.Store, + messenger: d.Messenger, + lcm: d.Lifecycle, + dataDir: d.DataDir, + clock: d.Clock, + lookPath: d.LookPath, + executable: d.Executable, + logger: d.Logger, } if m.clock == nil { m.clock = time.Now @@ -120,12 +141,19 @@ func New(d Deps) *Manager { if m.lookPath == nil { m.lookPath = exec.LookPath } + if m.executable == nil { + m.executable = os.Executable + } + if m.logger == nil { + m.logger = slog.Default() + } return m } // Spawn creates the session row (which assigns the "{project}-{n}" id), then the -// workspace and runtime, then reports completion to the LCM. A failure after the -// row exists parks it as terminated and rolls back what was built. +// workspace and runtime, then reports completion to the LCM. If workspace +// materialization fails the still-seed row is deleted outright; a later failure +// parks the row as terminated and rolls back what was built. func (m *Manager) Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.SessionRecord, error) { project, err := m.loadProject(ctx, cfg.ProjectID) if err != nil { @@ -160,7 +188,10 @@ func (m *Manager) Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.Sess BaseBranch: project.Config.WithDefaults().DefaultBranch, }) if err != nil { - m.markSpawnFailedTerminated(ctx, id) + // Nothing observable exists yet — no worktree, no runtime — so the seed + // row is deleted outright instead of accumulating as a terminated orphan + // in session lists (e.g. when gitworktree refuses the branch). + m.rollbackSpawnSeedRow(ctx, id) return domain.SessionRecord{}, fmt.Errorf("spawn %s: workspace: %w", id, err) } @@ -208,7 +239,7 @@ func (m *Manager) Spawn(ctx context.Context, cfg ports.SpawnConfig) (domain.Sess SessionID: id, WorkspacePath: ws.Path, Argv: argv, - Env: spawnEnv(id, cfg.ProjectID, cfg.IssueID, m.dataDir, project.Config.Env), + Env: m.runtimeEnv(id, cfg.ProjectID, cfg.IssueID, project.Config.Env), }) if err != nil { _ = m.workspace.Destroy(ctx, ws) @@ -278,11 +309,26 @@ func roleOverride(kind domain.SessionKind, cfg domain.ProjectConfig) domain.Role // markSpawnFailedTerminated best-effort parks an orphaned spawn as terminated. // A phantom half-spawned row is worse than a terminal one; we only delete the -// row when nothing observable has landed yet (seed state) via rollbackSpawn. +// row when nothing observable has landed yet (seed state) via rollbackSpawn or +// rollbackSpawnSeedRow. func (m *Manager) markSpawnFailedTerminated(ctx context.Context, id domain.SessionID) { _ = m.lcm.MarkTerminated(ctx, id) } +// rollbackSpawnSeedRow best-effort removes the row of a spawn that failed +// before anything observable (worktree, runtime) was built, so failed spawns +// don't accumulate terminated rows in session lists. DeleteSession only removes +// rows still in seed state; if the row has progressed or the delete itself +// fails, fall back to parking it terminated so a phantom row never looks live. +// (Kill is not a usable fallback here: it refuses seed rows with +// ErrIncompleteHandle before recording terminal intent.) +func (m *Manager) rollbackSpawnSeedRow(ctx context.Context, id domain.SessionID) { + if deleted, err := m.store.DeleteSession(ctx, id); err == nil && deleted { + return + } + m.markSpawnFailedTerminated(ctx, id) +} + // rollbackSpawn deletes a session row when it is still in seed state — used // when an out-of-band step that happens AFTER `Spawn` returns (e.g. PR claim // over HTTP) has failed and the caller wants the partially-spawned session @@ -393,7 +439,7 @@ func (m *Manager) Restore(ctx context.Context, id domain.SessionID) (domain.Sess SessionID: id, WorkspacePath: ws.Path, Argv: argv, - Env: spawnEnv(id, rec.ProjectID, rec.IssueID, m.dataDir, project.Config.Env), + Env: m.runtimeEnv(id, rec.ProjectID, rec.IssueID, project.Config.Env), }) if err != nil { return domain.SessionRecord{}, fmt.Errorf("restore %s: runtime: %w", id, err) @@ -557,6 +603,54 @@ func spawnEnv(id domain.SessionID, project domain.ProjectID, issue domain.IssueI return env } +// runtimeEnv is spawnEnv plus the hook PATH pin: the session's PATH puts the +// running daemon's own directory first, so the bare `ao` in workspace hook +// commands resolves to the daemon that installed them rather than whatever +// `ao` is first on the inherited PATH (e.g. a legacy CLI without the hooks +// command, which fails every callback and silently kills activity tracking). +// When the pin cannot be applied the inherited PATH is kept and a warning is +// logged so the degradation isn't silent. +func (m *Manager) runtimeEnv(id domain.SessionID, project domain.ProjectID, issue domain.IssueID, projectEnv map[string]string) map[string]string { + env := spawnEnv(id, project, issue, m.dataDir, projectEnv) + path, err := hookPATH(m.executable, os.Getenv, projectEnv) + if err != nil { + m.logger.Warn("session PATH not pinned to the daemon binary; `ao hooks` callbacks may resolve to a different ao and activity tracking will stall", + "session", id, "error", err) + return env + } + env["PATH"] = path + return env +} + +// hookPATH builds the PATH value pinned into a spawned session: the daemon +// executable's directory prepended to the base PATH (the project's PATH +// override when set, else the daemon's inherited PATH — matching what the +// runtime would have exported anyway). An error means the pin cannot be +// applied: the executable is unresolvable, or is not named "ao", in which case +// prepending its directory would not change what `ao` resolves to. +func hookPATH(executable func() (string, error), getenv func(string) string, projectEnv map[string]string) (string, error) { + exe, err := executable() + if err != nil { + return "", fmt.Errorf("resolve daemon executable: %w", err) + } + name := filepath.Base(exe) + if runtime.GOOS == "windows" { + name = strings.TrimSuffix(strings.ToLower(name), ".exe") + } + if name != hookBinaryName { + return "", fmt.Errorf("daemon executable %s is not named %q", exe, hookBinaryName) + } + base := projectEnv["PATH"] + if base == "" { + base = getenv("PATH") + } + dir := filepath.Dir(exe) + if base == "" { + return dir, nil + } + return dir + string(os.PathListSeparator) + base, nil +} + // provisionWorkspace applies the project's per-workspace setup after the // worktree exists: symlink shared files from the project repo, then run any // post-create commands. Either failing aborts the spawn so a half-provisioned diff --git a/backend/internal/session_manager/manager_test.go b/backend/internal/session_manager/manager_test.go index 13aca200..b15cd596 100644 --- a/backend/internal/session_manager/manager_test.go +++ b/backend/internal/session_manager/manager_test.go @@ -1,9 +1,13 @@ package sessionmanager import ( + "bytes" "context" "errors" "fmt" + "log/slog" + "os" + "path/filepath" "strings" "testing" "time" @@ -15,10 +19,11 @@ import ( var ctx = context.Background() type fakeStore struct { - sessions map[domain.SessionID]domain.SessionRecord - pr map[domain.SessionID]domain.PRFacts - projects map[string]domain.ProjectRecord - num int + sessions map[domain.SessionID]domain.SessionRecord + pr map[domain.SessionID]domain.PRFacts + projects map[string]domain.ProjectRecord + num int + deleteErr error } func newFakeStore() *fakeStore { @@ -59,6 +64,9 @@ func (f *fakeStore) ListAllSessions(context.Context) ([]domain.SessionRecord, er return out, nil } func (f *fakeStore) DeleteSession(_ context.Context, id domain.SessionID) (bool, error) { + if f.deleteErr != nil { + return false, f.deleteErr + } rec, ok := f.sessions[id] if !ok { return false, nil @@ -164,6 +172,7 @@ type singleAgent struct{ agent ports.Agent } func (s singleAgent) Agent(domain.AgentHarness) (ports.Agent, bool) { return s.agent, true } type fakeWorkspace struct { + createErr error destroyErr error destroyed int lastCfg ports.WorkspaceConfig @@ -173,6 +182,9 @@ type fakeWorkspace struct { } func (w *fakeWorkspace) Create(_ context.Context, cfg ports.WorkspaceConfig) (ports.WorkspaceInfo, error) { + if w.createErr != nil { + return ports.WorkspaceInfo{}, w.createErr + } w.lastCfg = cfg path := w.path if path == "" { @@ -290,6 +302,41 @@ func TestSpawn_RollsBackOnRuntimeFailure(t *testing.T) { t.Fatal("orphaned spawn should be terminated") } } + +// TestSpawn_DeletesSeedRowOnWorkspaceFailure covers the failed-spawn cleanup: +// when workspace materialization fails (e.g. gitworktree refuses a branch +// checked out elsewhere), nothing observable was built, so the seed row is +// deleted outright rather than parked as a terminated orphan that clutters +// session lists. +func TestSpawn_DeletesSeedRowOnWorkspaceFailure(t *testing.T) { + m, st, rt, ws := newManager() + ws.createErr = ports.ErrWorkspaceBranchCheckedOutElsewhere + _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker}) + if !errors.Is(err, ports.ErrWorkspaceBranchCheckedOutElsewhere) { + t.Fatalf("err = %v, want ports.ErrWorkspaceBranchCheckedOutElsewhere", err) + } + if rec, present := st.sessions["mer-1"]; present { + t.Fatalf("seed row must be deleted, got %+v", rec) + } + if rt.created != 0 { + t.Fatal("runtime.Create must not run when workspace materialization fails") + } +} + +// TestSpawn_ParksRowTerminatedWhenSeedDeleteFails asserts the fallback: if the +// seed-row delete itself fails, the failed spawn still parks the row as +// terminated so it never looks live. +func TestSpawn_ParksRowTerminatedWhenSeedDeleteFails(t *testing.T) { + m, st, _, ws := newManager() + ws.createErr = ports.ErrWorkspaceBranchNotFetched + st.deleteErr = errors.New("db locked") + if _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker}); !errors.Is(err, ports.ErrWorkspaceBranchNotFetched) { + t.Fatalf("err = %v, want ports.ErrWorkspaceBranchNotFetched", err) + } + if !st.sessions["mer-1"].IsTerminated { + t.Fatal("row must fall back to terminated when the seed delete fails") + } +} func TestKill_TearsDownRuntimeAndWorkspace(t *testing.T) { m, st, rt, ws := newManager() st.sessions["mer-1"] = mkLive("mer-1") @@ -522,6 +569,112 @@ func TestSpawn_RejectsMissingAgentBinary(t *testing.T) { } } +// pathPinManager builds a manager whose Executable dep is stubbed, plus a +// buffer capturing its log output, for the hook PATH pin tests. +func pathPinManager(executable func() (string, error)) (*Manager, *fakeStore, *fakeRuntime, *bytes.Buffer) { + st := newFakeStore() + rt := &fakeRuntime{} + logBuf := &bytes.Buffer{} + lookPath := func(string) (string, error) { return "/bin/true", nil } + m := New(Deps{ + Runtime: rt, Agents: fakeAgents{}, Workspace: &fakeWorkspace{}, Store: st, + Messenger: &fakeMessenger{}, Lifecycle: &fakeLCM{store: st}, + LookPath: lookPath, Executable: executable, + Logger: slog.New(slog.NewTextHandler(logBuf, nil)), + }) + return m, st, rt, logBuf +} + +// TestSpawnAndRestore_PinHookPATHToDaemonBinary covers the activity-tracking +// fix: the spawned session's PATH must put the daemon executable's directory +// first, so the bare `ao` in the workspace hook commands resolves to the +// daemon that installed them, not a foreign `ao` earlier on the user's PATH +// (e.g. the legacy TypeScript CLI, which has no `hooks` command and silently +// kills activity tracking). +func TestSpawnAndRestore_PinHookPATHToDaemonBinary(t *testing.T) { + daemonExe := filepath.Join(t.TempDir(), "ao") + want := filepath.Dir(daemonExe) + string(os.PathListSeparator) + "/usr/bin" + executable := func() (string, error) { return daemonExe, nil } + + cases := []struct { + name string + launch func(m *Manager, st *fakeStore) error + }{ + { + name: "spawn", + launch: func(m *Manager, _ *fakeStore) error { + _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker}) + return err + }, + }, + { + name: "restore", + launch: func(m *Manager, st *fakeStore) error { + seedTerminal(st, "mer-1", domain.SessionMetadata{WorkspacePath: "/ws/mer-1", Branch: "b", AgentSessionID: "agent-x"}) + _, err := m.Restore(ctx, "mer-1") + return err + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("PATH", "/usr/bin") + m, st, rt, _ := pathPinManager(executable) + if err := tc.launch(m, st); err != nil { + t.Fatal(err) + } + if got := rt.lastCfg.Env["PATH"]; got != want { + t.Fatalf("runtime env PATH = %q, want %q", got, want) + } + }) + } +} + +// TestSpawn_HookPATHPinUnavailable asserts the degraded path is loud, not +// silent: when the daemon executable cannot anchor `ao` resolution, PATH is +// left to the runtime's inherited default and a warning is logged. +func TestSpawn_HookPATHPinUnavailable(t *testing.T) { + cases := []struct { + name string + executable func() (string, error) + }{ + {"executable unresolvable", func() (string, error) { return "", errors.New("no exe") }}, + {"executable not named ao", func() (string, error) { return "/opt/aod/ao-daemon", nil }}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + m, _, rt, logBuf := pathPinManager(tc.executable) + if _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker}); err != nil { + t.Fatal(err) + } + if got, ok := rt.lastCfg.Env["PATH"]; ok { + t.Fatalf("runtime env PATH = %q, want unset when the pin cannot be applied", got) + } + if !strings.Contains(logBuf.String(), "not pinned") { + t.Fatalf("expected a 'not pinned' warning in the log, got %q", logBuf.String()) + } + }) + } +} + +// TestSpawn_ProjectPATHIsPinBase asserts a project's PATH override survives the +// pin as its base rather than being clobbered or clobbering: the daemon dir +// still comes first. +func TestSpawn_ProjectPATHIsPinBase(t *testing.T) { + daemonExe := filepath.Join(t.TempDir(), "ao") + m, st, rt, _ := pathPinManager(func() (string, error) { return daemonExe, nil }) + st.projects["mer"] = domain.ProjectRecord{ID: "mer", Config: domain.ProjectConfig{ + Env: map[string]string{"PATH": "/proj/bin"}, + }} + if _, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker}); err != nil { + t.Fatal(err) + } + want := filepath.Dir(daemonExe) + string(os.PathListSeparator) + "/proj/bin" + if got := rt.lastCfg.Env["PATH"]; got != want { + t.Fatalf("runtime env PATH = %q, want %q", got, want) + } +} + func TestSpawn_KeepsExplicitBranch(t *testing.T) { m, st, _, _ := newManager() s, err := m.Spawn(ctx, ports.SpawnConfig{ProjectID: "mer", Kind: domain.KindWorker, Branch: "feature/x"}) diff --git a/backend/internal/session_manager/provision_test.go b/backend/internal/session_manager/provision_test.go index becdce95..ba04606a 100644 --- a/backend/internal/session_manager/provision_test.go +++ b/backend/internal/session_manager/provision_test.go @@ -2,6 +2,7 @@ package sessionmanager import ( "context" + "errors" "os" "path/filepath" "testing" @@ -26,6 +27,78 @@ func TestSpawnEnvProjectVarsCannotOverrideInternal(t *testing.T) { } } +func TestHookPATH(t *testing.T) { + sep := string(os.PathListSeparator) + daemonExe := filepath.Join("/opt", "aod", "ao") + daemonDir := filepath.Dir(daemonExe) + exeOK := func() (string, error) { return daemonExe, nil } + + cases := []struct { + name string + executable func() (string, error) + daemonPATH string + projectEnv map[string]string + want string + wantErr bool + }{ + { + name: "prepends daemon dir to inherited PATH", + executable: exeOK, + daemonPATH: "/usr/bin" + sep + "/bin", + want: daemonDir + sep + "/usr/bin" + sep + "/bin", + }, + { + name: "project PATH override is the base", + executable: exeOK, + daemonPATH: "/usr/bin", + projectEnv: map[string]string{"PATH": "/proj/bin"}, + want: daemonDir + sep + "/proj/bin", + }, + { + name: "empty base PATH yields the daemon dir alone", + executable: exeOK, + want: daemonDir, + }, + { + name: "unresolvable executable fails", + executable: func() (string, error) { return "", errors.New("no exe") }, + daemonPATH: "/usr/bin", + wantErr: true, + }, + { + // A daemon binary not named "ao" cannot anchor `ao` resolution by + // having its directory prepended, so the pin must be refused. + name: "executable not named ao fails", + executable: func() (string, error) { return filepath.Join("/opt", "aod", "ao-daemon"), nil }, + daemonPATH: "/usr/bin", + wantErr: true, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + getenv := func(key string) string { + if key == "PATH" { + return tc.daemonPATH + } + return "" + } + got, err := hookPATH(tc.executable, getenv, tc.projectEnv) + if tc.wantErr { + if err == nil { + t.Fatalf("hookPATH = %q, want error", got) + } + return + } + if err != nil { + t.Fatalf("hookPATH: %v", err) + } + if got != tc.want { + t.Fatalf("hookPATH = %q, want %q", got, tc.want) + } + }) + } +} + func TestEffectiveHarnessAndAgentConfig(t *testing.T) { cfg := domain.ProjectConfig{ AgentConfig: domain.AgentConfig{Model: "base", Permissions: domain.PermissionModeAuto}, diff --git a/backend/internal/storage/sqlite/gen/models.go b/backend/internal/storage/sqlite/gen/models.go index 694d1196..f8d614bf 100644 --- a/backend/internal/storage/sqlite/gen/models.go +++ b/backend/internal/storage/sqlite/gen/models.go @@ -129,6 +129,7 @@ type Session struct { CreatedAt time.Time UpdatedAt time.Time DisplayName string + FirstSignalAt sql.NullTime } type SessionWorktree struct { diff --git a/backend/internal/storage/sqlite/gen/sessions.sql.go b/backend/internal/storage/sqlite/gen/sessions.sql.go index 830eba7e..920e17f1 100644 --- a/backend/internal/storage/sqlite/gen/sessions.sql.go +++ b/backend/internal/storage/sqlite/gen/sessions.sql.go @@ -7,6 +7,7 @@ package gen import ( "context" + "database/sql" "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" @@ -15,7 +16,7 @@ import ( const getSession = `-- name: GetSession :one SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions WHERE id = ? ` @@ -40,6 +41,7 @@ func (q *Queries) GetSession(ctx context.Context, id domain.SessionID) (Session, &i.CreatedAt, &i.UpdatedAt, &i.DisplayName, + &i.FirstSignalAt, ) return i, err } @@ -47,10 +49,10 @@ func (q *Queries) GetSession(ctx context.Context, id domain.SessionID) (Session, const insertSession = `-- name: InsertSession :exec INSERT INTO sessions ( id, project_id, num, issue_id, kind, harness, display_name, - activity_state, activity_last_at, is_terminated, + activity_state, activity_last_at, first_signal_at, is_terminated, branch, workspace_path, runtime_handle_id, agent_session_id, prompt, created_at, updated_at -) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ` type InsertSessionParams struct { @@ -63,6 +65,7 @@ type InsertSessionParams struct { DisplayName string ActivityState domain.ActivityState ActivityLastAt time.Time + FirstSignalAt sql.NullTime IsTerminated bool Branch string WorkspacePath string @@ -84,6 +87,7 @@ func (q *Queries) InsertSession(ctx context.Context, arg InsertSessionParams) er arg.DisplayName, arg.ActivityState, arg.ActivityLastAt, + arg.FirstSignalAt, arg.IsTerminated, arg.Branch, arg.WorkspacePath, @@ -99,7 +103,7 @@ func (q *Queries) InsertSession(ctx context.Context, arg InsertSessionParams) er const listAllSessions = `-- name: ListAllSessions :many SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions ORDER BY project_id, num ` @@ -130,6 +134,7 @@ func (q *Queries) ListAllSessions(ctx context.Context) ([]Session, error) { &i.CreatedAt, &i.UpdatedAt, &i.DisplayName, + &i.FirstSignalAt, ); err != nil { return nil, err } @@ -147,7 +152,7 @@ func (q *Queries) ListAllSessions(ctx context.Context) ([]Session, error) { const listSessionsByProject = `-- name: ListSessionsByProject :many SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions WHERE project_id = ? ORDER BY num ` @@ -178,6 +183,7 @@ func (q *Queries) ListSessionsByProject(ctx context.Context, projectID domain.Pr &i.CreatedAt, &i.UpdatedAt, &i.DisplayName, + &i.FirstSignalAt, ); err != nil { return nil, err } @@ -248,7 +254,7 @@ func (q *Queries) SessionIsSeed(ctx context.Context, id domain.SessionID) (bool, const updateSession = `-- name: UpdateSession :exec UPDATE sessions SET issue_id = ?, kind = ?, harness = ?, display_name = ?, - activity_state = ?, activity_last_at = ?, is_terminated = ?, + activity_state = ?, activity_last_at = ?, first_signal_at = ?, is_terminated = ?, branch = ?, workspace_path = ?, runtime_handle_id = ?, agent_session_id = ?, prompt = ?, updated_at = ? WHERE id = ? @@ -261,6 +267,7 @@ type UpdateSessionParams struct { DisplayName string ActivityState domain.ActivityState ActivityLastAt time.Time + FirstSignalAt sql.NullTime IsTerminated bool Branch string WorkspacePath string @@ -279,6 +286,7 @@ func (q *Queries) UpdateSession(ctx context.Context, arg UpdateSessionParams) er arg.DisplayName, arg.ActivityState, arg.ActivityLastAt, + arg.FirstSignalAt, arg.IsTerminated, arg.Branch, arg.WorkspacePath, diff --git a/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql b/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql new file mode 100644 index 00000000..ae3ffc99 --- /dev/null +++ b/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql @@ -0,0 +1,59 @@ +-- +goose Up +-- first_signal_at records when the FIRST agent hook callback arrived for a +-- session: raw signal receipt, independent of the derived activity state. +-- NULL means no hook has ever reported for the current spawn/restore; the +-- session service derives the "no_signal" display status from it so a broken +-- hook pipeline (agent upgrade, PATH problem, blocked interactive prompt) +-- surfaces as "no activity signal" instead of a confident "idle". +-- +-- Backfill existing rows from activity_last_at: sessions created before this +-- column are treated as having signaled so an upgrade doesn't flip every +-- historical session to no_signal. +-- +goose StatementBegin +ALTER TABLE sessions ADD COLUMN first_signal_at TIMESTAMP; +-- +goose StatementEnd +-- +goose StatementBegin +UPDATE sessions SET first_signal_at = activity_last_at; +-- +goose StatementEnd + +-- Recreate the sessions update CDC trigger so the first hook receipt also +-- fans out a session_updated event: the first signal often repeats the seeded +-- activity state (e.g. Codex SessionStart reports idle on an idle-seeded row), +-- and without this clause the dashboard would keep showing no_signal until the +-- next real state change. +-- +goose StatementBegin +DROP TRIGGER sessions_cdc_update; +-- +goose StatementEnd +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated + OR (OLD.first_signal_at IS NULL AND NEW.first_signal_at IS NOT NULL) +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TRIGGER sessions_cdc_update; +-- +goose StatementEnd +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd +-- +goose StatementBegin +ALTER TABLE sessions DROP COLUMN first_signal_at; +-- +goose StatementEnd diff --git a/backend/internal/storage/sqlite/queries/sessions.sql b/backend/internal/storage/sqlite/queries/sessions.sql index 7b4329e4..5c66c07c 100644 --- a/backend/internal/storage/sqlite/queries/sessions.sql +++ b/backend/internal/storage/sqlite/queries/sessions.sql @@ -4,15 +4,15 @@ SELECT COALESCE(MAX(num), 0) + 1 AS next FROM sessions WHERE project_id = ?; -- name: InsertSession :exec INSERT INTO sessions ( id, project_id, num, issue_id, kind, harness, display_name, - activity_state, activity_last_at, is_terminated, + activity_state, activity_last_at, first_signal_at, is_terminated, branch, workspace_path, runtime_handle_id, agent_session_id, prompt, created_at, updated_at -) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); -- name: UpdateSession :exec UPDATE sessions SET issue_id = ?, kind = ?, harness = ?, display_name = ?, - activity_state = ?, activity_last_at = ?, is_terminated = ?, + activity_state = ?, activity_last_at = ?, first_signal_at = ?, is_terminated = ?, branch = ?, workspace_path = ?, runtime_handle_id = ?, agent_session_id = ?, prompt = ?, updated_at = ? WHERE id = ?; @@ -20,19 +20,19 @@ WHERE id = ?; -- name: GetSession :one SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions WHERE id = ?; -- name: ListSessionsByProject :many SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions WHERE project_id = ? ORDER BY num; -- name: ListAllSessions :many SELECT id, project_id, num, issue_id, kind, harness, activity_state, activity_last_at, is_terminated, branch, workspace_path, - runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name + runtime_handle_id, agent_session_id, prompt, created_at, updated_at, display_name, first_signal_at FROM sessions ORDER BY project_id, num; diff --git a/backend/internal/storage/sqlite/store/session_store.go b/backend/internal/storage/sqlite/store/session_store.go index 751b9ebf..84f17a50 100644 --- a/backend/internal/storage/sqlite/store/session_store.go +++ b/backend/internal/storage/sqlite/store/session_store.go @@ -180,7 +180,8 @@ func rowToRecord(row gen.Session) domain.SessionRecord { State: row.ActivityState, LastActivityAt: row.ActivityLastAt, }, - IsTerminated: row.IsTerminated, + FirstSignalAt: nullTimeToTime(row.FirstSignalAt), + IsTerminated: row.IsTerminated, Metadata: domain.SessionMetadata{ Branch: row.Branch, WorkspacePath: row.WorkspacePath, @@ -205,6 +206,7 @@ func recordToInsert(rec domain.SessionRecord, num int64) gen.InsertSessionParams DisplayName: rec.DisplayName, ActivityState: activity.State, ActivityLastAt: activity.LastActivityAt, + FirstSignalAt: timeToNullTime(rec.FirstSignalAt), IsTerminated: rec.IsTerminated, Branch: rec.Metadata.Branch, WorkspacePath: rec.Metadata.WorkspacePath, @@ -226,6 +228,7 @@ func recordToUpdate(rec domain.SessionRecord) gen.UpdateSessionParams { DisplayName: rec.DisplayName, ActivityState: activity.State, ActivityLastAt: activity.LastActivityAt, + FirstSignalAt: timeToNullTime(rec.FirstSignalAt), IsTerminated: rec.IsTerminated, Branch: rec.Metadata.Branch, WorkspacePath: rec.Metadata.WorkspacePath, @@ -236,6 +239,22 @@ func recordToUpdate(rec domain.SessionRecord) gen.UpdateSessionParams { } } +// nullTimeToTime / timeToNullTime bridge the nullable first_signal_at column +// to the domain's zero-time convention (zero = no signal received yet). +func nullTimeToTime(t sql.NullTime) time.Time { + if !t.Valid { + return time.Time{} + } + return t.Time +} + +func timeToNullTime(t time.Time) sql.NullTime { + if t.IsZero() { + return sql.NullTime{} + } + return sql.NullTime{Time: t, Valid: true} +} + func normalActivity(a domain.Activity, fallback time.Time) domain.Activity { if a.State == "" { a.State = domain.ActivityIdle diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index 61b8a479..d34017a0 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -269,6 +269,39 @@ func TestSessionUpdateActivityAndTermination(t *testing.T) { } } +func TestSessionFirstSignalRoundTrip(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + + // Fresh sessions have no signal receipt: NULL round-trips as zero time. + got, _, _ := s.GetSession(ctx, r.ID) + if !got.FirstSignalAt.IsZero() { + t.Fatalf("fresh session has receipt: %v", got.FirstSignalAt) + } + + stamp := time.Now().UTC().Truncate(time.Second) + got.FirstSignalAt = stamp + if err := s.UpdateSession(ctx, got); err != nil { + t.Fatal(err) + } + again, _, _ := s.GetSession(ctx, r.ID) + if !again.FirstSignalAt.Equal(stamp) { + t.Fatalf("receipt not persisted: got %v want %v", again.FirstSignalAt, stamp) + } + + // Clearing it (spawn/restore re-proves the hook pipeline) round-trips too. + again.FirstSignalAt = time.Time{} + if err := s.UpdateSession(ctx, again); err != nil { + t.Fatal(err) + } + final, _, _ := s.GetSession(ctx, r.ID) + if !final.FirstSignalAt.IsZero() { + t.Fatalf("receipt not cleared: %v", final.FirstSignalAt) + } +} + func TestPRCRUD(t *testing.T) { s := newTestStore(t) ctx := context.Background() @@ -660,49 +693,3 @@ func TestConcurrentSessionCreateAssignsUniqueNums(t *testing.T) { t.Fatalf("created %d sessions, want %d", len(all), n) } } - -func TestSessionWorktreesRoundTrip(t *testing.T) { - s := newTestStore(t) - ctx := context.Background() - seedProject(t, s, "ws") - rec, err := s.CreateSession(ctx, sampleRecord("ws")) - if err != nil { - t.Fatalf("create session: %v", err) - } - rows := []domain.SessionWorktreeRecord{ - {SessionID: rec.ID, RepoName: domain.RootWorkspaceRepoName, Branch: "ao/ws-1", BaseSHA: "root-base", WorktreePath: "/managed/ws/ws-1", State: "active"}, - {SessionID: rec.ID, RepoName: "api", Branch: "ao/ws-1", BaseSHA: "api-base", WorktreePath: "/managed/ws/ws-1/api", PreservedRef: "refs/ao/preserved/ws-1", State: "removed"}, - } - for _, row := range rows { - if err := s.UpsertSessionWorktree(ctx, row); err != nil { - t.Fatalf("upsert worktree %s: %v", row.RepoName, err) - } - } - got, err := s.ListSessionWorktrees(ctx, rec.ID) - if err != nil { - t.Fatalf("list worktrees: %v", err) - } - if !reflect.DeepEqual(got, rows) { - t.Fatalf("worktrees = %#v, want %#v", got, rows) - } - one, ok, err := s.GetSessionWorktree(ctx, rec.ID, "api") - if err != nil || !ok || one.PreservedRef != "refs/ao/preserved/ws-1" { - t.Fatalf("get api = %#v ok=%v err=%v", one, ok, err) - } - rows[1].State = "active" - rows[1].PreservedRef = "" - if err := s.UpsertSessionWorktree(ctx, rows[1]); err != nil { - t.Fatalf("update api worktree: %v", err) - } - one, ok, err = s.GetSessionWorktree(ctx, rec.ID, "api") - if err != nil || !ok || one.State != "active" || one.PreservedRef != "" { - t.Fatalf("updated api = %#v ok=%v err=%v", one, ok, err) - } - if err := s.DeleteSessionWorktrees(ctx, rec.ID); err != nil { - t.Fatalf("delete worktrees: %v", err) - } - got, err = s.ListSessionWorktrees(ctx, rec.ID) - if err != nil || len(got) != 0 { - t.Fatalf("after delete = %#v err=%v", got, err) - } -} diff --git a/docs/agent/README.md b/docs/agent/README.md index 669605c8..e9dc6b06 100644 --- a/docs/agent/README.md +++ b/docs/agent/README.md @@ -66,11 +66,14 @@ ao hooks The callback: 1. Reads the native hook JSON payload from stdin. -2. Reads the AO session id from `AO_SESSION_ID`. -3. Opens the AO SQLite store (`ao.db`) in the data dir — `AO_DATA_DIR`, default `/agent-orchestrator/data`. -4. Merges normalized metadata into the matching session row. -5. Publishes `session.updated` when metadata changed. -6. Prints `{}` and exits 0 for successful no-op cases, including non-AO sessions or missing rows. +2. Reads the AO session id from `AO_SESSION_ID` (exits 0 immediately for non-AO sessions). +3. Derives a normalized activity state from the agent + event (`activitydispatch.Derive`); events with no activity meaning report nothing. +4. POSTs the state to the daemon at `POST /api/v1/sessions/{id}/activity`; the daemon owns the store and fans out `session.updated` via CDC. +5. Always exits 0 — a failed delivery must never break the user's agent. Failures are appended to `hooks.log` under `AO_DATA_DIR` and surfaced by the `hooks-log` check in `ao doctor`. + +The daemon also records the FIRST callback per spawn/restore (`first_signal_at`); a live session that has never signaled past a grace period derives the `no_signal` display status instead of a confident `idle`, so a broken hook pipeline is visible on the dashboard. + +Persisting hook-derived metadata (`agentSessionId`, `title`, `summary`) into the session row is **not implemented yet** — until it is, adapters whose restore needs the native session id (e.g. `codex resume`) fall back to a fresh launch. The spawn engine inserts the AO session row before launching the durability provider so early startup hooks can update an existing row. If launch fails after insertion, spawn deletes the row during rollback. diff --git a/frontend/src/renderer/types/workspace.test.ts b/frontend/src/renderer/types/workspace.test.ts index 822eb30a..ccbfb1b0 100644 --- a/frontend/src/renderer/types/workspace.test.ts +++ b/frontend/src/renderer/types/workspace.test.ts @@ -26,6 +26,7 @@ function sessionWith(overrides: Partial): WorkspaceSession { describe("toSessionStatus", () => { it("passes through a known status", () => { expect(toSessionStatus("mergeable")).toBe("mergeable"); + expect(toSessionStatus("no_signal")).toBe("no_signal"); }); it("overrides to terminated when the session is terminated", () => { @@ -50,6 +51,7 @@ describe("workerDisplayStatus", () => { ["needs_input", "needs_you"], ["changes_requested", "needs_you"], ["review_pending", "needs_you"], + ["no_signal", "needs_you"], ["ci_failed", "ci_failed"], ["approved", "mergeable"], ["mergeable", "mergeable"], diff --git a/frontend/src/renderer/types/workspace.ts b/frontend/src/renderer/types/workspace.ts index ff0dc8ba..70ad080b 100644 --- a/frontend/src/renderer/types/workspace.ts +++ b/frontend/src/renderer/types/workspace.ts @@ -10,6 +10,7 @@ export type SessionStatus = | "merged" | "needs_input" | "idle" + | "no_signal" | "terminated"; const sessionStatuses = new Set([ @@ -24,6 +25,7 @@ const sessionStatuses = new Set([ "merged", "needs_input", "idle", + "no_signal", "terminated", ]); @@ -99,6 +101,9 @@ export function workerDisplayStatus(session: WorkspaceSession): WorkerDisplaySta case "needs_input": case "changes_requested": case "review_pending": + // no_signal: the daemon has never heard from this agent — a human + // should look at the pane, so it surfaces as needs-you. + case "no_signal": return "needs_you"; case "ci_failed": return "ci_failed"; From bd7009b7b575bca038cabaf001225fa78293a624 Mon Sep 17 00:00:00 2001 From: Ashish Huddar Date: Thu, 11 Jun 2026 10:11:37 +0530 Subject: [PATCH 3/4] test(store): restore TestSessionWorktreesRoundTrip lost in the re-landing port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The branch ported store_test.go wholesale from the closed redesign branch, whose copy predates #165 — silently dropping the session-worktrees round-trip test #165 added. Restore main's file and re-apply only this branch's addition (TestSessionFirstSignalRoundTrip). No other ported file lost main-side content (audited per-file against main; the remaining deletions are this branch's intended refactors). Co-Authored-By: Claude Fable 5 --- .../storage/sqlite/store/store_test.go | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index d34017a0..9571ef5c 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -693,3 +693,49 @@ func TestConcurrentSessionCreateAssignsUniqueNums(t *testing.T) { t.Fatalf("created %d sessions, want %d", len(all), n) } } + +func TestSessionWorktreesRoundTrip(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "ws") + rec, err := s.CreateSession(ctx, sampleRecord("ws")) + if err != nil { + t.Fatalf("create session: %v", err) + } + rows := []domain.SessionWorktreeRecord{ + {SessionID: rec.ID, RepoName: domain.RootWorkspaceRepoName, Branch: "ao/ws-1", BaseSHA: "root-base", WorktreePath: "/managed/ws/ws-1", State: "active"}, + {SessionID: rec.ID, RepoName: "api", Branch: "ao/ws-1", BaseSHA: "api-base", WorktreePath: "/managed/ws/ws-1/api", PreservedRef: "refs/ao/preserved/ws-1", State: "removed"}, + } + for _, row := range rows { + if err := s.UpsertSessionWorktree(ctx, row); err != nil { + t.Fatalf("upsert worktree %s: %v", row.RepoName, err) + } + } + got, err := s.ListSessionWorktrees(ctx, rec.ID) + if err != nil { + t.Fatalf("list worktrees: %v", err) + } + if !reflect.DeepEqual(got, rows) { + t.Fatalf("worktrees = %#v, want %#v", got, rows) + } + one, ok, err := s.GetSessionWorktree(ctx, rec.ID, "api") + if err != nil || !ok || one.PreservedRef != "refs/ao/preserved/ws-1" { + t.Fatalf("get api = %#v ok=%v err=%v", one, ok, err) + } + rows[1].State = "active" + rows[1].PreservedRef = "" + if err := s.UpsertSessionWorktree(ctx, rows[1]); err != nil { + t.Fatalf("update api worktree: %v", err) + } + one, ok, err = s.GetSessionWorktree(ctx, rec.ID, "api") + if err != nil || !ok || one.State != "active" || one.PreservedRef != "" { + t.Fatalf("updated api = %#v ok=%v err=%v", one, ok, err) + } + if err := s.DeleteSessionWorktrees(ctx, rec.ID); err != nil { + t.Fatalf("delete worktrees: %v", err) + } + got, err = s.ListSessionWorktrees(ctx, rec.ID) + if err != nil || len(got) != 0 { + t.Fatalf("after delete = %#v err=%v", got, err) + } +} From eac71e5802ae6d5afe760a31483ec8d18d06f721 Mon Sep 17 00:00:00 2001 From: Ashish Huddar Date: Thu, 11 Jun 2026 10:24:37 +0530 Subject: [PATCH 4/4] fix(status): only derive no_signal for harnesses that have a hook pipeline Review finding: the no_signal downgrade had no harness-capability gate, but first_signal_at can only ever be stamped by an `ao hooks` callback. Ten spawnable harnesses (amp, aider, crush, grok, kimi, devin, auggie, continue, vibe, pi) install no hooks at all, so every live session of theirs would have flipped from idle to a permanent no_signal -> needs_you after the 90s grace. The session service now takes a SignalCapable predicate; daemon wiring injects activitydispatch.SupportsHarness (the deriver registry is the source of truth for "this harness can signal"). Left nil, the service never claims no_signal. A new dispatch test pins that every deriver token is a known harness name. Also from the same review: - lifecycle/manager.go and the 0010 migration claimed Codex's SessionStart reports idle as the first signal; both codex and claude-code derivers deliberately return no signal for session-start, so the comments now cite a real case (a lost "active" POST followed by a Stop hook landing idle). - docs/agent/README.md documents the gate and the restore caveat: a restored session the user never prompts has nothing to signal, so it shows no_signal after the grace until a receipt-only session-start signal exists. - 0010 migration uses DROP TRIGGER IF EXISTS per house style. Co-Authored-By: Claude Fable 5 --- .../agent/activitydispatch/dispatch.go | 11 ++++ .../agent/activitydispatch/dispatch_test.go | 33 ++++++++++ backend/internal/daemon/lifecycle_wiring.go | 11 +++- backend/internal/lifecycle/manager.go | 5 +- backend/internal/service/session/service.go | 25 +++++++- backend/internal/service/session/status.go | 24 ++++---- .../internal/service/session/status_test.go | 60 +++++++++++++------ .../migrations/0010_add_first_signal_at.sql | 11 ++-- docs/agent/README.md | 2 +- 9 files changed, 141 insertions(+), 41 deletions(-) create mode 100644 backend/internal/adapters/agent/activitydispatch/dispatch_test.go diff --git a/backend/internal/adapters/agent/activitydispatch/dispatch.go b/backend/internal/adapters/agent/activitydispatch/dispatch.go index a386668c..812e15e2 100644 --- a/backend/internal/adapters/agent/activitydispatch/dispatch.go +++ b/backend/internal/adapters/agent/activitydispatch/dispatch.go @@ -57,3 +57,14 @@ func Derive(agent, event string, payload []byte) (domain.ActivityState, bool) { } return derive(event, payload) } + +// SupportsHarness reports whether a harness has an activity pipeline at all: +// a registered deriver here means its adapter installs `ao hooks ` +// callbacks that can reach the daemon. Status derivation uses this to decide +// whether prolonged silence is suspicious (no_signal) or simply all a hook-less +// harness can ever report (idle). Harness names and `ao hooks` agent tokens are +// the same strings by convention. +func SupportsHarness(h domain.AgentHarness) bool { + _, ok := Derivers[string(h)] + return ok +} diff --git a/backend/internal/adapters/agent/activitydispatch/dispatch_test.go b/backend/internal/adapters/agent/activitydispatch/dispatch_test.go new file mode 100644 index 00000000..68a007ff --- /dev/null +++ b/backend/internal/adapters/agent/activitydispatch/dispatch_test.go @@ -0,0 +1,33 @@ +package activitydispatch + +import ( + "testing" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" +) + +// Every deriver key must be a known harness name: SupportsHarness equates the +// two, so a token that drifts from its harness constant would silently report +// the harness as hook-less. +func TestDeriverTokensAreKnownHarnesses(t *testing.T) { + for token := range Derivers { + if !domain.AgentHarness(token).IsKnown() { + t.Errorf("deriver token %q is not a known AgentHarness", token) + } + } +} + +func TestSupportsHarness(t *testing.T) { + for _, h := range []domain.AgentHarness{domain.HarnessCodex, domain.HarnessClaudeCode, domain.HarnessOpenCode} { + if !SupportsHarness(h) { + t.Errorf("SupportsHarness(%q) = false, want true", h) + } + } + // Harnesses whose adapters install no hooks must read as unsupported so + // their silence never derives no_signal. + for _, h := range []domain.AgentHarness{domain.HarnessAmp, domain.HarnessAider, domain.HarnessCrush, domain.AgentHarness("")} { + if SupportsHarness(h) { + t.Errorf("SupportsHarness(%q) = true, want false", h) + } + } +} diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 61264a4c..b4d40905 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -7,6 +7,7 @@ import ( "path/filepath" "github.com/aoagents/agent-orchestrator/backend/internal/adapters" + "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/activitydispatch" agentregistry "github.com/aoagents/agent-orchestrator/backend/internal/adapters/agent/registry" "github.com/aoagents/agent-orchestrator/backend/internal/adapters/workspace/gitworktree" "github.com/aoagents/agent-orchestrator/backend/internal/config" @@ -84,7 +85,15 @@ func startSession(cfg config.Config, runtime ports.Runtime, store *sqlite.Store, if err != nil { logSCMProviderDisabled(log, err) } - return sessionsvc.NewWithDeps(sessionsvc.Deps{Manager: mgr, Store: store, PRClaimer: store, SCM: scmProvider}), nil + return sessionsvc.NewWithDeps(sessionsvc.Deps{ + Manager: mgr, + Store: store, + PRClaimer: store, + SCM: scmProvider, + // no_signal only makes sense for harnesses whose adapters install + // activity hooks; the deriver registry is the source of truth for that. + SignalCapable: activitydispatch.SupportsHarness, + }), nil } // runtimeMessageSender is the narrow part of the concrete runtime needed by diff --git a/backend/internal/lifecycle/manager.go b/backend/internal/lifecycle/manager.go index 10554121..925af553 100644 --- a/backend/internal/lifecycle/manager.go +++ b/backend/internal/lifecycle/manager.go @@ -87,8 +87,9 @@ func (m *Manager) ApplyActivitySignal(ctx context.Context, id domain.SessionID, act := domain.Activity{State: s.State, LastActivityAt: timeOr(s.Timestamp, now)} // A same-state repeat is still a write when it is the FIRST signal for // this spawn: the receipt itself is a durable fact (it clears the - // no_signal display status), e.g. Codex's SessionStart reports idle on - // an idle-seeded row. + // no_signal display status). Hook deliveries are best-effort, so the + // first to ARRIVE may match the seeded state — e.g. a turn's "active" + // POST is lost and its Stop hook lands idle on the idle-seeded row. if sameActivity(cur.Activity, act) && !cur.FirstSignalAt.IsZero() { return cur, false } diff --git a/backend/internal/service/session/service.go b/backend/internal/service/session/service.go index f0ccf548..4d33ae41 100644 --- a/backend/internal/service/session/service.go +++ b/backend/internal/service/session/service.go @@ -67,6 +67,11 @@ type Service struct { prClaimer ports.PRClaimer scm scmProvider clock func() time.Time + // signalCapable reports whether a harness has a hook pipeline that can + // deliver activity signals at all. Only capable harnesses are eligible for + // the no_signal downgrade — a hook-less harness staying silent forever is + // normal, not a broken pipeline. nil means "unknown": never downgrade. + signalCapable func(domain.AgentHarness) bool } // New wires a controller-facing session service over an internal session Manager. @@ -83,11 +88,15 @@ type Deps struct { PRClaimer ports.PRClaimer SCM scmProvider Clock func() time.Time + // SignalCapable gates the no_signal status downgrade per harness; daemon + // wiring passes activitydispatch.SupportsHarness. Left nil, no session is + // ever downgraded to no_signal. + SignalCapable func(domain.AgentHarness) bool } // NewWithDeps wires a session service with optional PR-claim dependencies. func NewWithDeps(d Deps) *Service { - s := &Service{manager: d.Manager, store: d.Store, prClaimer: d.PRClaimer, scm: d.SCM, clock: d.Clock} + s := &Service{manager: d.Manager, store: d.Store, prClaimer: d.PRClaimer, scm: d.SCM, clock: d.Clock, signalCapable: d.SignalCapable} if s.prClaimer == nil { if w, ok := d.Store.(ports.PRClaimer); ok { s.prClaimer = w @@ -301,9 +310,9 @@ func (s *Service) toSession(ctx context.Context, rec domain.SessionRecord) (doma return domain.Session{}, fmt.Errorf("pr facts %s: %w", rec.ID, err) } if !ok { - return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, nil, s.now()), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil + return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, nil, s.now(), s.harnessSignals(rec.Harness)), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil } - return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, &pr, s.now()), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil + return domain.Session{SessionRecord: rec, Status: deriveStatus(rec, &pr, s.now(), s.harnessSignals(rec.Harness)), TerminalHandleID: rec.Metadata.RuntimeHandleID}, nil } // now tolerates a zero-value Service (tests construct the struct literally @@ -314,3 +323,13 @@ func (s *Service) now() time.Time { } return s.clock() } + +// harnessSignals tolerates a zero-value Service the same way now does. Without +// an injected capability predicate the service cannot tell a broken pipeline +// from a hook-less harness, so it never claims no_signal. +func (s *Service) harnessSignals(h domain.AgentHarness) bool { + if s.signalCapable == nil { + return false + } + return s.signalCapable(h) +} diff --git a/backend/internal/service/session/status.go b/backend/internal/service/session/status.go index a3ebdcba..4510f7f5 100644 --- a/backend/internal/service/session/status.go +++ b/backend/internal/service/session/status.go @@ -8,14 +8,18 @@ import ( // noSignalGrace is how long after spawn/restore a session may stay silent // before its idle reading is downgraded to StatusNoSignal. It covers the -// agent's TUI boot plus the gap to its first hook callback (Codex fires -// SessionStart/UserPromptSubmit on the first turn, seconds after an -// auto-submitted prompt); past it, a session that has never signaled is -// indistinguishable from one with a broken hook pipeline, and the dashboard -// must not claim a confident "idle". +// agent's TUI boot plus the gap to the first activity-bearing hook callback +// (for Codex that is UserPromptSubmit, seconds after the auto-submitted spawn +// prompt — its SessionStart hook fires earlier but carries no activity state); +// past it, a silent session is indistinguishable from one with a broken hook +// pipeline, and the dashboard must not claim a confident "idle". const noSignalGrace = 90 * time.Second -func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts, now time.Time) domain.SessionStatus { +// deriveStatus computes the display status. signalCapable says whether this +// session's harness has an activity hook pipeline at all; only then can +// prolonged silence mean the pipeline is broken (no_signal) rather than the +// permanent, normal silence of a hook-less harness. +func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts, now time.Time, signalCapable bool) domain.SessionStatus { if rec.IsTerminated { if pr != nil && pr.Merged { return domain.StatusMerged @@ -40,10 +44,10 @@ func deriveStatus(rec domain.SessionRecord, pr *domain.PRFacts, now time.Time) d return domain.StatusWorking } - // No hook callback has ever arrived for this spawn/restore. The seeded - // LastActivityAt marks the launch, so once the grace passes the honest - // status is "no signal", not "idle". - if rec.FirstSignalAt.IsZero() && now.Sub(rec.Activity.LastActivityAt) > noSignalGrace { + // No hook callback has ever arrived for this spawn/restore even though the + // harness has a hook pipeline. The seeded LastActivityAt marks the launch, + // so once the grace passes the honest status is "no signal", not "idle". + if signalCapable && rec.FirstSignalAt.IsZero() && now.Sub(rec.Activity.LastActivityAt) > noSignalGrace { return domain.StatusNoSignal } return domain.StatusIdle diff --git a/backend/internal/service/session/status_test.go b/backend/internal/service/session/status_test.go index bfcc21e6..f7a96e2a 100644 --- a/backend/internal/service/session/status_test.go +++ b/backend/internal/service/session/status_test.go @@ -34,40 +34,62 @@ func TestServiceDerivesStatusFromSessionFactsAndPR(t *testing.T) { name string rec domain.SessionRecord pr *domain.PRFacts - want domain.SessionStatus + // hookless marks a harness with no activity pipeline (signalCapable + // false): silence is its permanent normal state, never no_signal. + hookless bool + want domain.SessionStatus }{ - {"terminated", statusRec(domain.ActivityExited, true), nil, domain.StatusTerminated}, - {"merged-pr", statusRec(domain.ActivityIdle, true), statusPR(domain.PRFacts{Merged: true}), domain.StatusMerged}, - {"needs-input", statusRec(domain.ActivityWaitingInput, false), statusPR(domain.PRFacts{CI: domain.CIFailing}), domain.StatusNeedsInput}, - {"ci-failed", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{CI: domain.CIFailing}), domain.StatusCIFailed}, - {"draft", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Draft: true}), domain.StatusDraft}, - {"changes-requested", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewChangesRequest}), domain.StatusChangesRequested}, - {"mergeable", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Mergeability: domain.MergeMergeable}), domain.StatusMergeable}, - {"approved", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewApproved}), domain.StatusApproved}, - {"review-pending", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewRequired}), domain.StatusReviewPending}, - {"pr-open", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{}), domain.StatusPROpen}, - {"working", statusRec(domain.ActivityActive, false), nil, domain.StatusWorking}, - {"idle", statusRec(domain.ActivityIdle, false), nil, domain.StatusIdle}, + {"terminated", statusRec(domain.ActivityExited, true), nil, false, domain.StatusTerminated}, + {"merged-pr", statusRec(domain.ActivityIdle, true), statusPR(domain.PRFacts{Merged: true}), false, domain.StatusMerged}, + {"needs-input", statusRec(domain.ActivityWaitingInput, false), statusPR(domain.PRFacts{CI: domain.CIFailing}), false, domain.StatusNeedsInput}, + {"ci-failed", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{CI: domain.CIFailing}), false, domain.StatusCIFailed}, + {"draft", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Draft: true}), false, domain.StatusDraft}, + {"changes-requested", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewChangesRequest}), false, domain.StatusChangesRequested}, + {"mergeable", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Mergeability: domain.MergeMergeable}), false, domain.StatusMergeable}, + {"approved", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewApproved}), false, domain.StatusApproved}, + {"review-pending", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{Review: domain.ReviewRequired}), false, domain.StatusReviewPending}, + {"pr-open", statusRec(domain.ActivityIdle, false), statusPR(domain.PRFacts{}), false, domain.StatusPROpen}, + {"working", statusRec(domain.ActivityActive, false), nil, false, domain.StatusWorking}, + {"idle", statusRec(domain.ActivityIdle, false), nil, false, domain.StatusIdle}, - // A live session whose agent never signaled is no_signal once the - // grace passes — never a confident idle. - {"no-signal-after-grace", silentRec(2 * noSignalGrace), nil, domain.StatusNoSignal}, + // A live session whose hook-capable agent never signaled is no_signal + // once the grace passes — never a confident idle. + {"no-signal-after-grace", silentRec(2 * noSignalGrace), nil, false, domain.StatusNoSignal}, + // A hook-less harness can never signal: its silence stays idle forever + // instead of degrading into a false "needs you". + {"hookless-silent-stays-idle", silentRec(2 * noSignalGrace), nil, true, domain.StatusIdle}, // Right after spawn the agent legitimately hasn't called back yet. - {"silent-within-grace-is-idle", silentRec(10 * time.Second), nil, domain.StatusIdle}, + {"silent-within-grace-is-idle", silentRec(10 * time.Second), nil, false, domain.StatusIdle}, // Termination and PR facts outrank the missing-signal downgrade. { "no-signal-terminated-wins", domain.SessionRecord{Activity: domain.Activity{State: domain.ActivityExited, LastActivityAt: statusNow.Add(-2 * noSignalGrace)}, IsTerminated: true}, nil, + false, domain.StatusTerminated, }, - {"no-signal-pr-wins", silentRec(2 * noSignalGrace), statusPR(domain.PRFacts{}), domain.StatusPROpen}, + {"no-signal-pr-wins", silentRec(2 * noSignalGrace), statusPR(domain.PRFacts{}), false, domain.StatusPROpen}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := deriveStatus(tt.rec, tt.pr, statusNow); got != tt.want { + if got := deriveStatus(tt.rec, tt.pr, statusNow, !tt.hookless); got != tt.want { t.Fatalf("got %q want %q", got, tt.want) } }) } } + +// Without an injected capability predicate the service must never claim +// no_signal; with one, capability follows the predicate per harness. +func TestHarnessSignalsCapabilityGate(t *testing.T) { + if (&Service{}).harnessSignals(domain.HarnessCodex) { + t.Fatal("zero-value Service reports signal-capable; want incapable (never no_signal)") + } + s := NewWithDeps(Deps{SignalCapable: func(h domain.AgentHarness) bool { return h == domain.HarnessCodex }}) + if !s.harnessSignals(domain.HarnessCodex) { + t.Fatal("harnessSignals(codex) = false with codex-capable predicate") + } + if s.harnessSignals(domain.HarnessAmp) { + t.Fatal("harnessSignals(amp) = true with codex-only predicate") + } +} diff --git a/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql b/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql index ae3ffc99..080c44e9 100644 --- a/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql +++ b/backend/internal/storage/sqlite/migrations/0010_add_first_signal_at.sql @@ -17,12 +17,13 @@ UPDATE sessions SET first_signal_at = activity_last_at; -- +goose StatementEnd -- Recreate the sessions update CDC trigger so the first hook receipt also --- fans out a session_updated event: the first signal often repeats the seeded --- activity state (e.g. Codex SessionStart reports idle on an idle-seeded row), --- and without this clause the dashboard would keep showing no_signal until the +-- fans out a session_updated event: hook deliveries are best-effort, so the +-- first signal to arrive may repeat the seeded activity state (a lost "active" +-- POST followed by a Stop hook landing idle on the idle-seeded row), and +-- without this clause the dashboard would keep showing no_signal until the -- next real state change. -- +goose StatementBegin -DROP TRIGGER sessions_cdc_update; +DROP TRIGGER IF EXISTS sessions_cdc_update; -- +goose StatementEnd -- +goose StatementBegin CREATE TRIGGER sessions_cdc_update @@ -40,7 +41,7 @@ END; -- +goose Down -- +goose StatementBegin -DROP TRIGGER sessions_cdc_update; +DROP TRIGGER IF EXISTS sessions_cdc_update; -- +goose StatementEnd -- +goose StatementBegin CREATE TRIGGER sessions_cdc_update diff --git a/docs/agent/README.md b/docs/agent/README.md index e9dc6b06..efc39c40 100644 --- a/docs/agent/README.md +++ b/docs/agent/README.md @@ -71,7 +71,7 @@ The callback: 4. POSTs the state to the daemon at `POST /api/v1/sessions/{id}/activity`; the daemon owns the store and fans out `session.updated` via CDC. 5. Always exits 0 — a failed delivery must never break the user's agent. Failures are appended to `hooks.log` under `AO_DATA_DIR` and surfaced by the `hooks-log` check in `ao doctor`. -The daemon also records the FIRST callback per spawn/restore (`first_signal_at`); a live session that has never signaled past a grace period derives the `no_signal` display status instead of a confident `idle`, so a broken hook pipeline is visible on the dashboard. +The daemon also records the FIRST callback per spawn/restore (`first_signal_at`); a live session that has never signaled past a grace period derives the `no_signal` display status instead of a confident `idle`, so a broken hook pipeline is visible on the dashboard. The downgrade only applies to harnesses with a registered activity deriver (`activitydispatch.SupportsHarness`, injected into the session service at daemon wiring) — for a hook-less adapter, permanent silence is normal and stays `idle`. Known limitation: neither Codex nor Claude Code derives an activity state from `SessionStart`, so a restored session the user never prompts has nothing to signal and shows `no_signal` once the grace passes; a receipt-only session-start signal would close that gap. Persisting hook-derived metadata (`agentSessionId`, `title`, `summary`) into the session row is **not implemented yet** — until it is, adapters whose restore needs the native session id (e.g. `codex resume`) fall back to a fresh launch.