diff --git a/cmd/agentsview/main_test.go b/cmd/agentsview/main_test.go index 59b4850e7..0c9d30a78 100644 --- a/cmd/agentsview/main_test.go +++ b/cmd/agentsview/main_test.go @@ -592,10 +592,49 @@ func TestCollectWatchRootsHermesSessionsWatchesStateDBParent(t *testing.T) { assert.Equal(t, []string{sessionsDir}, roots[1].dirs) } -func TestCollectWatchRootsUsesProviderWatchPlan(t *testing.T) { +func TestCollectWatchRootsUsesCoworkProviderRecursiveRoot(t *testing.T) { root := t.TempDir() - for _, dir := range []string{"brain", "conversations", "implicit"} { - require.NoError(t, os.Mkdir(filepath.Join(root, dir), 0o755), "mkdir %s", dir) + cfg := config.Config{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + } + + roots, unwatchedDirs := collectWatchRoots(cfg) + + require.Empty(t, unwatchedDirs, "cowork root should be watched directly") + got, ok := findCollectedWatchRoot(roots, root) + require.True(t, ok, "cowork provider WatchPlan root not collected") + assert.False(t, got.shallow, + "cowork provider recursive WatchPlan must override legacy ShallowWatch") + assert.Equal(t, []string{root}, got.dirs) +} + +func TestCollectWatchRootsUsesGeminiProviderMetadataRoot(t *testing.T) { + root := t.TempDir() + tmpRoot := filepath.Join(root, "tmp") + require.NoError(t, os.Mkdir(tmpRoot, 0o755), "mkdir tmp") + cfg := config.Config{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {root}, + }, + } + + roots, unwatchedDirs := collectWatchRoots(cfg) + + require.Empty(t, unwatchedDirs, "all gemini provider roots exist") + metadataRoot, ok := findCollectedWatchRoot(roots, root) + require.True(t, ok, "gemini provider metadata root not collected") + assert.True(t, metadataRoot.shallow) + tmp, ok := findCollectedWatchRoot(roots, tmpRoot) + require.True(t, ok, "gemini provider recursive tmp root not collected") + assert.False(t, tmp.shallow) +} + +func TestCollectWatchRootsUsesAntigravityCLIHistoryRoot(t *testing.T) { + root := t.TempDir() + for _, subdir := range []string{"brain", "conversations", "implicit"} { + require.NoError(t, os.Mkdir(filepath.Join(root, subdir), 0o755)) } cfg := config.Config{ AgentDirs: map[parser.AgentType][]string{ @@ -605,16 +644,44 @@ func TestCollectWatchRootsUsesProviderWatchPlan(t *testing.T) { roots, unwatchedDirs := collectWatchRoots(cfg) - require.Empty(t, unwatchedDirs, "unwatched dirs before watcher setup") - require.Len(t, roots, 4) - assert.Equal(t, filepath.Join(root, "brain"), roots[0].root) - assert.False(t, roots[0].shallow) - assert.Equal(t, filepath.Join(root, "conversations"), roots[1].root) - assert.True(t, roots[1].shallow) - assert.Equal(t, root, roots[2].root) - assert.True(t, roots[2].shallow, "history.jsonl root should be watched shallowly") - assert.Equal(t, filepath.Join(root, "implicit"), roots[3].root) - assert.True(t, roots[3].shallow) + require.Empty(t, unwatchedDirs, "all antigravity cli provider roots exist") + historyRoot, ok := findCollectedWatchRoot(roots, root) + require.True(t, ok, "antigravity cli history.jsonl root not collected") + assert.True(t, historyRoot.shallow) + conversations, ok := findCollectedWatchRoot( + roots, filepath.Join(root, "conversations"), + ) + require.True(t, ok, "antigravity cli conversations root not collected") + assert.True(t, conversations.shallow) + brain, ok := findCollectedWatchRoot(roots, filepath.Join(root, "brain")) + require.True(t, ok, "antigravity cli brain root not collected") + assert.False(t, brain.shallow) +} + +func TestMissingWatchRootCoverageDoesNotTreatShallowAncestorAsRecursive(t *testing.T) { + root := filepath.Clean(filepath.Join(t.TempDir(), "state")) + shallowRoots := []watchRoot{{root: root, shallow: true}} + recursiveRoots := []watchRoot{{root: root, shallow: false}} + + assert.True(t, + pathCoveredByAnyWatchRootCreation(filepath.Join(root, "sessions"), shallowRoots), + "shallow roots can observe immediate child creation") + assert.False(t, + pathCoveredByAnyWatchRootCreation(filepath.Join(root, "nested", "sessions"), shallowRoots), + "shallow ancestors must not be treated like recursive watches") + assert.True(t, + pathCoveredByAnyWatchRootCreation(filepath.Join(root, "nested", "sessions"), recursiveRoots), + "recursive roots cover nested missing roots") +} + +func findCollectedWatchRoot(roots []watchRoot, path string) (watchRoot, bool) { + path = filepath.Clean(path) + for _, root := range roots { + if filepath.Clean(root.root) == path { + return root, true + } + } + return watchRoot{}, false } func TestResyncCoversSignals(t *testing.T) { diff --git a/cmd/agentsview/parse_diff.go b/cmd/agentsview/parse_diff.go index c9c9d8248..2b14da3c6 100644 --- a/cmd/agentsview/parse_diff.go +++ b/cmd/agentsview/parse_diff.go @@ -268,9 +268,6 @@ func parseDiffAgentSupported(def parser.AgentDef) bool { if !def.FileBased { return false } - if def.DiscoverFunc != nil { - return true - } switch parser.ProviderMigrationModes()[def.Type] { case parser.ProviderMigrationProviderAuthoritative: _, ok := parser.ProviderFactoryByType(def.Type) diff --git a/cmd/agentsview/parse_diff_test.go b/cmd/agentsview/parse_diff_test.go index 3b9567167..78dfba982 100644 --- a/cmd/agentsview/parse_diff_test.go +++ b/cmd/agentsview/parse_diff_test.go @@ -163,6 +163,28 @@ func TestParseDiffAgentTypes(t *testing.T) { } } +func TestParseDiffSupportedAgentsIncludesProviderAuthoritativeAgents(t *testing.T) { + supported := parseDiffSupportedAgents() + modes := parser.ProviderMigrationModes() + // Build the expected set from the registry so the contract covers every + // current file-based, provider-authoritative agent and stays correct as + // the migration manifest changes, rather than a hand-maintained subset. + checked := 0 + for _, def := range parser.Registry { + if !def.FileBased || + modes[def.Type] != parser.ProviderMigrationProviderAuthoritative { + continue + } + checked++ + assert.True(t, parseDiffAgentSupported(def), + "parse-diff support must include provider-authoritative %s", def.Type) + assert.Contains(t, supported, string(def.Type), + "parse-diff supported list must include %s", def.Type) + } + require.Positive(t, checked, + "expected at least one file-based provider-authoritative agent") +} + func TestParseDiff_EmptyArchiveRunsClean(t *testing.T) { isolateParseDiffEnv(t) diff --git a/cmd/agentsview/token_use.go b/cmd/agentsview/token_use.go index 8d3799603..18cda016d 100644 --- a/cmd/agentsview/token_use.go +++ b/cmd/agentsview/token_use.go @@ -43,10 +43,9 @@ const ( // multiple suffix matches exist without an exact row, the // most recent wins and an ambiguity warning is emitted. // 3. Canonical disk probe: when input begins with a registered -// agent prefix, strip the prefix and call that agent's -// FindSourceFunc so a truly canonical-but-unsynced ID on disk -// still resolves. -// 4. Raw disk probe: call every file-based agent's FindSourceFunc +// agent prefix, strip the prefix and ask that agent's disk source +// lookup so a truly canonical-but-unsynced ID on disk still resolves. +// 4. Raw disk probe: ask every file-based agent's disk source lookup // with the raw input; the first hit yields "". // 5. No match anywhere: returned unchanged with known=false. // @@ -124,13 +123,9 @@ func resolveRawSessionID( } // agentHasDiskSourceLookup reports whether a session source can be located on -// disk by raw ID for the agent: via the legacy AgentDef FindSourceFunc hook, or -// via a provider-authoritative provider's FindSource for agents whose lookup was -// folded onto the provider (e.g. Codex). +// disk by raw ID for the agent, via its provider-authoritative provider's +// FindSource. func agentHasDiskSourceLookup(def parser.AgentDef) bool { - if def.FindSourceFunc != nil { - return true - } if parser.ProviderMigrationModes()[def.Type] != parser.ProviderMigrationProviderAuthoritative { return false @@ -140,13 +135,9 @@ func agentHasDiskSourceLookup(def parser.AgentDef) bool { } // findAgentSourceFile resolves a raw agent session ID to an on-disk source path -// under dir, using the legacy FindSourceFunc when present and otherwise the -// provider's FindSource (RawSessionID lookup). Returns "" when no source -// resolves or the agent has no on-disk lookup. +// under dir via the provider's FindSource (RawSessionID lookup). Returns "" +// when no source resolves or the agent has no on-disk lookup. func findAgentSourceFile(def parser.AgentDef, dir, rawID string) string { - if def.FindSourceFunc != nil { - return def.FindSourceFunc(dir, rawID) - } factory, ok := parser.ProviderFactoryByType(def.Type) if !ok { return "" diff --git a/cmd/agentsview/token_use_test.go b/cmd/agentsview/token_use_test.go index 862fb1e07..13c53ae77 100644 --- a/cmd/agentsview/token_use_test.go +++ b/cmd/agentsview/token_use_test.go @@ -228,9 +228,8 @@ func TestResolveSessionID_CanonicalCodexID_OnDiskNotInDB(t *testing.T) { ctx := context.Background() // Canonical "codex:" not yet synced but present on - // disk must resolve via the canonical disk probe — which - // strips the prefix before calling FindSourceFunc (the - // underlying finder rejects colon-bearing IDs). + // disk must resolve via the canonical disk probe, which strips + // the prefix before asking the agent source lookup. codexDir := filepath.Join(t.TempDir(), "codex-sessions") uuid := "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" dayDir := filepath.Join(codexDir, "2026", "04", "17") @@ -249,6 +248,39 @@ func TestResolveSessionID_CanonicalCodexID_OnDiskNotInDB(t *testing.T) { assert.True(t, known, "canonical disk probe") } +func TestResolveSessionID_ProviderAuthoritativeCursorOnDiskNotInDB(t *testing.T) { + d := newTestDB(t) + ctx := context.Background() + + cursorDir := t.TempDir() + rawID := "provider-cursor" + transcriptPath := filepath.Join( + cursorDir, + "Users-fiona-Documents-demo", + "agent-transcripts", + rawID+".jsonl", + ) + require.NoError(t, os.MkdirAll(filepath.Dir(transcriptPath), 0o755)) + require.NoError(t, os.WriteFile( + transcriptPath, + []byte(`{"role":"user","content":"hi"}`+"\n"), + 0o644, + )) + + agentDirs := map[parser.AgentType][]string{ + parser.AgentCursor: {cursorDir}, + } + got, known := resolveRawSessionID(ctx, d, agentDirs, rawID) + assert.Equal(t, "cursor:"+rawID, got, + "provider FindSource should resolve unsynced raw cursor IDs") + assert.True(t, known, "provider disk probe") + + got, known = resolveRawSessionID(ctx, d, agentDirs, "cursor:"+rawID) + assert.Equal(t, "cursor:"+rawID, got, + "canonical provider ID should resolve via provider FindSource") + assert.True(t, known, "canonical provider disk probe") +} + func TestResolveSessionID_RawOpenClawCollidesWithCodexPrefix(t *testing.T) { d := newTestDB(t) ctx := context.Background() @@ -291,6 +323,29 @@ func TestResolveSessionID_UnderscoreID_NoFalseMatch(t *testing.T) { assert.True(t, known) } +func TestAgentHasDiskSourceLookupIncludesProviderAuthoritativeAgents(t *testing.T) { + for _, agent := range []parser.AgentType{ + parser.AgentGptme, + parser.AgentPi, + parser.AgentOMP, + parser.AgentWorkBuddy, + parser.AgentCortex, + parser.AgentKimi, + parser.AgentQwenPaw, + parser.AgentOpenHands, + parser.AgentCursor, + parser.AgentVibe, + parser.AgentClaude, + parser.AgentCowork, + parser.AgentHermes, + } { + def, ok := parser.AgentByType(agent) + require.True(t, ok, "agent %s", agent) + assert.True(t, agentHasDiskSourceLookup(def), + "token-use disk probe must include provider-authoritative %s", agent) + } +} + func TestUsageExitCode_TokenData(t *testing.T) { u := &db.SessionUsage{HasTokenData: true} assert.Equal(t, tokenUseExitOK, usageExitCode(u)) diff --git a/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md b/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md index 931c15484..7290044c7 100644 --- a/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md +++ b/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md @@ -1,5 +1,20 @@ # Provider Dual-Run Harness Implementation Plan +> **Status:** Superseded for the `provider-explicit-registry` stack tip. This +> plan records the historical root-harness slice that was implemented on the +> lower `provider-facade-core` branch. Do not execute these steps against the +> final stack tip: `ProviderMigrationLegacyOnly` has been removed there, +> concrete parse-capable providers are expected to be `provider-authoritative`, +> and Claude.ai / ChatGPT are `import-only`. The stack-tip legacy cleanup in +> kata issue `n489` is complete; remaining provider-facade tracking lives in the +> caller and provider-group tasks listed in the provider facade design spec. +> +> For new provider migrations after this stack, use the current +> `internal/parser/provider_migration.go` contract: a lower branch may use +> `shadow-compare` as a transitional mode while parity is established, but the +> explicit-registry tip must reject `"legacy-only"` and keep the final manifest +> authoritative. + > **For agentic workers:** REQUIRED SUB-SKILL: Use > superpowers:subagent-driven-development (recommended) or > superpowers:executing-plans to implement this plan task-by-task. Steps use diff --git a/internal/config/config_test.go b/internal/config/config_test.go index ae57484cf..6f989af3f 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -272,9 +272,13 @@ func TestDefault_IncludesCodexArchivedSessionsDir(t *testing.T) { } func TestDefault_SkipsAiderUntilConfigured(t *testing.T) { + t.Setenv("AIDER_DIR", "") cfg, err := Default() require.NoError(t, err) + // Aider has no safe default root: a passive viewer must not enumerate + // $HOME (macOS privacy prompts), so it stays unresolved until the user + // opts in via AIDER_DIR or aider_dirs. assert.Empty(t, cfg.ResolveDirs(parser.AgentAider)) assert.False(t, cfg.IsUserConfigured(parser.AgentAider)) } diff --git a/internal/importer/importer.go b/internal/importer/importer.go index f0ed43ecd..8d4685270 100644 --- a/internal/importer/importer.go +++ b/internal/importer/importer.go @@ -111,7 +111,20 @@ func ImportClaudeAI( } }() - err := parser.ParseClaudeAIExport(r, func( + provider, ok := parser.NewProvider( + parser.AgentClaudeAI, parser.ProviderConfig{}, + ) + if !ok { + return stats, fmt.Errorf("claude.ai provider unavailable") + } + exporter, ok := provider.(parser.ClaudeAIExportParser) + if !ok { + return stats, fmt.Errorf( + "claude.ai provider does not support exports", + ) + } + + err := exporter.ParseClaudeAIExport(r, func( result parser.ParseResult, ) error { if ctx.Err() != nil { @@ -279,7 +292,20 @@ func ImportChatGPT( assetsDir: assetsDir, } - err := parser.ParseChatGPTExport(dir, resolver, + provider, ok := parser.NewProvider( + parser.AgentChatGPT, parser.ProviderConfig{}, + ) + if !ok { + return stats, fmt.Errorf("chatgpt provider unavailable") + } + exporter, ok := provider.(parser.ChatGPTExportParser) + if !ok { + return stats, fmt.Errorf( + "chatgpt provider does not support exports", + ) + } + + err := exporter.ParseChatGPTExport(dir, resolver, func(result parser.ParseResult) error { if ctx.Err() != nil { return ctx.Err() diff --git a/internal/parser/aider.go b/internal/parser/aider.go index cb25ab519..18567df71 100644 --- a/internal/parser/aider.go +++ b/internal/parser/aider.go @@ -36,7 +36,7 @@ import ( // sessiondex Rust adapter, which emits one session per run via a // path#idx key). agentsview already supports multiple sessions per // physical file via the virtual-path fan-out pattern used by Shelley and -// Zed, so aider reuses it: DiscoverAiderSessions returns the single +// Zed, so aider reuses it: discoverAiderSessions returns the single // physical file and the sync engine fans it out to one ParseResult per // run. A run with no parseable turns (e.g. a header-only run) yields no // session. Edited files are best-effort, taken from aider's own @@ -616,23 +616,23 @@ func buildAiderRunSession( return sess, messages } -// ParseAiderRun parses a single run (by positional index) out of a +// parseAiderRun parses a single run (by positional index) out of a // history file into one session. The physical file is read and split on // every call; callers parsing every run of a file should prefer -// ParseAiderRuns, which reads the file once. Returns (nil, nil, nil) +// parseAiderRuns, which reads the file once. Returns (nil, nil, nil) // when the run does not exist or has no parseable turns. -func ParseAiderRun( +func parseAiderRun( path string, idx int, machine string, ) (*ParsedSession, []ParsedMessage, error) { - return ParseAiderRunWithID(path, "", idx, machine) + return parseAiderRunWithID(path, "", idx, machine) } -// ParseAiderRunWithID is ParseAiderRun with an explicit canonical identity +// parseAiderRunWithID is parseAiderRun with an explicit canonical identity // path used to derive the stable session ID. idPath should be the run's // canonical physical history path (e.g. the remote path during SSH sync); // pass "" to fall back to the on-disk path, which is the local behavior. // The file is always read from path; only the ID hash uses idPath. -func ParseAiderRunWithID( +func parseAiderRunWithID( path, idPath string, idx int, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) @@ -657,16 +657,16 @@ func ParseAiderRunWithID( return sess, msgs, nil } -// ParseAiderRuns reads a history file once and parses every run into its +// parseAiderRuns reads a history file once and parses every run into its // own ParseResult, in file order. Runs with no parseable turns are // dropped. Returns nil for an unreadable or run-less file. This is the -// fan-out entry point used by the sync engine; ParseAiderRun is the +// fan-out entry point used by the sync engine; parseAiderRun is the // single-run lookup used when resolving one virtual path. -func ParseAiderRuns(path, machine string) ([]ParseResult, error) { - return ParseAiderRunsWithID(path, "", machine) +func parseAiderRuns(path, machine string) ([]ParseResult, error) { + return parseAiderRunsWithID(path, "", machine) } -// ParseAiderRunsWithID is ParseAiderRuns with an explicit canonical +// parseAiderRunsWithID is parseAiderRuns with an explicit canonical // identity path used to derive stable session IDs for every run. idPath // should be the file's canonical physical history path (e.g. the remote // path during SSH sync, where path is a random temp extraction dir); pass @@ -674,7 +674,7 @@ func ParseAiderRuns(path, machine string) ([]ParseResult, error) { // file is always read from path; only the per-run ID hash uses idPath, so // the IDs stay stable across syncs that extract the file to a different // temp location. -func ParseAiderRunsWithID(path, idPath, machine string) ([]ParseResult, error) { +func parseAiderRunsWithID(path, idPath, machine string) ([]ParseResult, error) { info, err := os.Stat(path) if err != nil { return nil, fmt.Errorf("stat %s: %w", path, err) @@ -701,7 +701,7 @@ func ParseAiderRunsWithID(path, idPath, machine string) ([]ParseResult, error) { return results, nil } -// DiscoverAiderSessions walks root looking for .aider.chat.history.md +// discoverAiderSessions walks root looking for .aider.chat.history.md // files. aider is rootless (no central store), so this is a bounded, // depth-capped, symlink-safe walk: it descends at most aiderMaxWalkDepth // levels, never follows symlinks, skips a fixed set of large vendor / @@ -712,7 +712,7 @@ func ParseAiderRunsWithID(path, idPath, machine string) ([]ParseResult, error) { // surfaced, so a partial scan still indexes whatever it found. Each // discovered physical file is fanned out into one session per run by the // sync engine. -func DiscoverAiderSessions(root string) []DiscoveredFile { +func discoverAiderSessions(root string) []DiscoveredFile { if root == "" { return nil } @@ -859,18 +859,18 @@ func aiderShouldSkipProtectedHomeDirs(root, home, goos string) bool { return filepath.Clean(root) == filepath.Clean(home) } -// FindAiderSourceFile resolves a single aider run's virtual source path +// findAiderSourceFile resolves a single aider run's virtual source path // ("#") from a root directory and a raw session ID (the // per-run hash). It re-runs the bounded discovery walk to find candidate // history files, then, for each, reads and splits it once to recompute // the per-run IDs and match rawID. It returns the matching virtual path, // or "" when nothing under root produces rawID. The physical file is // stat-ed via os.Stat (not re-walked) for the per-run parse downstream. -func FindAiderSourceFile(root, rawID string) string { +func findAiderSourceFile(root, rawID string) string { if root == "" || rawID == "" { return "" } - for _, f := range DiscoverAiderSessions(root) { + for _, f := range discoverAiderSessions(root) { if path, ok := AiderVirtualPathForRawID(f.Path, rawID); ok { return path } diff --git a/internal/parser/aider_omp_reasonix_provider_test.go b/internal/parser/aider_omp_reasonix_provider_test.go new file mode 100644 index 000000000..30b94e68e --- /dev/null +++ b/internal/parser/aider_omp_reasonix_provider_test.go @@ -0,0 +1,373 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- OMP ------------------------------------------------------------------- + +// TestOMPProviderParsesWithOMPIdentity verifies the OhMyPi agent is served by +// the parameterized Pi provider: it discovers the same JSONL layout but stamps +// the omp agent type and omp: session ID prefix. +func TestOMPProviderParsesWithOMPIdentity(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "session-omp.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("session-omp")) + + provider, ok := NewProvider(AgentOMP, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentOMP, discovered[0].Provider) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + sess := outcome.Results[0].Result.Session + assert.Equal(t, "omp:session-omp", sess.ID) + assert.Equal(t, AgentOMP, sess.Agent) +} + +// --- Reasonix -------------------------------------------------------------- + +func writeReasonixSession(t *testing.T, dir, sessionID string) string { + t.Helper() + transcript := filepath.Join(dir, sessionID+".jsonl") + writeSourceFile(t, transcript, strings.Join([]string{ + `{"role":"user","content":"explain the bug"}`, + `{"role":"assistant","content":"here is the fix","reasoning_content":"think"}`, + }, "\n")) + meta := transcript + ".meta" + writeSourceFile(t, meta, `{"id":"`+sessionID+ + `","model":"claude","topic_title":"Bug fix","workspace_root":"/home/u/proj",`+ + `"created_at":"2026-02-01T10:00:00Z","updated_at":"2026-02-01T10:05:00Z"}`) + return transcript +} + +func TestReasonixProviderDiscoverAndParse(t *testing.T) { + root := t.TempDir() + transcript := writeReasonixSession( + t, filepath.Join(root, "projects", "proj", "sessions"), "session-123", + ) + + provider, ok := NewProvider(AgentReasonix, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentReasonix, discovered[0].Provider) + assert.Equal(t, transcript, discovered[0].DisplayPath) + assert.Equal(t, "proj", discovered[0].ProjectHint) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + Fingerprint: SourceFingerprint{Hash: "deadbeef"}, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + sess := outcome.Results[0].Result.Session + assert.Equal(t, "reasonix:session-123", sess.ID) + assert.Equal(t, AgentReasonix, sess.Agent) + assert.Equal(t, "Bug fix", sess.SessionName) + assert.Equal(t, "deadbeef", sess.File.Hash) +} + +// TestReasonixProviderFingerprintFoldsSidecar verifies the composite +// fingerprint sums the transcript and its .jsonl.meta sidecar sizes and takes +// the later mtime, mirroring the legacy reasonixEffectiveInfo. +func TestReasonixProviderFingerprintFoldsSidecar(t *testing.T) { + root := t.TempDir() + transcript := writeReasonixSession( + t, filepath.Join(root, "sessions"), "session-fp", + ) + + provider, ok := NewProvider(AgentReasonix, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + transcriptInfo, err := os.Stat(transcript) + require.NoError(t, err) + metaInfo, err := os.Stat(transcript + ".meta") + require.NoError(t, err) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + + fp, err := provider.Fingerprint(context.Background(), discovered[0]) + require.NoError(t, err) + assert.Equal(t, transcriptInfo.Size()+metaInfo.Size(), fp.Size, + "composite size must include the sidecar") + assert.NotEmpty(t, fp.Hash) +} + +func TestReasonixProviderFingerprintHashChangesForSidecarOnlyChange(t *testing.T) { + root := t.TempDir() + transcript := writeReasonixSession( + t, filepath.Join(root, "sessions"), "session-fp-hash", + ) + + provider, ok := NewProvider(AgentReasonix, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + + before, err := provider.Fingerprint(context.Background(), discovered[0]) + require.NoError(t, err) + + writeSourceFile(t, transcript+".meta", `{"id":"session-fp-hash","model":"gpt-4.1",`+ + `"topic_title":"Updated","workspace_root":"/home/u/other",`+ + `"created_at":"2026-02-01T10:00:00Z","updated_at":"2026-02-01T10:10:00Z"}`) + + after, err := provider.Fingerprint(context.Background(), discovered[0]) + require.NoError(t, err) + assert.NotEqual(t, before.Hash, after.Hash, + "metadata-only changes must affect the composite fingerprint hash") +} + +// TestReasonixProviderChangedPathSidecar verifies a .jsonl.meta sidecar event +// classifies against its sibling transcript. +func TestReasonixProviderChangedPathSidecar(t *testing.T) { + root := t.TempDir() + transcript := writeReasonixSession( + t, filepath.Join(root, "projects", "proj", "sessions"), "session-cp", + ) + + provider, ok := NewProvider(AgentReasonix, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + sources, err := provider.SourcesForChangedPath(context.Background(), ChangedPathRequest{ + Path: transcript + ".meta", + EventKind: "write", + }) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, transcript, sources[0].DisplayPath) + assert.Equal(t, "proj", sources[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "session-cp", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, transcript, found.DisplayPath) +} + +func TestReasonixProviderChangedPathLayouts(t *testing.T) { + root := t.TempDir() + tests := []struct { + name string + dir string + sessionID string + wantProject string + }{ + { + name: "project bare", + dir: filepath.Join(root, "projects", "proj", "sessions"), + sessionID: "project-bare", + wantProject: "proj", + }, + { + name: "project nested", + dir: filepath.Join(root, "projects", "proj", "sessions", "project-nested"), + sessionID: "project-nested", + wantProject: "proj", + }, + { + name: "global", + dir: filepath.Join(root, "sessions"), + sessionID: "global-session", + }, + { + name: "archive", + dir: filepath.Join(root, "archive"), + sessionID: "archive-session", + }, + { + name: "subagent", + dir: filepath.Join(root, "sessions", "subagents"), + sessionID: "subagent-session", + }, + } + + provider, ok := NewProvider(AgentReasonix, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + transcript := writeReasonixSession(t, tt.dir, tt.sessionID) + sources, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: transcript, EventKind: "write"}, + ) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, transcript, sources[0].DisplayPath) + assert.Equal(t, tt.wantProject, sources[0].ProjectHint) + }) + } +} + +func TestReasonixProviderChangedPathDeletedSidecarAndTranscript(t *testing.T) { + root := t.TempDir() + transcript := writeReasonixSession( + t, filepath.Join(root, "projects", "proj", "sessions"), "session-delete", + ) + meta := transcript + ".meta" + provider, ok := NewProvider(AgentReasonix, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + require.NoError(t, os.Remove(meta)) + sources, err := provider.SourcesForChangedPath(context.Background(), ChangedPathRequest{ + Path: meta, + EventKind: "remove", + }) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, transcript, sources[0].DisplayPath, + "deleted sidecar events must reparse the live transcript") + + require.NoError(t, os.Remove(transcript)) + sources, err = provider.SourcesForChangedPath(context.Background(), ChangedPathRequest{ + Path: transcript, + EventKind: "remove", + }) + require.NoError(t, err) + require.Len(t, sources, 1, + "deleted transcripts remain candidates for the engine's remove filter") + assert.Equal(t, transcript, sources[0].DisplayPath) +} + +// --- Aider ----------------------------------------------------------------- + +func writeAiderProviderHistory(t *testing.T, repo string) string { + t.Helper() + path := filepath.Join(repo, AiderHistoryFileName()) + content := "# aider chat started at 2026-06-09 14:01:00\n" + + "#### first prompt\nanswer one\n" + + "# aider chat started at 2026-06-09 15:30:00\n" + + "#### second prompt\nanswer two\n" + writeSourceFile(t, path, content) + return path +} + +func TestAiderProviderDiscoverAndFanOut(t *testing.T) { + root := t.TempDir() + historyPath := writeAiderProviderHistory(t, filepath.Join(root, "myrepo")) + + provider, ok := NewProvider(AgentAider, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentAider, discovered[0].Provider) + assert.Equal(t, historyPath, discovered[0].DisplayPath) + + fp, err := provider.Fingerprint(context.Background(), discovered[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + Fingerprint: fp, + }) + require.NoError(t, err) + assert.True(t, outcome.ForceReplace, "aider fan-out force-replaces") + require.Len(t, outcome.Results, 2, "two content runs produce two sessions") + for i, r := range outcome.Results { + hp, idx, ok := ParseAiderVirtualPath(r.Result.Session.File.Path) + require.True(t, ok) + assert.Equal(t, historyPath, hp) + assert.Equal(t, i, idx) + assert.True(t, strings.HasPrefix(r.Result.Session.ID, "aider:")) + } +} + +// TestAiderProviderFindSourceByRawID resolves a per-run session ID back to its +// virtual run source, then parses just that run. +func TestAiderProviderFindSourceByRawID(t *testing.T) { + root := t.TempDir() + writeAiderProviderHistory(t, filepath.Join(root, "myrepo")) + + provider, ok := NewProvider(AgentAider, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + outcome, err := provider.Parse(context.Background(), ParseRequest{Source: discovered[0]}) + require.NoError(t, err) + require.Len(t, outcome.Results, 2) + + rawID := strings.TrimPrefix(outcome.Results[1].Result.Session.ID, "aider:") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: rawID, + }) + require.NoError(t, err) + require.True(t, ok) + _, idx, ok := ParseAiderVirtualPath(found.DisplayPath) + require.True(t, ok) + assert.Equal(t, 1, idx, "the second run resolves to run index 1") + + single, err := provider.Parse(context.Background(), ParseRequest{Source: found}) + require.NoError(t, err) + require.Len(t, single.Results, 1) + assert.Equal(t, outcome.Results[1].Result.Session.ID, + single.Results[0].Result.Session.ID) +} + +// TestAiderProviderRemoteIdentityStable verifies the PathRewriter-seeded +// identity keeps per-run session IDs stable when the same history file is read +// from different (temp) locations, mirroring SSH remote sync. +func TestAiderProviderRemoteIdentityStable(t *testing.T) { + rootA := t.TempDir() + rootB := t.TempDir() + writeAiderProviderHistory(t, filepath.Join(rootA, "myrepo")) + writeAiderProviderHistory(t, filepath.Join(rootB, "myrepo")) + + canonical := "host:/home/wes/myrepo/" + AiderHistoryFileName() + rewriter := func(string) string { return canonical } + + idsFor := func(root string) []string { + provider, ok := NewProvider(AgentAider, ProviderConfig{ + Roots: []string{root}, + PathRewriter: rewriter, + }) + require.True(t, ok) + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + outcome, err := provider.Parse(context.Background(), ParseRequest{Source: discovered[0]}) + require.NoError(t, err) + ids := make([]string, 0, len(outcome.Results)) + for _, r := range outcome.Results { + ids = append(ids, r.Result.Session.ID) + } + return ids + } + + assert.Equal(t, idsFor(rootA), idsFor(rootB), + "the canonical identity must keep run IDs stable across extraction dirs") +} diff --git a/internal/parser/aider_provider.go b/internal/parser/aider_provider.go new file mode 100644 index 000000000..0a34c312b --- /dev/null +++ b/internal/parser/aider_provider.go @@ -0,0 +1,274 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "strconv" +) + +// Aider appends every run to a single history file +// (.aider.chat.history.md). It is a multi-session container provider: +// discovery surfaces the history file as one source and Parse fans it out into +// one session per run, addressed by "#" virtual paths. All +// behavior is wired into the shared multi-session-container base via options. +// +// The PathRewriter (identity) maps an on-disk history path to its canonical +// stored form during remote sync, so per-run session IDs stay stable across +// syncs that extract the file to a different temp directory. It is threaded +// into the option closures by the build func and is nil for local sync. +func newAiderProviderFactory(def AgentDef) ProviderFactory { + return newMultiSessionProviderFactory( + def, + aiderProviderCapabilities(), + func(cfg ProviderConfig) multiSessionContainerSourceSet { + identity := cfg.PathRewriter + return newMultiSessionContainerSourceSet( + AgentAider, + cfg.Roots, + withContainerDiscovery(aiderDiscoverContainers), + withWatchRoots(aiderWatchRoots), + withChangedPathClassifier(aiderClassifyPath), + withMemberLookup( + func(root, rawID string) (multiSessionMatch, bool) { + return aiderFindMember(root, rawID, identity) + }, + ), + // A canonical remote-sync path (rewritten identity) must map + // back onto a local history file before it can classify. + withStoredPathFallback( + func(root, path string) (multiSessionMatch, bool) { + return aiderStoredPathFallback(root, path, identity) + }, + ), + withFingerprint(aiderFingerprintSource), + withContainerParse( + func(src multiSessionSource, req ParseRequest) ([]ParseResult, error) { + return aiderParseContainer(src, req.Machine, identity) + }, + ), + withMemberParse( + func(src multiSessionSource, req ParseRequest) (*ParseResult, error) { + return aiderParseMember(src, req.Machine, identity) + }, + ), + // Every run shares the history file's content hash, so a write + // re-parses and re-stamps every run. + withContainerHashStamping(), + ) + }, + ) +} + +func aiderDiscoverContainers(root string) []string { + sessions := discoverAiderSessions(root) + out := make([]string, 0, len(sessions)) + for _, df := range sessions { + out = append(out, df.Path) + } + return out +} + +func aiderWatchRoots(roots []string) []WatchRoot { + // Aider is rootless: history files live anywhere under a root. The legacy + // config marks it ShallowWatch, so watch each root non-recursively for the + // history filename and rely on periodic full-sync discovery for nested + // files, matching the prior behavior. + out := make([]WatchRoot, 0, len(roots)) + for _, root := range roots { + out = append(out, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{aiderHistoryFile}, + DebounceKey: string(AgentAider) + ":history:" + root, + }) + } + return out +} + +// aiderClassifyPath maps a stored or changed path to its history-file container +// and run. A virtual "#" path resolves to that single run; a bare +// history file resolves to the whole container (MemberID == ""). aider performs +// no existence check, so allowMissing is unused. +func aiderClassifyPath(root, path string, _ bool) (multiSessionMatch, bool) { + root = filepath.Clean(root) + if historyPath, idx, ok := ParseAiderVirtualPath(path); ok { + historyPath = filepath.Clean(historyPath) + if _, ok := relUnder(root, historyPath); !ok { + return multiSessionMatch{}, false + } + return multiSessionMatch{ + Path: path, + Container: historyPath, + MemberID: strconv.Itoa(idx), + }, true + } + path = filepath.Clean(path) + if filepath.Base(path) != aiderHistoryFile { + return multiSessionMatch{}, false + } + if _, ok := relUnder(root, path); !ok { + return multiSessionMatch{}, false + } + return multiSessionMatch{Path: path, Container: path}, true +} + +func aiderFindMember( + root, rawID string, identity func(string) string, +) (multiSessionMatch, bool) { + if rawID == "" { + return multiSessionMatch{}, false + } + for _, df := range discoverAiderSessions(root) { + virtualPath, ok := aiderVirtualPathForRawIDWithID( + df.Path, aiderIdentityForPath(df.Path, identity), rawID, + ) + if !ok { + continue + } + if match, ok := aiderClassifyPath(root, virtualPath, false); ok { + return match, true + } + } + return multiSessionMatch{}, false +} + +func aiderStoredPathFallback( + root, path string, identity func(string) string, +) (multiSessionMatch, bool) { + if path == "" { + return multiSessionMatch{}, false + } + for _, df := range discoverAiderSessions(root) { + localPath, ok := localAiderPathForCanonicalHint(df.Path, path, identity) + if !ok { + continue + } + if match, ok := aiderClassifyPath(root, localPath, false); ok { + return match, true + } + } + return multiSessionMatch{}, false +} + +// localAiderPathForCanonicalHint maps a canonical hint (the identity path, or a +// virtual run path built on it) back to the corresponding local history path or +// local virtual run path. It mirrors the legacy provider helper of the same +// name. +func localAiderPathForCanonicalHint( + historyPath, hint string, identity func(string) string, +) (string, bool) { + idPath := aiderIdentityForPath(historyPath, identity) + if idPath == "" { + idPath = aiderAbsPath(historyPath) + } + if hint == idPath { + return historyPath, true + } + hintHistoryPath, idx, ok := ParseAiderVirtualPath(hint) + if !ok || hintHistoryPath != idPath { + return "", false + } + return AiderVirtualPath(historyPath, idx), true +} + +// aiderIdentityForPath returns the canonical identity path used to seed per-run +// session IDs: the rewritten path during remote sync, or "" locally (which +// makes the parser fall back to the on-disk absolute path). It mirrors the +// legacy Engine.aiderIdentityPath / aiderProvider.identityPath. +func aiderIdentityForPath(historyPath string, identity func(string) string) string { + if identity == nil { + return "" + } + return identity(historyPath) +} + +func aiderVirtualPathForRawIDWithID( + historyPath string, + idPath string, + rawID string, +) (string, bool) { + data, err := os.ReadFile(historyPath) + if err != nil { + return "", false + } + runs := splitAiderRuns(string(data)) + ordinals := aiderEqualHeaderOrdinals(runs) + identity := aiderIdentityPath(historyPath, idPath) + for idx, run := range runs { + if aiderRawID(identity, run.rawHeader, ordinals[idx]) == rawID { + return AiderVirtualPath(historyPath, idx), true + } + } + return "", false +} + +func aiderFingerprintSource(src multiSessionSource) (SourceFingerprint, error) { + info, err := os.Stat(src.Container) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Container, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Container, + ) + } + hash, err := hashJSONLSourceFile(src.Container) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Hash: hash, + }, nil +} + +func aiderParseMember( + src multiSessionSource, machine string, identity func(string) string, +) (*ParseResult, error) { + idx, err := strconv.Atoi(src.MemberID) + if err != nil { + return nil, fmt.Errorf("invalid aider run index %q: %w", src.MemberID, err) + } + idPath := aiderIdentityForPath(src.Container, identity) + sess, msgs, err := parseAiderRunWithID(src.Container, idPath, idx, machine) + if err != nil { + return nil, err + } + if sess == nil { + return nil, nil + } + return &ParseResult{Session: *sess, Messages: msgs}, nil +} + +func aiderParseContainer( + src multiSessionSource, machine string, identity func(string) string, +) ([]ParseResult, error) { + idPath := aiderIdentityForPath(src.Container, identity) + return parseAiderRunsWithID(src.Container, idPath, machine) +} + +func aiderProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilityNotApplicable, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilitySupported, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilitySupported, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + }, + } +} diff --git a/internal/parser/aider_provider_test.go b/internal/parser/aider_provider_test.go new file mode 100644 index 000000000..288a302a0 --- /dev/null +++ b/internal/parser/aider_provider_test.go @@ -0,0 +1,78 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAiderProviderFindSourceUsesCanonicalIdentity(t *testing.T) { + root := t.TempDir() + repo := filepath.Join(root, "repo") + require.NoError(t, os.MkdirAll(repo, 0o755)) + historyPath := filepath.Join(repo, AiderHistoryFileName()) + require.NoError(t, os.WriteFile(historyPath, []byte(strings.Join([]string{ + "# aider chat started at 2026-06-09 14:01:00", + "#### canonical prompt", + "canonical answer", + }, "\n")+"\n"), 0o644)) + + remoteHistoryPath := "/remote/repo/" + AiderHistoryFileName() + rewriter := func(path string) string { + if history, idx, ok := ParseAiderVirtualPath(path); ok && history == historyPath { + return AiderVirtualPath(remoteHistoryPath, idx) + } + if path == historyPath { + return remoteHistoryPath + } + return path + } + provider, ok := NewProvider(AgentAider, ProviderConfig{ + Roots: []string{root}, + Machine: "remote-host", + PathRewriter: rewriter, + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0].Result + rawID := strings.TrimPrefix(result.Session.ID, "aider:") + localVirtualPath := result.Session.File.Path + remoteVirtualPath := rewriter(localVirtualPath) + + foundByRawID, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: rawID}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, localVirtualPath, foundByRawID.DisplayPath) + + foundByStoredPath, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{StoredFilePath: remoteVirtualPath}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, localVirtualPath, foundByStoredPath.DisplayPath) + + foundByFingerprintKey, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{FingerprintKey: remoteVirtualPath}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, localVirtualPath, foundByFingerprintKey.DisplayPath) +} diff --git a/internal/parser/aider_test.go b/internal/parser/aider_test.go index 267adbc1b..b003b9e39 100644 --- a/internal/parser/aider_test.go +++ b/internal/parser/aider_test.go @@ -25,7 +25,7 @@ func fixtureAider() string { // its own StartedAt and FirstMessage. The header-only trailing run // contributes no session. func TestParseAiderRunsPerRun(t *testing.T) { - results, err := ParseAiderRuns(fixtureAider(), "testmachine") + results, err := parseAiderRuns(fixtureAider(), "testmachine") require.NoError(t, err) // Three runs in the file, but the trailing header-only run has no // turns, so only two sessions are emitted. @@ -95,7 +95,7 @@ func TestParseAiderRunsPerRun(t *testing.T) { // TestParseAiderRunSingle parses one run out of a file by index. func TestParseAiderRunSingle(t *testing.T) { - sess, msgs, err := ParseAiderRun(fixtureAider(), 1, "m") + sess, msgs, err := parseAiderRun(fixtureAider(), 1, "m") require.NoError(t, err) require.NotNil(t, sess) require.NotEmpty(t, msgs) @@ -104,13 +104,13 @@ func TestParseAiderRunSingle(t *testing.T) { time.Date(2026, 6, 9, 15, 30, 0, 0, time.UTC), sess.StartedAt) // The trailing header-only run (index 2) yields no session. - sess2, msgs2, err := ParseAiderRun(fixtureAider(), 2, "m") + sess2, msgs2, err := parseAiderRun(fixtureAider(), 2, "m") require.NoError(t, err) assert.Nil(t, sess2) assert.Empty(t, msgs2) // Out-of-range indices are tolerated, not errors. - sess3, _, err := ParseAiderRun(fixtureAider(), 99, "m") + sess3, _, err := parseAiderRun(fixtureAider(), 99, "m") require.NoError(t, err) assert.Nil(t, sess3) } @@ -132,7 +132,7 @@ func TestAiderSessionIDStableOnAppend(t *testing.T) { "#### second prompt\nanswer two\n" require.NoError(t, os.WriteFile(path, []byte(base), 0o644)) - before, err := ParseAiderRuns(path, "m") + before, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, before, 2) id0, id1 := before[0].Session.ID, before[1].Session.ID @@ -143,7 +143,7 @@ func TestAiderSessionIDStableOnAppend(t *testing.T) { "#### third prompt\nanswer three\n" require.NoError(t, os.WriteFile(path, []byte(appended), 0o644)) - after, err := ParseAiderRuns(path, "m") + after, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, after, 3) @@ -170,7 +170,7 @@ func TestAiderSessionIDStableOnEarlyRemoval(t *testing.T) { "#### second prompt\nanswer two\n" require.NoError(t, os.WriteFile(path, []byte(run0+run1), 0o644)) - before, err := ParseAiderRuns(path, "m") + before, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, before, 2) secondID := before[1].Session.ID @@ -178,7 +178,7 @@ func TestAiderSessionIDStableOnEarlyRemoval(t *testing.T) { // Remove the first run; the second run is now positionally index 0 but // must keep its original ID. require.NoError(t, os.WriteFile(path, []byte(run1), 0o644)) - after, err := ParseAiderRuns(path, "m") + after, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, after, 1) assert.Equal(t, secondID, after[0].Session.ID, @@ -200,14 +200,14 @@ func TestAiderEqualHeaderRunsGetStableDistinctIDs(t *testing.T) { "#### prompt b\nanswer b\n" require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - r1, err := ParseAiderRuns(path, "m") + r1, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, r1, 2) assert.NotEqual(t, r1[0].Session.ID, r1[1].Session.ID, "equal-header runs disambiguate by ordinal") // Stable across re-parse. - r2, err := ParseAiderRuns(path, "m") + r2, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, r2, 2) assert.Equal(t, r1[0].Session.ID, r2[0].Session.ID) @@ -218,7 +218,7 @@ func TestAiderEqualHeaderRunsGetStableDistinctIDs(t *testing.T) { // test for SSH sync. During remote sync the history file is extracted to a // RANDOM local temp dir, so hashing the on-disk path would re-key the run on // every sync. Passing a canonical identity path (the remote physical path) -// to ParseAiderRunsWithID must produce the SAME ID regardless of where the +// to parseAiderRunsWithID must produce the SAME ID regardless of where the // file physically lives, while the plain on-disk parse (local behavior) // produces DIFFERENT IDs for the two locations. func TestAiderSessionIDStableAcrossExtractionDirs(t *testing.T) { @@ -245,10 +245,10 @@ func TestAiderSessionIDStableAcrossExtractionDirs(t *testing.T) { // syncs regardless of the temp extraction dir. const identity = "host:/home/wes/myrepo/.aider.chat.history.md" - withIDa, err := ParseAiderRunsWithID(pathA, identity, "m") + withIDa, err := parseAiderRunsWithID(pathA, identity, "m") require.NoError(t, err) require.Len(t, withIDa, 2) - withIDb, err := ParseAiderRunsWithID(pathB, identity, "m") + withIDb, err := parseAiderRunsWithID(pathB, identity, "m") require.NoError(t, err) require.Len(t, withIDb, 2) @@ -260,11 +260,11 @@ func TestAiderSessionIDStableAcrossExtractionDirs(t *testing.T) { // Sanity: the ID is derived from the identity path, not the temp path. // Without an identity path (local behavior), the two extraction paths // produce DIFFERENT IDs -- exactly the instability the identity path - // fixes. ParseAiderRuns is the empty-identity passthrough. - localA, err := ParseAiderRuns(pathA, "m") + // fixes. parseAiderRuns is the empty-identity passthrough. + localA, err := parseAiderRuns(pathA, "m") require.NoError(t, err) require.Len(t, localA, 2) - localB, err := ParseAiderRuns(pathB, "m") + localB, err := parseAiderRuns(pathB, "m") require.NoError(t, err) require.Len(t, localB, 2) assert.NotEqual(t, localA[0].Session.ID, localB[0].Session.ID, @@ -272,8 +272,8 @@ func TestAiderSessionIDStableAcrossExtractionDirs(t *testing.T) { assert.NotEqual(t, withIDa[0].Session.ID, localA[0].Session.ID, "identity-path ID differs from on-disk-path ID") - // ParseAiderRunWithID (single-run) must agree with the fan-out variant. - single, _, err := ParseAiderRunWithID(pathB, identity, 0, "m") + // parseAiderRunWithID (single-run) must agree with the fan-out variant. + single, _, err := parseAiderRunWithID(pathB, identity, 0, "m") require.NoError(t, err) require.NotNil(t, single) assert.Equal(t, withIDa[0].Session.ID, single.ID, @@ -298,7 +298,7 @@ func TestAiderSameHeaderEarlyRemovalRekeysSiblings(t *testing.T) { runC := hdr + "#### prompt c\nanswer c\n" require.NoError(t, os.WriteFile(path, []byte(runA+runB+runC), 0o644)) - before, err := ParseAiderRuns(path, "m") + before, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, before, 3) idB := before[1].Session.ID @@ -306,7 +306,7 @@ func TestAiderSameHeaderEarlyRemovalRekeysSiblings(t *testing.T) { // Remove the first same-header run; runs b and c each shift down one // equal-header ordinal. require.NoError(t, os.WriteFile(path, []byte(runB+runC), 0o644)) - after, err := ParseAiderRuns(path, "m") + after, err := parseAiderRuns(path, "m") require.NoError(t, err) require.Len(t, after, 2) @@ -334,7 +334,7 @@ func TestDiscoverAiderFindsFilesAtMaxDepth(t *testing.T) { require.NoError(t, os.WriteFile(tooDeepFile, []byte(hist), 0o644)) var paths []string - for _, f := range DiscoverAiderSessions(root) { + for _, f := range discoverAiderSessions(root) { paths = append(paths, f.Path) } assert.Contains(t, paths, atCapFile, @@ -371,7 +371,7 @@ func TestAiderRawIDAtDetectsShiftedIndex(t *testing.T) { assert.False(t, ok, "stale index 1 no longer recomputes to a run") // Re-resolution by raw ID finds run1 at its new index 0. - resolved := FindAiderSourceFile(dir, id1) + resolved := findAiderSourceFile(dir, id1) assert.Equal(t, AiderVirtualPath(path, 0), resolved, "re-resolving by raw ID locates the run at its shifted index") } @@ -503,7 +503,7 @@ func TestParseAiderRunsEmptyAndGarbage(t *testing.T) { require.NoError(t, os.WriteFile(path, []byte(c.content), 0o644)) - results, err := ParseAiderRuns(path, "m") + results, err := parseAiderRuns(path, "m") require.NoError(t, err) // never panics, never hard-errors assert.Len(t, results, c.wantCount) }) @@ -562,15 +562,17 @@ func TestListAiderRunMetas(t *testing.T) { } // TestAiderRegistryOptInDiscovery pins that Aider is not discovered by -// default. Aider has no central store; a rootless home scan can trigger macOS -// privacy prompts and is not trustworthy for always-on sync. Users must -// opt in with AIDER_DIR or aider_dirs. +// default. Aider has no central store, and a rootless $HOME scan can trigger +// macOS privacy prompts during passive background refreshes, so users must opt +// in with AIDER_DIR or aider_dirs. ShallowWatch must stay true so a configured +// broad root is watched only at the root, relying on the periodic sync. func TestAiderRegistryOptInDiscovery(t *testing.T) { def, ok := AgentByType(AgentAider) require.True(t, ok, "AgentAider missing from Registry") - assert.Empty(t, def.DefaultDirs) + assert.Empty(t, def.DefaultDirs, + "aider must not be discovered by default; opt in via AIDER_DIR/aider_dirs") assert.True(t, def.ShallowWatch, - "aider must not recursively watch a broad opt-in root") + "aider must watch an opt-in broad root shallowly, not recurse all of it") // The shallow-watch contract relies on no static subdir or custom // watch-roots wiring overriding it. assert.Empty(t, def.WatchSubdirs) @@ -597,13 +599,13 @@ func TestDiscoverAiderSessions(t *testing.T) { filepath.Join(skip, ".aider.chat.history.md"), []byte("# aider chat started at 2026-06-09 14:01:00\n"), 0o644)) - files := DiscoverAiderSessions(root) + files := discoverAiderSessions(root) require.Len(t, files, 1, "found repo file, skipped node_modules") assert.Equal(t, AgentAider, files[0].Agent) assert.Equal(t, aiderHistoryFile, filepath.Base(files[0].Path)) // Empty root is tolerated. - assert.Empty(t, DiscoverAiderSessions("")) + assert.Empty(t, discoverAiderSessions("")) } func TestAiderShouldSkipProtectedHomeDirsOnlyOnDarwinHomeRoot(t *testing.T) { @@ -669,8 +671,8 @@ func TestDiscoverAiderSessionsSkipsMacOSProtectedDirs(t *testing.T) { []byte("# aider chat started at 2026-06-09 14:01:00\n"), 0o644)) } - files := DiscoverAiderSessions(root) - assert.Empty(t, files, "broad home discovery must not enter macOS TCC-protected folders") + files := discoverAiderSessions(root) + assert.Empty(t, files, "default home discovery must not enter macOS TCC-protected folders") } func TestDiscoverAiderSessionsAllowsExplicitProtectedRoot(t *testing.T) { @@ -683,7 +685,7 @@ func TestDiscoverAiderSessionsAllowsExplicitProtectedRoot(t *testing.T) { filepath.Join(repo, ".aider.chat.history.md"), []byte("# aider chat started at 2026-06-09 14:01:00\n"), 0o644)) - files := DiscoverAiderSessions(documentsRoot) + files := discoverAiderSessions(documentsRoot) require.Len(t, files, 1, "explicit Aider roots should still be scanned") assert.Equal(t, filepath.Join(repo, ".aider.chat.history.md"), files[0].Path) } @@ -706,7 +708,7 @@ func TestAiderWalkBudget(t *testing.T) { 0o644)) start := time.Now() - files := DiscoverAiderSessions(root) + files := discoverAiderSessions(root) elapsed := time.Since(start) assert.Less(t, elapsed, aiderWalkBudget, "a normal walk finishes well under budget") @@ -725,13 +727,13 @@ func TestFindAiderSourceFile(t *testing.T) { require.NoError(t, os.WriteFile(hist, []byte(content), 0o644)) // Parse the runs to learn the real per-run raw IDs. - results, err := ParseAiderRuns(hist, "m") + results, err := parseAiderRuns(hist, "m") require.NoError(t, err) require.Len(t, results, 2) for i, r := range results { rawID := r.Session.ID[len(aiderIDPrefix):] - found := FindAiderSourceFile(root, rawID) + found := findAiderSourceFile(root, rawID) require.NotEmpty(t, found, "run %d should resolve", i) gotPath, gotIdx, ok := ParseAiderVirtualPath(found) require.True(t, ok) @@ -739,6 +741,6 @@ func TestFindAiderSourceFile(t *testing.T) { assert.Equal(t, i, gotIdx, "run %d resolves to run index %d", i, i) } - assert.Empty(t, FindAiderSourceFile(root, "nonexistent-id")) - assert.Empty(t, FindAiderSourceFile("", "anything")) + assert.Empty(t, findAiderSourceFile(root, "nonexistent-id")) + assert.Empty(t, findAiderSourceFile("", "anything")) } diff --git a/internal/parser/chatgpt.go b/internal/parser/chatgpt.go index 9ee655798..790bbc3c9 100644 --- a/internal/parser/chatgpt.go +++ b/internal/parser/chatgpt.go @@ -68,9 +68,24 @@ type chatGPTMeta struct { ModelSlug string `json:"model_slug"` } +// ChatGPTExportParser is implemented by the ChatGPT import-only provider to +// stream a ChatGPT data export. ChatGPT sessions are never discovered or +// synced from disk; they only enter the archive through a one-shot import, so +// this entry point lives on the provider rather than the Discover/Parse path. +// Callers obtain it via NewProvider(AgentChatGPT, ...) and a type assertion. +type ChatGPTExportParser interface { + // ParseChatGPTExport reads all conversations-*.json files from dir and + // calls onConversation for each non-empty conversation. + ParseChatGPTExport( + dir string, + assets AssetResolver, + onConversation func(ParseResult) error, + ) error +} + // ParseChatGPTExport reads all conversations-*.json files from dir // and calls onConversation for each non-empty conversation. -func ParseChatGPTExport( +func (p *chatGPTImportOnlyProvider) ParseChatGPTExport( dir string, assets AssetResolver, onConversation func(ParseResult) error, diff --git a/internal/parser/chatgpt_test.go b/internal/parser/chatgpt_test.go index 8ccaa6f00..b2d734e3f 100644 --- a/internal/parser/chatgpt_test.go +++ b/internal/parser/chatgpt_test.go @@ -92,7 +92,7 @@ func TestParseChatGPTExport(t *testing.T) { ]`) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) @@ -214,7 +214,7 @@ func TestParseChatGPTExport_ToolCalls(t *testing.T) { ]`) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) @@ -294,7 +294,7 @@ func TestParseChatGPTExport_Thinking(t *testing.T) { ]`) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) @@ -384,7 +384,7 @@ func TestParseChatGPTExport_SystemMessage(t *testing.T) { ]`) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) @@ -408,7 +408,7 @@ func TestParseChatGPTExport_SystemMessage(t *testing.T) { func TestParseChatGPTExport_EmptyDir(t *testing.T) { dir := t.TempDir() - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { return nil }) require.Error(t, err) @@ -459,7 +459,7 @@ func TestParseChatGPTExport_MultipleShards(t *testing.T) { shard("conv-b", "Second")) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) @@ -724,7 +724,7 @@ func TestParseChatGPTExport_WebSearch(t *testing.T) { ]`) var results []ParseResult - err := ParseChatGPTExport(dir, nil, func(r ParseResult) error { + err := parseChatGPTExport(dir, nil, func(r ParseResult) error { results = append(results, r) return nil }) diff --git a/internal/parser/claude_ai.go b/internal/parser/claude_ai.go index 65d6b5b53..eb72385ba 100644 --- a/internal/parser/claude_ai.go +++ b/internal/parser/claude_ai.go @@ -33,10 +33,25 @@ type claudeAIBlock struct { Thinking string `json:"thinking"` } +// ClaudeAIExportParser is implemented by the Claude.ai import-only provider to +// stream a Claude.ai conversations export. Claude.ai sessions are never +// discovered or synced from disk; they only enter the archive through a +// one-shot import, so this entry point lives on the provider rather than the +// Discover/Parse path. Callers obtain it via NewProvider(AgentClaudeAI, ...) +// and a type assertion. +type ClaudeAIExportParser interface { + // ParseClaudeAIExport streams a Claude.ai conversations.json export and + // calls onConversation for each non-empty conversation. + ParseClaudeAIExport( + r io.Reader, + onConversation func(ParseResult) error, + ) error +} + // ParseClaudeAIExport streams a Claude.ai conversations.json // export and calls onConversation for each non-empty // conversation. -func ParseClaudeAIExport( +func (p *claudeAIImportOnlyProvider) ParseClaudeAIExport( r io.Reader, onConversation func(ParseResult) error, ) error { diff --git a/internal/parser/claude_ai_test.go b/internal/parser/claude_ai_test.go index 5616e4a63..0c1a81e94 100644 --- a/internal/parser/claude_ai_test.go +++ b/internal/parser/claude_ai_test.go @@ -72,7 +72,7 @@ const testExportJSON = `[ func TestParseClaudeAIExport(t *testing.T) { var results []ParseResult - err := ParseClaudeAIExport( + err := parseClaudeAIExport( strings.NewReader(testExportJSON), func(r ParseResult) error { results = append(results, r) @@ -148,7 +148,7 @@ func TestParseClaudeAIExport_ContentBlocks(t *testing.T) { }]` var results []ParseResult - err := ParseClaudeAIExport( + err := parseClaudeAIExport( strings.NewReader(input), func(r ParseResult) error { results = append(results, r) @@ -174,7 +174,7 @@ func TestParseClaudeAIExport_ContentBlocks(t *testing.T) { func TestParseClaudeAIExport_EmptyArray(t *testing.T) { var results []ParseResult - err := ParseClaudeAIExport( + err := parseClaudeAIExport( strings.NewReader("[]"), func(r ParseResult) error { results = append(results, r) @@ -186,7 +186,7 @@ func TestParseClaudeAIExport_EmptyArray(t *testing.T) { } func TestParseClaudeAIExport_InvalidJSON(t *testing.T) { - err := ParseClaudeAIExport( + err := parseClaudeAIExport( strings.NewReader("{not json"), func(r ParseResult) error { return nil }, ) diff --git a/internal/parser/codex_provider_test.go b/internal/parser/codex_provider_test.go index f86267e43..ec5b48fcf 100644 --- a/internal/parser/codex_provider_test.go +++ b/internal/parser/codex_provider_test.go @@ -189,6 +189,47 @@ func TestCodexProviderFindSourcePinsExactArchivedDuplicate(t *testing.T) { assert.Equal(t, livePath, found.DisplayPath) } +func TestCodexProviderFindSourcePreferStoredSourceKeepsArchivedDuplicate(t *testing.T) { + base := t.TempDir() + liveRoot := filepath.Join(base, "sessions") + archivedRoot := filepath.Join(base, "archived_sessions") + uuid := "019eb791-cf7d-75c1-8439-9ed74c1229e6" + livePath := writeCodexProviderSession(t, liveRoot, uuid, "live") + archivedPath := writeCodexProviderArchivedSession( + t, archivedRoot, uuid, "archived", + ) + + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{archivedRoot, liveRoot}, + }) + require.True(t, ok) + + // PreferStoredSource pins the stored archived duplicate even when a fresh + // source is required, instead of canonicalizing to the live duplicate. + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: archivedPath, + FullSessionID: "codex:" + uuid, + RequireFreshSource: true, + PreferStoredSource: true, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, archivedPath, found.DisplayPath, + "PreferStoredSource must preserve the stored archived path") + + // Without the hint, RequireFreshSource canonicalizes to the live duplicate, + // which is exactly the behavior PreferStoredSource opts out of. + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: archivedPath, + FullSessionID: "codex:" + uuid, + RequireFreshSource: true, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, livePath, found.DisplayPath, + "RequireFreshSource without PreferStoredSource canonicalizes to live") +} + func TestCodexProviderFindSourceAcceptsLegacyShapedStoredPath(t *testing.T) { root := t.TempDir() sessionID := "test-uuid" diff --git a/internal/parser/directory_jsonl_source_set_test.go b/internal/parser/directory_jsonl_source_set_test.go new file mode 100644 index 000000000..1a76f3bd3 --- /dev/null +++ b/internal/parser/directory_jsonl_source_set_test.go @@ -0,0 +1,87 @@ +package parser + +import ( + "context" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDirectoryJSONLSourceSetDiscoversProjectFiles(t *testing.T) { + root := t.TempDir() + writeSourceFile(t, filepath.Join(root, "project-b", "session-b.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "project-a", "session-a.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "project-a", "nested", "skip.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "root.jsonl"), "{}\n") + + sources := newDirectoryJSONLSourceSet(AgentQwen, []string{root}) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.Equal(t, []string{"project-a", "project-b"}, sourceProjects(discovered)) + assert.Equal(t, []string{ + filepath.Join(root, "project-a", "session-a.jsonl"), + filepath.Join(root, "project-b", "session-b.jsonl"), + }, sourceDisplayPaths(discovered)) + + found, ok, err := sources.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "session-b", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, filepath.Join(root, "project-b", "session-b.jsonl"), found.DisplayPath) +} + +func TestDirectoryJSONLSourceSetComposesPathFilters(t *testing.T) { + root := t.TempDir() + writeSourceFile(t, filepath.Join(root, "project", "session-keep.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "project", "ignore.jsonl"), "{}\n") + + sources := newDirectoryJSONLSourceSet(AgentIflow, []string{root}, + withIncludePath(func(root, path string) bool { + return strings.HasPrefix(filepath.Base(path), "session-") + }), + withProjectHint(func(root, path string) string { + return "custom-" + filepath.Base(filepath.Dir(path)) + }), + ) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, "custom-project", discovered[0].ProjectHint) + assert.Equal(t, filepath.Join(root, "project", "session-keep.jsonl"), discovered[0].DisplayPath) +} + +func TestDirectoryJSONLSourceSetClassifiesDeletedProjectFiles(t *testing.T) { + root := t.TempDir() + sources := newDirectoryJSONLSourceSet(AgentCommandCode, []string{root}) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "project", "deleted.jsonl"), + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, "project", changed[0].ProjectHint) + assert.Equal(t, "project/deleted.jsonl", changed[0].Opaque.(JSONLSource).RelPath) + + deep, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "project", "nested", "ignored.jsonl"), + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, deep) +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index e02ded303..e6751bc6f 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -55,8 +55,8 @@ type DiscoveredFile struct { Project string // pre-extracted project name Agent AgentType // which agent this file belongs to ForceParse bool // bypass stored-state skips for sidecar-driven refreshes - ProviderSource *SourceRef // provider-owned source identity, when known - ProviderProcess bool // true when this caller may parse via ProviderSource + ProviderSource *SourceRef // provider-owned source identity, when discovered by provider path + ProviderProcess bool // true when this sync caller may parse through ProviderSource } // OpenCodeSourceMode identifies the usable OpenCode storage diff --git a/internal/parser/hermes_provider.go b/internal/parser/hermes_provider.go index 9496a8666..15bc9e609 100644 --- a/internal/parser/hermes_provider.go +++ b/internal/parser/hermes_provider.go @@ -7,6 +7,7 @@ import ( "fmt" "hash" "io" + "log" "os" "path/filepath" "sort" @@ -236,14 +237,22 @@ func (s hermesSourceSet) FindSource( if stateDB, _, ok := hermesStatePaths(root); ok && IsValidSessionID(req.RawSessionID) { found, err := hermesStateDBHasSession(stateDB, req.RawSessionID) - if err != nil { - return SourceRef{}, false, err - } - if !found { + switch { + case err != nil: + // Mirror parseArchive: an unreadable or schema-incompatible + // state.db falls back to transcripts rather than aborting the + // lookup, so a valid transcript session next to a bad state.db + // stays resolvable for resync. + log.Printf( + "hermes: state db lookup failed for %s: %v; "+ + "falling back to transcripts", stateDB, err, + ) + case !found: continue - } - if source, ok := s.sourceRef(root, stateDB); ok { - return source, true, nil + default: + if source, ok := s.sourceRef(root, stateDB); ok { + return source, true, nil + } } } transcriptRoot := hermesTranscriptRoot(root) diff --git a/internal/parser/hermes_provider_test.go b/internal/parser/hermes_provider_test.go index 1beec4f7d..ba9d2a160 100644 --- a/internal/parser/hermes_provider_test.go +++ b/internal/parser/hermes_provider_test.go @@ -453,6 +453,36 @@ func TestHermesProviderFindSourceDoesNotReturnStateDBForMissingRawID(t *testing. assert.Empty(t, source) } +func TestHermesProviderFindSourceFallsBackToTranscriptWhenStateDBUnreadable(t *testing.T) { + root := t.TempDir() + sessionsDir := filepath.Join(root, "sessions") + require.NoError(t, os.MkdirAll(sessionsDir, 0o755)) + + // A present-but-unreadable state.db: hermesStateDBHasSession opens it + // lazily, then errors on the first query because the bytes are not a + // SQLite database. parseArchive logs and falls back to transcripts in this + // case, so FindSource must do the same rather than aborting the lookup. + stateDB := filepath.Join(root, "state.db") + writeSourceFile(t, stateDB, "not a sqlite database") + + transcriptPath := filepath.Join(sessionsDir, "freshchild.jsonl") + writeSourceFile(t, transcriptPath, hermesProviderJSONLFixture("transcript question")) + + provider, ok := NewProvider(AgentHermes, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "freshchild", + }) + + require.NoError(t, err, "unreadable state.db must not abort transcript lookup") + require.True(t, ok, "valid transcript next to a bad state.db must be found") + assert.Equal(t, transcriptPath, source.DisplayPath) +} + func hermesProviderJSONLFixture(firstMessage string) string { return `{"role":"session_meta","platform":"cli","timestamp":"2026-05-14T10:00:00.000000"}` + "\n" + `{"role":"user","content":"` + firstMessage + `","timestamp":"2026-05-14T10:01:00.000000"}` + "\n" + diff --git a/internal/parser/hermes_test.go b/internal/parser/hermes_test.go index c1bd94ea4..ce0967cd7 100644 --- a/internal/parser/hermes_test.go +++ b/internal/parser/hermes_test.go @@ -1223,11 +1223,7 @@ func TestHermesRegistryEntry(t *testing.T) { assert.Equal(t, "hermes:", found.IDPrefix) assert.True(t, found.FileBased) assert.Contains(t, found.DefaultDirs, ".hermes/sessions") - // Hermes is provider-authoritative: discovery and source lookup live on the - // hermesProvider, not on legacy AgentDef hooks. The watch-root resolvers - // stay because they are provider-owned and consumed by watcher setup. - assert.Nil(t, found.DiscoverFunc) - assert.Nil(t, found.FindSourceFunc) + // The watch-root resolvers are provider-owned and consumed by watcher setup. assert.NotNil(t, found.WatchRootsFunc) assert.NotNil(t, found.ShallowWatchRootsFunc) } diff --git a/internal/parser/import_only_provider.go b/internal/parser/import_only_provider.go new file mode 100644 index 000000000..06c9f6d1b --- /dev/null +++ b/internal/parser/import_only_provider.go @@ -0,0 +1,57 @@ +package parser + +import "context" + +type importOnlyProviderFactory struct { + def AgentDef +} + +func newImportOnlyProviderFactory(def AgentDef) ProviderFactory { + return importOnlyProviderFactory{def: cloneAgentDef(def)} +} + +func (f importOnlyProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f importOnlyProviderFactory) Capabilities() Capabilities { + return Capabilities{} +} + +func (f importOnlyProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + base := importOnlyProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Config: cfg, + }, + } + + switch f.def.Type { + case AgentChatGPT: + return &chatGPTImportOnlyProvider{importOnlyProvider: base} + case AgentClaudeAI: + return &claudeAIImportOnlyProvider{importOnlyProvider: base} + default: + return &base + } +} + +type importOnlyProvider struct { + ProviderBase +} + +func (p *importOnlyProvider) Parse( + context.Context, + ParseRequest, +) (ParseOutcome, error) { + return ParseOutcome{}, p.unsupported(ProviderFeatureParse) +} + +type chatGPTImportOnlyProvider struct { + importOnlyProvider +} + +type claudeAIImportOnlyProvider struct { + importOnlyProvider +} diff --git a/internal/parser/import_only_provider_test.go b/internal/parser/import_only_provider_test.go new file mode 100644 index 000000000..1c9868b44 --- /dev/null +++ b/internal/parser/import_only_provider_test.go @@ -0,0 +1,20 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestImportOnlyProviderExportCapabilitiesAreAgentSpecific(t *testing.T) { + chatGPTProvider, ok := NewProvider(AgentChatGPT, ProviderConfig{}) + require.True(t, ok) + assert.Implements(t, (*ChatGPTExportParser)(nil), chatGPTProvider) + assert.NotImplements(t, (*ClaudeAIExportParser)(nil), chatGPTProvider) + + claudeAIProvider, ok := NewProvider(AgentClaudeAI, ProviderConfig{}) + require.True(t, ok) + assert.Implements(t, (*ClaudeAIExportParser)(nil), claudeAIProvider) + assert.NotImplements(t, (*ChatGPTExportParser)(nil), claudeAIProvider) +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 571368c69..ff4db1812 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -46,6 +46,13 @@ type ProviderFactory interface { type ProviderConfig struct { Roots []string Machine string + // PathRewriter maps an on-disk source path to its canonical stored form. + // It is non-nil only during remote (SSH) sync, where source files are read + // from a temporary extraction directory but must keep a stable identity + // across syncs. Providers whose session IDs are derived from the source + // path (Aider) use it to seed those IDs from the canonical remote path + // rather than the changing temp path. Most providers ignore it. + PathRewriter func(string) string } // Clone returns an independent config snapshot. @@ -59,8 +66,11 @@ func (cfg ProviderConfig) RootsCopy() []string { return append([]string(nil), cfg.Roots...) } -// Provider is the target parser/source facade. Providers own source shape and -// return normalized parser results for the sync engine to persist. +// Provider is the target parser/source facade. Providers own source shape, +// source identity, freshness, and lookup; the engine consumes SourceRefs, +// SourceFingerprints, and normalized ParseResults without knowing whether the +// backing data is a file, virtual DB row, sidecar set, remote canonical path, or +// multi-session container. type Provider interface { Definition() AgentDef Capabilities() Capabilities @@ -142,7 +152,10 @@ func (b ProviderBase) unsupported(feature string) error { } } -// SourceRef is the engine-visible handle for provider-owned source data. +// SourceRef is the engine-visible handle for provider-owned source data. It is +// the only source identity the engine should carry between discovery, changed +// path classification, lookup, fingerprinting, parsing, skip-cache checks, and +// persisted session metadata. type SourceRef struct { // Provider identifies the provider that created this source and must match // the provider instance used for subsequent operations. @@ -150,26 +163,38 @@ type SourceRef struct { // Key is stable within the provider across process restarts. It is suitable // for dedupe and diagnostics, but not necessarily for DB freshness checks. Key string - // DisplayPath is human-readable and may be a virtual path. + // DisplayPath is human-readable and may be a virtual path. For filesystem + // sources it is usually the path users expect to inspect. For shared stores + // it may be a provider virtual path such as "#". DisplayPath string - // FingerprintKey is the persisted lookup key for skip-cache and parser data - // version checks. Migrated providers should keep it compatible with legacy - // file_path values whenever practical. + // FingerprintKey is the persisted lookup key for source metadata, + // skip-cache, and parser data-version freshness checks. Providers should set + // it to the same identity they store in ParsedSession.File.Path whenever + // practical, and migrated providers must keep it compatible with legacy + // file_path values unless a documented provider-specific transition handles + // old rows. FingerprintKey string // ProjectHint is advisory metadata for UI grouping and may be empty. ProjectHint string // Opaque is provider-owned in-memory state. The engine must not persist, // compare, inspect, or log it, and providers must not require it for lookup - // from persisted rows. + // from persisted rows. Any source that needs to survive a process restart + // must be recoverable from Key, DisplayPath, FingerprintKey, + // FindSourceRequest, or discovery. Opaque any } -// WatchPlan describes provider-owned filesystem watch roots. +// WatchPlan describes provider-owned filesystem watch roots. Provider +// WatchPlans are authoritative for migrated providers; legacy AgentDef watch +// fields are fallback compatibility only. type WatchPlan struct { Roots []WatchRoot } -// WatchRoot is one filesystem root the engine should watch. +// WatchRoot is one filesystem root the engine should watch. Recursive roots +// observe nested source creation. Non-recursive roots observe only direct child +// changes and must not be treated as covering missing nested provider roots +// unless caller-specific creation handling documents that equivalence. type WatchRoot struct { Path string Recursive bool @@ -183,26 +208,54 @@ type WatchRoot struct { type ChangedPathRequest struct { Path string EventKind string + // WatchRoot is the provider WatchRoot that observed Path. Providers should + // use it to scope classification and avoid returning sources from unrelated + // configured roots that happen to match the same basename or raw ID. WatchRoot string // StoredSourcePaths are optional provider-persisted source paths already // known to the caller for this watch root. Providers that model a shared // physical file as virtual per-session sources use these to emit tombstone // sources when a DB row or DB file has disappeared and can no longer be - // rediscovered from current metadata. + // rediscovered from current metadata. Hints are advisory: providers must + // still validate ownership against the changed path/watch root before + // emitting them. StoredSourcePaths []string } -// FindSourceRequest contains persisted source hints for provider-owned lookup. +// FindSourceRequest contains lookup inputs and persisted source hints for +// provider-owned source resolution. RawSessionID and FullSessionID identify the +// requested logical session. StoredFilePath and FingerprintKey are advisory +// hints from the archive, not authoritative filesystem paths; providers should +// try them first when useful, but provider-owned identity decides whether the +// source belongs to the requested session. type FindSourceRequest struct { - RawSessionID string - FullSessionID string - StoredFilePath string - FingerprintKey string + RawSessionID string + FullSessionID string + // StoredFilePath is the persisted sessions.file_path value, which may be a + // provider virtual path and may be stale after a move, deletion, or remote + // sync identity rewrite. + StoredFilePath string + // FingerprintKey is the persisted source freshness key when the caller has + // one distinct from StoredFilePath. + FingerprintKey string + // RequireFreshSource asks the provider to verify the source against current + // provider metadata before returning it. A stale hint may still be used to + // find the current source, but if the provider cannot prove the requested + // session exists now it should return found=false rather than a tombstone. RequireFreshSource bool + // PreferStoredSource asks the provider to return a valid StoredFilePath + // source as-is rather than re-resolving it to a different but equivalent + // source (for example a duplicate of the same session in another on-disk + // layout). Source-lookup callers set it so an explicitly stored or pinned + // source path is preserved; sync processing leaves it false so duplicate + // sources still canonicalize to a single location. PreferStoredSource bool } -// SourceFingerprint is the provider-normalized source freshness identity. +// SourceFingerprint is the provider-normalized source freshness identity. The +// engine uses Key plus size/mtime/hash fields for skip-cache, data-version, and +// source metadata compatibility, including PostgreSQL push/read parity. Key +// should normally match SourceRef.FingerprintKey or ParsedSession.File.Path. type SourceFingerprint struct { Key string Size int64 @@ -221,7 +274,12 @@ type ParseRequest struct { } // ParseOutcome is the full-parse provider output. It is meaningful only when -// Provider.Parse returns a nil error. +// Provider.Parse returns a nil error. Providers own persisted source identity: +// each ParseResult.Result.Session.File.Path must be the same provider identity +// used for source metadata lookups and PostgreSQL/session metadata compatibility +// (usually SourceRef.FingerprintKey). For multi-session sources, every returned +// result must use a session-scoped path when the backing source can produce more +// than one logical session. type ParseOutcome struct { Results []ParseResultOutcome ExcludedSessionIDs []string @@ -251,7 +309,9 @@ type SourceError struct { } // DataVersionState describes whether a parsed result is current for this parser -// data version. +// data version. Data-version freshness is per result; clean skip-cache +// persistence is still source-scoped and must be suppressed by callers when any +// result needs retry, any source error exists, or the result set is incomplete. type DataVersionState uint8 const ( @@ -260,7 +320,10 @@ const ( DataVersionNeedsRetry ) -// SkipReason explains provider-level intentional skips. +// SkipReason explains provider-level intentional skips. A provider skip is an +// explicit source-level outcome, not a nil parse result. Callers may record a +// clean skip-cache entry only when the skip is complete, non-erroring, and keyed +// by the provider fingerprint/source identity. type SkipReason uint8 const ( @@ -311,35 +374,6 @@ const ( IncrementalNeedsFullParse ) -type legacyProviderFactory struct { - def AgentDef -} - -func (f legacyProviderFactory) Definition() AgentDef { - return cloneAgentDef(f.def) -} - -func (f legacyProviderFactory) Capabilities() Capabilities { - return Capabilities{} -} - -func (f legacyProviderFactory) NewProvider(cfg ProviderConfig) Provider { - return &legacyProvider{ - ProviderBase: ProviderBase{ - Def: cloneAgentDef(f.def), - Config: cfg.Clone(), - }, - } -} - -type legacyProvider struct { - ProviderBase -} - -func (p *legacyProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error) { - return ParseOutcome{}, p.unsupported(ProviderFeatureParse) -} - // ProviderFactories returns one provider factory for every registered agent. func ProviderFactories() []ProviderFactory { factories := make([]ProviderFactory, 0, len(Registry)) @@ -356,10 +390,14 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newAntigravityProviderFactory(def) case AgentAntigravityCLI: return newAntigravityCLIProviderFactory(def) + case AgentAider: + return newAiderProviderFactory(def) case AgentAmp: return newAmpProviderFactory(def) case AgentClaude: return newClaudeProviderFactory(def) + case AgentClaudeAI: + return newImportOnlyProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) case AgentCodex: @@ -372,6 +410,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCortexProviderFactory(def) case AgentCursor: return newCursorProviderFactory(def) + case AgentChatGPT: + return newImportOnlyProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) case AgentForge: @@ -398,10 +438,10 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newOpenHandsProviderFactory(def) case AgentOpenCode: return newOpenCodeProviderFactory(def) - case AgentOpenClaw: - return newOpenClawProviderFactory(def) case AgentOMP: return newPiProviderFactory(def) + case AgentOpenClaw: + return newOpenClawProviderFactory(def) case AgentPiebald: return newPiebaldProviderFactory(def) case AgentPi: @@ -414,6 +454,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newQwenProviderFactory(def) case AgentQwenPaw: return newQwenPawProviderFactory(def) + case AgentReasonix: + return newReasonixProviderFactory(def) case AgentShelley: return newShelleyProviderFactory(def) case AgentVSCopilot: @@ -431,7 +473,7 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { case AgentZed: return newZedProviderFactory(def) default: - return legacyProviderFactory{def: def} + panic("missing provider factory for " + string(def.Type)) } } @@ -445,6 +487,17 @@ func ProviderFactoryByType(t AgentType) (ProviderFactory, bool) { return nil, false } +// ProviderSupportsSourceDiscovery reports whether the registered provider can +// enumerate source references for report-only or sync discovery surfaces. +func ProviderSupportsSourceDiscovery(t AgentType) bool { + factory, ok := ProviderFactoryByType(t) + if !ok { + return false + } + return factory.Capabilities().Source.DiscoverSources == + CapabilitySupported +} + // NewProvider constructs a config-bound provider for an agent type. func NewProvider(t AgentType, cfg ProviderConfig) (Provider, bool) { factory, ok := ProviderFactoryByType(t) diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index ae49d29ed..c03b87b89 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -10,7 +10,6 @@ import ( type ProviderMigrationMode string const ( - ProviderMigrationLegacyOnly ProviderMigrationMode = "legacy-only" ProviderMigrationShadowCompare ProviderMigrationMode = "shadow-compare" ProviderMigrationProviderAuthoritative ProviderMigrationMode = "provider-authoritative" ProviderMigrationImportOnly ProviderMigrationMode = "import-only" @@ -39,8 +38,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentOpenClaw: ProviderMigrationProviderAuthoritative, AgentQClaw: ProviderMigrationProviderAuthoritative, AgentKimi: ProviderMigrationProviderAuthoritative, - AgentClaudeAI: ProviderMigrationLegacyOnly, - AgentChatGPT: ProviderMigrationLegacyOnly, + AgentClaudeAI: ProviderMigrationImportOnly, + AgentChatGPT: ProviderMigrationImportOnly, AgentKiro: ProviderMigrationProviderAuthoritative, AgentKiroIDE: ProviderMigrationProviderAuthoritative, AgentCortex: ProviderMigrationProviderAuthoritative, @@ -57,9 +56,9 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentQwenPaw: ProviderMigrationProviderAuthoritative, AgentGptme: ProviderMigrationProviderAuthoritative, AgentShelley: ProviderMigrationProviderAuthoritative, - AgentAider: ProviderMigrationLegacyOnly, + AgentAider: ProviderMigrationProviderAuthoritative, AgentOMP: ProviderMigrationProviderAuthoritative, - AgentReasonix: ProviderMigrationLegacyOnly, + AgentReasonix: ProviderMigrationProviderAuthoritative, } // ProviderMigrationModes returns the current provider migration manifest. @@ -102,24 +101,19 @@ func validateProviderMigrationMode( mode ProviderMigrationMode, ) error { def := factory.Definition() - legacy := isLegacyProviderFactory(factory) switch mode { - case ProviderMigrationLegacyOnly: - if !legacy { + case ProviderMigrationShadowCompare, ProviderMigrationProviderAuthoritative: + caps := factory.Capabilities().Source + if caps.DiscoverSources != CapabilitySupported { return fmt.Errorf( - "%s: concrete provider must opt into %s before leaving %s", - def.Type, - ProviderMigrationShadowCompare, - ProviderMigrationLegacyOnly, + "%s: %s requires provider source discovery", + def.Type, mode, ) } - case ProviderMigrationShadowCompare, ProviderMigrationProviderAuthoritative: - if legacy { + if caps.FindSource != CapabilitySupported { return fmt.Errorf( - "%s: %s requires a concrete provider; keep %s while using the legacy adapter", - def.Type, - mode, - ProviderMigrationLegacyOnly, + "%s: %s requires provider source lookup", + def.Type, mode, ) } case ProviderMigrationImportOnly: @@ -130,25 +124,12 @@ func validateProviderMigrationMode( ProviderMigrationImportOnly, ) } - if legacy { - return fmt.Errorf( - "%s: %s requires a concrete import-only provider; keep %s while using the legacy adapter", - def.Type, - ProviderMigrationImportOnly, - ProviderMigrationLegacyOnly, - ) - } default: return fmt.Errorf("%s: invalid provider migration mode %q", def.Type, mode) } return nil } -func isLegacyProviderFactory(factory ProviderFactory) bool { - _, ok := factory.(legacyProviderFactory) - return ok -} - func isImportOnlyAgentType(agent AgentType) bool { switch agent { case AgentClaudeAI, AgentChatGPT: diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go deleted file mode 100644 index 86bbdfb0e..000000000 --- a/internal/parser/provider_shim_scan_test.go +++ /dev/null @@ -1,192 +0,0 @@ -package parser - -import ( - "go/ast" - "go/parser" - "go/token" - "path/filepath" - "regexp" - "sort" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// legacyEntrypointVerb matches the provider-specific legacy parser entrypoint -// naming convention this facade migration deletes: package-level -// Discover*/Find*/Parse*/Process*/Classify* free functions that encode one -// provider's source shape. A migrated provider owns that behavior on receiver -// methods or provider-neutral source-set helpers; it must not reach back into a -// legacy free function as a compatibility shim. -var legacyEntrypointVerb = regexp.MustCompile(`^(Discover|Find|Parse|Process|Classify)[A-Z]`) - -// providerNeutralEntrypoints are package-level helpers whose names match the -// legacy verb pattern but are genuinely provider-neutral shared utilities. -// Provider files may reference these; they are not provider-specific legacy -// entrypoints. Keep this list small and add to it only when a new shared, -// provider-agnostic helper is introduced. -var providerNeutralEntrypoints = map[string]bool{ - "ParseVirtualSourcePath": true, - "ParseVirtualSourcePathForBase": true, - // ParseCursorTranscriptRelPath is a pure rel-path shape validator with no - // filesystem or provider state. It is shared by the engine's path - // classification/enrichment and the Cursor provider's source set, so it - // stays a free helper rather than moving onto the provider. - "ParseCursorTranscriptRelPath": true, -} - -// pendingShimProviderFiles are provider files whose behavior has not yet been -// folded onto the provider. They still reference legacy free functions and are -// temporarily exempt from the anti-shim gate so intermediate branches in the -// facade migration stay green while providers are folded one branch at a time. -// -// Each entry is a standing migration TODO: when a provider's behavior moves -// onto receiver methods or a provider-owned source set, delete its legacy free -// functions and remove the file from this list on the same branch. The stack -// tip (the zero-legacy gate) asserts this list is empty, so a provider cannot -// remain a permanent shim. -var pendingShimProviderFiles = map[string]bool{ - "claude_provider.go": true, - "codex_provider.go": true, - "copilot_provider.go": true, - "cowork_provider.go": true, - "gemini_provider.go": true, - "hermes_provider.go": true, - "opencode_provider.go": true, - "positron_provider.go": true, - "vibe_provider.go": true, - "visualstudio_copilot_provider.go": true, - "vscode_copilot_provider.go": true, -} - -// collectLegacyFreeFuncs returns the set of package-level free functions in the -// parser package whose names match the legacy entrypoint pattern, excluding the -// provider-neutral helpers. Tying detection to functions that actually exist -// (rather than to the name pattern alone) avoids false positives on types and -// values such as ParseResult or ParseRequest, and naturally shrinks as legacy -// functions are deleted. -func collectLegacyFreeFuncs(t *testing.T) (map[string]bool, *token.FileSet) { - t.Helper() - fset := token.NewFileSet() - goFiles, err := filepath.Glob("*.go") - require.NoError(t, err) - - legacy := make(map[string]bool) - for _, file := range goFiles { - if isTestGoFile(file) { - continue - } - parsed, err := parser.ParseFile(fset, file, nil, 0) - require.NoErrorf(t, err, "parse %s", file) - for _, decl := range parsed.Decls { - fn, ok := decl.(*ast.FuncDecl) - if !ok || fn.Recv != nil { - continue // methods are provider-owned, not free entrypoints - } - name := fn.Name.Name - if legacyEntrypointVerb.MatchString(name) && - !providerNeutralEntrypoints[name] { - legacy[name] = true - } - } - } - return legacy, fset -} - -func isTestGoFile(name string) bool { - return len(name) > len("_test.go") && - name[len(name)-len("_test.go"):] == "_test.go" -} - -// TestProviderFilesDoNotReferenceLegacyEntrypoints is the migration anti-shim -// gate. A *_provider.go that references a provider-specific legacy free -// function (whether by calling it or passing it as a value) is a shim, not a -// migration, so this scan fails for it unless the file is an explicitly tracked -// pending shim. The test is vacuous at the root (no provider files yet) and -// keeps the migrated providers honest as the stack folds each one. -func TestProviderFilesDoNotReferenceLegacyEntrypoints(t *testing.T) { - legacy, fset := collectLegacyFreeFuncs(t) - - providerFiles, err := filepath.Glob("*_provider.go") - require.NoError(t, err) - - for _, file := range providerFiles { - t.Run(file, func(t *testing.T) { - parsed, err := parser.ParseFile(fset, file, nil, 0) - require.NoErrorf(t, err, "parse %s", file) - - offenders := legacyReferencesInProviderFile(parsed, legacy) - if pendingShimProviderFiles[file] { - assert.NotEmptyf( - t, - offenders, - "%s is listed in pendingShimProviderFiles but no "+ - "longer references provider-specific legacy "+ - "entrypoints; remove it from the pending list", - file, - ) - return - } - assert.Emptyf( - t, - offenders, - "%s references provider-specific legacy entrypoints %v; "+ - "fold that behavior onto the provider or a "+ - "provider-neutral source-set helper instead of shimming", - file, - offenders, - ) - }) - } -} - -func legacyReferencesInProviderFile( - parsed *ast.File, - legacy map[string]bool, -) []string { - // A package cannot redeclare a free function name, so any direct ident in - // a provider file that matches a legacy free function is a reference to it. - // Method declarations and selector method names are provider-owned receiver - // surface, so they are not legacy free-function references. - declNames := make(map[*ast.Ident]struct{}) - selectorNames := make(map[*ast.Ident]struct{}) - for _, decl := range parsed.Decls { - if fn, ok := decl.(*ast.FuncDecl); ok { - declNames[fn.Name] = struct{}{} - } - } - ast.Inspect(parsed, func(n ast.Node) bool { - if selector, ok := n.(*ast.SelectorExpr); ok { - selectorNames[selector.Sel] = struct{}{} - } - return true - }) - - seen := make(map[string]struct{}) - var offenders []string - ast.Inspect(parsed, func(n ast.Node) bool { - ident, ok := n.(*ast.Ident) - if !ok { - return true - } - if _, isDecl := declNames[ident]; isDecl { - return true - } - if _, isSelector := selectorNames[ident]; isSelector { - return true - } - if !legacy[ident.Name] { - return true - } - if _, dup := seen[ident.Name]; dup { - return true - } - seen[ident.Name] = struct{}{} - offenders = append(offenders, ident.Name) - return true - }) - - sort.Strings(offenders) - return offenders -} diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index 65b689c5e..4702f697a 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -148,124 +147,27 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } } -func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - legacyAgent := legacyProviderTestAgent(t) - def, ok := AgentByType(legacyAgent) - require.True(t, ok) - provider, ok := NewProvider(legacyAgent, ProviderConfig{ - Roots: []string{t.TempDir()}, - Machine: "devbox", - }) - require.True(t, ok) - require.NotNil(t, provider) - - assert.Equal(t, Capabilities{}, provider.Capabilities()) +func TestProviderFactoryLookupRejectsMissingAgent(t *testing.T) { + require.NotEmpty(t, Registry) + agent := Registry[0].Type - ctx := context.Background() - discovered, err := provider.Discover(ctx) - require.NoError(t, err) - assert.Empty(t, discovered) - - plan, err := provider.WatchPlan(ctx) - require.NoError(t, err) - assert.Empty(t, plan.Roots) - - changed, err := provider.SourcesForChangedPath(ctx, ChangedPathRequest{ - Path: "/tmp/session.jsonl", - EventKind: "write", - WatchRoot: "/tmp", - }) - require.NoError(t, err) - assert.Empty(t, changed) - - source, found, err := provider.FindSource(ctx, FindSourceRequest{ - RawSessionID: "session", - FullSessionID: def.IDPrefix + "session", - StoredFilePath: "/tmp/session.jsonl", - FingerprintKey: "/tmp/session.jsonl", - }) - require.NoError(t, err) - assert.False(t, found) - assert.Empty(t, source) - - _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: legacyAgent, - Key: "session", - DisplayPath: "/tmp/session.jsonl", - FingerprintKey: "/tmp/session.jsonl", - }) - require.Error(t, err) - assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) - - incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: legacyAgent, Key: "session"}, - Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: def.IDPrefix + "session", - StartOrdinal: 1, - Machine: "devbox", - }) - require.NoError(t, err) - assert.Equal(t, IncrementalUnsupported, status) - assert.Empty(t, incremental) -} + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + assert.Equal(t, agent, factory.Definition().Type) -func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { - cfg := ProviderConfig{ + provider, ok := NewProvider(agent, ProviderConfig{ Roots: []string{"/tmp/one", "/tmp/two"}, Machine: "devbox", - } - legacyAgent := legacyProviderTestAgent(t) - - factory, ok := ProviderFactoryByType(legacyAgent) - require.True(t, ok) - assert.Equal(t, legacyAgent, factory.Definition().Type) - - provider, ok := NewProvider(legacyAgent, cfg) + }) require.True(t, ok) require.NotNil(t, provider) - cfg.Roots[0] = "/tmp/mutated" - legacy, ok := provider.(*legacyProvider) - require.True(t, ok) - assert.Equal(t, []string{"/tmp/one", "/tmp/two"}, legacy.Config.Roots) - assert.Equal(t, "devbox", legacy.Config.Machine) - _, ok = ProviderFactoryByType("missing") assert.False(t, ok) - _, ok = NewProvider("missing", cfg) + _, ok = NewProvider("missing", ProviderConfig{}) assert.False(t, ok) } -func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - legacyAgent := legacyProviderTestAgent(t) - provider, ok := NewProvider(legacyAgent, ProviderConfig{ - Roots: []string{t.TempDir()}, - Machine: "devbox", - }) - require.True(t, ok) - - outcome, err := provider.Parse(context.Background(), ParseRequest{ - Source: SourceRef{ - Provider: legacyAgent, - Key: "source", - DisplayPath: "/tmp/source.jsonl", - FingerprintKey: "/tmp/source.jsonl", - }, - Fingerprint: SourceFingerprint{ - Key: "/tmp/source.jsonl", - MTimeNS: time.Now().UnixNano(), - }, - Machine: "devbox", - }) - require.Error(t, err) - assert.Empty(t, outcome) - assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) - var unsupported UnsupportedProviderFeatureError - require.ErrorAs(t, err, &unsupported) - assert.Equal(t, legacyAgent, unsupported.Provider) - assert.Equal(t, ProviderFeatureParse, unsupported.Feature) -} - func TestProviderMigrationModesCoverRegistry(t *testing.T) { err := ValidateProviderMigrationModes( ProviderFactories(), @@ -274,38 +176,20 @@ func TestProviderMigrationModesCoverRegistry(t *testing.T) { require.NoError(t, err) } -func TestProviderMigrationModesRejectConcreteProviderLeftLegacyOnly(t *testing.T) { - factory := testProviderFactory{ - def: AgentDef{ - Type: AgentCodex, - DisplayName: "Codex", - }, - } - modes := map[AgentType]ProviderMigrationMode{ - AgentCodex: ProviderMigrationLegacyOnly, - } - - err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) - require.Error(t, err) - assert.Contains(t, err.Error(), string(AgentCodex)) - assert.Contains(t, err.Error(), string(ProviderMigrationShadowCompare)) -} - -func TestProviderMigrationModesRejectConcreteModeForLegacyFactory(t *testing.T) { - factory := legacyProviderFactory{ - def: AgentDef{ - Type: AgentCodex, - DisplayName: "Codex", - }, - } - modes := map[AgentType]ProviderMigrationMode{ - AgentCodex: ProviderMigrationShadowCompare, +func TestProviderMigrationModesUseOnlyFinalModes(t *testing.T) { + for agent, mode := range ProviderMigrationModes() { + switch mode { + case ProviderMigrationProviderAuthoritative, ProviderMigrationImportOnly: + default: + assert.Failf( + t, + "unexpected migration mode", + "%s uses non-final provider migration mode %q", + agent, + mode, + ) + } } - - err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) - require.Error(t, err) - assert.Contains(t, err.Error(), string(AgentCodex)) - assert.Contains(t, err.Error(), string(ProviderMigrationLegacyOnly)) } func TestProviderMigrationModesRestrictImportOnlyMode(t *testing.T) { @@ -354,18 +238,6 @@ func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error return ParseOutcome{}, nil } -func legacyProviderTestAgent(t *testing.T) AgentType { - t.Helper() - for _, def := range Registry { - factory := providerFactoryForDef(def) - if _, ok := factory.(legacyProviderFactory); ok { - return def.Type - } - } - t.Fatal("expected at least one legacy provider for fallback tests") - return "" -} - func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { t.Helper() @@ -378,8 +250,6 @@ func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { assert.Equal(t, want.WatchSubdirs, got.WatchSubdirs) assert.Equal(t, want.ShallowWatch, got.ShallowWatch) assert.Equal(t, want.FileBased, got.FileBased) - assert.Equal(t, want.DiscoverFunc == nil, got.DiscoverFunc == nil) - assert.Equal(t, want.FindSourceFunc == nil, got.FindSourceFunc == nil) assert.Equal(t, want.WatchRootsFunc == nil, got.WatchRootsFunc == nil) assert.Equal(t, want.ShallowWatchRootsFunc == nil, got.ShallowWatchRootsFunc == nil) } diff --git a/internal/parser/reasonix.go b/internal/parser/reasonix.go index 4302768ca..d2124ab67 100644 --- a/internal/parser/reasonix.go +++ b/internal/parser/reasonix.go @@ -166,10 +166,10 @@ func (b *reasonixSessionBuilder) processToolResult( return nil } -// ParseReasonixSession parses a Reasonix JSONL session file. +// parseReasonixSession parses a Reasonix JSONL session file. // Returns (nil, nil, nil, nil) if the file doesn't exist or // contains no user/assistant messages. -func ParseReasonixSession( +func parseReasonixSession( path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) @@ -339,10 +339,10 @@ func loadReasonixMetadata(transcriptPath string) (*reasonixMetadata, error) { return &meta, nil } -// DiscoverReasonixSessions discovers Reasonix sessions across +// discoverReasonixSessions discovers Reasonix sessions across // four layouts: project sessions, global sessions, global subagents, // and archive sessions. -func DiscoverReasonixSessions(reasonixDir string) []DiscoveredFile { +func discoverReasonixSessions(reasonixDir string) []DiscoveredFile { if reasonixDir == "" { return nil } @@ -461,9 +461,9 @@ func DiscoverReasonixSessions(reasonixDir string) []DiscoveredFile { return files } -// FindReasonixSourceFile locates a Reasonix session file by +// findReasonixSourceFile locates a Reasonix session file by // session ID. Searches project, global, subagent, and archive layouts. -func FindReasonixSourceFile(reasonixDir, rawID string) string { +func findReasonixSourceFile(reasonixDir, rawID string) string { if reasonixDir == "" || rawID == "" { return "" } diff --git a/internal/parser/reasonix_provider.go b/internal/parser/reasonix_provider.go new file mode 100644 index 000000000..08a8165a1 --- /dev/null +++ b/internal/parser/reasonix_provider.go @@ -0,0 +1,280 @@ +package parser + +import ( + "crypto/sha256" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// Reasonix stores each session as one .jsonl transcript with a sibling +// .jsonl.meta sidecar. It is a single-file provider: one file parses into one +// session, with a composite fingerprint that folds the sidecar in so a +// metadata-only write still re-parses. All behavior is wired into the shared +// single-file base via options. +func newReasonixProviderFactory(def AgentDef) ProviderFactory { + watchSubdirs := append([]string(nil), def.WatchSubdirs...) + return newSingleFileProviderFactory( + def, + reasonixProviderCapabilities(), + func(cfg ProviderConfig) singleFileSourceSet { + return newSingleFileSourceSet( + AgentReasonix, + cfg.Roots, + withFileDiscovery(reasonixDiscoverFiles), + withFileWatchRoots( + func(roots []string) []WatchRoot { + return reasonixWatchRoots(roots, watchSubdirs) + }, + ), + withFileChangedPathClassifier(reasonixClassifyPath), + withFileLookup(reasonixFindFile), + withFileFingerprint(reasonixFingerprintSource), + withFileParse(reasonixParseFile), + ) + }, + ) +} + +func reasonixDiscoverFiles(root string) []singleFileMatch { + sessions := discoverReasonixSessions(root) + out := make([]singleFileMatch, 0, len(sessions)) + for _, df := range sessions { + out = append(out, singleFileMatch{ + Path: filepath.Clean(df.Path), + ProjectHint: df.Project, + }) + } + return out +} + +func reasonixWatchRoots(roots, watchSubdirs []string) []WatchRoot { + subdirs := watchSubdirs + if len(subdirs) == 0 { + subdirs = []string{""} + } + out := make([]WatchRoot, 0, len(roots)*len(subdirs)) + for _, root := range roots { + for _, sub := range subdirs { + watchPath := root + if sub != "" { + watchPath = filepath.Join(root, sub) + } + out = append(out, WatchRoot{ + Path: watchPath, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "*.jsonl.meta"}, + DebounceKey: string(AgentReasonix) + ":" + sub + ":" + watchPath, + }) + } + } + return out +} + +// reasonixClassifyPath classifies a stored or changed path under root into a +// Reasonix session source. It mirrors the legacy classifyReasonixPath: a +// .jsonl.meta sidecar event maps to its sibling transcript, and only the four +// recognized layouts (project sessions, global sessions, archive, subagents) +// qualify. Reasonix performs no transcript existence check, so allowMissing is +// unused. +func reasonixClassifyPath( + root, path string, _ bool, +) (singleFileMatch, bool) { + rel, ok := relUnder(filepath.Clean(root), filepath.Clean(path)) + if !ok { + return singleFileMatch{}, false + } + if strings.HasSuffix(path, ".jsonl.meta") { + jsonlPath := strings.TrimSuffix(path, ".meta") + if _, err := os.Stat(jsonlPath); err != nil { + return singleFileMatch{}, false + } + path = jsonlPath + rel = strings.TrimSuffix(rel, ".meta") + } + if !strings.HasSuffix(path, ".jsonl") { + return singleFileMatch{}, false + } + project, ok := reasonixLayoutProject( + strings.Split(rel, string(filepath.Separator)), + ) + if !ok { + return singleFileMatch{}, false + } + return singleFileMatch{ + Path: filepath.Clean(path), + ProjectHint: project, + }, true +} + +func reasonixFindFile(root, rawID string) (singleFileMatch, bool) { + path := findReasonixSourceFile(root, rawID) + if path == "" { + return singleFileMatch{}, false + } + return reasonixClassifyPath(root, path, false) +} + +func reasonixFingerprintSource( + src singleFileSource, +) (SourceFingerprint, error) { + info, err := os.Stat(src.Path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.Path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf( + "stat %s: source is a directory", src.Path, + ) + } + // Composite identity: fold the sibling .jsonl.meta sidecar into size and + // mtime so a metadata-only write (timestamps, topic title) re-parses the + // transcript, mirroring the legacy reasonixEffectiveInfo. + size := info.Size() + mtime := info.ModTime().UnixNano() + metaPath := src.Path + ".meta" + if metaInfo, err := os.Stat(metaPath); err == nil { + size += metaInfo.Size() + if metaMTime := metaInfo.ModTime().UnixNano(); metaMTime > mtime { + mtime = metaMTime + } + } + hash, err := hashReasonixSourceFile(src.Path) + if err != nil { + return SourceFingerprint{}, err + } + return SourceFingerprint{ + Size: size, + MTimeNS: mtime, + Hash: hash, + }, nil +} + +func reasonixParseFile( + src singleFileSource, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, usageEvents, err := parseReasonixSession(src.Path, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + // Use the discovered project only when metadata did not supply one via + // workspace_root, matching the legacy processReasonix behavior. + if req.Source.ProjectHint != "" && sess.Project == "" { + sess.Project = req.Source.ProjectHint + } + // Reasonix uses a composite fingerprint (transcript plus .jsonl.meta + // sidecar); honor it so freshness state stays in lockstep with the skip + // cache while keeping the transcript content hash. + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return []ParseResult{{ + Session: *sess, + Messages: msgs, + UsageEvents: usageEvents, + }}, nil, nil +} + +func hashReasonixSourceFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", fmt.Errorf("hash %s: %w", path, err) + } + + metaPath := path + ".meta" + meta, err := os.Open(metaPath) + if err != nil { + if os.IsNotExist(err) { + return fmt.Sprintf("%x", h.Sum(nil)), nil + } + return "", fmt.Errorf("open %s: %w", metaPath, err) + } + defer meta.Close() + + if _, err := io.WriteString(h, "\x00reasonix-meta\x00"); err != nil { + return "", fmt.Errorf("hash %s: %w", metaPath, err) + } + if _, err := io.Copy(h, meta); err != nil { + return "", fmt.Errorf("hash %s: %w", metaPath, err) + } + return fmt.Sprintf("%x", h.Sum(nil)), nil +} + +// reasonixLayoutProject validates a root-relative transcript path against the +// recognized Reasonix layouts and returns the owning project (empty for global, +// archive, and subagent sessions). +func reasonixLayoutProject(parts []string) (string, bool) { + // Project sessions: projects/{project}/sessions/{id}.jsonl + if len(parts) == 4 && parts[0] == "projects" && parts[2] == "sessions" && + strings.HasSuffix(parts[3], ".jsonl") { + return parts[1], true + } + // Project sessions: projects/{project}/sessions/{id}/{id}.jsonl + if len(parts) == 5 && parts[0] == "projects" && parts[2] == "sessions" { + base := strings.TrimSuffix(parts[4], ".jsonl") + if base != "" && parts[3] == base { + return parts[1], true + } + } + // Global or archive sessions: sessions/{id}.jsonl or archive/{id}.jsonl + if len(parts) == 2 { + if (parts[0] == "sessions" || parts[0] == "archive") && + strings.HasSuffix(parts[1], ".jsonl") { + return "", true + } + } + // Nested global or subagent: sessions/{id}/{id}.jsonl or + // sessions/subagents/{id}.jsonl + if len(parts) == 3 { + base := strings.TrimSuffix(parts[2], ".jsonl") + if parts[0] == "sessions" && (parts[1] == "subagents" || parts[1] == base) { + if base != "" { + return "", true + } + } + } + return "", false +} + +func reasonixProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/reasonix_test.go b/internal/parser/reasonix_test.go index e4865e62c..6dd9c1237 100644 --- a/internal/parser/reasonix_test.go +++ b/internal/parser/reasonix_test.go @@ -44,7 +44,7 @@ func TestParseReasonixSession_Basic(t *testing.T) { `{"role":"assistant","content":"Here's a function","reasoning_content":"I need to write a function"}`, ) - sess, msgs, _, err := ParseReasonixSession(path, "test-machine") + sess, msgs, _, err := parseReasonixSession(path, "test-machine") require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, 2) @@ -69,7 +69,7 @@ func TestParseReasonixSession_ToolCalls(t *testing.T) { `{"role":"assistant","content":"I'll read it","tool_calls":[{"id":"call_1","name":"read_file","arguments":"{\"path\":\"config.json\"}"}]}`, ) - _, msgs, _, err := ParseReasonixSession(path, "m") + _, msgs, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.Len(t, msgs, 2) @@ -89,7 +89,7 @@ func TestParseReasonixSession_ToolResults(t *testing.T) { `{"role":"tool","content":"file contents here","tool_call_id":"call_1"}`, ) - _, msgs, _, err := ParseReasonixSession(path, "m") + _, msgs, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.Len(t, msgs, 3) @@ -112,7 +112,7 @@ func TestParseReasonixSession_TimestampSessionID(t *testing.T) { timestampPath := filepath.Join(dir, "20260617-081849.643965200-deepseek-v4-pro.jsonl") require.NoError(t, os.Rename(path, timestampPath)) - sess, _, _, err := ParseReasonixSession(timestampPath, "m") + sess, _, _, err := parseReasonixSession(timestampPath, "m") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "reasonix:20260617-081849.643965200-deepseek-v4-pro", sess.ID) @@ -128,7 +128,7 @@ func TestParseReasonixSession_SubagentID(t *testing.T) { subagentPath := filepath.Join(dir, "sa_20260612_105316_000000000_6b991b514f0a.jsonl") require.NoError(t, os.Rename(path, subagentPath)) - sess, _, _, err := ParseReasonixSession(subagentPath, "m") + sess, _, _, err := parseReasonixSession(subagentPath, "m") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "reasonix:sa_20260612_105316_000000000_6b991b514f0a", sess.ID) @@ -144,7 +144,7 @@ func TestParseReasonixSession_SpaceInSessionDir(t *testing.T) { content := `{"role":"user","content":"Test message"}` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, _, err := ParseReasonixSession(path, "m") + sess, _, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.NotNil(t, sess) // Should extract just the filename, not the directory @@ -168,7 +168,7 @@ func TestParseReasonixSession_MetadataFallback(t *testing.T) { } writeReasonixMetadata(t, path, meta) - sess, _, _, err := ParseReasonixSession(path, "m") + sess, _, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.NotNil(t, sess) @@ -199,7 +199,7 @@ func TestParseReasonixSession_MetadataFields(t *testing.T) { } writeReasonixMetadata(t, path, meta) - sess, _, _, err := ParseReasonixSession(path, "m") + sess, _, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "Metadata title", sess.SessionName) @@ -239,7 +239,7 @@ func TestParseReasonixSession_PartialMetadataFallsBackToFileMtime(t *testing.T) require.NoError(t, err) writeReasonixMetadata(t, path, tt.meta) - sess, _, _, err := ParseReasonixSession(path, "m") + sess, _, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, info.ModTime(), sess.StartedAt) @@ -256,7 +256,7 @@ func TestParseReasonixSession_ArchiveWithoutMeta(t *testing.T) { // Don't write metadata sidecar - archive files often lack .meta - sess, msgs, _, err := ParseReasonixSession(path, "m") + sess, msgs, _, err := parseReasonixSession(path, "m") require.NoError(t, err) require.NotNil(t, sess) require.Len(t, msgs, 2) @@ -277,7 +277,7 @@ func TestDiscoverReasonixSessions_ProjectSessions(t *testing.T) { sessionFile := filepath.Join(sessionDir, "session-123.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(`{"role":"user","content":"test"}`), 0o644)) - files := DiscoverReasonixSessions(baseDir) + files := discoverReasonixSessions(baseDir) require.Len(t, files, 1) assert.Equal(t, sessionFile, files[0].Path) assert.Equal(t, "my-project", files[0].Project) @@ -293,7 +293,7 @@ func TestDiscoverReasonixSessions_ProjectBareSession(t *testing.T) { sessionFile := filepath.Join(sessionsDir, "session-123.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(`{"role":"user","content":"test"}`), 0o644)) - files := DiscoverReasonixSessions(baseDir) + files := discoverReasonixSessions(baseDir) require.Len(t, files, 1) assert.Equal(t, sessionFile, files[0].Path) assert.Equal(t, "my-project", files[0].Project) @@ -310,7 +310,7 @@ func TestDiscoverReasonixSessions_GlobalSessions(t *testing.T) { sessionFile := filepath.Join(sessionsDir, "global-session.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(`{"role":"user","content":"test"}`), 0o644)) - files := DiscoverReasonixSessions(baseDir) + files := discoverReasonixSessions(baseDir) require.Len(t, files, 1) assert.Equal(t, sessionFile, files[0].Path) assert.Equal(t, AgentReasonix, files[0].Agent) @@ -326,7 +326,7 @@ func TestDiscoverReasonixSessions_Subagents(t *testing.T) { subagentFile := filepath.Join(subagentsDir, "sa_20260612_105316_000000000_hash.jsonl") require.NoError(t, os.WriteFile(subagentFile, []byte(`{"role":"user","content":"test"}`), 0o644)) - files := DiscoverReasonixSessions(baseDir) + files := discoverReasonixSessions(baseDir) require.Len(t, files, 1) assert.Equal(t, subagentFile, files[0].Path) } @@ -341,7 +341,7 @@ func TestDiscoverReasonixSessions_Archive(t *testing.T) { archiveFile := filepath.Join(archiveDir, "20260612-104235.267202400.jsonl") require.NoError(t, os.WriteFile(archiveFile, []byte(`{"role":"user","content":"test"}`), 0o644)) - files := DiscoverReasonixSessions(baseDir) + files := discoverReasonixSessions(baseDir) require.Len(t, files, 1) assert.Equal(t, archiveFile, files[0].Path) } @@ -357,7 +357,7 @@ func TestFindReasonixSourceFile_ProjectSession(t *testing.T) { sessionFile := filepath.Join(sessionDir, "test-id.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(""), 0o644)) - found := FindReasonixSourceFile(baseDir, "test-id") + found := findReasonixSourceFile(baseDir, "test-id") assert.Equal(t, sessionFile, found) } @@ -370,7 +370,7 @@ func TestFindReasonixSourceFile_ProjectBareSession(t *testing.T) { sessionFile := filepath.Join(sessionsDir, "test-id.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(""), 0o644)) - found := FindReasonixSourceFile(baseDir, "test-id") + found := findReasonixSourceFile(baseDir, "test-id") assert.Equal(t, sessionFile, found) } @@ -384,7 +384,7 @@ func TestFindReasonixSourceFile_GlobalSession(t *testing.T) { sessionFile := filepath.Join(sessionsDir, "global-id.jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(""), 0o644)) - found := FindReasonixSourceFile(baseDir, "global-id") + found := findReasonixSourceFile(baseDir, "global-id") assert.Equal(t, sessionFile, found) } @@ -398,6 +398,6 @@ func TestFindReasonixSourceFile_Archive(t *testing.T) { archiveFile := filepath.Join(archiveDir, "archive-id.jsonl") require.NoError(t, os.WriteFile(archiveFile, []byte(""), 0o644)) - found := FindReasonixSourceFile(baseDir, "archive-id") + found := findReasonixSourceFile(baseDir, "archive-id") assert.Equal(t, archiveFile, found) } diff --git a/internal/parser/sibling_metadata_source_set_test.go b/internal/parser/sibling_metadata_source_set_test.go new file mode 100644 index 000000000..96f207fcf --- /dev/null +++ b/internal/parser/sibling_metadata_source_set_test.go @@ -0,0 +1,96 @@ +package parser + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSiblingMetadataSourceSetMapsSiblingEventsToPrimarySource(t *testing.T) { + root := t.TempDir() + sessionDir := filepath.Join(root, "session_123") + messagesPath := filepath.Join(sessionDir, "messages.jsonl") + metaPath := filepath.Join(sessionDir, "meta.json") + writeSourceFile(t, messagesPath, "{\"role\":\"user\"}\n") + writeSourceFile(t, metaPath, "{\"title\":\"Session\"}\n") + + sources := NewSiblingMetadataSourceSet( + AgentVibe, + []string{root}, + JSONLSourceSetOptions{ + Recursive: true, + Extensions: []string{".jsonl"}, + IncludePath: func(root, path string) bool { + return filepath.Base(path) == "messages.jsonl" + }, + }, + SiblingMetadataSourceSetOptions{ + SiblingGlobs: []string{"meta.json"}, + SiblingPaths: func(root, sourcePath string) []string { + return []string{filepath.Join(filepath.Dir(sourcePath), "meta.json")} + }, + SourcePathForSibling: func(root, siblingPath string) (string, bool) { + if filepath.Base(siblingPath) != "meta.json" { + return "", false + } + return filepath.Join(filepath.Dir(siblingPath), "messages.jsonl"), true + }, + }, + ) + + plan, err := sources.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.ElementsMatch(t, []string{"*.jsonl", "meta.json"}, plan.Roots[0].IncludeGlobs) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: metaPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, messagesPath, changed[0].DisplayPath) + + fingerprint, err := sources.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.Equal(t, messagesPath, fingerprint.Key) + assert.NotZero(t, fingerprint.Size) + assert.NotZero(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) +} + +func TestSiblingMetadataSourceSetFingerprintsSourceWithoutOpaque(t *testing.T) { + root := t.TempDir() + sessionDir := filepath.Join(root, "session_123") + messagesPath := filepath.Join(sessionDir, "messages.jsonl") + metaPath := filepath.Join(sessionDir, "meta.json") + writeSourceFile(t, messagesPath, "{\"role\":\"user\"}\n") + writeSourceFile(t, metaPath, "{\"title\":\"Session\"}\n") + + sources := NewSiblingMetadataSourceSet( + AgentVibe, + []string{root}, + JSONLSourceSetOptions{Recursive: true}, + SiblingMetadataSourceSetOptions{ + SiblingPaths: func(root, sourcePath string) []string { + return []string{filepath.Join(filepath.Dir(sourcePath), "meta.json")} + }, + }, + ) + source, ok, err := sources.FindSource( + context.Background(), + FindSourceRequest{StoredFilePath: messagesPath}, + ) + require.NoError(t, err) + require.True(t, ok) + source.Opaque = nil + + fingerprint, err := sources.Fingerprint(context.Background(), source) + + require.NoError(t, err) + assert.Equal(t, messagesPath, fingerprint.Key) + assert.NotEmpty(t, fingerprint.Hash) +} diff --git a/internal/parser/types.go b/internal/parser/types.go index 439c48b10..25a36b57c 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -68,15 +68,6 @@ type AgentDef struct { ShallowWatch bool // true = watch root only, rely on periodic sync for subdirs FileBased bool // false for DB-backed agents - // DiscoverFunc finds session files under a root directory. - // Nil for non-file-based agents. - DiscoverFunc func(string) []DiscoveredFile - - // FindSourceFunc locates a single session's source file - // given a root directory and the raw session ID (prefix - // already stripped). Nil for non-file-based agents. - FindSourceFunc func(string, string) string - // WatchRootsFunc resolves the directories to watch for live // updates under a configured root, for agents whose watch // targets depend on the on-disk layout rather than a static @@ -555,35 +546,34 @@ var Registry = []AgentDef{ // Aider has no central session store. It writes one Markdown // chat log per repo at /.aider.chat.history.md. There is // no safe canonical root: an always-on $HOME walk is prone to - // macOS privacy prompts and surprising background work. Users - // must opt in by setting AIDER_DIR or the aider_dirs config key - // to a code root they want scanned. + // macOS privacy prompts (Documents/Downloads/Music/Photos) during + // passive background refreshes, and to surprising work. Users must + // opt in by setting AIDER_DIR or the aider_dirs config key to a + // code root they want scanned. A configured broad root such as + // $HOME still gets the bounded, symlink-safe, depth-capped, + // time-budgeted walk with protected-folder pruning. // // ShallowWatch is true because users can still opt into broad - // roots. Watch those roots shallowly and rely on the 15-minute - // periodic sync to pick up new repos' history files; aider history + // roots; watch those roots shallowly and rely on the 15-minute + // periodic sync to pick up new repos' history files. Aider history // is append-mostly, so this is an acceptable latency tradeoff. - Type: AgentAider, - DisplayName: "Aider", - EnvVar: "AIDER_DIR", - ConfigKey: "aider_dirs", - IDPrefix: "aider:", - FileBased: true, - ShallowWatch: true, - DiscoverFunc: DiscoverAiderSessions, - FindSourceFunc: FindAiderSourceFile, - }, - { - Type: AgentReasonix, - DisplayName: "Reasonix", - EnvVar: "REASONIX_DIR", - ConfigKey: "reasonix_dirs", - DefaultDirs: []string{".reasonix", "AppData/Roaming/reasonix"}, - IDPrefix: "reasonix:", - WatchSubdirs: []string{"sessions", "archive", "projects"}, - FileBased: true, - DiscoverFunc: DiscoverReasonixSessions, - FindSourceFunc: FindReasonixSourceFile, + Type: AgentAider, + DisplayName: "Aider", + EnvVar: "AIDER_DIR", + ConfigKey: "aider_dirs", + IDPrefix: "aider:", + FileBased: true, + ShallowWatch: true, + }, + { + Type: AgentReasonix, + DisplayName: "Reasonix", + EnvVar: "REASONIX_DIR", + ConfigKey: "reasonix_dirs", + DefaultDirs: []string{".reasonix", "AppData/Roaming/reasonix"}, + IDPrefix: "reasonix:", + WatchSubdirs: []string{"sessions", "archive", "projects"}, + FileBased: true, }, } diff --git a/internal/parser/types_test.go b/internal/parser/types_test.go index d1a99a00d..89f463118 100644 --- a/internal/parser/types_test.go +++ b/internal/parser/types_test.go @@ -485,10 +485,6 @@ func TestZedRegistryEntry(t *testing.T) { assert.Equal(t, "ZED_DIR", def.EnvVar) assert.Equal(t, "zed_dirs", def.ConfigKey) assert.Equal(t, "zed:", def.IDPrefix) - // Zed is a migrated, provider-authoritative agent: source discovery and - // lookup live on the concrete provider, not on legacy AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "Zed DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "Zed FindSourceFunc") } func TestShelleyRegistryEntry(t *testing.T) { @@ -498,21 +494,12 @@ func TestShelleyRegistryEntry(t *testing.T) { assert.Equal(t, "SHELLEY_DIR", def.EnvVar) assert.Equal(t, "shelley_dirs", def.ConfigKey) assert.Equal(t, "shelley:", def.IDPrefix) - // Shelley is a migrated, provider-authoritative agent: source discovery - // and lookup live on the concrete provider, not on legacy AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "Shelley DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "Shelley FindSourceFunc") } func TestOpenCodeRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentOpenCode) require.True(t, ok, "AgentOpenCode missing from Registry") require.True(t, def.FileBased, "OpenCode FileBased") - // OpenCode is a migrated, provider-authoritative agent: source - // discovery and lookup live on the concrete provider, not on legacy - // AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "OpenCode DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "OpenCode FindSourceFunc") want := []string{ "storage/session", "storage/message", @@ -526,10 +513,6 @@ func TestCoworkRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentCowork) require.True(t, ok, "AgentCowork missing from Registry") require.True(t, def.FileBased, "Cowork FileBased") - // Cowork is a migrated, provider-authoritative agent: source discovery - // and lookup live on the concrete provider, not on legacy AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "Cowork DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "Cowork FindSourceFunc") assert.Equal(t, "COWORK_DIR", def.EnvVar) assert.Equal(t, "cowork_dirs", def.ConfigKey) assert.Equal(t, "cowork:", def.IDPrefix) @@ -548,11 +531,6 @@ func TestMiMoCodeRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentMiMoCode) require.True(t, ok, "AgentMiMoCode missing from Registry") require.True(t, def.FileBased, "MiMoCode FileBased") - // MiMoCode is a migrated, provider-authoritative agent: source - // discovery and lookup live on the concrete provider, not on legacy - // AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "MiMoCode DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "MiMoCode FindSourceFunc") assert.Equal(t, "MIMOCODE_DIR", def.EnvVar) assert.Equal(t, "mimocode_dirs", def.ConfigKey) assert.Equal(t, []string{".local/share/mimocode"}, def.DefaultDirs) @@ -570,11 +548,6 @@ func TestCommandCodeRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentCommandCode) require.True(t, ok, "AgentCommandCode missing from Registry") require.True(t, def.FileBased, "Command Code FileBased") - // Command Code is a migrated, provider-authoritative agent: source - // discovery and lookup live on the concrete provider, not on legacy - // AgentDef hooks. - require.Nil(t, def.DiscoverFunc, "Command Code DiscoverFunc") - require.Nil(t, def.FindSourceFunc, "Command Code FindSourceFunc") assert.Equal(t, []string{".commandcode/projects"}, def.DefaultDirs) assert.Equal(t, "commandcode:", def.IDPrefix) } @@ -583,8 +556,6 @@ func TestDeepSeekTUIRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentDeepSeekTUI) require.True(t, ok, "AgentDeepSeekTUI missing from Registry") require.True(t, def.FileBased, "DeepSeek TUI FileBased") - assert.Nil(t, def.DiscoverFunc, "DeepSeek TUI DiscoverFunc") - assert.Nil(t, def.FindSourceFunc, "DeepSeek TUI FindSourceFunc") assert.Equal(t, "DeepSeek TUI", def.DisplayName) assert.Equal(t, "DEEPSEEK_TUI_SESSIONS_DIR", def.EnvVar) assert.Equal(t, "deepseek_tui_sessions_dirs", def.ConfigKey) @@ -1123,10 +1094,6 @@ func TestReasonixRegistryEntry(t *testing.T) { assert.Contains(t, reasonixDef.WatchSubdirs, "sessions") assert.Contains(t, reasonixDef.WatchSubdirs, "archive") - // Verify function pointers are set - assert.NotNil(t, reasonixDef.DiscoverFunc, "DiscoverFunc must be set") - assert.NotNil(t, reasonixDef.FindSourceFunc, "FindSourceFunc must be set") - // Verify default dirs contain .reasonix and Windows path assert.True(t, len(reasonixDef.DefaultDirs) > 0) hasUnix := false diff --git a/internal/parser/vibe_test.go b/internal/parser/vibe_test.go index 89b3b6085..41e4fa10b 100644 --- a/internal/parser/vibe_test.go +++ b/internal/parser/vibe_test.go @@ -625,10 +625,6 @@ func TestVibeAgentByType(t *testing.T) { assert.Equal(t, "vibe_session_dirs", def.ConfigKey) assert.Equal(t, "vibe:", def.IDPrefix) assert.True(t, def.FileBased) - // Vibe is provider-authoritative: discovery and source lookup live on the - // vibeProvider, not on legacy AgentDef hooks. - assert.Nil(t, def.DiscoverFunc) - assert.Nil(t, def.FindSourceFunc) } func TestVibeAgentByPrefix(t *testing.T) { diff --git a/internal/parser/virtual_source_path_test.go b/internal/parser/virtual_source_path_test.go new file mode 100644 index 000000000..0669c9ae2 --- /dev/null +++ b/internal/parser/virtual_source_path_test.go @@ -0,0 +1,54 @@ +package parser + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestVirtualSourcePathRoundTripAllowsHashInContainerPath(t *testing.T) { + container := filepath.Join("/tmp", "work#1", "sessions.db") + virtual := VirtualSourcePath(container, "session-1") + + gotContainer, gotSourceID, ok := ParseVirtualSourcePath(virtual) + + require.True(t, ok, "expected virtual source path to parse") + assert.Equal(t, container, gotContainer) + assert.Equal(t, "session-1", gotSourceID) +} + +func TestParseVirtualSourcePathRejectsMalformedPaths(t *testing.T) { + tests := []string{ + "", + "/tmp/sessions.db", + "/tmp/sessions.db#", + "#session-1", + } + + for _, path := range tests { + t.Run(path, func(t *testing.T) { + container, sourceID, ok := ParseVirtualSourcePath(path) + + assert.False(t, ok) + assert.Empty(t, container) + assert.Empty(t, sourceID) + }) + } +} + +func TestParseVirtualSourcePathForBase(t *testing.T) { + path := VirtualSourcePath(filepath.Join("/tmp", "sessions.db"), "session-1") + + container, sourceID, ok := ParseVirtualSourcePathForBase(path, "sessions.db") + + require.True(t, ok, "expected base name to match") + assert.Equal(t, filepath.Join("/tmp", "sessions.db"), container) + assert.Equal(t, "session-1", sourceID) + + container, sourceID, ok = ParseVirtualSourcePathForBase(path, "other.db") + assert.False(t, ok) + assert.Empty(t, container) + assert.Empty(t, sourceID) +} diff --git a/internal/ssh/resolve.go b/internal/ssh/resolve.go index e02254547..89d518f63 100644 --- a/internal/ssh/resolve.go +++ b/internal/ssh/resolve.go @@ -67,22 +67,28 @@ func buildAiderResolveSnippet(envVar string) string { // "agentType:path\n" per agent target, plus "@file:path\n" lines for sibling // metadata files such as Codex's session_index.jsonl. // -// Only includes file-based agents that have on-disk sources to -// resolve: either a legacy DiscoverFunc or a provider facade that has -// left legacy-only mode. For each agent with an EnvVar, the script -// checks the env var first and falls back to the default dir. Dirs (and -// files) that don't exist on the remote are skipped. +// Only includes file-based agents that have on-disk sources to resolve via +// their provider facade. For each agent with an EnvVar, the script checks the +// env var first and falls back to the default dir. Dirs (and files) that don't +// exist on the remote are skipped. func buildResolveScript() string { var b strings.Builder for _, def := range parser.Registry { if !resolveAgentHasOnDiskSource(def) { continue } + // Aider has no central store and no safe default root: it writes + // one .aider.chat.history.md per repository, so after the opt-in + // change it carries no DefaultDirs and the DefaultDirs loop below + // never runs for it. Handle it independently so an explicitly + // configured remote AIDER_DIR still resolves history files. Remote + // sync emits only discovered .aider.chat.history.md files as tar + // targets, never the configured code root or the remote $HOME. The + // shell guard in buildAiderResolveSnippet also drops AIDER_DIR set + // to literal "$HOME" (or "$HOME/"), so an unscoped override cannot + // reintroduce a whole-home scan or tar. Local sync is unaffected: + // it discovers via its provider facade, not this script. if def.Type == parser.AgentAider { - // Aider has no safe default root: it writes one history file per - // repository. Remote sync still supports an explicit AIDER_DIR by - // emitting only discovered history files as tar targets instead of - // the configured code root or the remote home directory. if def.EnvVar != "" { b.WriteString(buildAiderResolveSnippet(def.EnvVar)) } @@ -122,20 +128,15 @@ func buildResolveScript() string { } // resolveAgentHasOnDiskSource reports whether a file-based agent has -// on-disk sources the resolve script should probe: either a legacy -// DiscoverFunc or a provider facade that has left legacy-only mode. -// Provider-migrated agents drop their DiscoverFunc but still have a -// configurable directory, so they must stay in the remote resolve set. +// on-disk sources the resolve script should probe via its provider facade. +// Provider-authoritative agents have a configurable directory, so they must +// stay in the remote resolve set. func resolveAgentHasOnDiskSource(def parser.AgentDef) bool { if !def.FileBased { return false } - if def.DiscoverFunc != nil { - return true - } switch parser.ProviderMigrationModes()[def.Type] { - case parser.ProviderMigrationShadowCompare, - parser.ProviderMigrationProviderAuthoritative: + case parser.ProviderMigrationProviderAuthoritative: _, ok := parser.ProviderFactoryByType(def.Type) return ok default: diff --git a/internal/ssh/resolve_test.go b/internal/ssh/resolve_test.go index bd2da1c7c..e61d66c10 100644 --- a/internal/ssh/resolve_test.go +++ b/internal/ssh/resolve_test.go @@ -21,7 +21,7 @@ func TestBuildResolveScript(t *testing.T) { // Non-file-based agents must not appear. for _, def := range parser.Registry { - if def.FileBased || def.DiscoverFunc != nil { + if def.FileBased { continue } marker := "\"" + string(def.Type) + ":" @@ -29,14 +29,17 @@ func TestBuildResolveScript(t *testing.T) { "non-file-based agent %s in script", def.Type) } - // Every file-based agent with DiscoverFunc must appear. + // Every file-based, provider-authoritative agent has on-disk source + // roots that SSH sync must transfer, so it must appear in the script. for _, def := range parser.Registry { - if !def.FileBased || def.DiscoverFunc == nil { + if !def.FileBased || + parser.ProviderMigrationModes()[def.Type] != + parser.ProviderMigrationProviderAuthoritative { continue } marker := "\"" + string(def.Type) + ":" assert.Contains(t, script, marker, - "file-based agent %s missing from script", def.Type) + "provider-authoritative agent %s missing from script", def.Type) } } diff --git a/internal/sync/engine.go b/internal/sync/engine.go index f1582207c..9d895683b 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -74,15 +74,11 @@ type EngineConfig struct { // that wrote data. Safe to leave nil (e.g., in PG serve mode // where the engine is not run). Emitter Emitter - // ProviderFactories and ProviderMigrationModes let stacked provider - // migration branches opt concrete providers into side-effect-free - // caller-level shadow observation before provider writes become - // authoritative. Nil uses the parser package registry/manifest. + // ProviderFactories and ProviderMigrationModes select which concrete + // providers own discovery and parsing for their agents. Nil uses the + // parser package registry/manifest. ProviderFactories []parser.ProviderFactory ProviderMigrationModes map[parser.AgentType]parser.ProviderMigrationMode - // ProviderShadowRecorder receives serialized shadow observations. - // Nil logs only provider errors or mismatches. - ProviderShadowRecorder func(ProviderShadowComparison) } // Engine orchestrates session file discovery and sync. @@ -113,8 +109,6 @@ type Engine struct { emitter Emitter providerFactories map[parser.AgentType]parser.ProviderFactory providerMigrationModes map[parser.AgentType]parser.ProviderMigrationMode - providerShadowMu gosync.Mutex - providerShadowRecorder func(ProviderShadowComparison) // forceParse disables every stored-state skip (skip cache, // size/mtime/data_version checks, incremental JSONL deltas) so @@ -214,7 +208,6 @@ func NewEngine( emitter: cfg.Emitter, providerFactories: providerFactoryMap(providerFactories), providerMigrationModes: providerModes, - providerShadowRecorder: cfg.ProviderShadowRecorder, } } @@ -572,8 +565,7 @@ func (e *Engine) classifyProviderChangedPath( for _, agentType := range agents { mode := e.providerMigrationModes[agentType] switch mode { - case parser.ProviderMigrationShadowCompare, - parser.ProviderMigrationProviderAuthoritative: + case parser.ProviderMigrationProviderAuthoritative: default: continue } @@ -961,63 +953,12 @@ func (e *Engine) classifyOnePath( if df, ok := e.classifyContainerPath(path, pathExists); ok { return df, true } - // Reasonix sidecar delete events arrive after .jsonl.meta no longer - // exists; classify them against the sibling transcript before the - // generic missing-path guard. - if strings.HasSuffix(path, ".jsonl.meta") { - if df, ok := e.classifyReasonixPath(path); ok { - return df, true - } - } - if !pathExists { - return parser.DiscoveredFile{}, false - } - if df, ok := e.classifyReasonixPath(path); ok { - return df, true - } - - // Claude change-path classification is provider-authoritative; the - // Claude provider's SourcesForChangedPath reproduces the - // //.jsonl and - // ///subagents/**/agent-.jsonl - // shapes, so the legacy block was removed when Claude was folded - // onto its provider. - - if df, ok := e.classifyAiderPath(path); ok { - return df, true - } - - // Antigravity IDE and CLI source/sidecar paths (conversations/.db, - // conversations|implicit/.pb, their WAL/SHM and trajectory.json - // sidecars, and annotations/brain/history.jsonl) are owned by the - // provider-authoritative SourcesForChangedPath path in - // classifyProviderChangedPath. - - return parser.DiscoveredFile{}, false -} - -// classifyAiderPath handles Aider's rootless chat-history layout: -// -// /.../.aider.chat.history.md -// -// extracted from classifyOnePath to stay within nilaway CFG limits. -func (e *Engine) classifyAiderPath( - path string, -) (parser.DiscoveredFile, bool) { - if filepath.Base(path) != parser.AiderHistoryFileName() { - return parser.DiscoveredFile{}, false - } - for _, aiderDir := range e.agentDirs[parser.AgentAider] { - if aiderDir == "" { - continue - } - if _, ok := isUnder(aiderDir, path); ok { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentAider, - }, true - } - } + // All file-backed agents are provider-authoritative: changed-path + // classification (including Reasonix's .jsonl.meta sidecar mapping, + // Aider's rootless history files, Claude's project transcripts, and + // Antigravity's sidecar fan-out) is owned by each provider's + // SourcesForChangedPath via classifyProviderChangedPath, so no legacy + // classifier remains here. return parser.DiscoveredFile{}, false } @@ -1802,19 +1743,6 @@ func (e *Engine) syncAllLocked( var all []parser.DiscoveredFile counts := make(map[parser.AgentType]int) - for _, def := range parser.Registry { - if !def.FileBased || def.DiscoverFunc == nil { - continue - } - for _, d := range e.agentDirs[def.Type] { - if !scope.includes(d) { - continue - } - found := def.DiscoverFunc(d) - counts[def.Type] += len(found) - all = append(all, found...) - } - } providerFound, providerFailures := e.discoverProviderSources(ctx, scope) for _, file := range providerFound { counts[file.Agent]++ @@ -2012,11 +1940,8 @@ func (e *Engine) syncAllLocked( } // discoverProviderSources runs full-sync discovery through the provider facade -// for every concrete provider that is authoritative. It is the provider-shape -// counterpart to the legacy AgentDef.DiscoverFunc loop, so a provider can drop -// its DiscoverFunc and still be discovered once it owns live processing. Shadow -// mode remains observational and never appends provider-only work to the live -// sync list. +// for every concrete provider that is authoritative. It is the sole on-disk +// discovery path: every file-based agent owns discovery through its provider. func (e *Engine) discoverProviderSources( ctx context.Context, scope *rootSyncScope, @@ -2269,10 +2194,9 @@ func (e *Engine) discoveredFileEffectiveMtime( if file.Agent == parser.AgentCodex { return discoveredFileMtime(file) } - // Only provider-authoritative sources resolve freshness through the - // provider Fingerprint. Shadow-compare files keep the legacy mtime path so - // agent-specific incremental-sync behavior (for example the Codex index - // refresh below) is unchanged while a provider is still shadowed. + // Provider-authoritative sources resolve freshness through the provider + // Fingerprint so composite provider-owned source state participates in + // incremental-sync cutoff checks. if file.ProviderSource != nil && file.ProviderProcess { if mtime, ok, err := e.providerSourceMtime(ctx, file); err != nil { return 0, err @@ -3013,6 +2937,15 @@ func (e *Engine) collectAndBatch( forceReplace: r.forceReplace, }) } + // A Kiro SQLite store is discovered as one container source + // but fans out into one session per row, so `total` counted it + // as a single file. Add the extra sessions it produced to keep + // TotalSessions a session count, matching the per-session tally + // the legacy syncKiroSQLite phase reported. + if len(r.results) > 1 && + filepath.Base(r.path) == kiroSQLiteDBName { + stats.TotalSessions += len(r.results) - 1 + } } if len(pending) >= batchSize { @@ -3149,92 +3082,12 @@ func (e *Engine) processFile( return res } - statPath := file.Path - if dbPath, _, ok := parseKiroSQLiteVirtualPath(file.Path); ok { - statPath = dbPath - } else if dbPath, _, ok := parser.ParseVirtualSourcePathForBase(file.Path, "threads.db"); ok { - statPath = dbPath - } else if dbPath, _, ok := parser.ParseVirtualSourcePathForBase(file.Path, shelleyDBFile); ok { - statPath = dbPath - } else if historyPath, _, ok := parser.ParseAiderVirtualPath(file.Path); ok { - // aider stores "#"; stat the physical file - // so SyncSingleSession (live watcher / on-demand re-sync) works. - statPath = historyPath - } - info, err := os.Stat(statPath) - if err != nil { - if os.IsNotExist(err) && - file.ForceParse && - providerDeletedPhysicalSQLiteSource(file.Agent, file.Path) { - return processResult{forceReplace: true} - } - return processResult{ - err: fmt.Errorf("stat %s: %w", file.Path, err), - } - } - - // Capture mtime once from the initial stat so all - // downstream cache operations use a consistent value. - mtime := info.ModTime().UnixNano() - if file.Agent == parser.AgentCowork { - mtime = parser.CoworkSessionMtime(file.Path, mtime) - } - if file.Agent == parser.AgentVibe { - // Vibe metadata (title, model, usage, canonical ID) lives in the - // sibling meta.json, so the skip-cache key must move when either file - // changes. Match vibeEffectiveInfo (max of messages.jsonl and - // meta.json) so a fixed meta.json retries a cached parse error instead - // of staying skipped on the unchanged transcript mtime. - mtime = vibeEffectiveInfo(file.Path, info).ModTime().UnixNano() - } - if file.Agent == parser.AgentReasonix { - mtime = reasonixEffectiveInfo(file.Path, info).ModTime().UnixNano() - } - cacheSkip := e.shouldCacheSkip(file) - - // Skip files cached from a previous sync (parse errors - // or non-interactive sessions) whose mtime is unchanged. - // Legacy codex_exec entries from pre-bulk-sync builds are - // scrubbed once at engine construction by - // migrateLegacyCodexExecSkips, so this check can treat - // the skip cache as authoritative without per-file - // re-validation. - if cacheSkip && !e.forceParse && !file.ForceParse { // parse-diff: ignore the skip cache - e.skipMu.RLock() - cachedMtime, cached := e.skipCache[file.Path] - e.skipMu.RUnlock() - if cached && cachedMtime == mtime { - if e.pathNeedsCachedSkipBypass(file.Path) { - e.clearSkip(file.Path) - } else { - res := processResult{ - skip: true, - mtime: mtime, - cacheSkip: true, - } - e.observeProviderShadow(ctx, file, res) - return res - } - } - } - - var res processResult - switch file.Agent { - case parser.AgentReasonix: - res = e.processReasonix(file, info) - case parser.AgentAider: - res = e.processAider(file, info) - default: - res = processResult{ - err: fmt.Errorf( - "unknown agent type: %s", file.Agent, - ), - } + // Every registered agent is provider-authoritative, so processProviderFile + // owns all file processing. Anything reaching here is an unrecognized agent + // type rather than a legacy parser fall-through. + return processResult{ + err: fmt.Errorf("unknown agent type: %s", file.Agent), } - res.cacheSkip = cacheSkip - res.mtime = mtime - e.observeProviderShadow(ctx, file, res) - return res } func (e *Engine) pathNeedsProjectReparse(path string) bool { @@ -3288,8 +3141,9 @@ func (e *Engine) processProviderFile( }, true } provider := factory.NewProvider(parser.ProviderConfig{ - Roots: e.agentDirs[file.Agent], - Machine: e.machine, + Roots: e.agentDirs[file.Agent], + Machine: e.machine, + PathRewriter: e.pathRewriter, }) source, found, err := e.providerSourceForDiscoveredFile(ctx, provider, file) @@ -3531,11 +3385,13 @@ func (e *Engine) processProviderFile( } // dropUnchangedSharedSQLiteResults reproduces the legacy per-session skip the -// folded processZed/processShelley loops performed. Zed and Shelley keep every -// session in one shared SQLite database, so the provider re-parses every -// session on any database change. Without a per-session filter the engine would -// rewrite and recount unchanged sessions. This drops results whose stored -// file_mtime (and, for Shelley's second-precision timestamps, the content +// folded processZed/processShelley loops and the aiderFileUnchanged check +// performed. Zed and Shelley keep every session in one shared SQLite database, +// and Aider fans every run out of one shared history file, so the provider +// re-parses every session on any change to that shared source. Without a +// per-session filter the engine would rewrite and recount unchanged sessions. +// This drops results whose stored file_mtime (and, for Shelley's +// second-precision timestamps and Aider's whole-file content hash, the // fingerprint stored in file_hash) and data_version already match, using the // path rewriter so remote stored paths resolve. Force-parse runs (parse-diff, // single-session resync) keep every result so they always re-emit. @@ -3550,6 +3406,10 @@ func (e *Engine) dropUnchangedSharedSQLiteResults( switch file.Agent { case parser.AgentShelley: compareHash = true + case parser.AgentAider: + // Every aider run in a history file shares the file's content hash, so + // a same-mtime append/truncate is caught by the hash compare. + compareHash = true case parser.AgentZed: default: return results @@ -3794,119 +3654,6 @@ func providerProcessCacheKey( return file.Path } -func (e *Engine) observeProviderShadow( - ctx context.Context, - file parser.DiscoveredFile, - legacy processResult, -) { - mode := e.providerMigrationModes[file.Agent] - if mode != parser.ProviderMigrationShadowCompare { - return - } - comparison := ProviderShadowComparison{File: file, Mode: mode} - if reason := providerShadowNotComparableReason(legacy); reason != "" { - comparison.NotComparableReason = reason - e.recordProviderShadowComparison(comparison) - return - } - factory, ok := e.providerFactories[file.Agent] - if !ok { - return - } - provider := factory.NewProvider(parser.ProviderConfig{ - Roots: e.agentDirs[file.Agent], - Machine: e.machine, - }) - source, found, err := e.providerSourceForDiscoveredFile(ctx, provider, file) - comparison.Err = err - if err == nil && found { - comparison.Source = source - comparison.Observation, comparison.Err = ObserveProviderSource( - ctx, - provider, - ProviderObserveRequest{ - Source: source, - Machine: e.machine, - ForceParse: e.forceParse || file.ForceParse, - }, - ) - if comparison.Err == nil { - comparison.Mismatches = compareProviderObservationToProcessResult( - comparison.Observation, - legacy, - file, - ) - } - } - if err == nil && !found { - comparison.Err = fmt.Errorf( - "%s provider shadow source not found for %s", - file.Agent, - file.Path, - ) - } - e.recordProviderShadowComparison(comparison) -} - -func providerShadowNotComparableReason(legacy processResult) string { - switch { - case legacy.err != nil: - return "legacy error" - case legacy.incremental != nil: - return "legacy incremental" - case legacy.skip: - return "legacy skip" - default: - return "" - } -} - -func (e *Engine) recordProviderShadowComparison( - comparison ProviderShadowComparison, -) { - if e.providerShadowRecorder != nil { - e.providerShadowMu.Lock() - defer e.providerShadowMu.Unlock() - e.providerShadowRecorder(comparison) - return - } - if comparison.NotComparableReason != "" { - return - } - sourceKey := comparison.Source.Key - if sourceKey == "" { - sourceKey = comparison.Source.FingerprintKey - } - fingerprintKey := comparison.Observation.Fingerprint.Key - if fingerprintKey == "" { - fingerprintKey = comparison.Source.FingerprintKey - } - if comparison.Err != nil { - log.Printf( - "%s provider shadow %s mode=%s source=%q fingerprint=%q: %v", - comparison.File.Agent, - comparison.File.Path, - comparison.Mode, - sourceKey, - fingerprintKey, - comparison.Err, - ) - return - } - if len(comparison.Mismatches) == 0 { - return - } - log.Printf( - "%s provider shadow %s mode=%s source=%q fingerprint=%q mismatches: %s", - comparison.File.Agent, - comparison.File.Path, - comparison.Mode, - sourceKey, - fingerprintKey, - strings.Join(comparison.Mismatches, "; "), - ) -} - func processFileUsesProvider(agent parser.AgentType) bool { switch agent { case parser.AgentForge, parser.AgentPiebald, parser.AgentWarp: @@ -4018,14 +3765,12 @@ func (e *Engine) shouldCacheSkip( } } if file.Agent == parser.AgentAider { - // A virtual aider path ("#") resolves to one - // run inside a shared physical file; let processAider own it so the - // generic per-file mtime cache cannot stand in for the per-run parse. - // The physical history file itself keeps the generic mtime skip: any - // write bumps the file mtime and re-parses every run. - if _, _, ok := parser.ParseAiderVirtualPath(file.Path); ok { - return false - } + // Aider fans one physical history file out into per-run virtual + // sessions. A mtime-only skip can hide same-mtime content changes, + // missing run rows, or stale per-run data versions before the + // provider fingerprint and dropUnchangedSharedSQLiteResults hash + // checks run, so all Aider freshness stays on that provider-aware path. + return false } if !isOpenCodeFormatStorageAgent(file.Agent) { return true @@ -5057,45 +4802,6 @@ func (e *Engine) classifyReasonixPath( return parser.DiscoveredFile{}, false } -func (e *Engine) processReasonix( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - effectiveInfo := reasonixEffectiveInfo(file.Path, info) - if e.shouldSkipByPath(file.Path, effectiveInfo) { - return processResult{skip: true} - } - - sess, msgs, _, err := parser.ParseReasonixSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - // Use the discovered project only when metadata did not supply a - // project via workspace_root. - if file.Project != "" && sess.Project == "" { - sess.Project = file.Project - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - sess.File.Size = effectiveInfo.Size() - sess.File.Mtime = effectiveInfo.ModTime().UnixNano() - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -5126,126 +4832,6 @@ func vibeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// aiderFileUnchanged reports whether a physical aider history file is -// unchanged since the last sync. Aider sessions are stored under virtual -// "#" paths, so the generic shouldSkipByPath (which looks the -// physical path up in the DB) never matches and would re-parse, re-hash, and -// re-write every run on every full/periodic sync. Mirror the per-virtual-path -// skip the other multi-session agents use (cf. kiroSQLitePendingSessionIDs). -// -// The whole file is skipped only when EVERY expected run row is known -// current: each run meta's virtual path must have a stored row whose size and -// mtime match this file's and whose data version is current. Size is checked -// alongside mtime so a same-mtime append/truncate is not wrongly skipped. If -// any run row is missing (e.g. a previous batch wrote only some runs, or a new run was -// appended whose row does not exist yet) or stale (an older data version, or -// resynced after a data-version bump while siblings were not), the file is -// re-parsed so the remaining sessions are repaired. Skipping on the first -// matching row would strand those runs forever. A run-less or unreadable -// file is treated as changed (never skipped) so it is retried. -func (e *Engine) aiderFileUnchanged(path string, info os.FileInfo) bool { - metas, err := parser.ListAiderRunMetas(path) - if err != nil || len(metas) == 0 { - return false - } - mtime := info.ModTime().UnixNano() - size := info.Size() - current := db.CurrentDataVersion() - expected := 0 - for _, m := range metas { - // Header-only runs produce no session row, so the fan-out never - // writes one for them; do not expect a stored row. - if !m.HasMessages { - continue - } - expected++ - lookupPath := m.VirtualPath - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(lookupPath) - } - storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok || storedSize != size || storedMtime != mtime || - e.db.GetDataVersionByPath(lookupPath) < current { - // This run is missing or stale: do not skip the file, so the - // fan-out re-parses and repairs every run. The size is compared - // alongside mtime so a same-mtime append/truncate (which leaves - // new or removed runs unsynced) is never wrongly skipped. - return false - } - } - // Skip only when at least one run was expected and all expected run rows - // are current. A file whose runs all lack turns produces no sessions, so - // there is nothing to skip-and-strand; re-parse it (cheap, capped read). - return expected > 0 -} - -// aiderIdentityPath returns the canonical history-file path used to derive -// stable aider session IDs. During remote SSH sync the file is read from a -// random temp extraction dir, so hashing the on-disk path would re-key the -// run on every sync; rewriting it to its canonical remote path keeps the ID -// stable. Returns "" for local sync (no pathRewriter), which makes the -// parser fall back to the on-disk path -- the original local behavior. -func (e *Engine) aiderIdentityPath(historyPath string) string { - if e.pathRewriter == nil { - return "" - } - return e.pathRewriter(historyPath) -} - -func (e *Engine) processAider( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Virtual path "#": parse that one run only. Used - // when re-syncing a single session by its source path. - if historyPath, idx, ok := parser.ParseAiderVirtualPath(file.Path); ok { - sess, msgs, err := parser.ParseAiderRunWithID( - historyPath, e.aiderIdentityPath(historyPath), idx, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - if hash, err := ComputeFileHash(historyPath); err == nil { - sess.File.Hash = hash - } - return processResult{ - results: []parser.ParseResult{{Session: *sess, Messages: msgs}}, - forceReplace: true, - } - } - - // parse-diff: !e.forceParse disables the stored-state skip so a forced - // reparse re-reads already-synced aider files instead of skipping them. - if !e.forceParse && e.aiderFileUnchanged(file.Path, info) { - return processResult{skip: true} - } - - // Physical history file: fan it out into one session per run. The file - // is read and split once. The whole file shares one content hash, so - // any write re-parses every run (acceptable: aider history is - // append-mostly and a single capped read). - results, err := parser.ParseAiderRunsWithID( - file.Path, e.aiderIdentityPath(file.Path), e.machine, - ) - if err != nil { - return processResult{err: err} - } - if len(results) == 0 { - return processResult{} - } - if hash, err := ComputeFileHash(file.Path); err == nil { - for i := range results { - results[i].Session.File.Hash = hash - } - } - return processResult{ - results: results, - forceReplace: true, - } -} - func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -7013,8 +6599,9 @@ func (e *Engine) FindSourceFile(sessionID string) string { if !e.isProviderAuthoritative(def.Type) { return "" } + storedPath := e.db.GetSessionFilePath(sessionID) if f := e.findProviderSourceFile( - context.Background(), def, sessionID, rawSessionID, + context.Background(), def, sessionID, rawSessionID, storedPath, ); f != "" { return f } @@ -7036,6 +6623,13 @@ func (e *Engine) FindSourceFile(sessionID string) string { } bareID := strings.TrimPrefix(rawID, def.IDPrefix) + storedPath := e.db.GetSessionFilePath(sessionID) + + if f := e.findProviderSourceFile( + context.Background(), def, sessionID, bareID, storedPath, + ); f != "" { + return f + } // Prefer stored file_path — it's authoritative and handles // cases where the session ID doesn't match the filename. @@ -7043,13 +6637,14 @@ func (e *Engine) FindSourceFile(sessionID string) string { // #) for the existence check, but // return the stored path so downstream parsing stays scoped to // the requested conversation rather than the whole trace file. - if fp := e.db.GetSessionFilePath(sessionID); fp != "" { + if fp := storedPath; fp != "" { if historyPath, idx, ok := parser.ParseAiderVirtualPath(fp); ok { // aider's stored "#" is positional: an // inserted or removed earlier run shifts the index onto a // different session. Only trust the stored path when run idx // still recomputes to the requested raw ID; otherwise fall - // through to FindSourceFunc, which re-resolves by raw ID. + // through. The provider facade, tried first above, owns raw-ID + // re-resolution. if got, ok := parser.AiderRawIDAt(historyPath, idx); ok && got == bareID { return fp } @@ -7058,18 +6653,6 @@ func (e *Engine) FindSourceFile(sessionID string) string { } } - if def.FindSourceFunc != nil { - for _, d := range e.agentDirs[def.Type] { - if f := def.FindSourceFunc(d, bareID); f != "" { - return f - } - } - } - if f := e.findProviderSourceFile( - context.Background(), def, sessionID, bareID, - ); f != "" { - return f - } return "" } @@ -7081,16 +6664,15 @@ func (e *Engine) isProviderAuthoritative(agent parser.AgentType) bool { } // findProviderSourceFile resolves a single session's source file through the -// provider facade for authoritative concrete providers. It is the -// provider-shape counterpart to AgentDef.FindSourceFunc, so a provider can drop -// its FindSourceFunc hook and stay locatable for diagnostics, export, and -// parse-diff lookups once it owns live processing. Shadow mode remains -// observational and must not satisfy lookups that legacy lookup would miss. +// provider facade for authoritative concrete providers. It is the sole +// source-lookup path, keeping sessions locatable for diagnostics, export, and +// parse-diff lookups. func (e *Engine) findProviderSourceFile( ctx context.Context, def parser.AgentDef, sessionID string, rawSessionID string, + storedPath string, ) string { mode := e.providerMigrationModes[def.Type] if mode != parser.ProviderMigrationProviderAuthoritative { @@ -7101,12 +6683,17 @@ func (e *Engine) findProviderSourceFile( return "" } provider := factory.NewProvider(parser.ProviderConfig{ - Roots: e.agentDirs[def.Type], - Machine: e.machine, + Roots: e.agentDirs[def.Type], + Machine: e.machine, + PathRewriter: e.pathRewriter, }) source, found, err := provider.FindSource(ctx, parser.FindSourceRequest{ - RawSessionID: rawSessionID, - FullSessionID: sessionID, + RawSessionID: rawSessionID, + FullSessionID: sessionID, + StoredFilePath: storedPath, + FingerprintKey: storedPath, + RequireFreshSource: true, + PreferStoredSource: true, }) if err != nil { log.Printf("%s provider source lookup: %v", def.Type, err) diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 5fa1a7389..3744c23c4 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -8307,7 +8307,7 @@ func TestIncrementalSync_CodexStoresEffectiveMtime(t *testing.T) { "effective mtime exceeds the plain rollout mtime") } -func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { +func TestIncrementalSync_CodexAppendFullReparseStoresRawFileSize(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") } @@ -8349,13 +8349,17 @@ func TestIncrementalSync_CodexHashMatchesConsumedPrefix(t *testing.T) { live, err := os.ReadFile(path) require.NoError(t, err, "read live transcript") - require.Less(t, *sess.FileSize, int64(len(live)), - "partial trailing JSON should remain outside the consumed prefix") - prefix := live[:*sess.FileSize] - sum := sha256.Sum256(prefix) + // Codex does not advertise incremental append, so re-syncing the appended + // transcript is a full re-parse that stores the raw file size and hash + // (including the ignored partial trailing line). The parsed-snapshot vs + // partial-tail distinction is enforced at parse-diff time via + // CodexTranscriptConsumedSize, not in the stored fingerprint. + require.Equal(t, int64(len(live)), *sess.FileSize, + "full Codex re-parse stores the raw file size") + sum := sha256.Sum256(live) wantHash := fmt.Sprintf("%x", sum[:]) assert.Equal(t, wantHash, *sess.FileHash, - "incremental Codex hash must match the consumed file_size prefix") + "stored Codex hash matches the whole-file fingerprint") } func TestIncrementalSync_CodexExecAppendRetainsEvents(t *testing.T) { diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index da25df69d..b223e6368 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1923,8 +1923,7 @@ func TestShouldSkipByPathWithRewriter(t *testing.T) { // writeAiderHistory writes a two-content-run plus one header-only-run // history file under a fresh repo dir and returns its path. The header-only -// trailing run produces no session, exercising the HasMessages path of -// aiderFileUnchanged. +// trailing run produces no session, so a fan-out parse yields two sessions. func writeAiderHistory(t *testing.T) string { t.Helper() repo := filepath.Join(t.TempDir(), "myrepo") @@ -1939,183 +1938,242 @@ func writeAiderHistory(t *testing.T) string { return path } -// insertAiderRunRow stores a session row for one aider virtual run path at -// the given size, mtime, and data version, mirroring what a real fan-out write -// produces. data_version is stamped separately because UpsertSession does -// not persist it. The stored size must match the history file's reported size -// for aiderFileUnchanged to treat the run as current. -func insertAiderRunRow( - t *testing.T, database *db.DB, - virtualPath string, size, mtime int64, dataVersion int, +func newAiderProviderTestEngine( + database *db.DB, + path string, + forceParse bool, +) *Engine { + root := filepath.Dir(filepath.Dir(path)) + return &Engine{ + db: database, + machine: "local", + forceParse: forceParse, + skipCache: make(map[string]int64), + agentDirs: map[parser.AgentType][]string{ + parser.AgentAider: {root}, + }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentAider: parser.ProviderMigrationProviderAuthoritative, + }, + } +} + +func persistAiderProviderResults( + t *testing.T, + database *db.DB, + results []parser.ParseResult, + mutate func(index int, session *db.Session, dataVersion *int), ) { t.Helper() - id := "aider:" + virtualPath - require.NoError(t, database.UpsertSession(db.Session{ - ID: id, - Project: "myrepo", - Machine: "local", - Agent: string(parser.AgentAider), - FilePath: strPtr(virtualPath), - FileSize: int64Ptr(size), - FileMtime: int64Ptr(mtime), - })) - require.NoError(t, database.SetSessionDataVersion(id, dataVersion)) + for i, r := range results { + dataVersion := db.CurrentDataVersion() + row := db.Session{ + ID: r.Session.ID, + Project: r.Session.Project, + Machine: "local", + Agent: string(parser.AgentAider), + FilePath: strPtr(r.Session.File.Path), + FileSize: int64Ptr(r.Session.File.Size), + FileMtime: int64Ptr(r.Session.File.Mtime), + FileHash: strPtr(r.Session.File.Hash), + } + if mutate != nil { + mutate(i, &row, &dataVersion) + } + require.NoError(t, database.UpsertSession(row)) + require.NoError(t, database.SetSessionDataVersion(row.ID, dataVersion)) + } } -// TestAiderFileUnchangedRequiresAllRuns is the MEDIUM-2 regression test: -// aiderFileUnchanged must skip a file only when EVERY content-bearing run -// row is current. Skipping on the first matching row (the old behavior) -// would strand runs that a partial batch never wrote or that went stale -// after a data-version bump, so they would never be repaired. -func TestAiderFileUnchangedRequiresAllRuns(t *testing.T) { - const mtime = int64(1_700_000_000_000_000_000) - const size = int64(4096) - cur := db.CurrentDataVersion() - - metasFor := func(t *testing.T, path string) []parser.AiderRunMeta { - t.Helper() - metas, err := parser.ListAiderRunMetas(path) - require.NoError(t, err) - // Two content-bearing runs plus one header-only run. - require.Len(t, metas, 3) - require.True(t, metas[0].HasMessages) - require.True(t, metas[1].HasMessages) - require.False(t, metas[2].HasMessages) - return metas +// TestProcessFileAiderProviderFanOut verifies the migrated Aider provider, run +// through processFile, fans one history file out into one session per +// content-bearing run under stable "#" virtual paths and +// force-replaces on parse. An unchanged re-sync drops every already-current run +// (the per-run skip that the legacy aiderFileUnchanged provided, now handled by +// dropUnchangedSharedSQLiteResults), while a forced parse re-emits them all. +func TestProcessFileAiderProviderFanOut(t *testing.T) { + database := openTestDB(t) + path := writeAiderHistory(t) + file := parser.DiscoveredFile{Agent: parser.AgentAider, Path: path} + + res := newAiderProviderTestEngine(database, path, false). + processFile(context.Background(), file) + require.NoError(t, res.err) + require.True(t, res.forceReplace, + "aider fan-out must force-replace stored runs") + require.Len(t, res.results, 2, + "two content-bearing runs must each produce a session") + for _, r := range res.results { + historyPath, _, ok := parser.ParseAiderVirtualPath(r.Session.File.Path) + require.True(t, ok, "each run is stored under a virtual run path") + assert.Equal(t, path, historyPath) + assert.Equal(t, parser.AgentAider, r.Session.Agent) } - t.Run("all runs current -> skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - // Both content runs have a current row. The header-only run has none, - // and must not block the skip. - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - insertAiderRunRow(t, database, metas[1].VirtualPath, size, mtime, cur) - - e := &Engine{db: database} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.True(t, got, "file with all run rows current must be skipped") - }) + // Persist the parsed runs as current so the unchanged re-sync can drop them. + persistAiderProviderResults(t, database, res.results, nil) - t.Run("rewritten remote run rows current -> skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - rewriter := func(p string) string { - return "host:" + p - } - // Remote sync stores the rewritten virtual run path, not the temp - // extraction path returned by ListAiderRunMetas. - insertAiderRunRow(t, database, - rewriter(metas[0].VirtualPath), size, mtime, cur) - insertAiderRunRow(t, database, - rewriter(metas[1].VirtualPath), size, mtime, cur) - - e := &Engine{db: database, pathRewriter: rewriter} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.True(t, got, - "remote file with all rewritten run rows current must be skipped") - }) + again := newAiderProviderTestEngine(database, path, false). + processFile(context.Background(), file) + require.NoError(t, again.err) + assert.Empty(t, again.results, + "an unchanged aider history must drop every already-current run") - t.Run("one run row missing -> do not skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - // Only the FIRST content run was written (a partial batch). Under the - // old any-match logic this stranded the second run forever. - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - - e := &Engine{db: database} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.False(t, got, - "a missing run row must force a re-parse to repair it") - }) + forced := newAiderProviderTestEngine(database, path, true). + processFile(context.Background(), file) + require.NoError(t, forced.err) + assert.Len(t, forced.results, 2, + "a forced parse must re-emit every content-bearing run") +} - t.Run("one run row stale data version -> do not skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - // The second run was resynced under an OLDER data version while the - // first is current. The file must still re-parse. - insertAiderRunRow(t, database, metas[1].VirtualPath, size, mtime, cur-1) - - e := &Engine{db: database} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.False(t, got, - "a stale data-version run row must force a re-parse") - }) +func TestProcessFileAiderProviderSameMtimeContentChangeIgnoresSkipCache(t *testing.T) { + database := openTestDB(t) + path := writeAiderHistory(t) + file := parser.DiscoveredFile{Agent: parser.AgentAider, Path: path} - t.Run("one run row stale mtime -> do not skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - insertAiderRunRow(t, database, metas[1].VirtualPath, size, mtime-1, cur) - - e := &Engine{db: database} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.False(t, got, - "a run row with a different mtime must force a re-parse") - }) + initial := newAiderProviderTestEngine(database, path, false). + processFile(context.Background(), file) + require.NoError(t, initial.err) + require.Len(t, initial.results, 2) + persistAiderProviderResults(t, database, initial.results, nil) - t.Run("one run row stale size -> do not skip", func(t *testing.T) { - database := openTestDB(t) - path := writeAiderHistory(t) - metas := metasFor(t, path) - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - // The second run row has the SAME mtime but a different stored size, - // modeling a same-mtime append/truncate. Ignoring size would wrongly - // skip the file and strand the appended/removed runs. - insertAiderRunRow(t, database, metas[1].VirtualPath, size-1, mtime, cur) - - e := &Engine{db: database} - got := e.aiderFileUnchanged(path, fakeFileInfo{size: size, mtime: mtime}) - assert.False(t, got, - "a run row with a different size must force a re-parse") - }) + mtime := time.Unix(0, initial.mtime) + updated := "# aider chat started at 2026-06-09 14:01:00\n" + + "#### first prompt\nanswer one changed\n" + + "# aider chat started at 2026-06-09 15:30:00\n" + + "#### second prompt\nanswer two changed\n" + require.NoError(t, os.WriteFile(path, []byte(updated), 0o644)) + require.NoError(t, os.Chtimes(path, mtime, mtime)) + + engine := newAiderProviderTestEngine(database, path, false) + engine.cacheSkip(path, initial.mtime) + after := engine.processFile(context.Background(), file) + require.NoError(t, after.err) + assert.False(t, after.skip, + "a stale mtime-only skip cache entry must not bypass Aider hashing") + assert.Len(t, after.results, 2, + "same-mtime content changes must re-emit every Aider run") +} + +func TestProcessFileAiderProviderSkipCacheDoesNotHidePartialOrStaleRows(t *testing.T) { + tests := []struct { + name string + mutate func(index int, session *db.Session, dataVersion *int) + storeRows func([]parser.ParseResult) []parser.ParseResult + }{ + { + name: "missing run row", + storeRows: func(results []parser.ParseResult) []parser.ParseResult { + require.Len(t, results, 2) + return results[:1] + }, + }, + { + name: "stale data version", + mutate: func(_ int, _ *db.Session, dataVersion *int) { + *dataVersion = db.CurrentDataVersion() - 1 + }, + }, + { + name: "stale hash", + mutate: func(_ int, session *db.Session, _ *int) { + session.FileHash = strPtr("stale-hash") + }, + }, + { + name: "stale mtime", + mutate: func(_ int, session *db.Session, _ *int) { + session.FileMtime = int64Ptr(1) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + database := openTestDB(t) + path := writeAiderHistory(t) + file := parser.DiscoveredFile{Agent: parser.AgentAider, Path: path} + initial := newAiderProviderTestEngine(database, path, false). + processFile(context.Background(), file) + require.NoError(t, initial.err) + require.Len(t, initial.results, 2) + + rows := initial.results + if tt.storeRows != nil { + rows = tt.storeRows(rows) + } + persistAiderProviderResults(t, database, rows, tt.mutate) + + engine := newAiderProviderTestEngine(database, path, false) + engine.cacheSkip(path, initial.mtime) + after := engine.processFile(context.Background(), file) + require.NoError(t, after.err) + assert.False(t, after.skip, + "a generic skip cache entry must not hide %s", tt.name) + assert.NotEmpty(t, after.results) + }) + } } -// TestProcessAiderForceParseReparsesUnchangedFile is the forced-reparse -// regression test: under forceParse (parse-diff), processAider must NOT take -// the aiderFileUnchanged skip even when every run row is current, so a forced -// run re-reads already-synced aider files instead of stranding them. -func TestProcessAiderForceParseReparsesUnchangedFile(t *testing.T) { +func TestFindSourceFileProviderAuthoritativePrefersProviderOverStoredPath(t *testing.T) { database := openTestDB(t) - path := writeAiderHistory(t) - info, err := os.Stat(path) - require.NoError(t, err) - // processAider stats the real file, so the stored rows must carry the - // file's actual size and mtime for the unchanged-skip to fire. - size := info.Size() - mtime := info.ModTime().UnixNano() - cur := db.CurrentDataVersion() + root := t.TempDir() + stalePath := filepath.Join(root, "stale.jsonl") + currentPath := filepath.Join(root, "current.jsonl") + require.NoError(t, os.WriteFile(stalePath, []byte("{}\n"), 0o644)) + require.NoError(t, os.WriteFile(currentPath, []byte("{}\n"), 0o644)) + require.NoError(t, database.UpsertSession(db.Session{ + ID: "cowork:lookup", + Project: "project", + Machine: "local", + Agent: string(parser.AgentCowork), + FilePath: strPtr(stalePath), + })) - metas, err := parser.ListAiderRunMetas(path) - require.NoError(t, err) - require.Len(t, metas, 3) - // Mark every content-bearing run as current so the non-forced path skips. - insertAiderRunRow(t, database, metas[0].VirtualPath, size, mtime, cur) - insertAiderRunRow(t, database, metas[1].VirtualPath, size, mtime, cur) - - file := parser.DiscoveredFile{Path: path, Agent: parser.AgentAider} - - // Sanity: without forceParse the unchanged file is skipped. - normal := &Engine{db: database, machine: "local"} - skipRes := normal.processAider(file, info) - require.True(t, skipRes.skip, - "without forceParse an unchanged aider file must be skipped") - require.Empty(t, skipRes.results) - - // With forceParse the file must be reparsed, not skipped. - forced := &Engine{db: database, machine: "local", forceParse: true} - forcedRes := forced.processAider(file, info) - require.NoError(t, forcedRes.err) - assert.False(t, forcedRes.skip, - "forceParse must reparse an unchanged aider file, not skip it") - assert.Len(t, forcedRes.results, 2, - "forced reparse must fan out one result per content-bearing run") + provider := &lookupSourceProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Cowork", + IDPrefix: "cowork:", + FileBased: true, + }, + Caps: parser.Capabilities{ + Source: parser.SourceCapabilities{ + FindSource: parser.CapabilitySupported, + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCowork, + Key: currentPath, + DisplayPath: currentPath, + FingerprintKey: currentPath, + }, + } + engine := &Engine{ + db: database, + agentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + providerFactories: providerFactoryMap([]parser.ProviderFactory{ + lookupSourceFactory{provider: provider}, + }), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + } + + got := engine.FindSourceFile("cowork:lookup") + + assert.Equal(t, currentPath, got) + require.Len(t, provider.findRequests, 1) + assert.Equal(t, "lookup", provider.findRequests[0].RawSessionID) + assert.Equal(t, "cowork:lookup", provider.findRequests[0].FullSessionID) + assert.Equal(t, stalePath, provider.findRequests[0].StoredFilePath) + assert.Equal(t, stalePath, provider.findRequests[0].FingerprintKey) + assert.True(t, provider.findRequests[0].RequireFreshSource) } // TestStripVirtualSourceSuffixAider verifies that an aider @@ -2129,6 +2187,43 @@ func TestStripVirtualSourceSuffixAider(t *testing.T) { "the run-index suffix must strip to the physical history path") } +type lookupSourceFactory struct { + provider *lookupSourceProvider +} + +func (f lookupSourceFactory) Definition() parser.AgentDef { + return f.provider.Definition() +} + +func (f lookupSourceFactory) Capabilities() parser.Capabilities { + return f.provider.Capabilities() +} + +func (f lookupSourceFactory) NewProvider(parser.ProviderConfig) parser.Provider { + return f.provider +} + +type lookupSourceProvider struct { + parser.ProviderBase + source parser.SourceRef + findRequests []parser.FindSourceRequest +} + +func (p *lookupSourceProvider) FindSource( + _ context.Context, + req parser.FindSourceRequest, +) (parser.SourceRef, bool, error) { + p.findRequests = append(p.findRequests, req) + return p.source, true, nil +} + +func (p *lookupSourceProvider) Parse( + context.Context, + parser.ParseRequest, +) (parser.ParseOutcome, error) { + return parser.ParseOutcome{}, nil +} + func TestToDBSessionStoresSessionName(t *testing.T) { pw := pendingWrite{sess: parser.ParsedSession{ ID: "commandcode:test", @@ -3647,11 +3742,11 @@ func TestEngine_ClassifyOnePathReasonixProjectBareMeta(t *testing.T) { dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) dbtest.WriteTestFile(t, metaPath, []byte(`{"model":"claude"}`)) - got, ok := engine.classifyOnePath(metaPath) - require.True(t, ok, "expected Reasonix sidecar to classify") - assert.Equal(t, sessionPath, got.Path) - assert.Equal(t, "proj", got.Project) - assert.Equal(t, parser.AgentReasonix, got.Agent) + files := engine.classifyPaths([]string{metaPath}) + require.Len(t, files, 1, "expected Reasonix sidecar to classify") + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, "proj", files[0].Project) + assert.Equal(t, parser.AgentReasonix, files[0].Agent) } func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { @@ -3670,11 +3765,11 @@ func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { metaPath := sessionPath + ".meta" dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) - got, ok := engine.classifyOnePath(metaPath) - require.True(t, ok, "expected deleted Reasonix sidecar to classify") - assert.Equal(t, sessionPath, got.Path) - assert.Equal(t, "proj", got.Project) - assert.Equal(t, parser.AgentReasonix, got.Agent) + files := engine.classifyPaths([]string{metaPath}) + require.Len(t, files, 1, "expected deleted Reasonix sidecar to classify") + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, "proj", files[0].Project) + assert.Equal(t, parser.AgentReasonix, files[0].Agent) } func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { @@ -3691,8 +3786,8 @@ func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { reasonixDir, "projects", "proj", "sessions", "session-123.jsonl", ) - _, ok := engine.classifyOnePath(sessionPath) - assert.False(t, ok, "expected deleted Reasonix transcript to be ignored") + files := engine.classifyPaths([]string{sessionPath}) + assert.Empty(t, files, "expected deleted Reasonix transcript to be ignored") } func TestEngine_SyncPathsReasonixMetadataOnlySessionFieldUpdate(t *testing.T) { diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index ab9548773..6e6707b32 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "path/filepath" "sort" "strings" "time" @@ -16,8 +15,9 @@ import ( // ParseDiffOptions configures a report-only re-parse comparison. type ParseDiffOptions struct { // Agents restricts the run; empty means every file-based agent with - // a DiscoverFunc. Agents without an on-disk source to re-parse - // (database-backed or import-only) are rejected with an error. + // a provider-discoverable on-disk source. Agents without an on-disk + // source to re-parse (database-backed or import-only) are rejected + // with an error. Agents []parser.AgentType // Limit caps the number of source files parsed, newest mtime first // across all agents. 0 means no limit. @@ -67,30 +67,19 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi report.Agents = append(report.Agents, string(def.Type)) } - // Discovery mirrors syncAllLocked's file phase: per-agent - // DiscoverFunc over the configured dirs, or provider discovery for - // agents that have dropped legacy discovery, then dedupe and the - // legacy-Kiro shadow filter. + // Discovery mirrors syncAllLocked's file phase: provider discovery over + // the configured dirs per agent, then dedupe and the legacy-Kiro shadow + // filter. Provider discovery already enumerates shared-SQLite sources + // (Kiro's data.sqlite3, db-mode OpenCode's opencode.db) per session, so + // no separate db-source synthesis is needed. var files []parser.DiscoveredFile for _, def := range resolved { - if def.DiscoverFunc != nil { - for _, d := range e.agentDirs[def.Type] { - files = append(files, def.DiscoverFunc(d)...) - } - continue - } providerFiles, err := e.parseDiffProviderSources(ctx, def.Type) if err != nil { return nil, err } files = append(files, providerFiles...) } - // DiscoverFunc does not emit the shared-SQLite source for Kiro - // (data.sqlite3) or db-mode OpenCode (opencode.db) — normal sync - // reaches those through dedicated phases. Synthesize them here so - // their sessions are actually re-parsed; processKiro/processOpenCode - // fan one db path out to every contained session under forceParse. - files = append(files, e.parseDiffDatabaseSources(resolved)...) files = dedupeDiscoveredFiles(files) files = e.filterShadowedLegacyKiroFiles(files) @@ -214,7 +203,8 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi } // parseDiffProviderSources discovers an agent's on-disk sources through -// the provider facade for agents that have dropped their DiscoverFunc. +// the provider facade. It is scoped to a single agent type so parse-diff +// respects the requested agent set. func (e *Engine) parseDiffProviderSources( ctx context.Context, agentType parser.AgentType, @@ -265,9 +255,6 @@ func (e *Engine) parseDiffAgentDiscoverable(def parser.AgentDef) bool { if !def.FileBased { return false } - if def.DiscoverFunc != nil { - return true - } switch e.providerMigrationModes[def.Type] { case parser.ProviderMigrationProviderAuthoritative: factory, ok := e.providerFactories[def.Type] @@ -323,59 +310,6 @@ func (e *Engine) resolveParseDiffAgents( return out, nil } -// parseDiffDatabaseSources synthesizes DiscoveredFile entries for the -// shared-SQLite agent stores that DiscoverFunc does not emit: Kiro's -// data.sqlite3, OpenCode's opencode.db, and Kilo's kilo.db. The -// corresponding process functions recognize those base filenames and fan -// one db path out to every contained session, so routing them through the -// normal worker loop re-parses every CLI Kiro / DB-backed OpenCode / -// DB-backed Kilo session. -// Without this, those sessions fall to the "not discovered" sweep and -// an --agent kiro / --agent opencode run would pass while comparing -// nothing. -// -// The OpenCode db is added whenever it exists, regardless of which -// source mode ResolveOpenCodeSource picks: normal sync reads -// opencode.db in storage-mode roots too (openCodePendingSessionIDs), -// because a migrated root can still hold DB-only legacy sessions. Kilo -// uses the same hybrid storage model. The storage-ID filtering in each -// process function keeps file-backed sessions from being compared twice. -func (e *Engine) parseDiffDatabaseSources( - resolved []parser.AgentDef, -) []parser.DiscoveredFile { - var extra []parser.DiscoveredFile - for _, def := range resolved { - // Provider-authoritative agents (no DiscoverFunc) already have - // their shared-SQLite sessions enumerated by - // parseDiffProviderSources, which applies the provider's - // storage-ID filter so a file-backed storage session is not also - // re-parsed from its stale db row. Synthesizing the raw db here - // would re-add those sessions through the legacy fan-out, double - // counting and bypassing the filter. - if def.DiscoverFunc == nil { - continue - } - switch def.Type { - case parser.AgentOpenCode, parser.AgentKilo, parser.AgentMiMoCode: - for _, dir := range e.agentDirs[def.Type] { - if dir == "" { - continue - } - dbPath := filepath.Join( - dir, openCodeFormatDBName(def.Type), - ) - if info, err := os.Stat(dbPath); err == nil && - !info.IsDir() { - extra = append(extra, parser.DiscoveredFile{ - Path: dbPath, Agent: def.Type, - }) - } - } - } - } - return extra -} - // sortAndLimitParseDiffFiles orders files newest-first by source // mtime (tie-break: path ascending) and applies the file cap. It // returns the kept files and the base paths of files cut by the @@ -492,7 +426,7 @@ func stripVirtualSourceSuffix(path string) string { // Detecting either makes the source unreliable, so the caller skips the raced // guard entirely. This never masks genuine drift for those agents, while plain // file-based agents reading a literal file still get the real race protection. -func parseDiffSourceReliableForRaced( +func (e *Engine) parseDiffSourceReliableForRaced( agent parser.AgentType, sourcePath string, ) bool { // A virtual path carries a recognized "#..." suffix; stripping changes @@ -501,15 +435,16 @@ func parseDiffSourceReliableForRaced( if stripVirtualSourceSuffix(sourcePath) != sourcePath { return false } - // Only plain file-based agents (FileBased with a DiscoverFunc, the same - // on-disk-source condition resolveParseDiffAgents uses) read a literal - // file whose mtime populated file_mtime. An unknown or DB-backed agent has - // no such basis. + // Only agents with a literal on-disk source -- the same discoverability + // condition resolveParseDiffAgents uses -- read a file whose mtime + // populated file_mtime. parseDiffAgentDiscoverable gates out DB-backed + // (FileBased == false, e.g. Forge) and non-authoritative agents, so an + // unknown or DB-backed agent has no such basis. def, ok := parser.AgentByType(agent) if !ok { return false } - return def.FileBased && def.DiscoverFunc != nil + return e.parseDiffAgentDiscoverable(def) } // parseDiffLiveMtime resolves a session's live source mtime for the raced @@ -716,7 +651,7 @@ func (e *Engine) parseDiffCollectFile( raced := false if realDiffs > 0 && compare && sourceSessionCount[pw.sess.File.Path] == 1 && - parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { + e.parseDiffSourceReliableForRaced(pw.sess.Agent, pw.sess.File.Path) { var storedMtime *int64 if stored != nil { storedMtime = stored.FileMtime diff --git a/internal/sync/parsediff_compare_test.go b/internal/sync/parsediff_compare_test.go index abe9cc22e..4148bb065 100644 --- a/internal/sync/parsediff_compare_test.go +++ b/internal/sync/parsediff_compare_test.go @@ -1734,9 +1734,10 @@ func TestParseDiffSourceReliableForRaced(t *testing.T) { want: false, }, } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{}) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := parseDiffSourceReliableForRaced(tt.agent, tt.path) + got := engine.parseDiffSourceReliableForRaced(tt.agent, tt.path) assert.Equal(t, tt.want, got) }) } diff --git a/internal/sync/parsediff_integration_test.go b/internal/sync/parsediff_integration_test.go index faef85da3..341cc64e7 100644 --- a/internal/sync/parsediff_integration_test.go +++ b/internal/sync/parsediff_integration_test.go @@ -837,8 +837,8 @@ func TestParseDiffCoversProviderAuthoritativePiFamily(t *testing.T) { } // TestParseDiffCoversKiroSQLite proves that Kiro's shared data.sqlite3 -// store — which DiscoverFunc never emits and which normal sync reaches -// through a dedicated phase — is actually re-parsed by parse-diff. A +// store, which the provider discovers and fans out to one session per +// row, is actually re-parsed by parse-diff. A // regressed force-parse guard or missing synthesized discovery would // surface here as the session being skipped/"not discovered" with // Examined 0 rather than compared. @@ -995,8 +995,8 @@ func TestParseDiffCoversMixedKiloRoot(t *testing.T) { } // TestParseDiffCoversShelley proves Shelley's shared shelley.db — which -// DiscoverFunc emits as a single file and which normal sync fans out to -// one session per conversation — is re-parsed and compared by parse-diff. +// the provider discovers as a single source and which normal sync fans +// out to one session per conversation — is re-parsed and compared by parse-diff. // Examined:1/Identical:1 means the stored conversation was matched and // vetted, not bucketed as skipped/"not discovered". func TestParseDiffCoversShelley(t *testing.T) { diff --git a/internal/sync/parsediff_provider_test.go b/internal/sync/parsediff_provider_test.go new file mode 100644 index 000000000..030d4ab2b --- /dev/null +++ b/internal/sync/parsediff_provider_test.go @@ -0,0 +1,222 @@ +package sync + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" +) + +func TestParseDiffDiscoversProviderSources(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "provider-only.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider := parseDiffProvider{ + sourcePath: sourcePath, + mtime: info.ModTime(), + size: info.Size(), + } + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + parseDiffProviderFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + report, err := engine.ParseDiff(context.Background(), ParseDiffOptions{ + Agents: []parser.AgentType{parser.AgentClaude}, + }) + + require.NoError(t, err) + require.NotNil(t, report) + assert.Equal(t, 1, report.FilesExamined) + assert.Equal(t, ParseDiffTotals{NewOnDisk: 1, Examined: 0}, report.Totals) + if assert.Len(t, report.Sessions, 1) { + assert.Equal(t, DiffNewOnDisk, report.Sessions[0].Class) + assert.Equal(t, "provider-discovered", report.Sessions[0].SessionID) + assert.Equal(t, sourcePath, report.Sessions[0].FilePath) + } +} + +func TestParseDiffProviderAuthoritativeAgentsAreDiscoverable(t *testing.T) { + engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{}) + for _, agent := range []parser.AgentType{ + parser.AgentGptme, + parser.AgentPi, + parser.AgentOMP, + parser.AgentWorkBuddy, + parser.AgentCortex, + parser.AgentKimi, + parser.AgentQwenPaw, + parser.AgentOpenHands, + parser.AgentCursor, + parser.AgentVibe, + parser.AgentClaude, + parser.AgentCowork, + parser.AgentHermes, + } { + def, ok := parser.AgentByType(agent) + require.True(t, ok, "agent %s", agent) + assert.True(t, engine.parseDiffAgentDiscoverable(def), + "parse-diff engine must include provider-authoritative %s", agent) + } +} + +func TestSyncAllDiscoversProviderSources(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "provider-only-sync.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider := parseDiffProvider{ + sourcePath: sourcePath, + mtime: info.ModTime(), + size: info.Size(), + } + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + parseDiffProviderFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + stats := engine.SyncAll(context.Background(), nil) + + assert.Equal(t, 1, stats.TotalSessions) + assert.Equal(t, 1, stats.Synced) + session, err := database.GetSession(context.Background(), "provider-discovered") + require.NoError(t, err) + require.NotNil(t, session) + assert.Equal(t, sourcePath, database.GetSessionFilePath("provider-discovered")) +} + +type parseDiffProviderFactory struct { + provider parseDiffProvider +} + +func (f parseDiffProviderFactory) Definition() parser.AgentDef { + return parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + FileBased: true, + } +} + +func (f parseDiffProviderFactory) Capabilities() parser.Capabilities { + return parser.Capabilities{ + Source: parser.SourceCapabilities{ + DiscoverSources: parser.CapabilitySupported, + FindSource: parser.CapabilitySupported, + }, + } +} + +func (f parseDiffProviderFactory) NewProvider( + parser.ProviderConfig, +) parser.Provider { + p := f.provider + p.ProviderBase = parser.ProviderBase{ + Def: f.Definition(), + Caps: f.Capabilities(), + } + return p +} + +type parseDiffProvider struct { + parser.ProviderBase + sourcePath string + mtime time.Time + size int64 +} + +func (p parseDiffProvider) Discover(context.Context) ([]parser.SourceRef, error) { + return []parser.SourceRef{p.source()}, nil +} + +func (p parseDiffProvider) FindSource( + context.Context, + parser.FindSourceRequest, +) (parser.SourceRef, bool, error) { + return p.source(), true, nil +} + +func (p parseDiffProvider) Fingerprint( + context.Context, + parser.SourceRef, +) (parser.SourceFingerprint, error) { + return parser.SourceFingerprint{ + Key: p.sourcePath, + Size: p.size, + MTimeNS: p.mtime.UnixNano(), + Hash: "provider-hash", + }, nil +} + +func (p parseDiffProvider) Parse( + context.Context, + parser.ParseRequest, +) (parser.ParseOutcome, error) { + return parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "provider-discovered", + Agent: parser.AgentClaude, + Machine: "devbox", + Project: "provider", + StartedAt: p.mtime, + EndedAt: p.mtime, + MessageCount: 1, + File: parser.FileInfo{ + Path: p.sourcePath, + Size: p.size, + Mtime: p.mtime.UnixNano(), + Hash: "provider-hash", + }, + }, + Messages: []parser.ParsedMessage{{ + Role: parser.RoleUser, + Content: "provider discovered", + Timestamp: p.mtime, + Ordinal: 0, + }}, + }, + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +func (p parseDiffProvider) source() parser.SourceRef { + return parser.SourceRef{ + Provider: parser.AgentClaude, + Key: p.sourcePath, + DisplayPath: p.sourcePath, + FingerprintKey: p.sourcePath, + ProjectHint: "provider", + } +} diff --git a/internal/sync/provider_effects.go b/internal/sync/provider_effects.go new file mode 100644 index 000000000..1aa8897f7 --- /dev/null +++ b/internal/sync/provider_effects.go @@ -0,0 +1,174 @@ +package sync + +import ( + "fmt" + "strings" + + "go.kenn.io/agentsview/internal/parser" +) + +// validateProviderOutcome rejects a provider parse outcome whose sessions, +// excluded IDs, or diagnostics do not belong to the source's agent. The engine +// runs this before applying a provider outcome so a misrouted or cross-provider +// result cannot corrupt the archive. +func validateProviderOutcome( + def parser.AgentDef, + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + outcome parser.ParseOutcome, +) error { + for _, result := range outcome.Results { + session := result.Result.Session + if session.Agent != def.Type { + return fmt.Errorf( + "%s: provider result session agent mismatch for %q: got %s", + def.Type, + session.ID, + session.Agent, + ) + } + if err := validateProviderParseResultSessionIDs(def, result.Result); err != nil { + return err + } + } + for _, sessionID := range outcome.ExcludedSessionIDs { + if err := validateProviderSessionID(def, sessionID, "excluded session id"); err != nil { + return err + } + } + for _, sourceErr := range outcome.SourceErrors { + if err := validateProviderSessionID(def, sourceErr.SessionID, "diagnostic session id"); err != nil { + return err + } + if sourceErr.SourceKey == "" { + return fmt.Errorf( + "%s: provider diagnostic source key is required for source %q", + def.Type, + source.Key, + ) + } + if !providerSourceKeyMatches(source, fingerprint, sourceErr.SourceKey) { + return fmt.Errorf( + "%s: provider diagnostic source key %q is unrelated to source %q", + def.Type, + sourceErr.SourceKey, + source.Key, + ) + } + } + return nil +} + +func validateProviderParseResultSessionIDs(def parser.AgentDef, result parser.ParseResult) error { + sessionIDs := []struct { + field string + id string + }{ + {field: "result session id", id: result.Session.ID}, + {field: "parent session id", id: result.Session.ParentSessionID}, + } + for _, sessionID := range sessionIDs { + if err := validateProviderSessionID(def, sessionID.id, sessionID.field); err != nil { + return err + } + } + for _, usage := range result.Session.UsageEvents { + if err := validateProviderSessionID(def, usage.SessionID, "session usage event session id"); err != nil { + return err + } + } + for _, usage := range result.UsageEvents { + if err := validateProviderSessionID(def, usage.SessionID, "usage event session id"); err != nil { + return err + } + } + for _, message := range result.Messages { + for _, toolCall := range message.ToolCalls { + if err := validateProviderSessionID(def, toolCall.SubagentSessionID, "tool call subagent session id"); err != nil { + return err + } + for _, event := range toolCall.ResultEvents { + if err := validateProviderSessionID(def, event.SubagentSessionID, "tool result event subagent session id"); err != nil { + return err + } + } + } + } + return nil +} + +func validateProviderSessionID(def parser.AgentDef, sessionID, field string) error { + if sessionID == "" || def.IDPrefix == "" { + return nil + } + if strings.HasPrefix(sessionID, def.IDPrefix) { + return nil + } + return fmt.Errorf( + "%s: provider %s %q must use prefix %q", + def.Type, + field, + sessionID, + def.IDPrefix, + ) +} + +func providerSourceKeyMatches( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + sourceKey string, +) bool { + if sourceKey == "" { + return true + } + for _, candidate := range []string{fingerprint.Key, source.FingerprintKey, source.Key} { + if candidate == "" { + continue + } + if sourceKey == candidate || strings.HasPrefix(sourceKey, candidate+"#") || + strings.HasPrefix(sourceKey, candidate+"::") || + strings.HasPrefix(sourceKey, candidate+"|") { + return true + } + } + return false +} + +// parseOutcomeResults flattens a provider parse outcome's per-result wrappers +// into the bare ParseResults the engine writes. +func parseOutcomeResults(outcomes []parser.ParseResultOutcome) []parser.ParseResult { + results := make([]parser.ParseResult, 0, len(outcomes)) + for _, outcome := range outcomes { + results = append(results, outcome.Result) + } + return results +} + +// plannedSourceKey is the stable identity the engine uses for a provider source +// when recording source-level state. It prefers the fingerprint key, then the +// source's own keys. +func plannedSourceKey( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) string { + if fingerprint.Key != "" { + return fingerprint.Key + } + if source.FingerprintKey != "" { + return source.FingerprintKey + } + return source.Key +} + +// plannedSkipKey is the skip-cache key the engine stores for a provider source +// that parsed to no work. It prefers the source fingerprint key so the skip +// entry keys off the same identity used elsewhere. +func plannedSkipKey( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) string { + if source.FingerprintKey != "" { + return source.FingerprintKey + } + return plannedSourceKey(source, fingerprint) +} diff --git a/internal/sync/provider_process_test.go b/internal/sync/provider_process_test.go index 14bec6d8f..b3274f780 100644 --- a/internal/sync/provider_process_test.go +++ b/internal/sync/provider_process_test.go @@ -3,8 +3,10 @@ package sync import ( "context" "database/sql" + "os" "path/filepath" "testing" + "time" _ "github.com/mattn/go-sqlite3" "github.com/stretchr/testify/assert" @@ -14,7 +16,7 @@ import ( "go.kenn.io/agentsview/internal/parser" ) -func TestProcessFileProviderShadowCompareForgeVirtualSource(t *testing.T) { +func TestProcessFileProviderForgeVirtualSource(t *testing.T) { root := t.TempDir() dbPath := writeProcessProviderForgeDB(t, root) engine := NewEngine(openTestDB(t), EngineConfig{ @@ -42,7 +44,7 @@ func TestProcessFileProviderShadowCompareForgeVirtualSource(t *testing.T) { assert.Len(t, res.results[0].Messages, 2) } -func TestProcessFileProviderShadowCompareSkipsStoredFreshSource(t *testing.T) { +func TestProcessFileProviderSkipsStoredFreshSource(t *testing.T) { root := t.TempDir() dbPath := writeProcessProviderForgeDB(t, root) virtualPath := dbPath + "#conv-001" @@ -86,7 +88,7 @@ func TestProcessFileProviderShadowCompareSkipsStoredFreshSource(t *testing.T) { assert.Empty(t, second.results) } -func TestProcessFileProviderShadowComparePiebaldVirtualSource(t *testing.T) { +func TestProcessFileProviderPiebaldVirtualSource(t *testing.T) { root := t.TempDir() dbPath := filepath.Join(root, "app.db") piebaldDB := openProcessProviderPiebaldDB(t, dbPath) @@ -113,7 +115,7 @@ func TestProcessFileProviderShadowComparePiebaldVirtualSource(t *testing.T) { assert.Len(t, res.results[0].Messages, 2) } -// TestProcessFileProviderShadowComparePiebaldSkipsStoredFreshSource verifies +// TestProcessFileProviderPiebaldSkipsStoredFreshSource verifies // that a provider-authoritative Piebald chat whose stored fingerprint already // matches is not reparsed on a repeat processFile. Piebald keeps every chat in // one app.db, but the provider fingerprint's mtime is the chat's own updated_at @@ -121,7 +123,7 @@ func TestProcessFileProviderShadowComparePiebaldVirtualSource(t *testing.T) { // per-session signal and skips on the DB-stored-fingerprint check. This mirrors // the legacy syncPiebald/piebaldPendingSessionIDs skip and the Forge // SkipsStoredFreshSource behavior; the in-memory skip cache stays empty. -func TestProcessFileProviderShadowComparePiebaldSkipsStoredFreshSource(t *testing.T) { +func TestProcessFileProviderPiebaldSkipsStoredFreshSource(t *testing.T) { root := t.TempDir() dbPath := filepath.Join(root, "app.db") piebaldDB := openProcessProviderPiebaldDB(t, dbPath) @@ -166,7 +168,7 @@ func TestProcessFileProviderShadowComparePiebaldSkipsStoredFreshSource(t *testin assert.Empty(t, second.results) } -func TestProcessFileProviderShadowCompareWarpVirtualSource(t *testing.T) { +func TestProcessFileProviderWarpVirtualSource(t *testing.T) { root := t.TempDir() dbPath := filepath.Join(root, "warp.sqlite") warpDB := openProcessProviderWarpDB(t, dbPath) @@ -204,6 +206,265 @@ func TestProcessFileUsesProviderDBBackedFamily(t *testing.T) { assert.False(t, processFileUsesProvider(parser.AgentClaude)) } +func TestProcessFileProviderAuthoritativeUsesInjectedProvider(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "owned.jsonl") + provider := newProcessFixtureProvider( + parser.SourceRef{ + Provider: parser.AgentCowork, + Key: "source-owned", + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "fixture-project", + }, + fingerprint, + parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: processFixtureResult( + "cowork:owned", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + ForceReplace: true, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, res.err) + require.Len(t, res.results, 1) + assert.False(t, res.skip) + assert.True(t, res.forceReplace) + assert.Equal(t, fingerprint.MTimeNS, res.mtime) + assert.Equal(t, []string{"find-source", "fingerprint", "parse"}, provider.calls) + require.Len(t, provider.findRequests, 1) + assert.True(t, provider.findRequests[0].RequireFreshSource) + assert.Equal(t, sourcePath, provider.findRequests[0].StoredFilePath) + assert.Equal(t, parser.AgentCowork, res.results[0].Session.Agent) + assert.Equal(t, "cowork:owned", res.results[0].Session.ID) + assert.Equal(t, "devbox", res.results[0].Session.Machine) + assert.Equal(t, "fixture-project", res.results[0].Session.Project) +} + +func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "retry.jsonl") + provider := newProcessFixtureProvider( + processFixtureSource(sourcePath), + fingerprint, + parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: processFixtureResult( + "cowork:current", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionCurrent, + }, + { + Result: processFixtureResult( + "cowork:retry", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionNeedsRetry, + }, + }, + ResultSetComplete: true, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, res.err) + require.Len(t, res.results, 2) + assert.False(t, res.needsRetryForSession("cowork:current")) + assert.True(t, res.needsRetryForSession("cowork:retry")) + assert.False(t, res.suppressesPresenceSweepForRetry()) +} + +func TestProcessFileProviderAuthoritativeSuppressesUncleanSkipCache(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "unclean.jsonl") + provider := newProcessFixtureProvider( + processFixtureSource(sourcePath), + fingerprint, + parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: processFixtureResult( + "cowork:unclean", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: false, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, res.err) + assert.True(t, res.cacheSkip) + assert.True(t, res.noCacheSkip) + assert.True(t, res.suppressPresenceSweep) +} + +func TestProcessFileProviderAuthoritativeUsesSkipReasonCacheKey(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "skip.jsonl") + source := processFixtureSource(sourcePath) + source.FingerprintKey = sourcePath + "#provider-key" + provider := newProcessFixtureProvider( + source, + fingerprint, + parser.ParseOutcome{ + SkipReason: parser.SkipNonInteractive, + ResultSetComplete: true, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.NoError(t, res.err) + assert.True(t, res.skip) + assert.True(t, res.cacheSkip) + assert.False(t, res.noCacheSkip) + assert.Equal(t, source.FingerprintKey, res.skipCacheKey(sourcePath)) +} + +func TestProcessFileProviderAuthoritativeForceParseAllowsStaleSourceLookup(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "force.jsonl") + provider := newProcessFixtureProvider( + processFixtureSource(sourcePath), + fingerprint, + parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: processFixtureResult( + "cowork:force", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + ForceParse: true, + }) + + require.NoError(t, res.err) + require.Len(t, provider.findRequests, 1) + assert.False(t, provider.findRequests[0].RequireFreshSource) + require.Len(t, provider.parseRequests, 1) + assert.True(t, provider.parseRequests[0].ForceParse) +} + +func TestProcessFileProviderAuthoritativeNotFoundFails(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "missing.jsonl") + provider := newProcessFixtureProvider( + processFixtureSource(sourcePath), + fingerprint, + parser.ParseOutcome{ResultSetComplete: true}, + ) + provider.findFound = false + engine := newProcessFixtureEngine(t, root, provider) + + res := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCowork, + }) + + require.Error(t, res.err) + assert.ErrorContains(t, res.err, "provider source not found") + assert.Equal(t, []string{"find-source"}, provider.calls) +} + +func TestSyncSingleSessionProviderAuthoritativeBypassesProviderSkipCache(t *testing.T) { + root := t.TempDir() + sourcePath, fingerprint := writeProcessProviderSource(t, root, "single.jsonl") + source := processFixtureSource(sourcePath) + source.FingerprintKey = sourcePath + "#provider-key" + provider := newProcessFixtureProvider( + source, + fingerprint, + parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: processFixtureResult( + "cowork:single", + parser.AgentCowork, + "fixture-project", + sourcePath, + fingerprint, + ), + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, + ) + engine := newProcessFixtureEngine(t, root, provider) + engine.cacheSkip(source.FingerprintKey, fingerprint.MTimeNS) + + require.NoError(t, engine.SyncSingleSession("cowork:single")) + + assert.Equal( + t, + []string{ + "find-source", + "find-source", + "fingerprint", + "parse", + }, + provider.calls, + ) + require.Len(t, provider.findRequests, 2) + assert.Equal(t, "single", provider.findRequests[0].RawSessionID) + assert.False(t, provider.findRequests[1].RequireFreshSource) + require.Len(t, provider.parseRequests, 1) + assert.True(t, provider.parseRequests[0].ForceParse) + engine.skipMu.RLock() + _, cached := engine.skipCache[source.FingerprintKey] + engine.skipMu.RUnlock() + assert.False(t, cached) +} + func writeProcessProviderForgeDB(t *testing.T, root string) string { t.Helper() dbPath := filepath.Join(root, ".forge.db") @@ -241,6 +502,170 @@ func writeProcessProviderForgeDB(t *testing.T, root string) string { return dbPath } +func newProcessFixtureEngine( + t *testing.T, + root string, + provider *processFixtureProvider, +) *Engine { + t.Helper() + return NewEngine(openTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCowork: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{processFixtureFactory{provider: provider}}, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, + }, + }) +} + +func writeProcessProviderSource( + t *testing.T, + root string, + name string, +) (string, parser.SourceFingerprint) { + t.Helper() + path := filepath.Join(root, name) + require.NoError(t, os.WriteFile(path, []byte(`{"session":"fixture"}`+"\n"), 0o644)) + info, err := os.Stat(path) + require.NoError(t, err) + return path, parser.SourceFingerprint{ + Key: path, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } +} + +func processFixtureSource(path string) parser.SourceRef { + return parser.SourceRef{ + Provider: parser.AgentCowork, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: "fixture-project", + } +} + +func processFixtureResult( + id string, + agent parser.AgentType, + project string, + path string, + fingerprint parser.SourceFingerprint, +) parser.ParseResult { + started := time.Unix(1_800_000_000, 0).UTC() + ended := started.Add(time.Second) + return parser.ParseResult{ + Session: parser.ParsedSession{ + ID: id, + Project: project, + Machine: "devbox", + Agent: agent, + StartedAt: started, + EndedAt: ended, + FirstMessage: "fixture prompt", + MessageCount: 1, + UserMessageCount: 1, + File: parser.FileInfo{ + Path: path, + Size: fingerprint.Size, + Mtime: fingerprint.MTimeNS, + }, + }, + Messages: []parser.ParsedMessage{{ + Ordinal: 0, + Role: parser.RoleUser, + Content: "fixture prompt", + Timestamp: started, + }}, + } +} + +func newProcessFixtureProvider( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + outcome parser.ParseOutcome, +) *processFixtureProvider { + return &processFixtureProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCowork, + DisplayName: "Cowork", + IDPrefix: "cowork:", + FileBased: true, + }, + Caps: parser.Capabilities{ + Source: parser.SourceCapabilities{ + FindSource: parser.CapabilitySupported, + CompositeFingerprint: parser.CapabilitySupported, + }, + }, + }, + source: source, + findFound: true, + fingerprint: fingerprint, + outcome: outcome, + } +} + +type processFixtureFactory struct { + provider *processFixtureProvider +} + +func (f processFixtureFactory) Definition() parser.AgentDef { + return f.provider.Definition() +} + +func (f processFixtureFactory) Capabilities() parser.Capabilities { + return f.provider.Capabilities() +} + +func (f processFixtureFactory) NewProvider(parser.ProviderConfig) parser.Provider { + return f.provider +} + +type processFixtureProvider struct { + parser.ProviderBase + + source parser.SourceRef + findFound bool + fingerprint parser.SourceFingerprint + outcome parser.ParseOutcome + calls []string + findRequests []parser.FindSourceRequest + parseRequests []parser.ParseRequest +} + +func (p *processFixtureProvider) FindSource( + _ context.Context, + req parser.FindSourceRequest, +) (parser.SourceRef, bool, error) { + p.calls = append(p.calls, "find-source") + p.findRequests = append(p.findRequests, req) + if !p.findFound { + return parser.SourceRef{}, false, nil + } + return p.source, true, nil +} + +func (p *processFixtureProvider) Fingerprint( + context.Context, + parser.SourceRef, +) (parser.SourceFingerprint, error) { + p.calls = append(p.calls, "fingerprint") + return p.fingerprint, nil +} + +func (p *processFixtureProvider) Parse( + _ context.Context, + req parser.ParseRequest, +) (parser.ParseOutcome, error) { + p.calls = append(p.calls, "parse") + p.parseRequests = append(p.parseRequests, req) + return p.outcome, nil +} + func openProcessProviderPiebaldDB(t *testing.T, path string) *sql.DB { t.Helper() database, err := sql.Open("sqlite3", path) diff --git a/internal/sync/provider_shadow.go b/internal/sync/provider_shadow.go deleted file mode 100644 index 73ba1fac2..000000000 --- a/internal/sync/provider_shadow.go +++ /dev/null @@ -1,583 +0,0 @@ -package sync - -import ( - "context" - "fmt" - "reflect" - "slices" - "strings" - - "go.kenn.io/agentsview/internal/parser" -) - -// ProviderObserveRequest is the source-level shadow-parse input used while the -// legacy sync path remains authoritative. -type ProviderObserveRequest struct { - Source parser.SourceRef - Machine string - ForceParse bool -} - -// ProviderObservation is the normalized, side-effect-free provider outcome for -// one source. -type ProviderObservation struct { - Fingerprint parser.SourceFingerprint - Results []parser.ParseResult - ExcludedSessionIDs []string - SourceErrors []parser.SourceError - SkipReason parser.SkipReason - ForceReplace bool - Planned ProviderPlannedEffects -} - -// ProviderShadowComparison is one caller-level shadow result. Legacy sync -// remains authoritative; this value records the side-effect-free provider -// observation and any differences from the legacy processResult. -type ProviderShadowComparison struct { - File parser.DiscoveredFile - Mode parser.ProviderMigrationMode - Source parser.SourceRef - Observation ProviderObservation - Mismatches []string - NotComparableReason string - Err error -} - -// ProviderPlannedEffects describes writes the provider path would have made. -// Shadow mode compares these in memory; it does not receive live DB, skip-cache, -// or diagnostic writers. SSE scopes are carried for later caller work but are -// not part of the root processResult comparison. -type ProviderPlannedEffects struct { - SourceKeys []string - DataVersions []ProviderPlannedDataVersion - SkipCacheKeys []string - Diagnostics []ProviderPlannedDiagnostic - SSEScopes []string -} - -// ProviderPlannedDataVersion is an in-memory data-version write candidate. -type ProviderPlannedDataVersion struct { - SessionID string - State parser.DataVersionState - RetryReason string -} - -// ProviderPlannedDiagnostic is an in-memory parse diagnostic candidate. -type ProviderPlannedDiagnostic struct { - SourceKey string - DisplayPath string - SessionID string - Err error - Retryable bool -} - -// DataVersionSessionIDs returns the planned data-version session IDs in parse -// result order. -func (p ProviderPlannedEffects) DataVersionSessionIDs() []string { - ids := make([]string, 0, len(p.DataVersions)) - for _, dataVersion := range p.DataVersions { - ids = append(ids, dataVersion.SessionID) - } - return ids -} - -// RetrySessionIDs returns sessions that need a future parse retry. -func (p ProviderPlannedEffects) RetrySessionIDs() []string { - var ids []string - for _, dataVersion := range p.DataVersions { - if dataVersion.State == parser.DataVersionNeedsRetry { - ids = append(ids, dataVersion.SessionID) - } - } - return ids -} - -// ObserveProviderSource fingerprints and parses a provider source without -// mutating persisted session state. It is the source-level comparison surface -// provider migration branches use before caller-level dual-run wiring exists. -func ObserveProviderSource( - ctx context.Context, - provider parser.Provider, - req ProviderObserveRequest, -) (ProviderObservation, error) { - def := provider.Definition() - if req.Source.Provider != def.Type { - return ProviderObservation{}, fmt.Errorf( - "provider source mismatch: source is %s, provider is %s", - req.Source.Provider, - def.Type, - ) - } - - fingerprint, err := provider.Fingerprint(ctx, req.Source) - if err != nil { - return ProviderObservation{}, err - } - outcome, err := provider.Parse(ctx, parser.ParseRequest{ - Source: req.Source, - Fingerprint: fingerprint, - Machine: req.Machine, - ForceParse: req.ForceParse, - }) - if err != nil { - return ProviderObservation{}, err - } - if err := validateProviderOutcome(def, req.Source, fingerprint, outcome); err != nil { - return ProviderObservation{}, err - } - - observation := ProviderObservation{ - Fingerprint: fingerprint, - Results: parseOutcomeResults(outcome.Results), - ExcludedSessionIDs: append([]string(nil), outcome.ExcludedSessionIDs...), - SourceErrors: append([]parser.SourceError(nil), outcome.SourceErrors...), - SkipReason: outcome.SkipReason, - ForceReplace: outcome.ForceReplace, - } - observation.Planned = planProviderEffects(req.Source, fingerprint, outcome) - return observation, nil -} - -func compareProviderObservationToProcessResult( - observation ProviderObservation, - legacy processResult, - file parser.DiscoveredFile, -) []string { - var mismatches []string - if len(observation.Results) != len(legacy.results) { - mismatches = append(mismatches, fmt.Sprintf( - "result count: provider=%d legacy=%d", - len(observation.Results), len(legacy.results), - )) - } - for i := 0; i < len(observation.Results) && i < len(legacy.results); i++ { - providerResult := observation.Results[i] - legacyResult := legacy.results[i] - if !reflect.DeepEqual(providerResult.Session, legacyResult.Session) { - mismatches = append(mismatches, fmt.Sprintf( - "result[%d] session differs: provider=%+v legacy=%+v", - i, providerResult.Session, legacyResult.Session, - )) - } - if !reflect.DeepEqual(providerResult.Messages, legacyResult.Messages) { - mismatches = append(mismatches, fmt.Sprintf( - "result[%d] messages differ", - i, - )) - } - if !reflect.DeepEqual(providerResult.UsageEvents, legacyResult.UsageEvents) { - mismatches = append(mismatches, fmt.Sprintf( - "result[%d] usage events differ", - i, - )) - } - } - if !slices.Equal(observation.ExcludedSessionIDs, legacy.excludedSessionIDs) { - mismatches = append(mismatches, fmt.Sprintf( - "excluded_session_ids: provider=%v legacy=%v", - observation.ExcludedSessionIDs, legacy.excludedSessionIDs, - )) - } - providerSourceErrors := comparableProviderSourceErrors(observation.SourceErrors) - legacySourceErrors := comparableLegacySourceErrors(file.Agent, legacy.sessionErrs) - if !reflect.DeepEqual(providerSourceErrors, legacySourceErrors) { - mismatches = append(mismatches, fmt.Sprintf( - "source_errors differ: provider=%v legacy=%v", - providerSourceErrors, legacySourceErrors, - )) - } - providerPlanned := comparablePlannedEffects(observation.Planned) - legacyPlanned := comparablePlannedEffects( - legacyPlannedEffectsFromProcessResult(file, legacy), - ) - if !slices.Equal(providerPlanned.SourceKeys, legacyPlanned.SourceKeys) { - mismatches = append(mismatches, fmt.Sprintf( - "planned.source_keys: provider=%v legacy=%v", - providerPlanned.SourceKeys, legacyPlanned.SourceKeys, - )) - } - if !reflect.DeepEqual(providerPlanned.DataVersions, legacyPlanned.DataVersions) { - mismatches = append(mismatches, fmt.Sprintf( - "planned.data_versions: provider=%v legacy=%v", - providerPlanned.DataVersions, legacyPlanned.DataVersions, - )) - } - if !slices.Equal(providerPlanned.SkipCacheKeys, legacyPlanned.SkipCacheKeys) { - mismatches = append(mismatches, fmt.Sprintf( - "planned.skip_cache_keys: provider=%v legacy=%v", - providerPlanned.SkipCacheKeys, legacyPlanned.SkipCacheKeys, - )) - } - if !reflect.DeepEqual(providerPlanned.Diagnostics, legacyPlanned.Diagnostics) { - mismatches = append(mismatches, fmt.Sprintf( - "planned.diagnostics: provider=%v legacy=%v", - providerPlanned.Diagnostics, legacyPlanned.Diagnostics, - )) - } - if observation.ForceReplace != legacy.forceReplace { - mismatches = append(mismatches, fmt.Sprintf( - "force_replace: provider=%t legacy=%t", - observation.ForceReplace, legacy.forceReplace, - )) - } - return mismatches -} - -func legacyPlannedEffectsFromProcessResult( - file parser.DiscoveredFile, - legacy processResult, -) ProviderPlannedEffects { - planned := ProviderPlannedEffects{} - for _, result := range legacy.results { - if result.Session.File.Path != "" && - !slices.Contains(planned.SourceKeys, result.Session.File.Path) { - planned.SourceKeys = append(planned.SourceKeys, result.Session.File.Path) - } - if result.Session.ID == "" { - continue - } - state := parser.DataVersionCurrent - if legacy.needsRetry { - state = parser.DataVersionNeedsRetry - } - planned.DataVersions = append(planned.DataVersions, ProviderPlannedDataVersion{ - SessionID: result.Session.ID, - State: state, - }) - } - if legacy.cacheSkip && legacy.mtime != 0 && !legacy.noCacheSkip && - legacy.incremental == nil && legacy.err == nil && len(legacy.results) == 0 && - file.Path != "" { - planned.SkipCacheKeys = append(planned.SkipCacheKeys, file.Path) - } - for _, sessionErr := range legacy.sessionErrs { - sessionID := normalizeLegacySessionID(file.Agent, sessionErr.sessionID) - planned.Diagnostics = append(planned.Diagnostics, ProviderPlannedDiagnostic{ - SourceKey: sessionErr.virtualPath, - DisplayPath: sessionErr.virtualPath, - SessionID: sessionID, - Err: sessionErr.err, - Retryable: true, - }) - } - return planned -} - -type comparableSourceError struct { - SessionID string - SourceKey string - Path string - Err string - Retryable bool -} - -func comparableProviderSourceErrors(sourceErrors []parser.SourceError) []comparableSourceError { - comparable := make([]comparableSourceError, 0, len(sourceErrors)) - for _, sourceErr := range sourceErrors { - path := sourceErr.DisplayPath - if path == "" { - path = sourceErr.SourceKey - } - comparable = append(comparable, comparableSourceError{ - SessionID: sourceErr.SessionID, - SourceKey: sourceErr.SourceKey, - Path: path, - Err: errString(sourceErr.Err), - Retryable: sourceErr.Retryable, - }) - } - return comparable -} - -func comparableLegacySourceErrors( - agent parser.AgentType, - sessionErrs []sessionParseError, -) []comparableSourceError { - comparable := make([]comparableSourceError, 0, len(sessionErrs)) - for _, sessionErr := range sessionErrs { - comparable = append(comparable, comparableSourceError{ - SessionID: normalizeLegacySessionID(agent, sessionErr.sessionID), - SourceKey: sessionErr.virtualPath, - Path: sessionErr.virtualPath, - Err: errString(sessionErr.err), - Retryable: true, - }) - } - return comparable -} - -func normalizeLegacySessionID(agent parser.AgentType, sessionID string) string { - if sessionID == "" { - return "" - } - def, ok := parser.AgentByType(agent) - if !ok || def.IDPrefix == "" { - return sessionID - } - host, rawID := parser.StripHostPrefix(sessionID) - if strings.HasPrefix(rawID, def.IDPrefix) { - return sessionID - } - normalized := def.IDPrefix + rawID - if host != "" { - return host + "~" + normalized - } - return normalized -} - -func errString(err error) string { - if err == nil { - return "" - } - return err.Error() -} - -type comparablePlanned struct { - SourceKeys []string - DataVersions []comparablePlannedDataVersion - SkipCacheKeys []string - Diagnostics []comparablePlannedDiagnostic -} - -type comparablePlannedDataVersion struct { - SessionID string - State parser.DataVersionState -} - -type comparablePlannedDiagnostic struct { - SourceKey string - DisplayPath string - SessionID string - Err string - Retryable bool -} - -func comparablePlannedEffects(planned ProviderPlannedEffects) comparablePlanned { - comparable := comparablePlanned{ - SourceKeys: slices.Clone(planned.SourceKeys), - SkipCacheKeys: slices.Clone(planned.SkipCacheKeys), - } - comparable.DataVersions = make( - []comparablePlannedDataVersion, - 0, - len(planned.DataVersions), - ) - for _, dataVersion := range planned.DataVersions { - comparable.DataVersions = append( - comparable.DataVersions, - comparablePlannedDataVersion{ - SessionID: dataVersion.SessionID, - State: dataVersion.State, - }, - ) - } - comparable.Diagnostics = make( - []comparablePlannedDiagnostic, - 0, - len(planned.Diagnostics), - ) - for _, diagnostic := range planned.Diagnostics { - comparable.Diagnostics = append( - comparable.Diagnostics, - comparablePlannedDiagnostic{ - SourceKey: diagnostic.SourceKey, - DisplayPath: diagnostic.DisplayPath, - SessionID: diagnostic.SessionID, - Err: errString(diagnostic.Err), - Retryable: diagnostic.Retryable, - }, - ) - } - return comparable -} - -func validateProviderOutcome( - def parser.AgentDef, - source parser.SourceRef, - fingerprint parser.SourceFingerprint, - outcome parser.ParseOutcome, -) error { - for _, result := range outcome.Results { - session := result.Result.Session - if session.Agent != def.Type { - return fmt.Errorf( - "%s: provider result session agent mismatch for %q: got %s", - def.Type, - session.ID, - session.Agent, - ) - } - if err := validateProviderParseResultSessionIDs(def, result.Result); err != nil { - return err - } - } - for _, sessionID := range outcome.ExcludedSessionIDs { - if err := validateProviderSessionID(def, sessionID, "excluded session id"); err != nil { - return err - } - } - for _, sourceErr := range outcome.SourceErrors { - if err := validateProviderSessionID(def, sourceErr.SessionID, "diagnostic session id"); err != nil { - return err - } - if sourceErr.SourceKey == "" { - return fmt.Errorf( - "%s: provider diagnostic source key is required for source %q", - def.Type, - source.Key, - ) - } - if !providerSourceKeyMatches(source, fingerprint, sourceErr.SourceKey) { - return fmt.Errorf( - "%s: provider diagnostic source key %q is unrelated to source %q", - def.Type, - sourceErr.SourceKey, - source.Key, - ) - } - } - return nil -} - -func validateProviderParseResultSessionIDs(def parser.AgentDef, result parser.ParseResult) error { - sessionIDs := []struct { - field string - id string - }{ - {field: "result session id", id: result.Session.ID}, - {field: "parent session id", id: result.Session.ParentSessionID}, - } - for _, sessionID := range sessionIDs { - if err := validateProviderSessionID(def, sessionID.id, sessionID.field); err != nil { - return err - } - } - for _, usage := range result.Session.UsageEvents { - if err := validateProviderSessionID(def, usage.SessionID, "session usage event session id"); err != nil { - return err - } - } - for _, usage := range result.UsageEvents { - if err := validateProviderSessionID(def, usage.SessionID, "usage event session id"); err != nil { - return err - } - } - for _, message := range result.Messages { - for _, toolCall := range message.ToolCalls { - if err := validateProviderSessionID(def, toolCall.SubagentSessionID, "tool call subagent session id"); err != nil { - return err - } - for _, event := range toolCall.ResultEvents { - if err := validateProviderSessionID(def, event.SubagentSessionID, "tool result event subagent session id"); err != nil { - return err - } - } - } - } - return nil -} - -func validateProviderSessionID(def parser.AgentDef, sessionID, field string) error { - if sessionID == "" || def.IDPrefix == "" { - return nil - } - if strings.HasPrefix(sessionID, def.IDPrefix) { - return nil - } - return fmt.Errorf( - "%s: provider %s %q must use prefix %q", - def.Type, - field, - sessionID, - def.IDPrefix, - ) -} - -func providerSourceKeyMatches( - source parser.SourceRef, - fingerprint parser.SourceFingerprint, - sourceKey string, -) bool { - if sourceKey == "" { - return true - } - for _, candidate := range []string{fingerprint.Key, source.FingerprintKey, source.Key} { - if candidate == "" { - continue - } - if sourceKey == candidate || strings.HasPrefix(sourceKey, candidate+"#") || - strings.HasPrefix(sourceKey, candidate+"::") || - strings.HasPrefix(sourceKey, candidate+"|") { - return true - } - } - return false -} - -func parseOutcomeResults(outcomes []parser.ParseResultOutcome) []parser.ParseResult { - results := make([]parser.ParseResult, 0, len(outcomes)) - for _, outcome := range outcomes { - results = append(results, outcome.Result) - } - return results -} - -func planProviderEffects( - source parser.SourceRef, - fingerprint parser.SourceFingerprint, - outcome parser.ParseOutcome, -) ProviderPlannedEffects { - planned := ProviderPlannedEffects{} - if sourceKey := plannedSourceKey(source, fingerprint); sourceKey != "" { - planned.SourceKeys = append(planned.SourceKeys, sourceKey) - } - if outcome.SkipReason != parser.SkipNone { - if skipKey := plannedSkipKey(source, fingerprint); skipKey != "" { - planned.SkipCacheKeys = append(planned.SkipCacheKeys, skipKey) - } - } - for _, result := range outcome.Results { - if result.Result.Session.ID == "" || - result.DataVersion == parser.DataVersionUnspecified { - continue - } - planned.DataVersions = append(planned.DataVersions, ProviderPlannedDataVersion{ - SessionID: result.Result.Session.ID, - State: result.DataVersion, - RetryReason: result.RetryReason, - }) - } - for _, sourceErr := range outcome.SourceErrors { - planned.Diagnostics = append(planned.Diagnostics, ProviderPlannedDiagnostic{ - SourceKey: sourceErr.SourceKey, - DisplayPath: sourceErr.DisplayPath, - SessionID: sourceErr.SessionID, - Err: sourceErr.Err, - Retryable: sourceErr.Retryable, - }) - } - return planned -} - -func plannedSourceKey( - source parser.SourceRef, - fingerprint parser.SourceFingerprint, -) string { - if fingerprint.Key != "" { - return fingerprint.Key - } - if source.FingerprintKey != "" { - return source.FingerprintKey - } - return source.Key -} - -func plannedSkipKey( - source parser.SourceRef, - fingerprint parser.SourceFingerprint, -) string { - if source.FingerprintKey != "" { - return source.FingerprintKey - } - return plannedSourceKey(source, fingerprint) -} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go deleted file mode 100644 index ac1d7070f..000000000 --- a/internal/sync/provider_shadow_caller_test.go +++ /dev/null @@ -1,1443 +0,0 @@ -package sync - -import ( - "context" - "errors" - "os" - "path/filepath" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.kenn.io/agentsview/internal/db" - "go.kenn.io/agentsview/internal/dbtest" - "go.kenn.io/agentsview/internal/parser" - "go.kenn.io/agentsview/internal/testjsonl" -) - -func TestClassifyProviderChangedPathPassesStoredHintsToShadowProvider( - t *testing.T, -) { - root := t.TempDir() - eventPath := filepath.Join(root, "state.sqlite3-wal") - storedPath := filepath.Join(root, "state.sqlite3") + "#session-a" - database := dbtest.OpenTestDB(t) - require.NoError(t, database.UpsertSession(db.Session{ - ID: "claude:session-a", - Project: "demo", - Machine: "devbox", - Agent: string(parser.AgentClaude), - FilePath: &storedPath, - })) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ - Path: root, - }}}, - changedSources: []parser.SourceRef{{ - Provider: parser.AgentClaude, - Key: storedPath, - DisplayPath: storedPath, - FingerprintKey: storedPath, - ProjectHint: "demo", - }}, - } - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, - }, - }) - - files := engine.classifyPaths([]string{eventPath}) - - require.Len(t, provider.changedRequests, 1) - assert.Equal(t, eventPath, provider.changedRequests[0].Path) - assert.Equal(t, root, provider.changedRequests[0].WatchRoot) - assert.Equal(t, []string{storedPath}, provider.changedRequests[0].StoredSourcePaths) - require.Len(t, files, 1) - assert.Equal(t, storedPath, files[0].Path) - assert.Equal(t, "demo", files[0].Project) - assert.True(t, files[0].ForceParse) - assert.False(t, files[0].ProviderProcess) - require.NotNil(t, files[0].ProviderSource) - assert.Equal(t, storedPath, files[0].ProviderSource.DisplayPath) -} - -func TestClassifyProviderChangedPathRunsAlongsideLegacyClassifier( - t *testing.T, -) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-recognized.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "legacy already recognizes this", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - )), - 0o644, - )) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ - Path: root, - }}}, - changedSources: []parser.SourceRef{{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "provider-project", - }}, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationShadowCompare, - }, - }) - - files := engine.classifyPaths([]string{sourcePath}) - - require.Len(t, provider.changedRequests, 1) - assert.Equal(t, sourcePath, provider.changedRequests[0].Path) - require.Len(t, files, 1) - assert.Equal(t, sourcePath, files[0].Path) - assert.True(t, files[0].ForceParse) - assert.False(t, files[0].ProviderProcess) - require.NotNil(t, files[0].ProviderSource) - assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) -} - -func TestClassifyProviderChangedPathMarksAuthoritativeProviderProcess( - t *testing.T, -) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "auth-recognized.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "authoritative provider owns this", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - )), - 0o644, - )) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ - Path: root, - }}}, - changedSources: []parser.SourceRef{{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }}, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - files := engine.classifyPaths([]string{sourcePath}) - - require.Len(t, provider.changedRequests, 1) - assert.Equal(t, sourcePath, provider.changedRequests[0].Path) - require.Len(t, files, 1) - assert.Equal(t, sourcePath, files[0].Path) - assert.True(t, files[0].ProviderProcess) - assert.False(t, files[0].ForceParse) - require.NotNil(t, files[0].ProviderSource) - assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) -} - -func TestDiscoverProviderSourcesOnlyRunsAuthoritativeProviders(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "provider-only.jsonl") - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "provider-project", - } - makeEngine := func(mode parser.ProviderMigrationMode) (*Engine, *shadowCallerProvider) { - t.Helper() - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - discoverSources: []parser.SourceRef{source}, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: mode, - }, - }) - return engine, provider - } - - shadowEngine, shadowProvider := makeEngine(parser.ProviderMigrationShadowCompare) - files, failures := shadowEngine.discoverProviderSources(context.Background(), nil) - assert.Empty(t, files) - assert.Zero(t, failures) - assert.Empty(t, shadowProvider.calls) - - authoritativeEngine, authoritativeProvider := makeEngine( - parser.ProviderMigrationProviderAuthoritative, - ) - files, failures = authoritativeEngine.discoverProviderSources(context.Background(), nil) - require.Len(t, files, 1) - assert.Zero(t, failures) - assert.Equal(t, []string{"discover"}, authoritativeProvider.calls) - assert.Equal(t, sourcePath, files[0].Path) - assert.Equal(t, "provider-project", files[0].Project) - assert.True(t, files[0].ProviderProcess) - require.NotNil(t, files[0].ProviderSource) - assert.Equal(t, source, *files[0].ProviderSource) -} - -func TestSyncAllProviderDiscoveryFailureSkipsFinishedWatermark(t *testing.T) { - root := t.TempDir() - discoverErr := errors.New("provider discovery failed") - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - discoverErr: discoverErr, - } - database := dbtest.OpenTestDB(t) - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - stats := engine.SyncAll(context.Background(), nil) - - assert.Equal(t, []string{"discover"}, provider.calls) - assert.Equal(t, 1, stats.Failed) - started, err := database.GetSyncState(syncStateStartedAt) - require.NoError(t, err) - assert.NotEmpty(t, started) - finished, err := database.GetSyncState(syncStateFinishedAt) - require.NoError(t, err) - assert.Empty(t, finished) -} - -func TestFindSourceFileFallsBackToAuthoritativeNonFileProvider(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "forge.db") + "#session-a" - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentForge, - DisplayName: "Forge", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentForge, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentForge: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentForge: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - found := engine.FindSourceFile("forge:session-a") - - assert.Equal(t, sourcePath, found) - assert.Equal(t, "session-a", provider.findRequest.RawSessionID) - assert.Equal(t, "forge:session-a", provider.findRequest.FullSessionID) -} - -func TestProviderVirtualSourceBackedByEventPreservesHashInDBPath(t *testing.T) { - dbPath := filepath.Join(t.TempDir(), "state#prod", "sessions.db") - sourcePath := dbPath + "#session-a" - - assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath)) - assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath+"-wal")) - assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath+"-shm")) - assert.False(t, providerVirtualSourceBackedByEvent(sourcePath, filepath.Dir(dbPath))) -} - -func TestParseDiffProviderDiscoveryErrorFails(t *testing.T) { - root := t.TempDir() - discoverErr := errors.New("discover failed") - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - }, - }, - }, - discoverErr: discoverErr, - } - engine := NewDiffEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCodex: {root}, - }, - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCodex: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - report, err := engine.ParseDiff(context.Background(), ParseDiffOptions{ - Agents: []parser.AgentType{parser.AgentCodex}, - }) - - require.Error(t, err) - assert.Nil(t, report) - assert.ErrorContains(t, err, "parse-diff codex provider discovery") - assert.ErrorIs(t, err, discoverErr) -} - -func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-skip.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile( - sourcePath, - []byte(testjsonl.JoinJSONL( - testjsonl.ClaudeUserJSON( - "already cached", - "2026-06-01T10:00:00Z", - "/Users/dev/code/demo", - ), - )), - 0o644, - )) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCowork, - DisplayName: "Claude Cowork", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentCowork, - Key: sourcePath, - }, - } - var comparisons []ProviderShadowComparison - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCowork: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCowork: parser.ProviderMigrationShadowCompare, - }, - ProviderShadowRecorder: func(comparison ProviderShadowComparison) { - comparisons = append(comparisons, comparison) - }, - }) - engine.InjectSkipCache(map[string]int64{ - sourcePath: info.ModTime().UnixNano(), - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCowork, - }) - - require.True(t, result.skip) - require.Len(t, comparisons, 1) - assert.Equal(t, "legacy skip", comparisons[0].NotComparableReason) - assert.Empty(t, comparisons[0].Mismatches) - assert.Empty(t, provider.calls) -} - -func TestProcessFileProviderAuthoritativeUsesInjectedProvider(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "provider-project", - } - providerResult := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "provider-owned", - Project: "provider-project", - Agent: parser.AgentClaude, - Machine: "devbox", - File: parser.FileInfo{ - Path: sourcePath, - Mtime: info.ModTime().UnixNano(), - }, - }, - Messages: []parser.ParsedMessage{{ - Role: parser.RoleUser, - Content: "parsed through provider", - Timestamp: info.ModTime(), - Ordinal: 0, - }}, - } - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath + "#fingerprint", - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - ForceReplace: true, - }, - }, - source: source, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - require.Len(t, result.results, 1) - assert.Equal(t, "provider-owned", result.results[0].Session.ID) - assert.Equal(t, "provider-project", result.results[0].Session.Project) - assert.Equal(t, info.ModTime().UnixNano(), result.mtime) - assert.True(t, result.forceReplace) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestProcessFileProviderAuthoritativeSkipsFreshClaudeBeforeFingerprint(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "-Users-dev-code-demo", "fresh.jsonl") - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - ProjectHint: "demo", - } - - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - Caps: parser.Capabilities{ - Source: parser.SourceCapabilities{ - IncrementalAppend: parser.CapabilitySupported, - }, - }, - }, - }, - source: source, - } - database := dbtest.OpenTestDB(t) - filePath := sourcePath - fileSize := info.Size() - fileMtime := info.ModTime().UnixNano() - require.NoError(t, database.UpsertSession(db.Session{ - ID: "fresh", - Project: "demo", - Machine: "devbox", - Agent: string(parser.AgentClaude), - FilePath: &filePath, - FileSize: &fileSize, - FileMtime: &fileMtime, - })) - require.NoError(t, database.SetSessionDataVersion("fresh", db.CurrentDataVersion())) - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentClaude: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.Equal(t, fileMtime, result.mtime) - assert.Empty(t, provider.calls) - assert.Equal(t, sourcePath, provider.findRequest.StoredFilePath) -} - -func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *testing.T) { - root := t.TempDir() - database := dbtest.OpenTestDB(t) - sourcePath, sourceMtime := writeFreshCoworkProviderSource( - t, root, database, "fresh-session", - ) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCowork, - DisplayName: "Claude Cowork", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentCowork, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCowork: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCowork, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.Equal(t, sourceMtime, result.mtime) - assert.Empty(t, provider.calls) -} - -func TestProcessFileProviderAuthoritativeSkipsFreshGeminiBeforeFingerprint(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join( - root, "tmp", "alias", "chats", "session-001.json", - ) - sourceMtime := writeFreshProviderDBSession( - t, sourcePath, nil, - ) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentGemini, - DisplayName: "Gemini CLI", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentGemini, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentGemini: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentGemini: parser.ProviderMigrationProviderAuthoritative, - }, - }) - requireFreshProviderSession(t, engine.db, parser.AgentGemini, sourcePath, sourceMtime) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentGemini, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.Equal(t, sourceMtime, result.mtime) - assert.Empty(t, provider.calls) -} - -func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { - root := t.TempDir() - database := dbtest.OpenTestDB(t) - sourcePath, sourceMtime := writeFreshCoworkProviderSource( - t, root, database, "force-session", - ) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCowork, - DisplayName: "Claude Cowork", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - MTimeNS: sourceMtime, - }, - outcome: parser.ParseOutcome{ - ResultSetComplete: true, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentCowork, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCowork: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCowork: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCowork, - ForceParse: true, - }) - - require.NoError(t, result.err) - assert.False(t, result.skip) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) - assert.True(t, provider.parseRequest.ForceParse) -} - -func TestProcessFileProviderAuthoritativeSkipsFreshCopilotBeforeFingerprint(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join( - root, "session-state", "copilot-fresh", "events.jsonl", - ) - workspacePath := filepath.Join(filepath.Dir(sourcePath), "workspace.yaml") - sourceMtime := writeFreshProviderDBSession( - t, sourcePath, &workspacePath, - ) - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCopilot, - DisplayName: "Copilot CLI", - }, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentCopilot, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{ - parser.AgentCopilot: {root}, - }, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentCopilot: parser.ProviderMigrationProviderAuthoritative, - }, - }) - requireFreshProviderSession(t, engine.db, parser.AgentCopilot, sourcePath, sourceMtime) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentCopilot, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.Equal(t, sourceMtime, result.mtime) - assert.Empty(t, provider.calls) -} - -func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath, - } - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{ - { - Result: parser.ParseResult{Session: parser.ParsedSession{ - ID: "provider-current", Agent: parser.AgentClaude, - }}, - DataVersion: parser.DataVersionCurrent, - }, - { - Result: parser.ParseResult{Session: parser.ParsedSession{ - ID: "provider-retry", Agent: parser.AgentClaude, - }}, - DataVersion: parser.DataVersionNeedsRetry, - }, - }, - ResultSetComplete: true, - }, - }, - source: source, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - require.Len(t, result.results, 2) - assert.False(t, result.needsRetryForSession("provider-current")) - assert.True(t, result.needsRetryForSession("provider-retry")) -} - -func TestProcessFileProviderAuthoritativeSuppressesUncleanSkipCache(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "unclean-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - source := parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath + "#source-key", - } - makeEngine := func(outcome parser.ParseOutcome, parseErr error) *Engine { - t.Helper() - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath + "#fingerprint", - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: outcome, - parseErr: parseErr, - }, - source: source, - } - return NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - } - - tests := []struct { - name string - outcome parser.ParseOutcome - parseErr error - wantErr bool - }{ - { - name: "whole source parse error", - wantErr: true, - parseErr: errors.New( - "provider source failed", - ), - }, - { - name: "incomplete empty result set", - outcome: parser.ParseOutcome{ - ResultSetComplete: false, - }, - }, - { - name: "source error", - outcome: parser.ParseOutcome{ - ResultSetComplete: true, - SourceErrors: []parser.SourceError{{ - SourceKey: sourcePath, - Err: errors.New("session failed"), - }}, - }, - }, - { - name: "retry result", - outcome: parser.ParseOutcome{ - ResultSetComplete: true, - Results: []parser.ParseResultOutcome{{ - Result: parser.ParseResult{Session: parser.ParsedSession{ - ID: "provider-retry", Agent: parser.AgentClaude, - }}, - DataVersion: parser.DataVersionNeedsRetry, - }}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - engine := makeEngine(tt.outcome, tt.parseErr) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - if tt.wantErr { - require.Error(t, result.err) - } else { - require.NoError(t, result.err) - } - assert.True(t, result.cacheSkip) - assert.True(t, result.noCacheSkip) - - stats := engine.collectAndBatch( - context.Background(), - singleSyncJob(syncJob{processResult: result, path: sourcePath}), - 1, - 1, - nil, - syncWriteDefault, - ) - if tt.wantErr { - assert.Equal(t, 1, stats.Failed) - } - cache := engine.SnapshotSkipCache() - assert.NotContains(t, cache, sourcePath+"#source-key") - assert.NotContains(t, cache, sourcePath) - }) - } -} - -func TestSyncSingleSessionProviderAuthoritativeBypassesProviderSkipCache(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "single-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - sourceKey := sourcePath + "#source-key" - providerResult := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "provider-owned", - Project: "provider-project", - Agent: parser.AgentClaude, - Machine: "devbox", - StartedAt: info.ModTime(), - EndedAt: info.ModTime(), - MessageCount: 1, - File: parser.FileInfo{ - Path: sourcePath, - Mtime: info.ModTime().UnixNano(), - }, - }, - Messages: []parser.ParsedMessage{{ - Role: parser.RoleUser, - Content: "explicit provider resync", - Timestamp: info.ModTime(), - Ordinal: 0, - }}, - } - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath + "#fingerprint", - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{{ - Result: providerResult, - DataVersion: parser.DataVersionCurrent, - }}, - ResultSetComplete: true, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourceKey, - ProjectHint: "provider-project", - }, - } - database := dbtest.OpenTestDB(t) - filePath := sourcePath - fileSize := info.Size() - fileMtime := info.ModTime().UnixNano() - require.NoError(t, database.UpsertSession(db.Session{ - ID: "provider-owned", - Project: "old-project", - Machine: "devbox", - Agent: string(parser.AgentClaude), - FilePath: &filePath, - FileSize: &fileSize, - FileMtime: &fileMtime, - })) - engine := NewEngine(database, EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - engine.InjectSkipCache(map[string]int64{ - sourceKey: info.ModTime().UnixNano(), - }) - - require.NoError(t, engine.SyncSingleSession("provider-owned")) - - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) - assert.True(t, provider.parseRequest.ForceParse) - cache := engine.SnapshotSkipCache() - assert.NotContains(t, cache, sourceKey) -} - -func singleSyncJob(job syncJob) <-chan syncJob { - results := make(chan syncJob, 1) - results <- job - close(results) - return results -} - -func TestProcessFileProviderAuthoritativeForceParseAllowsStaleSourceLookup(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "force-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ResultSetComplete: true}, - }, - source: parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath + "#source-key", - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - engine.forceParse = true - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - assert.False(t, provider.findRequest.RequireFreshSource) - assert.True(t, provider.parseRequest.ForceParse) -} - -func TestProcessFileProviderAuthoritativeNotFoundFails(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "missing-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - found := false - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - }, - findFound: &found, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.Error(t, result.err) - assert.Contains(t, result.err.Error(), "provider source not found") - assert.Empty(t, provider.calls) -} - -func TestProcessFileProviderAuthoritativeTranslatesSkipReason(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join(root, "skip-provider-owned.jsonl") - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - - provider := &shadowCallerProvider{ - shadowTestProvider: shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentClaude, - DisplayName: "Claude Code", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: sourcePath, - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - }, - outcome: parser.ParseOutcome{ - ResultSetComplete: true, - SkipReason: parser.SkipNoSession, - }, - }, - source: parser.SourceRef{ - Provider: parser.AgentClaude, - Key: sourcePath, - DisplayPath: sourcePath, - FingerprintKey: sourcePath + "#source-key", - }, - } - engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ - AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, - Machine: "devbox", - ProviderFactories: []parser.ProviderFactory{ - shadowCallerFactory{provider: provider}, - }, - ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ - parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, - }, - }) - - result := engine.processFile(context.Background(), parser.DiscoveredFile{ - Path: sourcePath, - Agent: parser.AgentClaude, - }) - - require.NoError(t, result.err) - assert.True(t, result.skip) - assert.True(t, result.cacheSkip) - assert.Equal(t, sourcePath+"#source-key", result.cacheKey) - assert.Equal(t, info.ModTime().UnixNano(), result.mtime) - assert.Empty(t, result.results) - - results := make(chan syncJob, 1) - results <- syncJob{ - processResult: result, - path: sourcePath, - } - close(results) - stats := engine.collectAndBatch(context.Background(), results, 1, 1, nil, syncWriteDefault) - - assert.Equal(t, 1, stats.Skipped) - cache := engine.SnapshotSkipCache() - assert.Equal(t, info.ModTime().UnixNano(), cache[sourcePath+"#source-key"]) - _, cachedByPath := cache[sourcePath] - assert.False(t, cachedByPath) - - cleanResult := processResult{ - results: []parser.ParseResult{{ - Session: parser.ParsedSession{ - ID: "provider-clean", - Project: "provider-project", - Agent: parser.AgentClaude, - Machine: "devbox", - StartedAt: info.ModTime(), - EndedAt: info.ModTime(), - File: parser.FileInfo{ - Path: sourcePath, - Mtime: info.ModTime().UnixNano(), - }, - }, - }}, - mtime: info.ModTime().UnixNano(), - cacheSkip: true, - cacheKey: sourcePath + "#source-key", - } - stats = engine.collectAndBatch( - context.Background(), - singleSyncJob(syncJob{processResult: cleanResult, path: sourcePath}), - 1, - 1, - nil, - syncWriteDefault, - ) - - assert.Equal(t, 1, stats.Synced) - cache = engine.SnapshotSkipCache() - assert.NotContains(t, cache, sourcePath+"#source-key") - assert.NotContains(t, cache, sourcePath) -} - -type shadowCallerProvider struct { - shadowTestProvider - source parser.SourceRef - findRequest parser.FindSourceRequest - findFound *bool - watchPlan parser.WatchPlan - changedSources []parser.SourceRef - changedRequests []parser.ChangedPathRequest - changedErr error - discoverSources []parser.SourceRef - discoverErr error -} - -func (p *shadowCallerProvider) Discover( - context.Context, -) ([]parser.SourceRef, error) { - p.calls = append(p.calls, "discover") - if p.discoverErr != nil { - return nil, p.discoverErr - } - return append([]parser.SourceRef(nil), p.discoverSources...), nil -} - -func (p *shadowCallerProvider) FindSource( - _ context.Context, - req parser.FindSourceRequest, -) (parser.SourceRef, bool, error) { - p.findRequest = req - if p.findFound != nil && !*p.findFound { - return parser.SourceRef{}, false, nil - } - return p.source, true, nil -} - -func (p *shadowCallerProvider) WatchPlan( - context.Context, -) (parser.WatchPlan, error) { - return p.watchPlan, nil -} - -func (p *shadowCallerProvider) SourcesForChangedPath( - _ context.Context, - req parser.ChangedPathRequest, -) ([]parser.SourceRef, error) { - p.changedRequests = append(p.changedRequests, req) - if p.changedErr != nil { - return nil, p.changedErr - } - return append([]parser.SourceRef(nil), p.changedSources...), nil -} - -type shadowCallerFactory struct { - provider *shadowCallerProvider -} - -func (f shadowCallerFactory) Definition() parser.AgentDef { - return f.provider.Definition() -} - -func (f shadowCallerFactory) Capabilities() parser.Capabilities { - return f.provider.Capabilities() -} - -func (f shadowCallerFactory) NewProvider(parser.ProviderConfig) parser.Provider { - return f.provider -} - -func writeFreshCoworkProviderSource( - t *testing.T, - root string, - database *db.DB, - rawSessionID string, -) (string, int64) { - t.Helper() - - sessionDir := filepath.Join(root, "org", "workspace", "local_fresh") - projectDir := filepath.Join(sessionDir, ".claude", "projects", "-demo") - require.NoError(t, os.MkdirAll(projectDir, 0o755)) - metaPath := sessionDir + ".json" - sourcePath := filepath.Join(projectDir, rawSessionID+".jsonl") - require.NoError(t, os.WriteFile(metaPath, []byte(`{"title":"Fresh"}`), 0o644)) - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - - transcriptTime := time.Unix(1_781_475_210, 0) - metaTime := transcriptTime.Add(time.Second) - require.NoError(t, os.Chtimes(sourcePath, transcriptTime, transcriptTime)) - require.NoError(t, os.Chtimes(metaPath, metaTime, metaTime)) - info, err := os.Stat(sourcePath) - require.NoError(t, err) - sourceSize := info.Size() - sourceMtime := parser.CoworkSessionMtime(sourcePath, info.ModTime().UnixNano()) - require.Equal(t, metaTime.UnixNano(), sourceMtime) - - fullSessionID := "cowork:" + rawSessionID - require.NoError(t, database.UpsertSession(db.Session{ - ID: fullSessionID, - Project: "cowork-project", - Machine: "devbox", - Agent: string(parser.AgentCowork), - FilePath: &sourcePath, - FileSize: &sourceSize, - FileMtime: &sourceMtime, - })) - require.NoError(t, database.SetSessionDataVersion( - fullSessionID, db.CurrentDataVersion(), - )) - - return sourcePath, sourceMtime -} - -func writeFreshProviderDBSession( - t *testing.T, - sourcePath string, - mtimeSidecar *string, -) int64 { - t.Helper() - - require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) - require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) - sourceTime := time.Unix(1_781_475_210, 0) - require.NoError(t, os.Chtimes(sourcePath, sourceTime, sourceTime)) - mtime := sourceTime.UnixNano() - if mtimeSidecar != nil { - sidecarTime := sourceTime.Add(time.Second) - require.NoError(t, os.WriteFile(*mtimeSidecar, []byte("name: Fresh\n"), 0o644)) - require.NoError(t, os.Chtimes(*mtimeSidecar, sidecarTime, sidecarTime)) - mtime = sidecarTime.UnixNano() - } - - return mtime -} - -func requireFreshProviderSession( - t *testing.T, - database *db.DB, - agent parser.AgentType, - sourcePath string, - sourceMtime int64, -) { - t.Helper() - - info, err := os.Stat(sourcePath) - require.NoError(t, err) - sourceSize := info.Size() - fullSessionID := string(agent) + ":fresh" - require.NoError(t, database.UpsertSession(db.Session{ - ID: fullSessionID, - Project: "provider-project", - Machine: "devbox", - Agent: string(agent), - FilePath: &sourcePath, - FileSize: &sourceSize, - FileMtime: &sourceMtime, - })) - require.NoError(t, database.SetSessionDataVersion( - fullSessionID, db.CurrentDataVersion(), - )) -} diff --git a/internal/sync/provider_shadow_codex_test.go b/internal/sync/provider_shadow_codex_test.go deleted file mode 100644 index 0ef157e65..000000000 --- a/internal/sync/provider_shadow_codex_test.go +++ /dev/null @@ -1,83 +0,0 @@ -package sync - -import ( - "context" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.kenn.io/agentsview/internal/parser" - "go.kenn.io/agentsview/internal/testjsonl" -) - -// TestObserveProviderSourceParsesCodexSourceWithIndexTitle exercises the folded -// Codex provider end to end through ObserveProviderSource. The legacy -// ParseCodexSession entrypoint was deleted in the fold, so this replaces the -// shadow-baseline comparison with provider-API coverage that pins the parsed -// session shape: discovery finds the dated transcript, the sibling -// session_index.jsonl supplies the thread title as session_name, and the -// observed parse output and data-version planning match the source. -func TestObserveProviderSourceParsesCodexSourceWithIndexTitle(t *testing.T) { - base := t.TempDir() - root := filepath.Join(base, "sessions") - uuid := "019eb791-cf7d-75c1-8439-9ed74c12abcd" - sourcePath := filepath.Join( - root, - "2026", - "06", - "11", - "rollout-2026-06-11T12-44-06-"+uuid+".jsonl", - ) - writeProviderShadowSourceFile( - t, - sourcePath, - testjsonl.JoinJSONL( - testjsonl.CodexSessionMetaJSON( - uuid, - "/home/user/code/api", - "codex_cli_rs", - "2026-06-11T12:44:06Z", - ), - testjsonl.CodexMsgJSON("user", "provider question", "2026-06-11T12:44:07Z"), - ), - ) - writeProviderShadowSourceFile( - t, - filepath.Join(base, parser.CodexSessionIndexFilename), - `{"id":"`+uuid+`","thread_name":"Provider title","updated_at":"2026-06-11T17:34:20Z"}`+"\n", - ) - - provider, ok := parser.NewProvider(parser.AgentCodex, parser.ProviderConfig{ - Roots: []string{root}, - Machine: "devbox", - }) - require.True(t, ok) - sources, err := provider.Discover(context.Background()) - require.NoError(t, err) - require.Len(t, sources, 1) - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: sources[0], - Machine: "devbox", - }) - require.NoError(t, err) - require.Len(t, observation.Results, 1) - - session := observation.Results[0].Session - assert.Equal(t, "codex:"+uuid, session.ID) - assert.Equal(t, parser.AgentCodex, session.Agent) - assert.Equal(t, "devbox", session.Machine) - assert.Equal(t, "/home/user/code/api", session.Cwd) - assert.Equal(t, "Provider title", session.SessionName) - assert.Equal(t, "provider question", session.FirstMessage) - assert.Equal(t, sourcePath, session.File.Path) - assert.Equal(t, observation.Fingerprint.Hash, session.File.Hash) - - require.Len(t, observation.Results[0].Messages, 1) - assert.Equal(t, parser.RoleUser, observation.Results[0].Messages[0].Role) - - assert.Equal(t, []string{session.ID}, observation.Planned.DataVersionSessionIDs()) - assert.Empty(t, observation.Planned.Diagnostics) -} diff --git a/internal/sync/provider_shadow_kiro_family_test.go b/internal/sync/provider_shadow_kiro_family_test.go deleted file mode 100644 index f75ce007c..000000000 --- a/internal/sync/provider_shadow_kiro_family_test.go +++ /dev/null @@ -1,111 +0,0 @@ -package sync_test - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.kenn.io/agentsview/internal/parser" - "go.kenn.io/agentsview/internal/sync" -) - -// TestKiroProviderAuthoritativeParsesSQLiteSource exercises the Kiro -// provider end to end now that Kiro is provider-authoritative and the legacy -// package-level entrypoints have been folded away. The provider discovers the -// current-store data.sqlite3 source, fans it out per conversation, and emits a -// force-replace result set so the archive cleanup semantics are preserved. -func TestKiroProviderAuthoritativeParsesSQLiteSource(t *testing.T) { - root := t.TempDir() - store := createKiroSQLiteDB(t, root) - sessionID := "sqlite-session" - store.addSession( - t, - "/home/user/code/kiro-app", - sessionID, - readKiroSQLiteFixture(t, "standard_payload.json"), - 1779012000000, - 1779012030000, - ) - - provider, ok := parser.NewProvider(parser.AgentKiro, parser.ProviderConfig{ - Roots: []string{root}, - Machine: "devbox", - }) - require.True(t, ok) - sources, err := provider.Discover(context.Background()) - require.NoError(t, err) - require.Len(t, sources, 1) - - observation, err := sync.ObserveProviderSource(context.Background(), provider, sync.ProviderObserveRequest{ - Source: sources[0], - Machine: "devbox", - }) - require.NoError(t, err) - require.Len(t, observation.Results, 1) - - assert.True(t, observation.ForceReplace) - session := observation.Results[0].Session - assert.Equal(t, "kiro:"+sessionID, session.ID) - assert.Equal(t, parser.AgentKiro, session.Agent) - assert.Equal(t, "devbox", session.Machine) - assert.Equal(t, store.path+"#"+sessionID, session.File.Path) - assert.NotEmpty(t, observation.Results[0].Messages) - assert.Equal(t, []string{session.ID}, observation.Planned.DataVersionSessionIDs()) - assert.Empty(t, observation.Planned.Diagnostics) -} - -// TestKiroIDEProviderAuthoritativeParsesWorkspaceSession exercises the Kiro -// IDE provider end to end after the fold: discovery and parse own the -// workspace-sessions JSON source without any legacy package-level entrypoint. -func TestKiroIDEProviderAuthoritativeParsesWorkspaceSession(t *testing.T) { - root := t.TempDir() - sourcePath := filepath.Join( - root, - "workspace-sessions", - "encoded-workspace", - "new-session.json", - ) - writeKiroIDEProviderSource(t, sourcePath, "New IDE question") - - provider, ok := parser.NewProvider(parser.AgentKiroIDE, parser.ProviderConfig{ - Roots: []string{root}, - Machine: "devbox", - }) - require.True(t, ok) - sources, err := provider.Discover(context.Background()) - require.NoError(t, err) - require.Len(t, sources, 1) - - observation, err := sync.ObserveProviderSource(context.Background(), provider, sync.ProviderObserveRequest{ - Source: sources[0], - Machine: "devbox", - }) - require.NoError(t, err) - require.Len(t, observation.Results, 1) - - session := observation.Results[0].Session - assert.Equal(t, parser.AgentKiroIDE, session.Agent) - assert.Equal(t, "devbox", session.Machine) - assert.Equal(t, observation.Fingerprint.Hash, session.File.Hash) - assert.NotEmpty(t, observation.Results[0].Messages) - assert.Equal(t, []string{session.ID}, observation.Planned.DataVersionSessionIDs()) - assert.Empty(t, observation.Planned.Diagnostics) -} - -func writeKiroIDEProviderSource(t *testing.T, path, question string) { - t.Helper() - require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) - require.NoError(t, os.WriteFile(path, []byte( - `{"sessionId":"new-session",`+ - `"title":"New title",`+ - `"workspaceDirectory":"/home/user/dev/new-app",`+ - `"history":[`+ - `{"message":{"role":"user","content":"`+question+`","id":"m1"}},`+ - `{"message":{"role":"assistant","content":"New IDE answer","id":"m2"}}`+ - `]}`+"\n", - ), 0o644)) -} diff --git a/internal/sync/provider_shadow_support_test.go b/internal/sync/provider_shadow_support_test.go deleted file mode 100644 index 03714305b..000000000 --- a/internal/sync/provider_shadow_support_test.go +++ /dev/null @@ -1,19 +0,0 @@ -package sync - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" -) - -// writeProviderShadowSourceFile writes a provider source fixture, creating the -// parent directory. It is the shared helper for the per-provider shadow/parse -// tests (the Codex fold is the lowest caller; later provider folds reuse it). -func writeProviderShadowSourceFile(t *testing.T, path, content string) { - t.Helper() - - require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) - require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) -} diff --git a/internal/sync/provider_shadow_test.go b/internal/sync/provider_shadow_test.go deleted file mode 100644 index ec3a892f0..000000000 --- a/internal/sync/provider_shadow_test.go +++ /dev/null @@ -1,541 +0,0 @@ -package sync - -import ( - "context" - "errors" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "go.kenn.io/agentsview/internal/parser" -) - -func TestObserveProviderSourcePlansEffectsWithoutWriter(t *testing.T) { - sourceErr := errors.New("bad session") - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: "source-key", - Size: 123, - MTimeNS: 456, - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{ - { - Result: parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - }, - }, - DataVersion: parser.DataVersionCurrent, - }, - { - Result: parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:two", - Agent: parser.AgentCodex, - }, - }, - DataVersion: parser.DataVersionNeedsRetry, - RetryReason: "fallback parser", - }, - }, - ExcludedSessionIDs: []string{"codex:excluded"}, - SourceErrors: []parser.SourceError{ - { - SourceKey: "source-key", - DisplayPath: "display-path", - SessionID: "codex:bad", - Err: sourceErr, - Retryable: true, - }, - }, - SkipReason: parser.SkipNonInteractive, - ForceReplace: true, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - DisplayPath: "display-path", - FingerprintKey: "fingerprint-key", - }, - Machine: "devbox", - ForceParse: true, - }) - require.NoError(t, err) - - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) - assert.Equal(t, "devbox", provider.parseRequest.Machine) - assert.True(t, provider.parseRequest.ForceParse) - assert.Equal(t, int64(456), provider.parseRequest.Fingerprint.MTimeNS) - - require.Len(t, observation.Results, 2) - assert.Equal(t, "codex:one", observation.Results[0].Session.ID) - assert.Equal(t, []string{"codex:excluded"}, observation.ExcludedSessionIDs) - assert.Equal(t, parser.SkipNonInteractive, observation.SkipReason) - assert.True(t, observation.ForceReplace) - - assert.Equal(t, []string{"source-key"}, observation.Planned.SourceKeys) - assert.Equal(t, []string{"fingerprint-key"}, observation.Planned.SkipCacheKeys) - assert.Equal(t, []string{"codex:one", "codex:two"}, observation.Planned.DataVersionSessionIDs()) - assert.Equal(t, []string{"codex:two"}, observation.Planned.RetrySessionIDs()) - require.Len(t, observation.Planned.Diagnostics, 1) - assert.Equal(t, "codex:bad", observation.Planned.Diagnostics[0].SessionID) - assert.True(t, observation.Planned.Diagnostics[0].Retryable) - assert.ErrorIs(t, observation.Planned.Diagnostics[0].Err, sourceErr) - assert.Empty(t, observation.Planned.SSEScopes) -} - -func TestCompareProviderObservationDetectsSessionMetadataMismatch(t *testing.T) { - providerResult := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - Project: "proj", - Machine: "devbox", - ParentSessionID: "codex:provider-parent", - }, - } - legacyResult := providerResult - legacyResult.Session.ParentSessionID = "codex:legacy-parent" - - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - Results: []parser.ParseResult{providerResult}, - }, - processResult{ - results: []parser.ParseResult{legacyResult}, - }, - parser.DiscoveredFile{}, - ) - - require.NotEmpty(t, mismatches) - assert.Contains(t, mismatches[0], "session") -} - -func TestCompareProviderObservationDetectsSourceErrorContentMismatch(t *testing.T) { - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - SourceErrors: []parser.SourceError{{ - SourceKey: "source-key", - DisplayPath: "source.jsonl", - SessionID: "codex:bad", - Err: errors.New("provider parse failed"), - }}, - }, - processResult{ - sessionErrs: []sessionParseError{{ - sessionID: "codex:bad", - virtualPath: "source.jsonl", - err: errors.New("legacy parse failed"), - }}, - }, - parser.DiscoveredFile{}, - ) - - require.NotEmpty(t, mismatches) - assert.Contains(t, mismatches[0], "source_errors") -} - -func TestCompareProviderObservationNormalizesLegacySourceErrorSessionID(t *testing.T) { - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - SourceErrors: []parser.SourceError{{ - SourceKey: "source.jsonl#bad", - DisplayPath: "source.jsonl#bad", - SessionID: "codex:bad", - Err: errors.New("parse failed"), - Retryable: true, - }}, - Planned: ProviderPlannedEffects{ - Diagnostics: []ProviderPlannedDiagnostic{{ - SourceKey: "source.jsonl#bad", - DisplayPath: "source.jsonl#bad", - SessionID: "codex:bad", - Err: errors.New("parse failed"), - Retryable: true, - }}, - }, - }, - processResult{ - sessionErrs: []sessionParseError{{ - sessionID: "bad", - virtualPath: "source.jsonl#bad", - err: errors.New("parse failed"), - }}, - }, - parser.DiscoveredFile{Agent: parser.AgentCodex}, - ) - - assert.Empty(t, mismatches) -} - -func TestCompareProviderObservationDetectsPlannedDataVersionMismatch(t *testing.T) { - result := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - File: parser.FileInfo{ - Path: "source.jsonl", - }, - }, - } - - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - Results: []parser.ParseResult{result}, - Planned: ProviderPlannedEffects{ - SourceKeys: []string{"source.jsonl"}, - DataVersions: []ProviderPlannedDataVersion{{ - SessionID: "codex:one", - State: parser.DataVersionNeedsRetry, - RetryReason: "fallback parser", - }}, - }, - }, - processResult{ - results: []parser.ParseResult{result}, - }, - parser.DiscoveredFile{Path: "source.jsonl"}, - ) - - require.NotEmpty(t, mismatches) - assert.Contains(t, mismatches[0], "planned.data_versions") -} - -func TestCompareProviderObservationIgnoresProviderOnlyRetryReason(t *testing.T) { - result := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - File: parser.FileInfo{ - Path: "source.jsonl", - }, - }, - } - - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - Results: []parser.ParseResult{result}, - Planned: ProviderPlannedEffects{ - SourceKeys: []string{"source.jsonl"}, - DataVersions: []ProviderPlannedDataVersion{{ - SessionID: "codex:one", - State: parser.DataVersionNeedsRetry, - RetryReason: "fallback parser", - }}, - }, - }, - processResult{ - results: []parser.ParseResult{result}, - needsRetry: true, - }, - parser.DiscoveredFile{Path: "source.jsonl"}, - ) - - assert.Empty(t, mismatches) -} - -func TestCompareProviderObservationIgnoresProviderOnlySSEScopes(t *testing.T) { - result := parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - File: parser.FileInfo{ - Path: "source.jsonl", - }, - }, - } - - mismatches := compareProviderObservationToProcessResult( - ProviderObservation{ - Results: []parser.ParseResult{result}, - Planned: ProviderPlannedEffects{ - SourceKeys: []string{"source.jsonl"}, - DataVersions: []ProviderPlannedDataVersion{{ - SessionID: "codex:one", - State: parser.DataVersionCurrent, - }}, - SSEScopes: []string{"sessions"}, - }, - }, - processResult{ - results: []parser.ParseResult{result}, - }, - parser.DiscoveredFile{Path: "source.jsonl"}, - ) - - assert.Empty(t, mismatches) -} - -func TestObserveProviderSourceRejectsProviderMismatch(t *testing.T) { - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentClaude, - Key: "source-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), string(parser.AgentClaude)) - assert.Contains(t, err.Error(), string(parser.AgentCodex)) - assert.Empty(t, observation) - assert.Empty(t, provider.calls) -} - -func TestObserveProviderSourceRejectsCrossProviderResult(t *testing.T) { - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - IDPrefix: "codex:", - }, - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{ - { - Result: parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentClaude, - }, - }, - }, - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), "session agent") - assert.Contains(t, err.Error(), string(parser.AgentClaude)) - assert.Contains(t, err.Error(), string(parser.AgentCodex)) - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestObserveProviderSourceRejectsForeignSessionID(t *testing.T) { - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - IDPrefix: "codex:", - }, - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{ - { - Result: parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "claude:one", - Agent: parser.AgentCodex, - }, - }, - }, - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), "session id") - assert.Contains(t, err.Error(), "claude:one") - assert.Contains(t, err.Error(), "codex:") - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestObserveProviderSourceRejectsForeignNestedSessionID(t *testing.T) { - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - IDPrefix: "codex:", - }, - }, - outcome: parser.ParseOutcome{ - Results: []parser.ParseResultOutcome{ - { - Result: parser.ParseResult{ - Session: parser.ParsedSession{ - ID: "codex:one", - Agent: parser.AgentCodex, - ParentSessionID: "claude:parent", - }, - }, - }, - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), "parent session id") - assert.Contains(t, err.Error(), "claude:parent") - assert.Contains(t, err.Error(), "codex:") - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestObserveProviderSourceRejectsEmptyDiagnosticSourceKey(t *testing.T) { - sourceErr := errors.New("bad source") - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - IDPrefix: "codex:", - }, - }, - outcome: parser.ParseOutcome{ - SourceErrors: []parser.SourceError{ - { - SessionID: "codex:bad", - Err: sourceErr, - }, - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), "diagnostic source key") - assert.Contains(t, err.Error(), "required") - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestObserveProviderSourceRejectsUnrelatedDiagnosticSourceKey(t *testing.T) { - sourceErr := errors.New("bad source") - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - IDPrefix: "codex:", - }, - }, - fingerprint: parser.SourceFingerprint{ - Key: "fingerprint-key", - }, - outcome: parser.ParseOutcome{ - SourceErrors: []parser.SourceError{ - { - SourceKey: "other-source", - SessionID: "codex:bad", - Err: sourceErr, - }, - }, - }, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - FingerprintKey: "source-fingerprint-key", - }, - }) - require.Error(t, err) - assert.Contains(t, err.Error(), "diagnostic source key") - assert.Contains(t, err.Error(), "other-source") - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) -} - -func TestObserveProviderSourceStopsAfterFingerprintError(t *testing.T) { - fingerprintErr := errors.New("stat failed") - provider := &shadowTestProvider{ - ProviderBase: parser.ProviderBase{ - Def: parser.AgentDef{ - Type: parser.AgentCodex, - DisplayName: "Codex", - }, - }, - fingerprintErr: fingerprintErr, - } - - observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ - Source: parser.SourceRef{ - Provider: parser.AgentCodex, - Key: "source-key", - }, - }) - require.ErrorIs(t, err, fingerprintErr) - assert.Empty(t, observation) - assert.Equal(t, []string{"fingerprint"}, provider.calls) -} - -type shadowTestProvider struct { - parser.ProviderBase - calls []string - fingerprint parser.SourceFingerprint - fingerprintErr error - outcome parser.ParseOutcome - parseErr error - parseRequest parser.ParseRequest -} - -func (p *shadowTestProvider) Fingerprint( - context.Context, - parser.SourceRef, -) (parser.SourceFingerprint, error) { - p.calls = append(p.calls, "fingerprint") - if p.fingerprintErr != nil { - return parser.SourceFingerprint{}, p.fingerprintErr - } - return p.fingerprint, nil -} - -func (p *shadowTestProvider) Parse( - _ context.Context, - req parser.ParseRequest, -) (parser.ParseOutcome, error) { - p.calls = append(p.calls, "parse") - p.parseRequest = req - if p.parseErr != nil { - return parser.ParseOutcome{}, p.parseErr - } - return p.outcome, nil -}