From cb005c7cae76be635d13e1f39f0a293d5e88e1d4 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:35:23 -0400 Subject: [PATCH] feat(parser): add provider facade core Introduce the Provider interface, ProviderBase/ProviderFactory, and source-set helpers; own provider discovery and lookup at the root; and add the legacy-call shim scan that gates provider files. fix(parser): cover Aider, OMP, Reasonix in migration manifest These agents live in the registry but were absent from the provider migration manifest, so ValidateProviderMigrationModes failed once the registry began enforcing that every agent has a mode. They remain on the legacy path here; later stack commits migrate them to concrete providers and flip these entries to provider-authoritative. fix(sync): keep shadow provider discovery observational Shadow provider mode must not add provider-only work or satisfy source lookups that the legacy runtime would miss. Otherwise a migration comparison can change live sync behavior before the provider becomes authoritative.\n\nProvider-authoritative discovery now reports discovery failures as sync failures and suppresses the provider completion watermark for that run, preserving the next incremental pass. The shim scan also keeps pending exemptions honest by failing stale entries while ignoring provider-owned selector methods.\n\nValidation: go test -tags "fts5" ./internal/parser -run 'TestProviderFilesDoNotReferenceLegacyEntrypoints' -count=1; go test -tags "fts5" ./internal/sync -run 'Test(DiscoverProviderSourcesOnlyRunsAuthoritativeProviders|SyncAllProviderDiscoveryFailureSkipsFinishedWatermark|FindSourceFileFallsBackToAuthoritativeNonFileProvider|ClassifyProviderChangedPath|ProcessFileShadow|ProcessFileProviderAuthoritative|ProviderVirtualSourceBackedByEvent)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check docs(parser): clarify provider freshness contract The facade spec still described successful parses as eligible for a clean skip-cache entry, which conflicts with the no-schema-change data-version model and can leave unchanged sessions stale after parser upgrades.\n\nDocument stored changed-path hints explicitly and keep successful unchanged-source freshness tied to DB metadata plus parser data version, reserving skipped_files for retry, failure, and explicit skip cases.\n\nValidation: go test -tags "fts5" ./internal/parser -run 'TestProviderFilesDoNotReferenceLegacyEntrypoints' -count=1; git diff --check. mdformat ran via commit hook. docs(parser): pin provider source identity semantics The facade contract needs to say exactly which provider source key is persisted because the migration intentionally avoids a schema change. Without that rule, providers could diverge between SourceRef, SourceFingerprint, and sessions.file_path identities.\n\nAlso define capability conformance by meaningful field presence so unsupported zero-value fields are treated consistently in provider tests.\n\nValidation: git diff --check. mdformat is unavailable on PATH, but the commit hook ran. style(docs): mdformat provider dual-run harness plan --- Makefile | 10 +- .../2026-06-20-provider-dual-run-harness.md | 308 ++++ .../2026-06-19-provider-facade-design.md | 303 ++-- go.mod | 2 + go.sum | 4 + internal/db/db.go | 3 + internal/db/sessions.go | 144 ++ internal/db/source_path_hints_test.go | 164 +++ internal/parser/capabilities.go | 61 + internal/parser/capabilitysupport_enumer.go | 112 ++ internal/parser/discovery.go | 9 +- internal/parser/provider.go | 371 +++++ internal/parser/provider_migration.go | 159 +++ internal/parser/provider_shim_scan_test.go | 199 +++ internal/parser/provider_test.go | 368 +++++ internal/parser/tools.go | 5 + internal/sync/engine.go | 771 +++++++++- internal/sync/parsediff.go | 11 +- internal/sync/parsediff_compare_test.go | 148 ++ internal/sync/provider_shadow.go | 583 ++++++++ internal/sync/provider_shadow_caller_test.go | 1240 +++++++++++++++++ internal/sync/provider_shadow_test.go | 541 +++++++ 22 files changed, 5389 insertions(+), 127 deletions(-) create mode 100644 docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md create mode 100644 internal/db/source_path_hints_test.go create mode 100644 internal/parser/capabilities.go create mode 100644 internal/parser/capabilitysupport_enumer.go create mode 100644 internal/parser/provider.go create mode 100644 internal/parser/provider_migration.go create mode 100644 internal/parser/provider_shim_scan_test.go create mode 100644 internal/parser/provider_test.go create mode 100644 internal/parser/tools.go create mode 100644 internal/sync/provider_shadow.go create mode 100644 internal/sync/provider_shadow_caller_test.go create mode 100644 internal/sync/provider_shadow_test.go diff --git a/Makefile b/Makefile index 9acfe0488..b91f89c3e 100644 --- a/Makefile +++ b/Makefile @@ -332,7 +332,15 @@ nilaway-golangci-build: # Run NilAway through the custom golangci-lint module plugin. nilaway: pricing-snapshot ensure-embed-dir nilaway-golangci-build - $(CUSTOM_GCL) run --config .golangci.nilaway.yml ./... + @set -e; \ + root=$$(pwd); \ + dirs=$$(go list -f '{{.Dir}}' ./...); \ + for dir in $$dirs; do \ + pkg="./$${dir#$$root/}"; \ + echo "$(CUSTOM_GCL) run --config .golangci.nilaway.yml $$pkg"; \ + GOMAXPROCS=$${GOMAXPROCS:-1} GOGC=$${GOGC:-10} GOMEMLIMIT=$${GOMEMLIMIT:-512MiB} \ + $(CUSTOM_GCL) run --config .golangci.nilaway.yml "$$pkg"; \ + done # Install pinned local lint tools. lint-tools: diff --git a/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md b/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md new file mode 100644 index 000000000..931c15484 --- /dev/null +++ b/docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md @@ -0,0 +1,308 @@ +# Provider Dual-Run Harness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add the root-level provider migration harness so provider branches +must opt into shadow comparison instead of only adding parallel provider +implementations. + +**Architecture:** The parser package owns the per-`AgentType` migration manifest +because provider branches already change parser factories. The sync package owns +the source-level observation helper because it converts provider `Fingerprint` +and `Parse` calls into engine-shaped planned effects without touching the live +database. + +**Tech Stack:** Go 1.26, `testing`, `github.com/stretchr/testify`, git-spice +stacked branches. + +**Migration mode semantics:** + +- `legacy-only`: only the legacy parser/sync path runs and writes. This is the + default for legacy adapter providers and is allowed for concrete providers + only with an explicit rollback note and open follow-up task. +- `shadow-compare`: the legacy path remains authoritative for DB writes, + skip-cache persistence, data-version rows, source metadata, diagnostics, + SSE, and return values. The provider path runs through the shared provider + runner and produces normalized in-memory planned effects. Tests compare + those planned effects against the legacy outcome; runtime mismatches are + developer diagnostics only and must not create user-visible parse + diagnostics. +- `provider-authoritative`: the provider path owns writes and return values and + the old provider-specific legacy branch is gone. This mode is reserved for + the stack tip after every parse-capable provider has passed shadow + comparison. +- `import-only`: the provider is intentionally excluded from filesystem parse + comparison because it represents non-filesystem import/export metadata + rather than a parser replacement. + +Promotion requires fixture evidence for parsed sessions, exclusions, skip-cache +keys, data-version state, source metadata, diagnostics, retry state, and +source-key/session-ID compatibility. Rollback means moving the manifest entry +back to `legacy-only`, recording the reason in kata/review notes, and leaving +the legacy path authoritative until the mismatch is fixed. + +Provider observations must reject cross-provider output before planning effects +and before any remote machine prefix is applied. `ParseResult.Session.Agent` +must equal the provider `AgentType`. Persisted session IDs in the result graph +must use the provider's ID prefix when one exists; this includes result IDs, +parent IDs, usage-event session IDs, subagent links, exclusions, and diagnostic +session IDs. Diagnostic `SourceError.SourceKey` values are required and must be +the provider fingerprint key, `SourceRef.FingerprintKey`, `SourceRef.Key`, or a +virtual key derived from one of those candidates by appending `#`, `::`, or `|`. + +`ProviderPlannedEffects` is an engine-shaped comparison model, not a second +writer. Its source key is the fingerprint key when available, then +`SourceRef.FingerprintKey`, then `SourceRef.Key`. Its skip-cache key follows the +same engine order used for persisted skip decisions. Its data-version entries +match the rows the legacy engine would stamp after successful writes, including +retry state from `DataVersionNeedsRetry`. Its diagnostics mirror parse +diagnostics without inserting them into the live store. Provider retry-reason +text and SSE scopes are outside the root process-result comparison until a later +caller task exposes equivalent legacy data. + +Performance rule: shadow comparison may double-parse a source only while that +provider is actively migrating. Large roots and shared database providers need +fixture or benchmark coverage before promotion, and caller-level shadow wiring +must keep provider failures from blocking legacy writes unless a test is +explicitly asserting the mismatch. + +______________________________________________________________________ + +### Task 1: Provider Migration Manifest + +**Files:** + +- Create: `internal/parser/provider_migration.go` + +- Modify: `internal/parser/provider_test.go` + +- [ ] **Step 1: Write the failing manifest tests** + +Add tests that prove the manifest covers the registry and rejects a concrete +provider left in `legacy-only` mode: + +```go +func TestProviderMigrationModesCoverRegistry(t *testing.T) { + err := ValidateProviderMigrationModes(ProviderFactories(), ProviderMigrationModes()) + require.NoError(t, err) +} + +func TestProviderMigrationModesRejectConcreteProviderLeftLegacyOnly(t *testing.T) { + factory := testProviderFactory{def: AgentDef{Type: AgentCodex, DisplayName: "Codex"}} + modes := map[AgentType]ProviderMigrationMode{ + AgentCodex: ProviderMigrationLegacyOnly, + } + + err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) + require.Error(t, err) + assert.Contains(t, err.Error(), "codex") + assert.Contains(t, err.Error(), "shadow-compare") +} +``` + +- [ ] **Step 2: Run the parser tests and verify RED** + +Run: + +```bash +go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1 +``` + +Expected: FAIL because `ProviderMigrationMode`, `ProviderMigrationModes`, and +`ValidateProviderMigrationModes` do not exist yet. + +- [ ] **Step 3: Implement the manifest types and validation** + +Create `internal/parser/provider_migration.go` with: + +```go +type ProviderMigrationMode string + +const ( + ProviderMigrationLegacyOnly ProviderMigrationMode = "legacy-only" + ProviderMigrationShadowCompare ProviderMigrationMode = "shadow-compare" + ProviderMigrationProviderAuthoritative ProviderMigrationMode = "provider-authoritative" + ProviderMigrationImportOnly ProviderMigrationMode = "import-only" +) +``` + +Add a registry-covering manifest initialized to `legacy-only`, return copies to +callers, and validate: + +- every provider factory has one mode; + +- no extra manifest entry points at an unknown agent; + +- concrete non-legacy factories cannot remain `legacy-only`; + +- `shadow-compare`, `provider-authoritative`, and `import-only` require a + concrete factory; + +- `import-only` is allowed only for Claude.ai and ChatGPT. + +- [ ] **Step 4: Run the parser tests and verify GREEN** + +Run: + +```bash +go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1 +``` + +Expected: PASS. + +### Task 2: Source-Level Provider Observation + +**Files:** + +- Create: `internal/sync/provider_shadow.go` + +- Create: `internal/sync/provider_shadow_test.go` + +- [ ] **Step 1: Write failing observation tests** + +Add tests that use a fake provider to prove the helper: + +- calls `Fingerprint` before `Parse`; +- converts `ParseOutcome` into an observation; +- records planned data-version/source/diagnostic effects in memory; +- never accepts a mismatched `SourceRef.Provider`; +- rejects provider results, exclusions, and diagnostics whose agent or persisted + session-ID namespace belongs to another provider. + +The main test should assert: + +```go +assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +assert.Equal(t, []string{"codex:one"}, observation.Planned.DataVersionSessionIDs()) +assert.Equal(t, []string{"codex:two"}, observation.Planned.RetrySessionIDs()) +assert.Equal(t, []string{"source-key"}, observation.Planned.SourceKeys) +assert.Len(t, observation.Planned.Diagnostics, 1) +``` + +- [ ] **Step 2: Run the sync tests and verify RED** + +Run: + +```bash +go test -tags "fts5" ./internal/sync -run TestObserveProviderSource -count=1 +``` + +Expected: FAIL because `ObserveProviderSource` and observation types do not +exist. + +- [ ] **Step 3: Implement the minimal observation helper** + +Create `internal/sync/provider_shadow.go` with: + +```go +type ProviderObserveRequest struct { + Source parser.SourceRef + Machine string + ForceParse bool +} + +type ProviderObservation struct { + Results []parser.ParseResult + ExcludedSessionIDs []string + SourceErrors []parser.SourceError + SkipReason parser.SkipReason + ForceReplace bool + Planned ProviderPlannedEffects +} +``` + +`ObserveProviderSource` checks the source/provider type match, calls +`Fingerprint`, calls `Parse`, validates provider output invariants, and builds +in-memory planned effects. It must not accept a `db.DB`, `Engine`, writer +callback, or mutable skip-cache reference. + +- [ ] **Step 4: Run the sync tests and verify GREEN** + +Run: + +```bash +go test -tags "fts5" ./internal/sync -run TestObserveProviderSource -count=1 +``` + +Expected: PASS. + +### Follow-Up: Caller-Level Wiring + +**Files:** + +The root harness branch wires the shared `processFile` shadow comparison. The +remaining caller families below stay as later sync migration work so provider +branches can add caller-specific source selection, hint lookup, and acceptance +coverage one behavior group at a time. + +**Step 1: Wire remaining source-processing callers into shadow comparison** + +Move changed-path sync and `SyncSingleSession` semantics into the caller-level +dual-run wrapper without adding a duplicate `processFile` hook. These callers +reuse the shared `processFile` observation for parse comparison, then add +caller-specific source selection, stored-source hints, and acceptance assertions +around that observation. They must leave live DB/diagnostic/SSE state driven +only by the legacy result. + +**Step 2: Add lookup/watch/diagnostic caller coverage** + +Move session watch flows, export/source lookup, source mtime, token-usage raw +source probing, parse-diff, and parse diagnostics through the same provider +runner. Tests must cover source lookup freshness, virtual paths, source mtime, +raw probing behavior, report shape, and source-error behavior. + +**Step 3: Define runtime mismatch reporting** + +Mismatches are test failures in shared parity tests. Runtime mismatch reporting +is developer-only logging or debug diagnostics and must include provider, source +key, fingerprint key, mode, field path, legacy value summary, provider value +summary, and whether fingerprinting or parsing failed. It must not persist +user-visible parse diagnostics while `shadow-compare` is active. + +### Task 3: Validation And Commit + +**Files:** + +- Modify as needed from Tasks 1-2. + +- [ ] **Step 1: Format and verify** + +Run: + +```bash +go fmt ./... +go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1 +go test -tags "fts5" ./internal/sync -run TestObserveProviderSource -count=1 +go test -tags "fts5" ./internal/parser -count=1 +go test -tags "fts5" ./internal/sync -count=1 +go vet ./... +git diff --check +``` + +Expected: all commands pass. If `go fmt ./...` rewrites unrelated comments, +restore only unrelated user-owned changes before committing. + +- [ ] **Step 2: Commit on `provider-facade-core`** + +Commit the root harness slice with a conventional message: + +```bash +git add docs/superpowers/plans/2026-06-20-provider-dual-run-harness.md internal/parser/provider_migration.go internal/parser/provider_test.go internal/sync/provider_shadow.go internal/sync/provider_shadow_test.go +git commit -m "feat(parser): add provider migration harness" +``` + +- [ ] **Step 3: Restack locally when explicitly authorized** + +If the user has explicitly authorized branch changes and restacking for this +session, run: + +```bash +git-spice upstack restack +``` + +Expected: dependent provider branches are replayed on the harness branch and +conflicts are resolved provider by provider. Do not push, submit, or update PRs +unless the user has separately authorized that network operation. diff --git a/docs/superpowers/specs/2026-06-19-provider-facade-design.md b/docs/superpowers/specs/2026-06-19-provider-facade-design.md index 7cb113948..f9d4e64ba 100644 --- a/docs/superpowers/specs/2026-06-19-provider-facade-design.md +++ b/docs/superpowers/specs/2026-06-19-provider-facade-design.md @@ -21,8 +21,8 @@ fingerprints, and `ParseResult` values. stays authoritative while provider parsing is shadow-compared provider by provider. - Make every provider PR an actual migration step: adding a provider - implementation must also opt that provider into the shared migration manifest - and provider-vs-legacy parity tests. + implementation must also opt that provider into the shared migration + manifest and provider-vs-legacy parity tests. - Keep `ParsedSession`, `ParsedMessage`, `ParsedToolCall`, `ParsedToolResult`, `ParsedUsageEvent`, and `ParseResult` as the normalized output contract. - Remove the provider-by-provider `sync.Engine.processFile` dispatch switch only @@ -51,8 +51,8 @@ fingerprints, and `ParseResult` values. The provider facade must respect these constraints: - Source shape belongs to the provider. The engine must not know whether a - source is a JSONL file, SQLite row, sidecar, trace folder, import archive, or - multiple files. + source is a JSONL file, SQLite row, sidecar, trace folder, import archive, + or multiple files. - Providers embed a base facade with zero-value no-op implementations for optional source behavior. - Providers must implement `Parse`; the base facade must not provide a fake @@ -65,9 +65,9 @@ The provider facade must respect these constraints: - All existing providers migrate to the new layer before the old sync dispatch is considered removed. - During the stacked migration, legacy dispatch remains the writer. Provider - dispatch is run through the same root-level harness for opted-in providers and - compared against legacy output, but it must not mutate persisted session state - until the stack tip switches authority. + dispatch is run through the same root-level harness for opted-in providers + and compared against legacy output, but it must not mutate persisted session + state until the stack tip switches authority. - A provider branch is incomplete if it only adds provider code. It must also move the provider's migration status out of legacy-only mode and include the dual-run test coverage that proves the new shape is exercised. @@ -373,7 +373,13 @@ Rules: - `Key` is stable within the provider and suitable for logs and dedupe. - `DisplayPath` is human-readable and may be a virtual path. -- `FingerprintKey` is the DB lookup key used for skip/data-version checks. +- `FingerprintKey` is the DB lookup key used for skip/data-version checks. With + the no-schema-change migration, this is the authoritative persisted source + identity and is written through `ParsedSession.File.Path` / + `sessions.file_path`. `SourceFingerprint.Key` must either equal the selected + persisted identity or be empty so the engine falls back to + `SourceRef.FingerprintKey` and `SourceRef.Key`; there is no separate + fingerprint-key column. - `ProjectHint` is advisory and can be empty. - `Opaque` is internal provider state. The engine treats it as an opaque token. - `Opaque` is never persisted or logged, must be immutable for engine callers, @@ -383,9 +389,9 @@ Rules: Backwards compatibility: - Migrated providers should keep `FingerprintKey` compatible with the source key - or stored `file_path` values already written by the legacy sync path whenever - practical. Existing fingerprint and data-version metadata should continue to - short-circuit unchanged sources after the facade migration. + or stored `file_path` values already written by the legacy sync path + whenever practical. Existing fingerprint and data-version metadata should + continue to short-circuit unchanged sources after the facade migration. - If a provider must change its lookup key or fingerprint identity, that provider migration must explicitly document the expected full resync or metadata transition. The facade migration itself must not silently force all @@ -411,9 +417,10 @@ type WatchRoot struct { } type ChangedPathRequest struct { - Path string - EventKind string - WatchRoot string + Path string + EventKind string + WatchRoot string + StoredSourcePaths []string } ``` @@ -423,7 +430,10 @@ provider filters that allow broad OS watch roots without parsing every changed file. `DebounceKey` groups related paths such as sibling metadata files and a transcript. `ChangedPathRequest.WatchRoot` is the matched watch root, so the provider can classify changes relative to the configured root that produced -them. +them. `StoredSourcePaths` is the persisted `sessions.file_path` hint set scoped +to the matched watch root and provider. Providers with virtual, database-backed, +or multi-file sources use it to classify deletion and tombstone events that no +longer have a regular source file on disk. The provider owns the final changed-path decision. The engine may use `IncludeGlobs` and `ExcludeGlobs` as coarse prefilters because the provider @@ -502,8 +512,8 @@ concrete pass criteria: - database fan-out fingerprints reuse database-level metadata plus row/session identifiers instead of scanning unrelated rows; - any provider that requires a full content hash documents why mtime, size, - inode/device, row metadata, or sidecar metadata are insufficient and includes - a benchmark budget for that provider. + inode/device, row metadata, or sidecar metadata are insufficient and + includes a benchmark budget for that provider. ## Parse Requests And Outcomes @@ -565,9 +575,9 @@ Runtime behavior: can still be ingested. - All session IDs in parse outcomes use the persisted normalized/full session ID namespace. That includes `ParseResultOutcome.Result.Session.ID`, - `ExcludedSessionIDs`, and `SourceError.SessionID`. Raw upstream IDs may appear - in provider internals, diagnostics, or lookup requests, but the engine - compares outcome IDs only against persisted full session IDs. + `ExcludedSessionIDs`, and `SourceError.SessionID`. Raw upstream IDs may + appear in provider internals, diagnostics, or lookup requests, but the + engine compares outcome IDs only against persisted full session IDs. - `SourceError.SessionID` is required for per-session failures from multi-session providers. `SourceKey` and `DisplayPath` are diagnostic source identifiers, not substitutes for persisted session identity. If the provider @@ -584,35 +594,47 @@ Runtime behavior: `RetryReason` records why, for example an Antigravity-style lower-resolution fallback. - `DataVersionUnspecified` is allowed only during migration adapters; provider - harness tests should require new providers to set either `DataVersionCurrent` - or `DataVersionNeedsRetry` for every returned result. + harness tests should require new providers to set either + `DataVersionCurrent` or `DataVersionNeedsRetry` for every returned result. - Mixed data-version states are valid for multi-session sources. One result can be current while another result from the same source needs retry, and a retryable `SourceError` affects only the failed session unless the provider reports a whole-source `error`. -- Data-version writes are per result, but clean skip-cache persistence remains - source/fingerprint scoped. `ResultSetComplete` means the provider has - accounted for the complete logical session set represented by the - `SourceRef`/`FingerprintKey`: returned results, explicit exclusions, and clean - replacements cover every retained session for that source. The engine may - write a clean skip-cache entry only when `ResultSetComplete` is true, every - returned result is `DataVersionCurrent`, there are no `SourceErrors`, and any - previously persisted rows for that `FingerprintKey` are either returned, - listed in `ExcludedSessionIDs`, or covered by a clean `ForceReplace`. +- Data-version writes are per result, and successful unchanged-source freshness + remains DB metadata driven through source identity, file size, effective + mtime, and parser data version. `skipped_files` must not become a clean + successful-parse cache unless it also stores data-version-aware freshness + state; with the current no-schema-change migration it remains a + retry/failure and explicit skip cache. +- `ResultSetComplete` means the provider has accounted for the complete logical + session set represented by the `SourceRef`/`FingerprintKey`: returned + results, explicit exclusions, and clean replacements cover every retained + session for that source. The engine may use that proof to avoid stale rows + and to clear retry/failure cache entries, but not to persist a + data-version-blind clean source skip. - Any `DataVersionNeedsRetry` result, retryable per-session error, non-retryable - per-session error, or incomplete result set suppresses the clean skip-cache - entry for the whole `FingerprintKey`. Non-retryable errors may be recorded as - diagnostics or failure-cache entries, but they do not prove the source is - clean because a future parser version or source change may still need to - revisit the same logical session set. + per-session error, or incomplete result set prevents the provider outcome + from being treated as a clean complete source. Non-retryable errors may be + recorded as diagnostics or failure-cache entries, but they do not prove the + source is clean because a future parser version or source change may still + need to revisit the same logical session set. - During a partial multi-session parse, existing persisted rows that are absent from `Results` are retained unless their IDs are listed in `ExcludedSessionIDs` or the provider completes a clean `ForceReplace` parse - for the owning logical source. A retryable `SourceError` leaves that session's - existing row stale and eligible for a future retry instead of deleting it or - marking it current. + for the owning logical source. A retryable `SourceError` leaves that + session's existing row stale and eligible for a future retry instead of + deleting it or marking it current. - `SkipReason` replaces implicit "nil session means skip" behavior. Skips are explicit outcomes and should not be conflated with retryable parse failures. +- Provider cache identity and `ResultSetComplete` semantics are root hook + invariants, not per-provider policy. Provider branches may add source-family + coverage, but they must not redefine cache keys, omission/deletion behavior, + or retry-state persistence. +- When a migrated provider's fingerprint key differs from the legacy + `file.Path`, the provider path reads, writes, and clears only the provider + fingerprint key. Old legacy skip-cache entries may remain in the persisted + archive as inert compatibility leftovers; they must not be consulted for a + provider-authoritative source once its provider key is known. - Providers do not write to the DB. - Providers do not mutate, delete, or repair source files. @@ -769,6 +791,15 @@ Capability semantics are intentionally strict: - `CapabilityNotApplicable` means the upstream source format cannot represent the feature. It is not a placeholder for unfinished implementation work. +Capability conformance uses meaningful presence predicates rather than raw Go +zero-value checks. Unsupported string fields such as cwd, git branch, model, +stop reason, and session name must remain empty; unsupported counts such as +malformed lines must remain zero; unsupported booleans such as truncation must +remain false; unsupported repeated data such as relationships, subagents, tool +calls, tool results, and usage events must remain empty. A zero value is +therefore absence unless the provider declares support for the corresponding +feature and fixtures prove a source can intentionally emit it. + Generated enum tooling should be reproducible: - keep a `tools.go` file with a `tools` build tag and a blank import for @@ -863,10 +894,18 @@ entries for related providers, but tests must expand the family to every concrete `AgentType`; a family-level entry is not enough to mark an individual parse-capable provider migrated. -- legacy-only: only the existing sync path runs; -- shadow-compare: legacy runs and writes, provider runs through the new generic - path, and tests compare normalized outcomes; -- provider-authoritative: provider dispatch writes and the old path is absent. +- legacy-only: only the existing sync path runs and writes. This is the normal + mode for legacy adapter providers. A concrete provider may move back to this + mode only as a documented rollback with an open follow-up task. +- shadow-compare: legacy runs and writes; provider runs through the new generic + path and produces normalized in-memory planned effects. Tests compare those + effects against the legacy outcome. Runtime mismatches are developer + diagnostics only; they must not persist user-visible parse diagnostics or + change SSE-visible state. +- provider-authoritative: provider dispatch writes, returns the caller result, + and the old provider-specific legacy path is absent. This is reserved for + the stack-tip cleanup after every parse-capable provider has passed shadow + comparison. - import-only: the provider exists for non-filesystem import/export metadata and is intentionally excluded from parse shadow comparison. @@ -901,8 +940,8 @@ Changed-path live sync becomes: 1. The watcher reports a changed path. 1. The engine finds providers whose `WatchPlan` roots match the changed path. 1. Each matched provider classifies it through `SourcesForChangedPath` with a - `ChangedPathRequest` that includes the changed path, event kind, and matched - watch root. + `ChangedPathRequest` that includes the changed path, event kind, and + matched watch root. 1. The engine processes the returned `SourceRef` values generically. Source lookup becomes: @@ -925,9 +964,9 @@ source errors, and exclusions still use persisted full session IDs. The root harness has two comparison surfaces: - source-level parity, available as soon as the root harness lands, compares one - legacy-discovered source or fixture set against the provider runner. Provider - implementation PRs must use this surface before they can move their manifest - entry to shadow-compare. + legacy-discovered source or fixture set against the provider runner. + Provider implementation PRs must use this surface before they can move their + manifest entry to shadow-compare. - caller-level dual-run, added by the later sync, lookup/watch, and diagnostics tasks, wraps production callers such as full sync, changed-path sync, `SyncSingleSession`, source lookup, source mtime, and parse-diff. @@ -944,24 +983,99 @@ helpers but keep side effects isolated: metadata, SSE emissions, and diagnostics. 1. Run the provider caller for the same agent through the generic provider helper. Depending on the caller, this may come from provider discovery, - `SourcesForChangedPath`, or `FindSource`; the comparison layer must not teach - the engine provider-specific path formats. -1. Normalize both outputs into the same comparison shape: full session IDs, - parsed message/tool/usage content, excluded IDs, skip reasons, retry state, - data-version state, source metadata, and per-session errors. + `SourcesForChangedPath`, or `FindSource`; the comparison layer must not + teach the engine provider-specific path formats. +1. Normalize both outputs into the same comparison shape for the surface being + exercised. The root `processFile` comparison covers full session IDs, + parsed message/tool/usage content, excluded IDs, retry state, data-version + state, source metadata, and per-session errors. Source-level provider tests + and later caller tasks own `SkipReason` parity until the legacy side + exposes a comparable skip-reason projection. 1. Represent provider-side effects as in-memory planned effects, not live DB mutations. Planned effects include source metadata writes, data-version - writes, skip-cache updates, diagnostics, and SSE topics. Integration tests - may additionally run against disposable stores, but shadow mode never - receives the live writer. + writes, skip-cache updates, and diagnostics. Integration tests may + additionally run against disposable stores, but shadow mode never receives + the live writer. SSE/event scope parity is deferred until the + provider-authoritative caller owns live event emission. 1. Report mismatches as test failures in the migration harness. Runtime diagnostics are opt-in developer diagnostics only; they must not create user-visible parse diagnostics or SSE-visible state. The provider side must not mutate persisted session state while in shadow-compare mode. +`ProviderPlannedEffects` must match the legacy engine's observable write model, +not an abstract parser-local model: + +- source metadata keys use the provider fingerprint key when present, then + `SourceRef.FingerprintKey`, then `SourceRef.Key`; +- skip-cache keys follow the same key order the legacy engine uses before a + persisted skip decision; +- data-version entries represent process-result-level write intent for the + concrete session rows the engine would stamp after successful writes, + including current versus `DataVersionNeedsRetry` state; retry-reason text is + provider-local until the legacy process result exposes equivalent detail; +- diagnostics mirror the legacy parse diagnostic fields, including display path, + source key, session ID, error, and retryability, but are never written to + the live diagnostics table in shadow mode; +- legacy skip, incremental, and whole-source error states are recorded as + non-comparable in the root `processFile` hook. Provider `SkipReason` parity + is handled by provider-local/source-level tests until a later caller task + defines a legacy skip-reason projection. + +Provider output must be namespaced before it can produce planned effects and +before any remote machine prefix is applied. `ParseResult.Session.Agent` must +equal the provider `AgentType`. Persisted session IDs in the result graph must +use the provider's `AgentDef.IDPrefix` when one exists; this includes result +IDs, parent IDs, usage-event session IDs, subagent links, exclusions, and +diagnostic session IDs. `ParsedSession.SourceSessionID` is excluded from this +check because current parsers use it for upstream/raw source IDs, not persisted +session IDs. Diagnostic `SourceError.SourceKey` values are required and must be +one of the observed source identities or a derived virtual key: the provider +fingerprint key, `SourceRef.FingerprintKey`, `SourceRef.Key`, or one of those +values followed by `#`, `::`, or `|`. Cross-provider sessions are not legal in +shadow mode because they make parity false positives indistinguishable from real +legacy behavior. + +Fingerprint failures and parse failures are compared separately. A fingerprint +failure means no provider parse was attempted and the mismatch report records a +fingerprint failure. A parse failure after a successful fingerprint records the +fingerprint key and parse error. Neither failure may block the legacy write path +while the mode is `shadow-compare`. + +Mismatch reports must include provider, migration mode, source key, fingerprint +key, comparison field path, a bounded legacy summary, a bounded provider +summary, and whether the mismatch came from discovery, fingerprint, parse +output, planned effects, or runtime failure. Runtime reporting is developer-only +logging or debug output until a later task defines persistence. + +Shadow comparison can double-parse large sources while a provider migrates. +Provider PRs that touch large roots, shared SQLite sources, or composite sources +need fixture coverage or benchmarks that show fingerprinting and shadow parse +overhead are acceptable before promotion. The rollback rule is to move the +manifest entry back to `legacy-only`, leave the legacy path authoritative, and +keep the blocking kata/review item open. + Provider branches must exercise this transition with shared tests rather than only provider-local unit tests. The branch is considered migrated only when the -manifest entry and parity tests are present. +manifest entry and parity tests are present. Deferred parity items such as +provider-only retry-reason text, SSE scopes, and caller-specific skip-reason +mapping are promotion gates for provider-authoritative mode; they cannot remain +open at the stack tip where legacy dispatch is removed. + +Before any concrete provider changes to `provider-authoritative`, that branch +must prove the generic hook contract for its source shape: + +- `FindSource` honors global and file-scoped force-parse by allowing stale + stored source hints when requested. +- provider not-found in authoritative mode is an explicit error, not an implicit + legacy fallback. +- multi-result sources preserve `ParseResultOutcome.DataVersion` per session, so + retry-needed fallback rows do not mark unrelated current rows stale. +- skip-cache lookup and persistence use the provider fingerprint key selected + for the source, with tests for virtual or composite paths where `file.Path` + differs from `SourceRef.FingerprintKey`. +- `ResultSetComplete`, excluded IDs, diagnostics, and source errors have parity + tests for the provider's source family before the old legacy dispatch for + that provider is removed. If a provider that moved to shadow-compare proves flaky, its manifest entry can return to legacy-only with a reason and an open kata task or review note. The @@ -1002,30 +1116,32 @@ sequence: 1. Add the root-level dual-run migration harness before migrating provider branches. It must contain the shared provider execution helper, comparison normalizer, source-level parity surface, planned-effect comparison model, - explicit per-`AgentType` migration manifest, and tests that fail when a - concrete parse-capable provider exists without a migration-mode entry. + caller-level `processFile` shadow comparison for full sync, explicit + per-`AgentType` migration manifest, and tests that fail when a concrete + parse-capable provider exists without a migration-mode entry. 1. Add JSONL source-set helpers and tests for simple file-backed JSONL providers. 1. Use `git-spice` to restack the provider branches on the root harness branch - after that lower branch changes. The stack must be verified with - `gs log short`, conflicts resolved provider by provider, and updates - submitted with git-spice so every PR includes both provider implementation - and migration wiring. + after that lower branch changes when the user has explicitly authorized + branch changes for the session. The stack must be verified with + `gs log short` and conflicts resolved provider by provider. Pushing, + submitting, or updating PRs is a separate network operation and requires + separate explicit authorization. 1. Migrate simple JSONL providers with acceptance tests for discovery, fingerprint, parse output, skip-cache metadata, and data-version behavior. Each provider PR must move its affected concrete `AgentType` entries from legacy-only to shadow-compare and run the shared source-level provider-vs-legacy parity harness. Legacy sync remains authoritative. 1. Add and migrate sibling/composite source providers with acceptance tests for - watch planning, composite fingerprints, sidecar/title refreshes, and changed - path classification. Each PR must opt its provider into shadow-compare and - pass the shared source-level parity harness while legacy sync remains - authoritative. + watch planning, composite fingerprints, sidecar/title refreshes, and + changed path classification. Each PR must opt its provider into + shadow-compare and pass the shared source-level parity harness while legacy + sync remains authoritative. 1. Add and migrate virtual-path and SQLite fan-out providers with acceptance - tests for stored advisory paths, tombstone recovery via `StoredSourcePaths`, - logical session lookup, per-session errors, and source mtime behavior. Each - PR must opt its provider into shadow-compare and pass the shared parity - harness while legacy sync remains authoritative. + tests for stored advisory paths, tombstone recovery via + `StoredSourcePaths`, logical session lookup, per-session errors, and source + mtime behavior. Each PR must opt its provider into shadow-compare and pass + the shared parity harness while legacy sync remains authoritative. 1. Add and migrate non-file import/database providers with acceptance tests for `FindSource`, fingerprinting, and unsupported source mechanics. Import-only providers are explicitly marked import-only rather than shadow-compared. @@ -1033,21 +1149,24 @@ sequence: changed-path shadow comparison depends on providers. It must query by provider and watched root, use the `(agent, file_path)` index shape, return stable de-duplicated paths, and include tests for provider/root filtering, - path normalization, sibling-prefix false positives such as `/root/db` versus - `/root/db2`, dedupe, batching/no-truncation behavior, and large unrelated - session tables. + path normalization, sibling-prefix false positives such as `/root/db` + versus `/root/db2`, dedupe, batching/no-truncation behavior, and large + unrelated session tables. 1. Add provider compatibility tests for stored hint interpretation before - changed-path shadow comparison depends on providers. SQLite fan-out providers - must cover malformed or obsolete virtual paths, debug-only diagnostics, DB - row deletion, DB file deletion, and stale hints that belong to a different - physical DB under the same watch root. -1. Move source-processing callers into the caller-level dual-run harness: full - sync, changed-path sync, and `SyncSingleSession`. During migration these - callers compare provider output against legacy for parsed output, skip-cache, - data-version, source metadata, diagnostics, excluded IDs, and retry state; - only the legacy result writes. Changed-path comparison must populate - `StoredSourcePaths` from scoped persisted session metadata and include DB row - deletion and DB file deletion integration tests. + changed-path shadow comparison depends on providers. SQLite fan-out + providers must cover malformed or obsolete virtual paths, debug-only + diagnostics, DB row deletion, DB file deletion, and stale hints that belong + to a different physical DB under the same watch root. +1. Move the remaining source-processing caller semantics into the caller-level + dual-run harness: changed-path sync and `SyncSingleSession`. The root + harness already shadows the shared `processFile` path, so these tasks must + add caller-specific source selection, stored-source hints, and acceptance + coverage rather than adding a second shadow hook. During migration these + callers compare provider output against legacy for parsed output, + skip-cache, data-version, source metadata, diagnostics, excluded IDs, and + retry state; only the legacy result writes. Changed-path comparison must + populate `StoredSourcePaths` from scoped persisted session metadata and + include DB row deletion and DB file deletion integration tests. 1. Move lookup/watch callers into the caller-level dual-run harness: session watch flows, export/source lookup, source mtime, and token-usage raw source probing. During migration these callers compare lookup freshness, virtual @@ -1057,8 +1176,8 @@ sequence: compare report shape and source-error behavior against legacy. 1. At the tip of the stack only, switch all shadow-compared providers to provider-authoritative dispatch, remove the provider-by-provider - `processFile` switch, and remove or deprecate old `AgentDef` source callback - fields after all callers stop using them. + `processFile` switch, and remove or deprecate old `AgentDef` source + callback fields after all callers stop using them. Migration should keep existing parser unit tests. Provider-level tests become the required integration surface for future providers. @@ -1128,8 +1247,8 @@ Required tests: - Provider harness tests for discovery, fingerprint, parse, source lookup, and optional incremental parsing. - Incremental parsing tests for `IncrementalUnsupported`, - `IncrementalNoNewData`, `IncrementalApplied`, `IncrementalNeedsFullParse`, and - real incremental errors. + `IncrementalNoNewData`, `IncrementalApplied`, `IncrementalNeedsFullParse`, + and real incremental errors. - Parse diagnostic tests proving stable source fields are reported and opaque payloads are not serialized or logged. - Source-level migration parity tests comparing provider output to current @@ -1149,8 +1268,8 @@ Required tests: - Dual-run isolation tests proving provider shadow comparison cannot write sessions, messages, source metadata, data-version rows, skip-cache entries, diagnostics, or SSE-visible state while legacy remains authoritative. These - tests compare in-memory planned effects or disposable-store observations, not - live production mutations. + tests compare in-memory planned effects or disposable-store observations, + not live production mutations. - Sync integration tests for incremental Claude/Codex, multi-session sources, parse-diff, source mtime, source lookup, skip cache, usage events, sidecars, virtual paths, and title/metadata refreshes. @@ -1173,7 +1292,7 @@ decisions from those structures: - retryable failure: do not cache skip by unchanged mtime and do not mark the affected source/session current for the parser data version; - non-retryable per-session failure: eligible for failure-cache persistence, but - not for a clean source skip-cache entry; + not for a data-version-blind clean source skip; - full parse fallback from incremental: `IncrementalNeedsFullParse`; - unsupported optional provider feature: `ErrUnsupportedProviderFeature`; - successful lower-resolution fallback: per-result `DataVersionNeedsRetry` plus diff --git a/go.mod b/go.mod index 865160fdd..a927d68b0 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.26.3 require ( github.com/BurntSushi/toml v1.6.0 github.com/danielgtaylor/huma/v2 v2.38.0 + github.com/dmarkham/enumer v1.6.3 github.com/duckdb/duckdb-go/v2 v2.10504.0 github.com/fsnotify/fsnotify v1.10.1 github.com/gofrs/flock v0.13.0 @@ -81,6 +82,7 @@ require ( github.com/moby/term v0.5.2 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/pascaldekloe/name v1.0.0 // indirect github.com/pierrec/lz4/v4 v4.1.25 // indirect github.com/posthog/posthog-go v1.12.6 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect diff --git a/go.sum b/go.sum index 3ea04e754..a42f7cb7f 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/dmarkham/enumer v1.6.3 h1:B4aV4OsfzbrS5rvjILt4mMjiWBA//cKxJUMsvHZ8mEI= +github.com/dmarkham/enumer v1.6.3/go.mod h1:DyjXaqCglj4GhELF73oWiparNkYkXvmOBLza/o4kO74= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= @@ -152,6 +154,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= +github.com/pascaldekloe/name v1.0.0 h1:n7LKFgHixETzxpRv2R77YgPUFo85QHGZKrdaYm7eY5U= +github.com/pascaldekloe/name v1.0.0/go.mod h1:Z//MfYJnH4jVpQ9wkclwu2I2MkHmXTlT9wR5UZScttM= github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0= github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= diff --git a/internal/db/db.go b/internal/db/db.go index dcc9a18d7..ca1ba915e 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -1375,6 +1375,9 @@ func (db *DB) createPartialIndexesLocked(w *writerHandle) error { AND model != ''`, `CREATE INDEX IF NOT EXISTS idx_sessions_has_secret ON sessions(secret_leak_count) WHERE secret_leak_count > 0`, + `CREATE INDEX IF NOT EXISTS idx_sessions_agent_file_path_active + ON sessions(agent, file_path) + WHERE file_path IS NOT NULL AND deleted_at IS NULL`, } for _, ddl := range indexes { if _, err := w.Exec(ddl); err != nil { diff --git a/internal/db/sessions.go b/internal/db/sessions.go index f0bd3b757..80be65d9d 100644 --- a/internal/db/sessions.go +++ b/internal/db/sessions.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "path/filepath" + "sort" "strings" "time" ) @@ -1901,6 +1902,149 @@ func (db *DB) ListSessionIDsByFilePath(path, agent string) ([]string, error) { return ids, nil } +const storedSourcePathHintRootBatchSize = 100 + +// ListStoredSourcePathHints returns active source paths for agent whose stored +// file_path falls under any watched root. It is used by provider changed-path +// comparison to avoid losing sessions when the changed path is a sidecar or a +// root-scoped database event rather than the exact persisted source path. +func (db *DB) ListStoredSourcePathHints( + agent string, + roots []string, +) ([]string, error) { + if agent == "" { + return nil, nil + } + roots = normalizeStoredSourcePathHintRoots(roots) + if len(roots) == 0 { + return nil, nil + } + + seen := make(map[string]struct{}) + var hints []string + for start := 0; start < len(roots); start += storedSourcePathHintRootBatchSize { + end := min(start+storedSourcePathHintRootBatchSize, len(roots)) + batch := roots[start:end] + query, args := storedSourcePathHintQuery(agent, batch) + rows, err := db.getReader().Query(query, args...) + if err != nil { + return nil, fmt.Errorf("listing stored source path hints: %w", err) + } + for rows.Next() { + var path string + if err := rows.Scan(&path); err != nil { + _ = rows.Close() + return nil, fmt.Errorf("scanning stored source path hint: %w", err) + } + path = cleanStoredSourcePathHint(path) + if !storedSourcePathHintInAnyRoot(path, batch) { + continue + } + if _, ok := seen[path]; ok { + continue + } + seen[path] = struct{}{} + hints = append(hints, path) + } + if err := rows.Close(); err != nil { + return nil, fmt.Errorf("closing stored source path hint rows: %w", err) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterating stored source path hints: %w", err) + } + } + sort.Strings(hints) + return hints, nil +} + +func normalizeStoredSourcePathHintRoots(roots []string) []string { + seen := make(map[string]struct{}, len(roots)) + out := make([]string, 0, len(roots)) + for _, root := range roots { + root = cleanStoredSourcePathHint(root) + if root == "" || root == "." { + continue + } + if _, ok := seen[root]; ok { + continue + } + seen[root] = struct{}{} + out = append(out, root) + } + sort.Strings(out) + return out +} + +func storedSourcePathHintQuery(agent string, roots []string) (string, []any) { + clauses := make([]string, 0, len(roots)) + args := []any{agent} + for _, root := range roots { + root = cleanStoredSourcePathHint(root) + if root == "" || root == "." { + continue + } + likeRoot := sqliteLikeEscape(root) + clauses = append(clauses, + `(file_path = ? OR + file_path LIKE ? ESCAPE '!' OR + file_path LIKE ? ESCAPE '!')`, + ) + args = append(args, + root, + likeRoot+string(filepath.Separator)+"%", + likeRoot+"#%", + ) + } + if len(clauses) == 0 { + return `SELECT file_path FROM sessions WHERE 0`, nil + } + query := `SELECT file_path + FROM sessions + WHERE agent = ? + AND file_path IS NOT NULL + AND deleted_at IS NULL + AND (` + strings.Join(clauses, " OR ") + `) + ORDER BY file_path` + return query, args +} + +func cleanStoredSourcePathHint(path string) string { + return filepath.Clean(path) +} + +func storedSourcePathHintInAnyRoot(path string, roots []string) bool { + for _, root := range roots { + if storedSourcePathHintInRoot(path, root) { + return true + } + } + return false +} + +func storedSourcePathHintInRoot(path, root string) bool { + path = cleanStoredSourcePathHint(path) + root = cleanStoredSourcePathHint(root) + if path == root || strings.HasPrefix(path, root+string(filepath.Separator)) { + return true + } + suffix, ok := strings.CutPrefix(path, root+"#") + return ok && + storedSourcePathHintAllowsVirtualSuffix(root) && + suffix != "" && + !strings.ContainsAny(suffix, `/\`) +} + +func storedSourcePathHintAllowsVirtualSuffix(root string) bool { + return filepath.Ext(root) != "" +} + +func sqliteLikeEscape(value string) string { + value = strings.ReplaceAll(value, `!`, `!!`) + value = strings.ReplaceAll(value, `%`, `!%`) + value = strings.ReplaceAll(value, `_`, `!_`) + return value +} + // GetDataVersionByPath returns the minimum data_version for // sessions matching a file_path. Returns 0 when no session // exists for the path. diff --git a/internal/db/source_path_hints_test.go b/internal/db/source_path_hints_test.go new file mode 100644 index 000000000..fc903f29e --- /dev/null +++ b/internal/db/source_path_hints_test.go @@ -0,0 +1,164 @@ +package db + +import ( + "fmt" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestListStoredSourcePathHintsScopesByAgentAndRoot(t *testing.T) { + d := testDB(t) + root := t.TempDir() + watchRoot := filepath.Join(root, "db") + childPath := filepath.Join(watchRoot, "sessions", "a.jsonl") + virtualPath := filepath.Join(watchRoot, "state.sqlite3") + "#session-a" + uncleanPath := filepath.Join(watchRoot, "nested", "..", "nested", "b.jsonl") + cleanPath := filepath.Join(watchRoot, "nested", "b.jsonl") + siblingPath := filepath.Join(root, "db2", "sessions", "other.jsonl") + otherAgentPath := filepath.Join(watchRoot, "sessions", "other-agent.jsonl") + deletedPath := filepath.Join(watchRoot, "sessions", "deleted.jsonl") + + insertSessionWithSourcePath(t, d, "claude:child", "claude", childPath) + insertSessionWithSourcePath(t, d, "claude:child-dup", "claude", childPath) + insertSessionWithSourcePath(t, d, "claude:virtual", "claude", virtualPath) + insertSessionWithSourcePath(t, d, "claude:unclean", "claude", uncleanPath) + insertSessionWithSourcePath(t, d, "claude:sibling", "claude", siblingPath) + insertSessionWithSourcePath(t, d, "codex:other-agent", "codex", otherAgentPath) + insertSessionWithSourcePath(t, d, "claude:deleted", "claude", deletedPath) + require.NoError(t, d.SoftDeleteSession("claude:deleted")) + + got, err := d.ListStoredSourcePathHints("claude", []string{ + filepath.Join(watchRoot, "."), + filepath.Join(root, "db2", "..", "db"), + }) + + require.NoError(t, err) + assert.Equal(t, []string{ + cleanPath, + childPath, + virtualPath, + }, got) +} + +func TestListStoredSourcePathHintsHandlesHashPathsAndVirtualSuffixes(t *testing.T) { + d := testDB(t) + base := t.TempDir() + + hashRoot := filepath.Join(base, "db#prod") + hashChild := filepath.Join(hashRoot, "sessions", "a.jsonl") + insertSessionWithSourcePath(t, d, "claude:hash-child", "claude", hashChild) + + dbRoot := filepath.Join(base, "state.sqlite3") + virtualPath := dbRoot + "#session-a" + insertSessionWithSourcePath(t, d, "claude:virtual", "claude", virtualPath) + + plainRoot := filepath.Join(base, "db") + hashSibling := filepath.Join(base, "db#backup", "sessions", "b.jsonl") + hashVirtualSibling := plainRoot + "#session-b" + insertSessionWithSourcePath(t, d, "claude:hash-sibling", "claude", hashSibling) + insertSessionWithSourcePath( + t, d, "claude:hash-virtual-sibling", "claude", hashVirtualSibling, + ) + + got, err := d.ListStoredSourcePathHints("claude", []string{ + hashRoot, + dbRoot, + plainRoot, + }) + + require.NoError(t, err) + assert.Equal(t, []string{ + hashChild, + virtualPath, + }, got) +} + +func TestListStoredSourcePathHintsEscapesLikeWildcards(t *testing.T) { + d := testDB(t) + base := t.TempDir() + root := filepath.Join(base, "db%!_root") + childPath := filepath.Join(root, "session.jsonl") + insertSessionWithSourcePath(t, d, "claude:wildcard-child", "claude", childPath) + + siblingPath := filepath.Join(base, "dbX!Yroot", "session.jsonl") + insertSessionWithSourcePath(t, d, "claude:wildcard-sibling", "claude", siblingPath) + + got, err := d.ListStoredSourcePathHints("claude", []string{root}) + + require.NoError(t, err) + assert.Equal(t, []string{childPath}, got) +} + +func TestListStoredSourcePathHintsBatchesRootsWithoutTruncating(t *testing.T) { + d := testDB(t) + base := t.TempDir() + var roots []string + var want []string + for i := range storedSourcePathHintRootBatchSize + 17 { + root := filepath.Join(base, fmt.Sprintf("root-%03d", i)) + roots = append(roots, root) + if i == 0 || i == storedSourcePathHintRootBatchSize+16 { + path := filepath.Join(root, "session.jsonl") + insertSessionWithSourcePath( + t, d, fmt.Sprintf("claude:match-%03d", i), "claude", path, + ) + want = append(want, path) + } + } + for i := range 250 { + path := filepath.Join(base, "unrelated", fmt.Sprintf("%03d.jsonl", i)) + insertSessionWithSourcePath( + t, d, fmt.Sprintf("claude:unrelated-%03d", i), "claude", path, + ) + } + + got, err := d.ListStoredSourcePathHints("claude", roots) + + require.NoError(t, err) + assert.Equal(t, want, got) +} + +func TestStoredSourcePathHintsLookupUsesAgentFilePathIndex(t *testing.T) { + d := testDB(t) + root := t.TempDir() + explainSQL, args := storedSourcePathHintQuery("claude", []string{root}) + rows, err := d.getReader().Query("EXPLAIN QUERY PLAN "+explainSQL, args...) + require.NoError(t, err) + defer rows.Close() + + var details []string + for rows.Next() { + var id, parent, notused int + var detail string + require.NoError(t, rows.Scan(&id, &parent, ¬used, &detail)) + details = append(details, detail) + } + require.NoError(t, rows.Err()) + + assert.Contains( + t, + strings.Join(details, "\n"), + "idx_sessions_agent_file_path_active", + ) +} + +func insertSessionWithSourcePath( + t *testing.T, + d *DB, + id string, + agent string, + path string, + opts ...func(*Session), +) { + t.Helper() + insertSession(t, d, id, "proj", append([]func(*Session){ + func(s *Session) { + s.Agent = agent + s.FilePath = &path + }, + }, opts...)...) +} diff --git a/internal/parser/capabilities.go b/internal/parser/capabilities.go new file mode 100644 index 000000000..3ca2c2fb9 --- /dev/null +++ b/internal/parser/capabilities.go @@ -0,0 +1,61 @@ +package parser + +//go:generate go run github.com/dmarkham/enumer -type=CapabilitySupport -json -text -transform=snake -trimprefix=Capability -output=capabilitysupport_enumer.go + +// CapabilitySupport describes whether a provider implements or can represent a +// source or content feature. The zero value is unsupported. +type CapabilitySupport uint8 + +const ( + CapabilityUnsupported CapabilitySupport = iota + CapabilitySupported + CapabilityNotApplicable +) + +// Capabilities groups provider source mechanics and parsed-content features. +// Capabilities are declarative: a concrete provider that reports Supported must +// implement the matching behavior rather than relying on ProviderBase defaults. +// Callers may still invoke optional methods and handle their no-op or typed +// unsupported results, but scheduling and validation should trust this +// declaration once a provider has migrated off the legacy adapter. +type Capabilities struct { + Source SourceCapabilities + Content ContentCapabilities +} + +// SourceCapabilities declares optional source mechanics implemented by a +// provider. +type SourceCapabilities struct { + DiscoverSources CapabilitySupport + WatchSources CapabilitySupport + ClassifyChangedPath CapabilitySupport + FindSource CapabilitySupport + CompositeFingerprint CapabilitySupport + IncrementalAppend CapabilitySupport + MultiSessionSource CapabilitySupport + PerSessionErrors CapabilitySupport + ExcludedSessions CapabilitySupport + ForceReplaceOnParse CapabilitySupport +} + +// ContentCapabilities declares optional normalized content fields a provider +// may emit. +type ContentCapabilities struct { + FirstMessage CapabilitySupport + SessionName CapabilitySupport + Cwd CapabilitySupport + GitBranch CapabilitySupport + Relationships CapabilitySupport + Subagents CapabilitySupport + Thinking CapabilitySupport + ToolCalls CapabilitySupport + ToolResults CapabilitySupport + ToolResultEvents CapabilitySupport + PerMessageTokenUsage CapabilitySupport + AggregateUsageEvents CapabilitySupport + TerminationStatus CapabilitySupport + MalformedLineCount CapabilitySupport + TruncationStatus CapabilitySupport + Model CapabilitySupport + StopReason CapabilitySupport +} diff --git a/internal/parser/capabilitysupport_enumer.go b/internal/parser/capabilitysupport_enumer.go new file mode 100644 index 000000000..6d5062d9b --- /dev/null +++ b/internal/parser/capabilitysupport_enumer.go @@ -0,0 +1,112 @@ +// Code generated by "enumer -type=CapabilitySupport -json -text -transform=snake -trimprefix=Capability -output=capabilitysupport_enumer.go"; DO NOT EDIT. + +package parser + +import ( + "encoding/json" + "fmt" + "strings" +) + +const _CapabilitySupportName = "unsupportedsupportednot_applicable" + +var _CapabilitySupportIndex = [...]uint8{0, 11, 20, 34} + +const _CapabilitySupportLowerName = "unsupportedsupportednot_applicable" + +func (i CapabilitySupport) String() string { + if i >= CapabilitySupport(len(_CapabilitySupportIndex)-1) { + return fmt.Sprintf("CapabilitySupport(%d)", i) + } + return _CapabilitySupportName[_CapabilitySupportIndex[i]:_CapabilitySupportIndex[i+1]] +} + +// An "invalid array index" compiler error signifies that the constant values have changed. +// Re-run the stringer command to generate them again. +func _CapabilitySupportNoOp() { + var x [1]struct{} + _ = x[CapabilityUnsupported-(0)] + _ = x[CapabilitySupported-(1)] + _ = x[CapabilityNotApplicable-(2)] +} + +var _CapabilitySupportValues = []CapabilitySupport{CapabilityUnsupported, CapabilitySupported, CapabilityNotApplicable} + +var _CapabilitySupportNameToValueMap = map[string]CapabilitySupport{ + _CapabilitySupportName[0:11]: CapabilityUnsupported, + _CapabilitySupportLowerName[0:11]: CapabilityUnsupported, + _CapabilitySupportName[11:20]: CapabilitySupported, + _CapabilitySupportLowerName[11:20]: CapabilitySupported, + _CapabilitySupportName[20:34]: CapabilityNotApplicable, + _CapabilitySupportLowerName[20:34]: CapabilityNotApplicable, +} + +var _CapabilitySupportNames = []string{ + _CapabilitySupportName[0:11], + _CapabilitySupportName[11:20], + _CapabilitySupportName[20:34], +} + +// CapabilitySupportString retrieves an enum value from the enum constants string name. +// Throws an error if the param is not part of the enum. +func CapabilitySupportString(s string) (CapabilitySupport, error) { + if val, ok := _CapabilitySupportNameToValueMap[s]; ok { + return val, nil + } + + if val, ok := _CapabilitySupportNameToValueMap[strings.ToLower(s)]; ok { + return val, nil + } + return 0, fmt.Errorf("%s does not belong to CapabilitySupport values", s) +} + +// CapabilitySupportValues returns all values of the enum +func CapabilitySupportValues() []CapabilitySupport { + return _CapabilitySupportValues +} + +// CapabilitySupportStrings returns a slice of all String values of the enum +func CapabilitySupportStrings() []string { + strs := make([]string, len(_CapabilitySupportNames)) + copy(strs, _CapabilitySupportNames) + return strs +} + +// IsACapabilitySupport returns "true" if the value is listed in the enum definition. "false" otherwise +func (i CapabilitySupport) IsACapabilitySupport() bool { + for _, v := range _CapabilitySupportValues { + if i == v { + return true + } + } + return false +} + +// MarshalJSON implements the json.Marshaler interface for CapabilitySupport +func (i CapabilitySupport) MarshalJSON() ([]byte, error) { + return json.Marshal(i.String()) +} + +// UnmarshalJSON implements the json.Unmarshaler interface for CapabilitySupport +func (i *CapabilitySupport) UnmarshalJSON(data []byte) error { + var s string + if err := json.Unmarshal(data, &s); err != nil { + return fmt.Errorf("CapabilitySupport should be a string, got %s", data) + } + + var err error + *i, err = CapabilitySupportString(s) + return err +} + +// MarshalText implements the encoding.TextMarshaler interface for CapabilitySupport +func (i CapabilitySupport) MarshalText() ([]byte, error) { + return []byte(i.String()), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface for CapabilitySupport +func (i *CapabilitySupport) UnmarshalText(text []byte) error { + var err error + *i, err = CapabilitySupportString(string(text)) + return err +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index f04786fd2..15b83aed1 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -46,9 +46,12 @@ func isDirOrSymlink( // DiscoveredFile holds a discovered session file. type DiscoveredFile struct { - Path string - Project string // pre-extracted project name - Agent AgentType // which agent this file belongs to + Path string + Project string // pre-extracted project name + Agent AgentType // which agent this file belongs to + ForceParse bool // caller requires a full source reparse + ProviderSource *SourceRef // provider-owned source identity, when known + ProviderProcess bool // true when this caller may parse via ProviderSource } // OpenCodeSourceMode identifies the usable OpenCode storage diff --git a/internal/parser/provider.go b/internal/parser/provider.go new file mode 100644 index 000000000..d8c992a79 --- /dev/null +++ b/internal/parser/provider.go @@ -0,0 +1,371 @@ +package parser + +import ( + "context" + "errors" + "time" +) + +const ( + ProviderFeatureFingerprint = "fingerprint" + ProviderFeatureParse = "parse" +) + +// ErrUnsupportedProviderFeature identifies optional provider behavior that is +// intentionally absent. Callers use errors.Is to distinguish this from I/O or +// parse failures. +var ErrUnsupportedProviderFeature = errors.New("unsupported provider feature") + +// UnsupportedProviderFeatureError wraps ErrUnsupportedProviderFeature with the +// provider and feature names that produced it. +type UnsupportedProviderFeatureError struct { + Provider AgentType + Feature string +} + +func (err UnsupportedProviderFeatureError) Error() string { + if err.Provider == "" { + return "unsupported provider feature " + err.Feature + } + return string(err.Provider) + ": unsupported provider feature " + err.Feature +} + +func (err UnsupportedProviderFeatureError) Unwrap() error { + return ErrUnsupportedProviderFeature +} + +// ProviderFactory is the registry surface for creating config-bound provider +// instances. +type ProviderFactory interface { + Definition() AgentDef + Capabilities() Capabilities + NewProvider(ProviderConfig) Provider +} + +// ProviderConfig is copied into a provider instance at construction time. +type ProviderConfig struct { + Roots []string + Machine string +} + +// Clone returns an independent config snapshot. +func (cfg ProviderConfig) Clone() ProviderConfig { + cfg.Roots = cfg.RootsCopy() + return cfg +} + +// RootsCopy returns an independent roots slice. +func (cfg ProviderConfig) RootsCopy() []string { + return append([]string(nil), cfg.Roots...) +} + +// Provider is the target parser/source facade. Providers own source shape and +// return normalized parser results for the sync engine to persist. +type Provider interface { + Definition() AgentDef + Capabilities() Capabilities + + Discover(context.Context) ([]SourceRef, error) + WatchPlan(context.Context) (WatchPlan, error) + SourcesForChangedPath(context.Context, ChangedPathRequest) ([]SourceRef, error) + FindSource(context.Context, FindSourceRequest) (SourceRef, bool, error) + Fingerprint(context.Context, SourceRef) (SourceFingerprint, error) + + // Parse returns a normalized outcome for one logical source. A non-nil + // error is a whole-source failure, including context cancellation; callers + // must ignore the returned ParseOutcome. Partial multi-session success is + // represented by a nil error with successful Results plus SourceErrors for + // isolated per-session failures. + Parse(context.Context, ParseRequest) (ParseOutcome, error) + ParseIncremental( + context.Context, + IncrementalRequest, + ) (IncrementalOutcome, IncrementalStatus, error) +} + +// ProviderBase is embedded by concrete providers to make optional source +// methods callable with zero-value no-op behavior. +type ProviderBase struct { + Def AgentDef + Caps Capabilities + Config ProviderConfig +} + +func (b ProviderBase) Definition() AgentDef { + return cloneAgentDef(b.Def) +} + +func (b ProviderBase) Capabilities() Capabilities { + return b.Caps +} + +func (b ProviderBase) Discover(context.Context) ([]SourceRef, error) { + return nil, nil +} + +func (b ProviderBase) WatchPlan(context.Context) (WatchPlan, error) { + return WatchPlan{}, nil +} + +func (b ProviderBase) SourcesForChangedPath( + context.Context, + ChangedPathRequest, +) ([]SourceRef, error) { + return nil, nil +} + +func (b ProviderBase) FindSource( + context.Context, + FindSourceRequest, +) (SourceRef, bool, error) { + return SourceRef{}, false, nil +} + +func (b ProviderBase) Fingerprint( + context.Context, + SourceRef, +) (SourceFingerprint, error) { + return SourceFingerprint{}, b.unsupported(ProviderFeatureFingerprint) +} + +func (b ProviderBase) ParseIncremental( + context.Context, + IncrementalRequest, +) (IncrementalOutcome, IncrementalStatus, error) { + return IncrementalOutcome{}, IncrementalUnsupported, nil +} + +func (b ProviderBase) unsupported(feature string) error { + return UnsupportedProviderFeatureError{ + Provider: b.Def.Type, + Feature: feature, + } +} + +// SourceRef is the engine-visible handle for provider-owned source data. +type SourceRef struct { + // Provider identifies the provider that created this source and must match + // the provider instance used for subsequent operations. + Provider AgentType + // Key is stable within the provider across process restarts. It is suitable + // for dedupe and diagnostics, but not necessarily for DB freshness checks. + Key string + // DisplayPath is human-readable and may be a virtual path. + DisplayPath string + // FingerprintKey is the persisted lookup key for skip-cache and parser data + // version checks. Migrated providers should keep it compatible with legacy + // file_path values whenever practical. + FingerprintKey string + // ProjectHint is advisory metadata for UI grouping and may be empty. + ProjectHint string + // Opaque is provider-owned in-memory state. The engine must not persist, + // compare, inspect, or log it, and providers must not require it for lookup + // from persisted rows. + Opaque any +} + +// WatchPlan describes provider-owned filesystem watch roots. +type WatchPlan struct { + Roots []WatchRoot +} + +// WatchRoot is one filesystem root the engine should watch. +type WatchRoot struct { + Path string + Recursive bool + IncludeGlobs []string + ExcludeGlobs []string + DebounceKey string +} + +// ChangedPathRequest is passed back to providers for authoritative changed-path +// classification. +type ChangedPathRequest struct { + Path string + EventKind string + WatchRoot string + // StoredSourcePaths are optional provider-persisted source paths already + // known to the caller for this watch root. Providers that model a shared + // physical file as virtual per-session sources use these to emit tombstone + // sources when a DB row or DB file has disappeared and can no longer be + // rediscovered from current metadata. + StoredSourcePaths []string +} + +// FindSourceRequest contains persisted source hints for provider-owned lookup. +type FindSourceRequest struct { + RawSessionID string + FullSessionID string + StoredFilePath string + FingerprintKey string + RequireFreshSource bool +} + +// SourceFingerprint is the provider-normalized source freshness identity. +type SourceFingerprint struct { + Key string + Size int64 + MTimeNS int64 + Inode uint64 + Device uint64 + Hash string +} + +// ParseRequest is the full-parse provider input. +type ParseRequest struct { + Source SourceRef + Fingerprint SourceFingerprint + Machine string + ForceParse bool +} + +// ParseOutcome is the full-parse provider output. It is meaningful only when +// Provider.Parse returns a nil error. +type ParseOutcome struct { + Results []ParseResultOutcome + ExcludedSessionIDs []string + SourceErrors []SourceError + ResultSetComplete bool + ForceReplace bool + SkipReason SkipReason +} + +// ParseResultOutcome pairs a normalized parse result with per-session retry and +// data-version state. +type ParseResultOutcome struct { + Result ParseResult + DataVersion DataVersionState + RetryReason string +} + +// SourceError reports a per-session parse failure from a multi-session source. +// Providers use the Parse error return instead when a failure cannot be +// isolated to a persisted full session ID. +type SourceError struct { + SourceKey string + DisplayPath string + SessionID string + Err error + Retryable bool +} + +// DataVersionState describes whether a parsed result is current for this parser +// data version. +type DataVersionState uint8 + +const ( + DataVersionUnspecified DataVersionState = iota + DataVersionCurrent + DataVersionNeedsRetry +) + +// SkipReason explains provider-level intentional skips. +type SkipReason uint8 + +const ( + SkipNone SkipReason = iota + SkipNoSession + SkipUnsupportedSource + SkipNonInteractive + SkipShadowedBySidecar +) + +// IncrementalRequest is the append-only parse input. +type IncrementalRequest struct { + Source SourceRef + Fingerprint SourceFingerprint + SessionID string + Offset int64 + StartOrdinal int + Machine string +} + +// IncrementalOutcome is the append-only parse output. +type IncrementalOutcome struct { + SessionID string + Messages []ParsedMessage + EndedAt time.Time + ConsumedBytes int64 + MessageCount int + UserMessageCount int + TotalOutputTokens int + PeakContextTokens int + HasTotalOutputTokens bool + HasPeakContextTokens bool + ForceReplace bool +} + +// IncrementalStatus describes how an incremental parse attempt should proceed. +type IncrementalStatus uint8 + +const ( + IncrementalUnsupported IncrementalStatus = iota + IncrementalNoNewData + IncrementalApplied + IncrementalNeedsFullParse +) + +type legacyProviderFactory struct { + def AgentDef +} + +func (f legacyProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f legacyProviderFactory) Capabilities() Capabilities { + return Capabilities{} +} + +func (f legacyProviderFactory) NewProvider(cfg ProviderConfig) Provider { + return &legacyProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Config: cfg.Clone(), + }, + } +} + +type legacyProvider struct { + ProviderBase +} + +func (p *legacyProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error) { + return ParseOutcome{}, p.unsupported(ProviderFeatureParse) +} + +// ProviderFactories returns one provider factory for every registered agent. +func ProviderFactories() []ProviderFactory { + factories := make([]ProviderFactory, 0, len(Registry)) + for _, def := range Registry { + factories = append(factories, legacyProviderFactory{ + def: cloneAgentDef(def), + }) + } + return factories +} + +// ProviderFactoryByType returns the factory for an agent type. +func ProviderFactoryByType(t AgentType) (ProviderFactory, bool) { + for _, factory := range ProviderFactories() { + if factory.Definition().Type == t { + return factory, true + } + } + return nil, false +} + +// NewProvider constructs a config-bound provider for an agent type. +func NewProvider(t AgentType, cfg ProviderConfig) (Provider, bool) { + factory, ok := ProviderFactoryByType(t) + if !ok { + return nil, false + } + return factory.NewProvider(cfg), true +} + +func cloneAgentDef(def AgentDef) AgentDef { + def.DefaultDirs = append([]string(nil), def.DefaultDirs...) + def.WatchSubdirs = append([]string(nil), def.WatchSubdirs...) + return def +} diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go new file mode 100644 index 000000000..e213cbf00 --- /dev/null +++ b/internal/parser/provider_migration.go @@ -0,0 +1,159 @@ +package parser + +import ( + "fmt" + "maps" +) + +// ProviderMigrationMode describes which runtime path owns a provider during +// the facade migration. +type ProviderMigrationMode string + +const ( + ProviderMigrationLegacyOnly ProviderMigrationMode = "legacy-only" + ProviderMigrationShadowCompare ProviderMigrationMode = "shadow-compare" + ProviderMigrationProviderAuthoritative ProviderMigrationMode = "provider-authoritative" + ProviderMigrationImportOnly ProviderMigrationMode = "import-only" +) + +var providerMigrationModes = map[AgentType]ProviderMigrationMode{ + AgentClaude: ProviderMigrationLegacyOnly, + AgentCowork: ProviderMigrationLegacyOnly, + AgentCodex: ProviderMigrationLegacyOnly, + AgentCopilot: ProviderMigrationLegacyOnly, + AgentGemini: ProviderMigrationLegacyOnly, + AgentMiMoCode: ProviderMigrationLegacyOnly, + AgentOpenCode: ProviderMigrationLegacyOnly, + AgentKilo: ProviderMigrationLegacyOnly, + AgentOpenHands: ProviderMigrationLegacyOnly, + AgentCursor: ProviderMigrationLegacyOnly, + AgentIflow: ProviderMigrationLegacyOnly, + AgentAmp: ProviderMigrationLegacyOnly, + AgentZencoder: ProviderMigrationLegacyOnly, + AgentVSCodeCopilot: ProviderMigrationLegacyOnly, + AgentVSCopilot: ProviderMigrationLegacyOnly, + AgentPi: ProviderMigrationLegacyOnly, + AgentQwen: ProviderMigrationLegacyOnly, + AgentCommandCode: ProviderMigrationLegacyOnly, + AgentDeepSeekTUI: ProviderMigrationLegacyOnly, + AgentOpenClaw: ProviderMigrationLegacyOnly, + AgentQClaw: ProviderMigrationLegacyOnly, + AgentKimi: ProviderMigrationLegacyOnly, + AgentClaudeAI: ProviderMigrationLegacyOnly, + AgentChatGPT: ProviderMigrationLegacyOnly, + AgentKiro: ProviderMigrationLegacyOnly, + AgentKiroIDE: ProviderMigrationLegacyOnly, + AgentCortex: ProviderMigrationLegacyOnly, + AgentHermes: ProviderMigrationLegacyOnly, + AgentWorkBuddy: ProviderMigrationLegacyOnly, + AgentForge: ProviderMigrationLegacyOnly, + AgentPiebald: ProviderMigrationLegacyOnly, + AgentWarp: ProviderMigrationLegacyOnly, + AgentPositron: ProviderMigrationLegacyOnly, + AgentAntigravity: ProviderMigrationLegacyOnly, + AgentAntigravityCLI: ProviderMigrationLegacyOnly, + AgentVibe: ProviderMigrationLegacyOnly, + AgentZed: ProviderMigrationLegacyOnly, + AgentQwenPaw: ProviderMigrationLegacyOnly, + AgentGptme: ProviderMigrationLegacyOnly, + AgentShelley: ProviderMigrationLegacyOnly, + AgentAider: ProviderMigrationLegacyOnly, + AgentOMP: ProviderMigrationLegacyOnly, + AgentReasonix: ProviderMigrationLegacyOnly, +} + +// ProviderMigrationModes returns the current provider migration manifest. +func ProviderMigrationModes() map[AgentType]ProviderMigrationMode { + modes := make(map[AgentType]ProviderMigrationMode, len(providerMigrationModes)) + maps.Copy(modes, providerMigrationModes) + return modes +} + +// ValidateProviderMigrationModes checks that provider factories and the +// migration manifest move in lockstep during the staged facade migration. +func ValidateProviderMigrationModes( + factories []ProviderFactory, + modes map[AgentType]ProviderMigrationMode, +) error { + seen := make(map[AgentType]bool, len(factories)) + for _, factory := range factories { + def := factory.Definition() + seen[def.Type] = true + + mode, ok := modes[def.Type] + if !ok { + return fmt.Errorf("%s: missing provider migration mode", def.Type) + } + if err := validateProviderMigrationMode(factory, mode); err != nil { + return err + } + } + + for agent := range modes { + if !seen[agent] { + return fmt.Errorf("%s: provider migration mode has no factory", agent) + } + } + return nil +} + +func validateProviderMigrationMode( + factory ProviderFactory, + mode ProviderMigrationMode, +) error { + def := factory.Definition() + legacy := isLegacyProviderFactory(factory) + switch mode { + case ProviderMigrationLegacyOnly: + if !legacy { + return fmt.Errorf( + "%s: concrete provider must opt into %s before leaving %s", + def.Type, + ProviderMigrationShadowCompare, + ProviderMigrationLegacyOnly, + ) + } + case ProviderMigrationShadowCompare, ProviderMigrationProviderAuthoritative: + if legacy { + return fmt.Errorf( + "%s: %s requires a concrete provider; keep %s while using the legacy adapter", + def.Type, + mode, + ProviderMigrationLegacyOnly, + ) + } + case ProviderMigrationImportOnly: + if !isImportOnlyAgentType(def.Type) { + return fmt.Errorf( + "%s: %s is only valid for import-only providers", + def.Type, + ProviderMigrationImportOnly, + ) + } + if legacy { + return fmt.Errorf( + "%s: %s requires a concrete import-only provider; keep %s while using the legacy adapter", + def.Type, + ProviderMigrationImportOnly, + ProviderMigrationLegacyOnly, + ) + } + default: + return fmt.Errorf("%s: invalid provider migration mode %q", def.Type, mode) + } + return nil +} + +func isLegacyProviderFactory(factory ProviderFactory) bool { + _, ok := factory.(legacyProviderFactory) + return ok +} + +func isImportOnlyAgentType(agent AgentType) bool { + switch agent { + case AgentClaudeAI, AgentChatGPT: + return true + default: + return false + } +} diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go new file mode 100644 index 000000000..b0231d1b3 --- /dev/null +++ b/internal/parser/provider_shim_scan_test.go @@ -0,0 +1,199 @@ +package parser + +import ( + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "regexp" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// legacyEntrypointVerb matches the provider-specific legacy parser entrypoint +// naming convention this facade migration deletes: package-level +// Discover*/Find*/Parse*/Process*/Classify* free functions that encode one +// provider's source shape. A migrated provider owns that behavior on receiver +// methods or provider-neutral source-set helpers; it must not reach back into a +// legacy free function as a compatibility shim. +var legacyEntrypointVerb = regexp.MustCompile(`^(Discover|Find|Parse|Process|Classify)[A-Z]`) + +// providerNeutralEntrypoints are package-level helpers whose names match the +// legacy verb pattern but are genuinely provider-neutral shared utilities. +// Provider files may reference these; they are not provider-specific legacy +// entrypoints. Keep this list small and add to it only when a new shared, +// provider-agnostic helper is introduced. +var providerNeutralEntrypoints = map[string]bool{ + "ParseVirtualSourcePath": true, + "ParseVirtualSourcePathForBase": true, +} + +// pendingShimProviderFiles are provider files whose behavior has not yet been +// folded onto the provider. They still reference legacy free functions and are +// temporarily exempt from the anti-shim gate so intermediate branches in the +// facade migration stay green while providers are folded one branch at a time. +// +// Each entry is a standing migration TODO: when a provider's behavior moves +// onto receiver methods or a provider-owned source set, delete its legacy free +// functions and remove the file from this list on the same branch. The stack +// tip (the zero-legacy gate) asserts this list is empty, so a provider cannot +// remain a permanent shim. +var pendingShimProviderFiles = map[string]bool{ + "antigravity_cli_provider.go": true, + "antigravity_provider.go": true, + "claude_provider.go": true, + "codex_provider.go": true, + "commandcode_provider.go": true, + "copilot_provider.go": true, + "cowork_provider.go": true, + "cursor_provider.go": true, + "db_backed_provider.go": true, + "gemini_provider.go": true, + "hermes_provider.go": true, + "iflow_provider.go": true, + "kiro_ide_provider.go": true, + "kiro_provider.go": true, + "opencode_provider.go": true, + "openhands_provider.go": true, + "positron_provider.go": true, + "qwenpaw_provider.go": true, + "shelley_provider.go": true, + "vibe_provider.go": true, + "visualstudio_copilot_provider.go": true, + "vscode_copilot_provider.go": true, + "zed_provider.go": true, +} + +// collectLegacyFreeFuncs returns the set of package-level free functions in the +// parser package whose names match the legacy entrypoint pattern, excluding the +// provider-neutral helpers. Tying detection to functions that actually exist +// (rather than to the name pattern alone) avoids false positives on types and +// values such as ParseResult or ParseRequest, and naturally shrinks as legacy +// functions are deleted. +func collectLegacyFreeFuncs(t *testing.T) (map[string]bool, *token.FileSet) { + t.Helper() + fset := token.NewFileSet() + goFiles, err := filepath.Glob("*.go") + require.NoError(t, err) + + legacy := make(map[string]bool) + for _, file := range goFiles { + if isTestGoFile(file) { + continue + } + parsed, err := parser.ParseFile(fset, file, nil, 0) + require.NoErrorf(t, err, "parse %s", file) + for _, decl := range parsed.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || fn.Recv != nil { + continue // methods are provider-owned, not free entrypoints + } + name := fn.Name.Name + if legacyEntrypointVerb.MatchString(name) && + !providerNeutralEntrypoints[name] { + legacy[name] = true + } + } + } + return legacy, fset +} + +func isTestGoFile(name string) bool { + return len(name) > len("_test.go") && + name[len(name)-len("_test.go"):] == "_test.go" +} + +// TestProviderFilesDoNotReferenceLegacyEntrypoints is the migration anti-shim +// gate. A *_provider.go that references a provider-specific legacy free +// function (whether by calling it or passing it as a value) is a shim, not a +// migration, so this scan fails for it unless the file is an explicitly tracked +// pending shim. The test is vacuous at the root (no provider files yet) and +// keeps the migrated providers honest as the stack folds each one. +func TestProviderFilesDoNotReferenceLegacyEntrypoints(t *testing.T) { + legacy, fset := collectLegacyFreeFuncs(t) + + providerFiles, err := filepath.Glob("*_provider.go") + require.NoError(t, err) + + for _, file := range providerFiles { + t.Run(file, func(t *testing.T) { + parsed, err := parser.ParseFile(fset, file, nil, 0) + require.NoErrorf(t, err, "parse %s", file) + + offenders := legacyReferencesInProviderFile(parsed, legacy) + if pendingShimProviderFiles[file] { + assert.NotEmptyf( + t, + offenders, + "%s is listed in pendingShimProviderFiles but no "+ + "longer references provider-specific legacy "+ + "entrypoints; remove it from the pending list", + file, + ) + return + } + assert.Emptyf( + t, + offenders, + "%s references provider-specific legacy entrypoints %v; "+ + "fold that behavior onto the provider or a "+ + "provider-neutral source-set helper instead of shimming", + file, + offenders, + ) + }) + } +} + +func legacyReferencesInProviderFile( + parsed *ast.File, + legacy map[string]bool, +) []string { + // A package cannot redeclare a free function name, so any direct ident in + // a provider file that matches a legacy free function is a reference to it. + // Method declarations and selector method names are provider-owned receiver + // surface, so they are not legacy free-function references. + declNames := make(map[*ast.Ident]struct{}) + selectorNames := make(map[*ast.Ident]struct{}) + for _, decl := range parsed.Decls { + if fn, ok := decl.(*ast.FuncDecl); ok { + declNames[fn.Name] = struct{}{} + } + } + ast.Inspect(parsed, func(n ast.Node) bool { + if selector, ok := n.(*ast.SelectorExpr); ok { + selectorNames[selector.Sel] = struct{}{} + } + return true + }) + + seen := make(map[string]struct{}) + var offenders []string + ast.Inspect(parsed, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + if _, isDecl := declNames[ident]; isDecl { + return true + } + if _, isSelector := selectorNames[ident]; isSelector { + return true + } + if !legacy[ident.Name] { + return true + } + if _, dup := seen[ident.Name]; dup { + return true + } + seen[ident.Name] = struct{}{} + offenders = append(offenders, ident.Name) + return true + }) + + sort.Strings(offenders) + return offenders +} diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go new file mode 100644 index 000000000..e1f73995b --- /dev/null +++ b/internal/parser/provider_test.go @@ -0,0 +1,368 @@ +package parser + +import ( + "context" + "encoding/json" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestProviderConfigCloneCopiesRoots(t *testing.T) { + cfg := ProviderConfig{ + Roots: []string{"one", "two"}, + Machine: "devbox", + } + + clone := cfg.Clone() + rootsCopy := cfg.RootsCopy() + cfg.Roots[0] = "mutated" + clone.Roots[1] = "clone-mutated" + rootsCopy[1] = "copy-mutated" + + assert.Equal(t, []string{"one", "clone-mutated"}, clone.Roots) + assert.Equal(t, []string{"one", "copy-mutated"}, rootsCopy) + assert.Equal(t, []string{"mutated", "two"}, cfg.Roots) + assert.Equal(t, "devbox", clone.Machine) +} + +func TestProviderBaseZeroValueOptionalMethods(t *testing.T) { + ctx := context.Background() + var base ProviderBase + + discovered, err := base.Discover(ctx) + require.NoError(t, err) + assert.Empty(t, discovered) + + plan, err := base.WatchPlan(ctx) + require.NoError(t, err) + assert.Empty(t, plan.Roots) + + changed, err := base.SourcesForChangedPath(ctx, ChangedPathRequest{ + Path: "/tmp/session.jsonl", + EventKind: "write", + WatchRoot: "/tmp", + }) + require.NoError(t, err) + assert.Empty(t, changed) + + source, found, err := base.FindSource(ctx, FindSourceRequest{ + RawSessionID: "raw", + FullSessionID: "agent:raw", + StoredFilePath: "/tmp/session.jsonl", + FingerprintKey: "/tmp/session.jsonl", + RequireFreshSource: true, + }) + require.NoError(t, err) + assert.False(t, found) + assert.Empty(t, source) + + fingerprint, err := base.Fingerprint(ctx, SourceRef{ + Provider: AgentCodex, + Key: "source", + }) + require.Error(t, err) + assert.Empty(t, fingerprint) + assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) + var unsupported UnsupportedProviderFeatureError + require.ErrorAs(t, err, &unsupported) + assert.Equal(t, AgentType(""), unsupported.Provider) + assert.Equal(t, ProviderFeatureFingerprint, unsupported.Feature) + + incremental, status, err := base.ParseIncremental(ctx, IncrementalRequest{ + Source: SourceRef{Provider: AgentCodex, Key: "source"}, + Fingerprint: SourceFingerprint{Key: "source"}, + SessionID: "codex:session", + Offset: 1024, + StartOrdinal: 7, + Machine: "devbox", + }) + require.NoError(t, err) + assert.Equal(t, IncrementalUnsupported, status) + assert.Empty(t, incremental) + + _, ok := any(base).(Provider) + assert.False(t, ok, "ProviderBase must not satisfy Provider without Parse") +} + +func TestUnsupportedProviderFeatureErrorWrapsSentinel(t *testing.T) { + err := UnsupportedProviderFeatureError{ + Provider: AgentCodex, + Feature: ProviderFeatureFingerprint, + } + + assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) + assert.Contains(t, err.Error(), string(AgentCodex)) + assert.Contains(t, err.Error(), ProviderFeatureFingerprint) +} + +func TestCapabilitySupportTextAndJSON(t *testing.T) { + assert.Equal(t, "unsupported", CapabilityUnsupported.String()) + assert.Equal(t, "supported", CapabilitySupported.String()) + assert.Equal(t, "not_applicable", CapabilityNotApplicable.String()) + + marshaled, err := json.Marshal(CapabilitySupported) + require.NoError(t, err) + assert.JSONEq(t, `"supported"`, string(marshaled)) + + var decoded CapabilitySupport + require.NoError(t, json.Unmarshal([]byte(`"not_applicable"`), &decoded)) + assert.Equal(t, CapabilityNotApplicable, decoded) + + text, err := CapabilitySupported.MarshalText() + require.NoError(t, err) + assert.Equal(t, "supported", string(text)) + + require.NoError(t, decoded.UnmarshalText([]byte("unsupported"))) + assert.Equal(t, CapabilityUnsupported, decoded) + assert.Error(t, decoded.UnmarshalText([]byte("bogus"))) +} + +func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { + factories := ProviderFactories() + require.Len(t, factories, len(Registry)) + + seen := make(map[AgentType]bool, len(factories)) + for _, factory := range factories { + def := factory.Definition() + require.Falsef(t, seen[def.Type], "duplicate provider factory for %s", def.Type) + seen[def.Type] = true + + registryDef, ok := AgentByType(def.Type) + require.Truef(t, ok, "provider factory for unknown agent %s", def.Type) + assertAgentDefMetadataEqual(t, registryDef, def) + + provider := factory.NewProvider(ProviderConfig{ + Roots: []string{"/tmp/root"}, + Machine: "devbox", + }) + require.NotNil(t, provider) + assertAgentDefMetadataEqual(t, def, provider.Definition()) + } + + for _, def := range Registry { + assert.Truef(t, seen[def.Type], "missing provider factory for %s", def.Type) + } +} + +func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + + assert.Equal(t, Capabilities{}, provider.Capabilities()) + + ctx := context.Background() + discovered, err := provider.Discover(ctx) + require.NoError(t, err) + assert.Empty(t, discovered) + + plan, err := provider.WatchPlan(ctx) + require.NoError(t, err) + assert.Empty(t, plan.Roots) + + changed, err := provider.SourcesForChangedPath(ctx, ChangedPathRequest{ + Path: "/tmp/session.jsonl", + EventKind: "write", + WatchRoot: "/tmp", + }) + require.NoError(t, err) + assert.Empty(t, changed) + + source, found, err := provider.FindSource(ctx, FindSourceRequest{ + RawSessionID: "session", + FullSessionID: "codex:session", + StoredFilePath: "/tmp/session.jsonl", + FingerprintKey: "/tmp/session.jsonl", + }) + require.NoError(t, err) + assert.False(t, found) + assert.Empty(t, source) + + _, err = provider.Fingerprint(ctx, SourceRef{ + Provider: AgentCodex, + Key: "session", + DisplayPath: "/tmp/session.jsonl", + FingerprintKey: "/tmp/session.jsonl", + }) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) + + incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ + Source: SourceRef{Provider: AgentCodex, Key: "session"}, + Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, + SessionID: "codex:session", + StartOrdinal: 1, + Machine: "devbox", + }) + require.NoError(t, err) + assert.Equal(t, IncrementalUnsupported, status) + assert.Empty(t, incremental) +} + +func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { + cfg := ProviderConfig{ + Roots: []string{"/tmp/one", "/tmp/two"}, + Machine: "devbox", + } + + factory, ok := ProviderFactoryByType(AgentCodex) + require.True(t, ok) + assert.Equal(t, AgentCodex, factory.Definition().Type) + + provider, ok := NewProvider(AgentCodex, cfg) + require.True(t, ok) + require.NotNil(t, provider) + + cfg.Roots[0] = "/tmp/mutated" + legacy, ok := provider.(*legacyProvider) + require.True(t, ok) + assert.Equal(t, []string{"/tmp/one", "/tmp/two"}, legacy.Config.Roots) + assert.Equal(t, "devbox", legacy.Config.Machine) + + _, ok = ProviderFactoryByType("missing") + assert.False(t, ok) + _, ok = NewProvider("missing", cfg) + assert.False(t, ok) +} + +func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { + provider, ok := NewProvider(AgentCodex, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentCodex, + Key: "source", + DisplayPath: "/tmp/source.jsonl", + FingerprintKey: "/tmp/source.jsonl", + }, + Fingerprint: SourceFingerprint{ + Key: "/tmp/source.jsonl", + MTimeNS: time.Now().UnixNano(), + }, + Machine: "devbox", + }) + require.Error(t, err) + assert.Empty(t, outcome) + assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) + var unsupported UnsupportedProviderFeatureError + require.ErrorAs(t, err, &unsupported) + assert.Equal(t, AgentCodex, unsupported.Provider) + assert.Equal(t, ProviderFeatureParse, unsupported.Feature) +} + +func TestProviderMigrationModesCoverRegistry(t *testing.T) { + err := ValidateProviderMigrationModes( + ProviderFactories(), + ProviderMigrationModes(), + ) + require.NoError(t, err) +} + +func TestProviderMigrationModesRejectConcreteProviderLeftLegacyOnly(t *testing.T) { + factory := testProviderFactory{ + def: AgentDef{ + Type: AgentCodex, + DisplayName: "Codex", + }, + } + modes := map[AgentType]ProviderMigrationMode{ + AgentCodex: ProviderMigrationLegacyOnly, + } + + err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) + require.Error(t, err) + assert.Contains(t, err.Error(), string(AgentCodex)) + assert.Contains(t, err.Error(), string(ProviderMigrationShadowCompare)) +} + +func TestProviderMigrationModesRejectConcreteModeForLegacyFactory(t *testing.T) { + factory := legacyProviderFactory{ + def: AgentDef{ + Type: AgentCodex, + DisplayName: "Codex", + }, + } + modes := map[AgentType]ProviderMigrationMode{ + AgentCodex: ProviderMigrationShadowCompare, + } + + err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) + require.Error(t, err) + assert.Contains(t, err.Error(), string(AgentCodex)) + assert.Contains(t, err.Error(), string(ProviderMigrationLegacyOnly)) +} + +func TestProviderMigrationModesRestrictImportOnlyMode(t *testing.T) { + factory := testProviderFactory{ + def: AgentDef{ + Type: AgentCodex, + DisplayName: "Codex", + }, + } + modes := map[AgentType]ProviderMigrationMode{ + AgentCodex: ProviderMigrationImportOnly, + } + + err := ValidateProviderMigrationModes([]ProviderFactory{factory}, modes) + require.Error(t, err) + assert.Contains(t, err.Error(), string(AgentCodex)) + assert.Contains(t, err.Error(), string(ProviderMigrationImportOnly)) +} + +type testProviderFactory struct { + def AgentDef +} + +func (f testProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f testProviderFactory) Capabilities() Capabilities { + return Capabilities{} +} + +func (f testProviderFactory) NewProvider(cfg ProviderConfig) Provider { + return &testProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Config: cfg.Clone(), + }, + } +} + +type testProvider struct { + ProviderBase +} + +func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error) { + return ParseOutcome{}, nil +} + +func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { + t.Helper() + + assert.Equal(t, want.Type, got.Type) + assert.Equal(t, want.DisplayName, got.DisplayName) + assert.Equal(t, want.EnvVar, got.EnvVar) + assert.Equal(t, want.ConfigKey, got.ConfigKey) + assert.Equal(t, want.DefaultDirs, got.DefaultDirs) + assert.Equal(t, want.IDPrefix, got.IDPrefix) + assert.Equal(t, want.WatchSubdirs, got.WatchSubdirs) + assert.Equal(t, want.ShallowWatch, got.ShallowWatch) + assert.Equal(t, want.FileBased, got.FileBased) + assert.Equal(t, want.DiscoverFunc == nil, got.DiscoverFunc == nil) + assert.Equal(t, want.FindSourceFunc == nil, got.FindSourceFunc == nil) + assert.Equal(t, want.WatchRootsFunc == nil, got.WatchRootsFunc == nil) + assert.Equal(t, want.ShallowWatchRootsFunc == nil, got.ShallowWatchRootsFunc == nil) +} diff --git a/internal/parser/tools.go b/internal/parser/tools.go new file mode 100644 index 000000000..c1117e7dd --- /dev/null +++ b/internal/parser/tools.go @@ -0,0 +1,5 @@ +//go:build tools + +package parser + +import _ "github.com/dmarkham/enumer" diff --git a/internal/sync/engine.go b/internal/sync/engine.go index c709821aa..eb43cc389 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -75,6 +75,15 @@ type EngineConfig struct { // that wrote data. Safe to leave nil (e.g., in PG serve mode // where the engine is not run). Emitter Emitter + // ProviderFactories and ProviderMigrationModes let stacked provider + // migration branches opt concrete providers into side-effect-free + // caller-level shadow observation before provider writes become + // authoritative. Nil uses the parser package registry/manifest. + ProviderFactories []parser.ProviderFactory + ProviderMigrationModes map[parser.AgentType]parser.ProviderMigrationMode + // ProviderShadowRecorder receives serialized shadow observations. + // Nil logs only provider errors or mismatches. + ProviderShadowRecorder func(ProviderShadowComparison) } // Engine orchestrates session file discovery and sync. @@ -99,10 +108,14 @@ type Engine struct { // idPrefix and pathRewriter support remote sync: // prefix all session IDs to avoid collisions, rewrite // temp paths to "host:/remote/path" form. - ephemeral bool - idPrefix string - pathRewriter func(string) string - emitter Emitter + ephemeral bool + idPrefix string + pathRewriter func(string) string + emitter Emitter + providerFactories map[parser.AgentType]parser.ProviderFactory + providerMigrationModes map[parser.AgentType]parser.ProviderMigrationMode + providerShadowMu gosync.Mutex + providerShadowRecorder func(ProviderShadowComparison) // forceParse disables every stored-state skip (skip cache, // size/mtime/data_version checks, incremental JSONL deltas) so @@ -181,6 +194,14 @@ func NewEngine( for k, v := range cfg.AgentDirs { dirs[k] = append([]string(nil), v...) } + providerFactories := parser.ProviderFactories() + if cfg.ProviderFactories != nil { + providerFactories = cfg.ProviderFactories + } + providerModes := parser.ProviderMigrationModes() + if cfg.ProviderMigrationModes != nil { + maps.Copy(providerModes, cfg.ProviderMigrationModes) + } return &Engine{ db: database, @@ -192,7 +213,21 @@ func NewEngine( idPrefix: cfg.IDPrefix, pathRewriter: cfg.PathRewriter, emitter: cfg.Emitter, + providerFactories: providerFactoryMap(providerFactories), + providerMigrationModes: providerModes, + providerShadowRecorder: cfg.ProviderShadowRecorder, + } +} + +func providerFactoryMap( + factories []parser.ProviderFactory, +) map[parser.AgentType]parser.ProviderFactory { + out := make(map[parser.AgentType]parser.ProviderFactory, len(factories)) + for _, factory := range factories { + def := factory.Definition() + out[def.Type] = factory } + return out } // migrateLegacyCodexExecSkips removes skip cache entries @@ -411,6 +446,17 @@ type syncJob struct { path string } +func (j syncJob) skipCacheKey() string { + return j.processResult.skipCacheKey(j.path) +} + +func (r processResult) skipCacheKey(path string) string { + if r.cacheKey != "" { + return r.cacheKey + } + return path +} + // SyncPaths syncs only the specified changed file paths // instead of discovering and hashing all session files. // Paths that don't match known session file patterns are @@ -464,8 +510,8 @@ func (e *Engine) classifyPaths( paths []string, ) []parser.DiscoveredFile { geminiProjectsByDir := make(map[string]map[string]string) - seen := make(map[string]struct{}, len(paths)) - var files []parser.DiscoveredFile + seen := make(map[string]int, len(paths)) + files := make([]parser.DiscoveredFile, 0, len(paths)) for _, p := range paths { // Antigravity sidecar events map to potentially several // session sources and must classify even when the event @@ -481,12 +527,14 @@ func (e *Engine) classifyPaths( dfs = []parser.DiscoveredFile{df} } } + dfs = append(dfs, e.classifyProviderChangedPath(p)...) for _, df := range dfs { key := string(df.Agent) + "\x00" + df.Path - if _, ok := seen[key]; ok { + if idx, ok := seen[key]; ok { + files[idx] = mergeChangedPathDiscoveredFile(files[idx], df) continue } - seen[key] = struct{}{} + seen[key] = len(files) files = append(files, df) } } @@ -495,6 +543,224 @@ func (e *Engine) classifyPaths( return e.dedupeClaudeDiscoveredFiles(files) } +func mergeChangedPathDiscoveredFile( + current parser.DiscoveredFile, + next parser.DiscoveredFile, +) parser.DiscoveredFile { + current.ForceParse = current.ForceParse || next.ForceParse + current.ProviderProcess = current.ProviderProcess || next.ProviderProcess + if current.Project == "" { + current.Project = next.Project + } + if current.ProviderSource == nil && next.ProviderSource != nil { + current.ProviderSource = next.ProviderSource + } + return current +} + +func (e *Engine) classifyProviderChangedPath( + path string, +) []parser.DiscoveredFile { + ctx := context.Background() + eventKind := providerChangedPathEventKind(path) + var files []parser.DiscoveredFile + seen := map[string]struct{}{} + + agents := make([]parser.AgentType, 0, len(e.providerFactories)) + for agent := range e.providerFactories { + agents = append(agents, agent) + } + slices.SortFunc(agents, func(a, b parser.AgentType) int { + return strings.Compare(string(a), string(b)) + }) + + for _, agentType := range agents { + mode := e.providerMigrationModes[agentType] + switch mode { + case parser.ProviderMigrationShadowCompare, + parser.ProviderMigrationProviderAuthoritative: + default: + continue + } + roots := e.agentDirs[agentType] + if len(roots) == 0 { + continue + } + factory, ok := e.providerFactories[agentType] + if !ok || factory == nil { + continue + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: roots, + Machine: e.machine, + }) + def := provider.Definition() + watchRoots := providerChangedPathWatchRoots(ctx, provider, roots) + for _, watchRoot := range watchRoots { + storedSourcePaths, err := e.db.ListStoredSourcePathHints( + string(def.Type), + []string{watchRoot}, + ) + if err != nil { + log.Printf( + "%s provider changed-path stored hints: %v", + def.Type, err, + ) + } + sources, err := provider.SourcesForChangedPath( + ctx, + parser.ChangedPathRequest{ + Path: path, + EventKind: eventKind, + WatchRoot: watchRoot, + StoredSourcePaths: storedSourcePaths, + }, + ) + if err != nil { + if !errors.Is(err, parser.ErrUnsupportedProviderFeature) { + log.Printf( + "%s provider changed-path classification: %v", + def.Type, err, + ) + } + continue + } + for _, source := range sources { + sourcePath := providerDiscoveredPath(source) + if sourcePath == "" { + continue + } + agent := source.Provider + if agent == "" { + agent = def.Type + } + key := string(agent) + "\x00" + sourcePath + if _, ok := seen[key]; ok { + continue + } + if eventKind == "remove" && + filepath.Clean(sourcePath) == filepath.Clean(path) && + !parser.IsRegularFile(sourcePath) && + !providerDeletedPhysicalSQLiteSource(agent, sourcePath) { + continue + } + seen[key] = struct{}{} + sourceCopy := source + files = append(files, parser.DiscoveredFile{ + Path: sourcePath, + Project: source.ProjectHint, + Agent: agent, + ForceParse: providerChangedPathForceParse(agent, sourcePath, path, eventKind, mode), + ProviderSource: &sourceCopy, + ProviderProcess: mode == parser.ProviderMigrationProviderAuthoritative, + }) + } + } + } + return files +} + +func providerChangedPathWatchRoots( + ctx context.Context, + provider parser.Provider, + roots []string, +) []string { + plan, err := provider.WatchPlan(ctx) + if err == nil && len(plan.Roots) > 0 { + watchRoots := make([]string, 0, len(plan.Roots)) + seen := make(map[string]struct{}, len(plan.Roots)) + for _, root := range plan.Roots { + path := filepath.Clean(root.Path) + if path == "" || path == "." { + continue + } + if _, ok := seen[path]; ok { + continue + } + seen[path] = struct{}{} + watchRoots = append(watchRoots, path) + } + if len(watchRoots) > 0 { + return watchRoots + } + } + watchRoots := make([]string, 0, len(roots)) + for _, root := range roots { + root = filepath.Clean(root) + if root == "" || root == "." { + continue + } + watchRoots = append(watchRoots, root) + } + return watchRoots +} + +func providerChangedPathForceParse( + agent parser.AgentType, + sourcePath string, + eventPath string, + eventKind string, + mode parser.ProviderMigrationMode, +) bool { + if mode != parser.ProviderMigrationProviderAuthoritative { + return true + } + if filepath.Clean(sourcePath) != filepath.Clean(eventPath) && + !providerVirtualSourceBackedByEvent(sourcePath, eventPath) { + return true + } + return eventKind == "remove" && + providerDeletedPhysicalSQLiteSource(agent, sourcePath) +} + +func providerVirtualSourceBackedByEvent(sourcePath, eventPath string) bool { + idx := strings.LastIndex(sourcePath, "#") + if idx < 0 { + return false + } + dbPath := filepath.Clean(sourcePath[:idx]) + eventPath = filepath.Clean(eventPath) + return eventPath == dbPath || + eventPath == dbPath+"-wal" || + eventPath == dbPath+"-shm" +} + +func providerChangedPathEventKind(path string) string { + if path == "" { + return "" + } + if _, err := os.Lstat(path); err != nil && os.IsNotExist(err) { + return "remove" + } + return "write" +} + +func providerDiscoveredPath(source parser.SourceRef) string { + for _, path := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if path != "" { + return path + } + } + return "" +} + +func providerDeletedPhysicalSQLiteSource( + agent parser.AgentType, path string, +) bool { + switch agent { + case parser.AgentZed: + return filepath.Base(path) == "threads.db" + case parser.AgentShelley: + return filepath.Base(path) == shelleyDBFile + default: + return false + } +} + func dedupeDiscoveredFiles( files []parser.DiscoveredFile, ) []parser.DiscoveredFile { @@ -2769,6 +3035,11 @@ func (e *Engine) syncAllLocked( all = append(all, found...) } } + providerFound, providerFailures := e.discoverProviderSources(ctx, scope) + for _, file := range providerFound { + counts[file.Agent]++ + } + all = append(all, providerFound...) if !since.IsZero() { all = e.dedupeClaudeDiscoveredFiles(all) @@ -2816,6 +3087,9 @@ func (e *Engine) syncAllLocked( stats := e.collectAndBatch( ctx, results, len(all), progressTotal, onProgress, writeMode, ) + for range providerFailures { + stats.RecordFailed() + } if verbose { log.Printf( "file sync: %d synced, %d skipped in %s", @@ -3160,7 +3434,7 @@ func (e *Engine) syncAllLocked( e.lastSyncStats = stats e.mu.Unlock() - if recordSyncState { + if recordSyncState && providerFailures == 0 { e.recordSyncFinished() } // Emission happens in SyncAll / SyncAllSince after syncMu is @@ -3168,6 +3442,82 @@ func (e *Engine) syncAllLocked( return stats } +// discoverProviderSources runs full-sync discovery through the provider facade +// for every concrete provider that is authoritative. It is the provider-shape +// counterpart to the legacy AgentDef.DiscoverFunc loop, so a provider can drop +// its DiscoverFunc and still be discovered once it owns live processing. Shadow +// mode remains observational and never appends provider-only work to the live +// sync list. +func (e *Engine) discoverProviderSources( + ctx context.Context, + scope *rootSyncScope, +) ([]parser.DiscoveredFile, int) { + var files []parser.DiscoveredFile + var failures int + + agents := make([]parser.AgentType, 0, len(e.providerFactories)) + for agent := range e.providerFactories { + agents = append(agents, agent) + } + slices.SortFunc(agents, func(a, b parser.AgentType) int { + return strings.Compare(string(a), string(b)) + }) + + for _, agentType := range agents { + mode := e.providerMigrationModes[agentType] + if mode != parser.ProviderMigrationProviderAuthoritative { + continue + } + roots := e.agentDirs[agentType] + if len(roots) == 0 { + continue + } + filteredRoots := make([]string, 0, len(roots)) + for _, root := range roots { + if scope.includes(root) { + filteredRoots = append(filteredRoots, root) + } + } + if len(filteredRoots) == 0 { + continue + } + factory, ok := e.providerFactories[agentType] + if !ok || factory == nil { + continue + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: filteredRoots, + Machine: e.machine, + }) + sources, err := provider.Discover(ctx) + if err != nil { + log.Printf("%s provider discovery: %v", agentType, err) + failures++ + continue + } + def := provider.Definition() + for _, source := range sources { + sourcePath := providerDiscoveredPath(source) + if sourcePath == "" { + continue + } + agent := source.Provider + if agent == "" { + agent = def.Type + } + sourceCopy := source + files = append(files, parser.DiscoveredFile{ + Path: sourcePath, + Project: source.ProjectHint, + Agent: agent, + ProviderSource: &sourceCopy, + ProviderProcess: true, + }) + } + } + return files, failures +} + // recordSyncStarted persists the start time of a sync run // into pg_sync_state. Callers use this to compute mtime // cutoffs for future quick incremental syncs. @@ -3998,12 +4348,15 @@ func (e *Engine) collectAndBatch( } stats.RecordFailed() if r.cacheSkip && r.mtime != 0 && !r.noCacheSkip { - e.cacheSkip(r.path, r.mtime) + e.cacheSkip(r.skipCacheKey(), r.mtime) } log.Printf("sync error: %v", r.err) continue } if r.skip { + if r.cacheSkip && r.mtime != 0 && !r.noCacheSkip { + e.cacheSkip(r.skipCacheKey(), r.mtime) + } stats.RecordSkip() progress.SessionsDone++ e.reportProgress(onProgress, progress) @@ -4030,15 +4383,15 @@ func (e *Engine) collectAndBatch( stats.filesOK++ stats.parserExcludedFiles++ } - if r.cacheSkip { - e.cacheSkip(r.path, r.mtime) + if r.cacheSkip && !r.noCacheSkip { + e.cacheSkip(r.skipCacheKey(), r.mtime) } progress.SessionsDone++ e.reportProgress(onProgress, progress) continue } if r.cacheSkip { - e.clearSkip(r.path) + e.clearSkip(r.skipCacheKey()) } stats.filesOK++ @@ -4059,7 +4412,7 @@ func (e *Engine) collectAndBatch( sess: pr.Session, msgs: pr.Messages, usageEvents: pr.UsageEvents, - needsRetry: r.needsRetry, + needsRetry: r.needsRetryForSession(pr.Session.ID), forceReplace: r.forceReplace, }) } @@ -4171,12 +4524,33 @@ type processResult struct { // reuse the existing ordinals, so the default append-only // writeMessages would silently drop the rewrite. forceReplace bool + cacheKey string + // retrySessionIDs carries provider per-result data-version state. + // Legacy parsers use needsRetry as a source-wide fallback. + retrySessionIDs map[string]bool + // suppressPresenceSweep marks an incomplete source result where + // missing stored sessions are expected rather than parser drift. + suppressPresenceSweep bool +} + +func (r processResult) needsRetryForSession(sessionID string) bool { + if r.retrySessionIDs != nil { + return r.retrySessionIDs[sessionID] + } + return r.needsRetry +} + +func (r processResult) suppressesPresenceSweepForRetry() bool { + return r.retrySessionIDs == nil && r.needsRetry } func (e *Engine) processFile( ctx context.Context, file parser.DiscoveredFile, ) processResult { + if res, ok := e.processProviderFile(ctx, file); ok { + return res + } var info os.FileInfo var err error @@ -4247,7 +4621,7 @@ func (e *Engine) processFile( // migrateLegacyCodexExecSkips, so this check can treat // the skip cache as authoritative without per-file // re-validation. - if cacheSkip && !e.forceParse { // parse-diff: ignore the skip cache + if cacheSkip && !e.forceParse && !file.ForceParse { // parse-diff: ignore the skip cache e.skipMu.RLock() cachedMtime, cached := e.skipCache[file.Path] e.skipMu.RUnlock() @@ -4255,11 +4629,13 @@ func (e *Engine) processFile( if e.pathNeedsProjectReparse(file.Path) { e.clearSkip(file.Path) } else { - return processResult{ + res := processResult{ skip: true, mtime: mtime, cacheSkip: true, } + e.observeProviderShadow(ctx, file, res) + return res } } } @@ -4345,6 +4721,7 @@ func (e *Engine) processFile( } res.cacheSkip = cacheSkip res.mtime = mtime + e.observeProviderShadow(ctx, file, res) return res } @@ -4360,6 +4737,296 @@ func (e *Engine) pathNeedsProjectReparse(path string) bool { return ok && parser.NeedsProjectReparse(project) } +func (e *Engine) processProviderFile( + ctx context.Context, + file parser.DiscoveredFile, +) (processResult, bool) { + mode := e.providerMigrationModes[file.Agent] + if mode != parser.ProviderMigrationProviderAuthoritative { + return processResult{}, false + } + if file.ProviderSource != nil && !file.ProviderProcess { + return processResult{}, false + } + + factory, ok := e.providerFactories[file.Agent] + if !ok { + return processResult{ + err: fmt.Errorf("provider not found for agent type: %s", file.Agent), + }, true + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[file.Agent], + Machine: e.machine, + }) + + source, found, err := e.providerSourceForDiscoveredFile(ctx, provider, file) + if err != nil { + return processResult{err: err}, true + } + if !found { + return processResult{ + err: fmt.Errorf( + "%s provider source not found for %s", + file.Agent, + file.Path, + ), + }, true + } + + fingerprint, err := provider.Fingerprint(ctx, source) + if err != nil { + return processResult{err: err}, true + } + cacheKey := providerProcessCacheKey(file, source, fingerprint) + cacheSkip := e.shouldCacheSkip(file) + if cacheSkip && !e.forceParse && !file.ForceParse { + e.skipMu.RLock() + cachedMtime, cached := e.skipCache[cacheKey] + e.skipMu.RUnlock() + if cached && cachedMtime == fingerprint.MTimeNS { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: true, + cacheKey: cacheKey, + }, true + } + } + + outcome, err := provider.Parse(ctx, parser.ParseRequest{ + Source: source, + Fingerprint: fingerprint, + Machine: e.machine, + ForceParse: e.forceParse || file.ForceParse, + }) + if err != nil { + return processResult{ + err: err, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: true, + }, true + } + if err := validateProviderOutcome( + provider.Definition(), + source, + fingerprint, + outcome, + ); err != nil { + return processResult{ + err: err, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: true, + }, true + } + cleanCache := providerOutcomeAllowsCleanSkipCache(outcome) + if outcome.SkipReason != parser.SkipNone { + return processResult{ + skip: true, + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: !cleanCache, + }, true + } + + res := processResult{ + results: parseOutcomeResults(outcome.Results), + excludedSessionIDs: append([]string(nil), outcome.ExcludedSessionIDs...), + mtime: fingerprint.MTimeNS, + cacheSkip: cacheSkip, + cacheKey: cacheKey, + noCacheSkip: !cleanCache, + forceReplace: outcome.ForceReplace, + suppressPresenceSweep: !outcome.ResultSetComplete, + } + for _, result := range outcome.Results { + if result.DataVersion == parser.DataVersionNeedsRetry { + if res.retrySessionIDs == nil { + res.retrySessionIDs = make(map[string]bool) + } + res.retrySessionIDs[result.Result.Session.ID] = true + } + } + if e.forceParse || file.ForceParse { + for _, sourceErr := range outcome.SourceErrors { + res.sessionErrs = append(res.sessionErrs, sessionParseError{ + sessionID: sourceErr.SessionID, + virtualPath: sourceErr.SourceKey, + err: sourceErr.Err, + }) + } + } + return res, true +} + +func providerOutcomeAllowsCleanSkipCache(outcome parser.ParseOutcome) bool { + if !outcome.ResultSetComplete { + return false + } + if len(outcome.SourceErrors) > 0 { + return false + } + for _, result := range outcome.Results { + if result.DataVersion == parser.DataVersionNeedsRetry { + return false + } + } + return true +} + +func (e *Engine) providerSourceForDiscoveredFile( + ctx context.Context, + provider parser.Provider, + file parser.DiscoveredFile, +) (parser.SourceRef, bool, error) { + if file.ProviderSource != nil { + source := *file.ProviderSource + if source.Provider != file.Agent { + return parser.SourceRef{}, false, fmt.Errorf( + "provider source mismatch for %s: %s", + file.Agent, + source.Provider, + ) + } + return source, true, nil + } + + return provider.FindSource(ctx, parser.FindSourceRequest{ + StoredFilePath: file.Path, + FingerprintKey: file.Path, + RequireFreshSource: !e.forceParse && !file.ForceParse, + }) +} + +func providerProcessCacheKey( + file parser.DiscoveredFile, + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) string { + if key := plannedSkipKey(source, fingerprint); key != "" { + return key + } + return file.Path +} + +func (e *Engine) observeProviderShadow( + ctx context.Context, + file parser.DiscoveredFile, + legacy processResult, +) { + mode := e.providerMigrationModes[file.Agent] + if mode != parser.ProviderMigrationShadowCompare { + return + } + comparison := ProviderShadowComparison{File: file, Mode: mode} + if reason := providerShadowNotComparableReason(legacy); reason != "" { + comparison.NotComparableReason = reason + e.recordProviderShadowComparison(comparison) + return + } + factory, ok := e.providerFactories[file.Agent] + if !ok { + return + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[file.Agent], + Machine: e.machine, + }) + source, found, err := e.providerSourceForDiscoveredFile(ctx, provider, file) + comparison.Err = err + if err == nil && found { + comparison.Source = source + comparison.Observation, comparison.Err = ObserveProviderSource( + ctx, + provider, + ProviderObserveRequest{ + Source: source, + Machine: e.machine, + ForceParse: e.forceParse || file.ForceParse, + }, + ) + if comparison.Err == nil { + comparison.Mismatches = compareProviderObservationToProcessResult( + comparison.Observation, + legacy, + file, + ) + } + } + if err == nil && !found { + comparison.Err = fmt.Errorf( + "%s provider shadow source not found for %s", + file.Agent, + file.Path, + ) + } + e.recordProviderShadowComparison(comparison) +} + +func providerShadowNotComparableReason(legacy processResult) string { + switch { + case legacy.err != nil: + return "legacy error" + case legacy.incremental != nil: + return "legacy incremental" + case legacy.skip: + return "legacy skip" + default: + return "" + } +} + +func (e *Engine) recordProviderShadowComparison( + comparison ProviderShadowComparison, +) { + if e.providerShadowRecorder != nil { + e.providerShadowMu.Lock() + defer e.providerShadowMu.Unlock() + e.providerShadowRecorder(comparison) + return + } + if comparison.NotComparableReason != "" { + return + } + sourceKey := comparison.Source.Key + if sourceKey == "" { + sourceKey = comparison.Source.FingerprintKey + } + fingerprintKey := comparison.Observation.Fingerprint.Key + if fingerprintKey == "" { + fingerprintKey = comparison.Source.FingerprintKey + } + if comparison.Err != nil { + log.Printf( + "%s provider shadow %s mode=%s source=%q fingerprint=%q: %v", + comparison.File.Agent, + comparison.File.Path, + comparison.Mode, + sourceKey, + fingerprintKey, + comparison.Err, + ) + return + } + if len(comparison.Mismatches) == 0 { + return + } + log.Printf( + "%s provider shadow %s mode=%s source=%q fingerprint=%q mismatches: %s", + comparison.File.Agent, + comparison.File.Path, + comparison.Mode, + sourceKey, + fingerprintKey, + strings.Join(comparison.Mismatches, "; "), + ) +} + func (e *Engine) shouldCacheSkip( file parser.DiscoveredFile, ) bool { @@ -8744,12 +9411,13 @@ func (e *Engine) FindSourceFile(sessionID string) string { } } } + if f := e.findProviderSourceFile( + context.Background(), def, sessionID, rawSessionID, + ); f != "" { + return f + } return "" } - if def.FindSourceFunc == nil { - return "" - } - if def.Type == parser.AgentKiro { for _, dir := range e.agentDirs[parser.AgentKiro] { dbPath := parser.FindKiroSQLiteDBPath(dir) @@ -8788,14 +9456,59 @@ func (e *Engine) FindSourceFile(sessionID string) string { } } - for _, d := range e.agentDirs[def.Type] { - if f := def.FindSourceFunc(d, bareID); f != "" { - return f + if def.FindSourceFunc != nil { + for _, d := range e.agentDirs[def.Type] { + if f := def.FindSourceFunc(d, bareID); f != "" { + return f + } } } + if f := e.findProviderSourceFile( + context.Background(), def, sessionID, bareID, + ); f != "" { + return f + } return "" } +// findProviderSourceFile resolves a single session's source file through the +// provider facade for authoritative concrete providers. It is the +// provider-shape counterpart to AgentDef.FindSourceFunc, so a provider can drop +// its FindSourceFunc hook and stay locatable for diagnostics, export, and +// parse-diff lookups once it owns live processing. Shadow mode remains +// observational and must not satisfy lookups that legacy lookup would miss. +func (e *Engine) findProviderSourceFile( + ctx context.Context, + def parser.AgentDef, + sessionID string, + rawSessionID string, +) string { + mode := e.providerMigrationModes[def.Type] + if mode != parser.ProviderMigrationProviderAuthoritative { + return "" + } + factory, ok := e.providerFactories[def.Type] + if !ok || factory == nil { + return "" + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[def.Type], + Machine: e.machine, + }) + source, found, err := provider.FindSource(ctx, parser.FindSourceRequest{ + RawSessionID: rawSessionID, + FullSessionID: sessionID, + }) + if err != nil { + log.Printf("%s provider source lookup: %v", def.Type, err) + return "" + } + if !found { + return "" + } + return providerDiscoveredPath(source) +} + // SourceMtime returns the current source-backed mtime for a // session. Most file-based agents map directly to a single source // file, but OpenCode storage sessions derive their effective mtime @@ -9097,8 +9810,9 @@ func (e *Engine) SyncSingleSessionContext( // the file, even if it was cached as non-interactive // during a bulk SyncAll. file := parser.DiscoveredFile{ - Path: path, - Agent: agent, + Path: path, + Agent: agent, + ForceParse: true, } if e.shouldCacheSkip(file) { e.clearSkip(path) @@ -9245,13 +9959,16 @@ func (e *Engine) SyncSingleSessionContext( res := e.processFile(ctx, file) if res.err != nil { if res.cacheSkip && res.mtime != 0 && !res.noCacheSkip { - e.cacheSkip(path, res.mtime) + e.cacheSkip(res.skipCacheKey(path), res.mtime) } return res.err } if res.skip { return nil } + if res.cacheSkip { + e.clearSkip(res.skipCacheKey(path)) + } // Delete parser-excluded sessions before writing the parsed // results, mirroring collectAndBatch. Vibe promotes a session @@ -9290,7 +10007,7 @@ func (e *Engine) SyncSingleSessionContext( sess: pr.Session, msgs: pr.Messages, usageEvents: pr.UsageEvents, - needsRetry: res.needsRetry, + needsRetry: res.needsRetryForSession(pr.Session.ID), }, ); err != nil && !isIntentionalSessionSkip(err) && diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index e0b8ed194..f37410493 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -566,7 +566,7 @@ func (e *Engine) parseDiffCollectFile( sess: pr.Session, msgs: pr.Messages, usageEvents: pr.UsageEvents, - needsRetry: job.needsRetry, + needsRetry: job.needsRetryForSession(pr.Session.ID), } prepared, msgs, ok := e.prepareSessionWrite(pw, resolver) id := prepared.ID @@ -766,9 +766,12 @@ func (e *Engine) parseDiffCollectFile( report.Totals.ExcludedByParser++ } - // needsRetry output is transient and low fidelity; missing - // sessions there are expected, not parser drift. - if !job.needsRetry { + // Legacy source-wide needsRetry output and incomplete provider + // result sets are transient and low fidelity; missing sessions + // there are expected, not parser drift. Provider per-session + // retry state is handled above and should not hide unrelated + // missing sessions from the same complete source. + if !job.suppressesPresenceSweepForRetry() && !job.suppressPresenceSweep { *presencePaths = append(*presencePaths, base) } return nil diff --git a/internal/sync/parsediff_compare_test.go b/internal/sync/parsediff_compare_test.go index f0dc39147..ccd9e61c5 100644 --- a/internal/sync/parsediff_compare_test.go +++ b/internal/sync/parsediff_compare_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/dbtest" "go.kenn.io/agentsview/internal/parser" ) @@ -1740,6 +1741,153 @@ func TestParseDiffSourceReliableForRaced(t *testing.T) { } } +func TestParseDiffPresenceSweepKeepsMixedProviderRetryCoverage(t *testing.T) { + sourcePath := "/tmp/provider-source.jsonl" + filePath := sourcePath + current := &db.Session{ + ID: "provider-current", + Agent: string(parser.AgentClaude), + Machine: "devbox", + Project: "provider-project", + FilePath: &filePath, + DataVersion: db.CurrentDataVersion(), + } + retry := &db.Session{ + ID: "provider-retry", + Agent: string(parser.AgentClaude), + Machine: "devbox", + Project: "provider-project", + FilePath: &filePath, + DataVersion: db.CurrentDataVersion(), + } + missing := &db.Session{ + ID: "provider-missing", + Agent: string(parser.AgentClaude), + Machine: "devbox", + Project: "provider-project", + FilePath: &filePath, + DataVersion: db.CurrentDataVersion(), + } + storedByID := map[string]*db.Session{ + current.ID: current, + retry.ID: retry, + missing.ID: missing, + } + storedByPath := map[string][]*db.Session{ + sourcePath: {current, retry, missing}, + } + job := syncJob{ + path: sourcePath, + processResult: processResult{ + results: []parser.ParseResult{ + {Session: parser.ParsedSession{ + ID: current.ID, + Agent: parser.AgentClaude, + Machine: "devbox", + Project: "provider-project", + File: parser.FileInfo{ + Path: sourcePath, + }, + }}, + {Session: parser.ParsedSession{ + ID: retry.ID, + Agent: parser.AgentClaude, + Machine: "devbox", + Project: "provider-project", + File: parser.FileInfo{ + Path: sourcePath, + }, + }}, + }, + retrySessionIDs: map[string]bool{ + retry.ID: true, + }, + }, + } + engine := &Engine{db: dbtest.OpenTestDB(t)} + report := &ParseDiffReport{FieldCounts: map[string]int{}} + visited := map[string]bool{} + var presencePaths []string + + err := engine.parseDiffCollectFile( + context.Background(), + report, + job, + map[string]parser.AgentType{sourcePath: parser.AgentClaude}, + storedByID, + storedByPath, + visited, + engine.loadWorktreeProjectResolver(), + &presencePaths, + ) + require.NoError(t, err) + engine.parseDiffPresenceSweep( + report, + presencePaths, + storedByPath, + visited, + ) + + assert.Equal(t, 1, report.Totals.NeedsRetry) + assert.Equal(t, 1, report.Totals.Changed) + byID := map[string]SessionDiff{} + for _, session := range report.Sessions { + byID[session.SessionID] = session + } + assert.Equal(t, DiffNeedsRetry, byID[retry.ID].Class) + assert.Equal(t, DiffChanged, byID[missing.ID].Class) + require.NotEmpty(t, byID[missing.ID].Fields) + assert.Equal(t, FieldPresence, byID[missing.ID].Fields[0].Field) +} + +func TestParseDiffPresenceSweepSkipsIncompleteProviderResults(t *testing.T) { + sourcePath := "/tmp/incomplete-provider-source.jsonl" + filePath := sourcePath + missing := &db.Session{ + ID: "provider-missing", + Agent: string(parser.AgentClaude), + Machine: "devbox", + Project: "provider-project", + FilePath: &filePath, + DataVersion: db.CurrentDataVersion(), + } + storedByPath := map[string][]*db.Session{ + sourcePath: {missing}, + } + job := syncJob{ + path: sourcePath, + processResult: processResult{ + suppressPresenceSweep: true, + }, + } + engine := &Engine{db: dbtest.OpenTestDB(t)} + report := &ParseDiffReport{FieldCounts: map[string]int{}} + visited := map[string]bool{} + var presencePaths []string + + err := engine.parseDiffCollectFile( + context.Background(), + report, + job, + map[string]parser.AgentType{sourcePath: parser.AgentClaude}, + map[string]*db.Session{missing.ID: missing}, + storedByPath, + visited, + engine.loadWorktreeProjectResolver(), + &presencePaths, + ) + require.NoError(t, err) + engine.parseDiffPresenceSweep( + report, + presencePaths, + storedByPath, + visited, + ) + + assert.Equal(t, 0, report.Totals.Changed) + assert.Empty(t, report.Sessions) +} + func TestParseDiffReportHasFailures(t *testing.T) { tests := []struct { name string diff --git a/internal/sync/provider_shadow.go b/internal/sync/provider_shadow.go new file mode 100644 index 000000000..73ba1fac2 --- /dev/null +++ b/internal/sync/provider_shadow.go @@ -0,0 +1,583 @@ +package sync + +import ( + "context" + "fmt" + "reflect" + "slices" + "strings" + + "go.kenn.io/agentsview/internal/parser" +) + +// ProviderObserveRequest is the source-level shadow-parse input used while the +// legacy sync path remains authoritative. +type ProviderObserveRequest struct { + Source parser.SourceRef + Machine string + ForceParse bool +} + +// ProviderObservation is the normalized, side-effect-free provider outcome for +// one source. +type ProviderObservation struct { + Fingerprint parser.SourceFingerprint + Results []parser.ParseResult + ExcludedSessionIDs []string + SourceErrors []parser.SourceError + SkipReason parser.SkipReason + ForceReplace bool + Planned ProviderPlannedEffects +} + +// ProviderShadowComparison is one caller-level shadow result. Legacy sync +// remains authoritative; this value records the side-effect-free provider +// observation and any differences from the legacy processResult. +type ProviderShadowComparison struct { + File parser.DiscoveredFile + Mode parser.ProviderMigrationMode + Source parser.SourceRef + Observation ProviderObservation + Mismatches []string + NotComparableReason string + Err error +} + +// ProviderPlannedEffects describes writes the provider path would have made. +// Shadow mode compares these in memory; it does not receive live DB, skip-cache, +// or diagnostic writers. SSE scopes are carried for later caller work but are +// not part of the root processResult comparison. +type ProviderPlannedEffects struct { + SourceKeys []string + DataVersions []ProviderPlannedDataVersion + SkipCacheKeys []string + Diagnostics []ProviderPlannedDiagnostic + SSEScopes []string +} + +// ProviderPlannedDataVersion is an in-memory data-version write candidate. +type ProviderPlannedDataVersion struct { + SessionID string + State parser.DataVersionState + RetryReason string +} + +// ProviderPlannedDiagnostic is an in-memory parse diagnostic candidate. +type ProviderPlannedDiagnostic struct { + SourceKey string + DisplayPath string + SessionID string + Err error + Retryable bool +} + +// DataVersionSessionIDs returns the planned data-version session IDs in parse +// result order. +func (p ProviderPlannedEffects) DataVersionSessionIDs() []string { + ids := make([]string, 0, len(p.DataVersions)) + for _, dataVersion := range p.DataVersions { + ids = append(ids, dataVersion.SessionID) + } + return ids +} + +// RetrySessionIDs returns sessions that need a future parse retry. +func (p ProviderPlannedEffects) RetrySessionIDs() []string { + var ids []string + for _, dataVersion := range p.DataVersions { + if dataVersion.State == parser.DataVersionNeedsRetry { + ids = append(ids, dataVersion.SessionID) + } + } + return ids +} + +// ObserveProviderSource fingerprints and parses a provider source without +// mutating persisted session state. It is the source-level comparison surface +// provider migration branches use before caller-level dual-run wiring exists. +func ObserveProviderSource( + ctx context.Context, + provider parser.Provider, + req ProviderObserveRequest, +) (ProviderObservation, error) { + def := provider.Definition() + if req.Source.Provider != def.Type { + return ProviderObservation{}, fmt.Errorf( + "provider source mismatch: source is %s, provider is %s", + req.Source.Provider, + def.Type, + ) + } + + fingerprint, err := provider.Fingerprint(ctx, req.Source) + if err != nil { + return ProviderObservation{}, err + } + outcome, err := provider.Parse(ctx, parser.ParseRequest{ + Source: req.Source, + Fingerprint: fingerprint, + Machine: req.Machine, + ForceParse: req.ForceParse, + }) + if err != nil { + return ProviderObservation{}, err + } + if err := validateProviderOutcome(def, req.Source, fingerprint, outcome); err != nil { + return ProviderObservation{}, err + } + + observation := ProviderObservation{ + Fingerprint: fingerprint, + Results: parseOutcomeResults(outcome.Results), + ExcludedSessionIDs: append([]string(nil), outcome.ExcludedSessionIDs...), + SourceErrors: append([]parser.SourceError(nil), outcome.SourceErrors...), + SkipReason: outcome.SkipReason, + ForceReplace: outcome.ForceReplace, + } + observation.Planned = planProviderEffects(req.Source, fingerprint, outcome) + return observation, nil +} + +func compareProviderObservationToProcessResult( + observation ProviderObservation, + legacy processResult, + file parser.DiscoveredFile, +) []string { + var mismatches []string + if len(observation.Results) != len(legacy.results) { + mismatches = append(mismatches, fmt.Sprintf( + "result count: provider=%d legacy=%d", + len(observation.Results), len(legacy.results), + )) + } + for i := 0; i < len(observation.Results) && i < len(legacy.results); i++ { + providerResult := observation.Results[i] + legacyResult := legacy.results[i] + if !reflect.DeepEqual(providerResult.Session, legacyResult.Session) { + mismatches = append(mismatches, fmt.Sprintf( + "result[%d] session differs: provider=%+v legacy=%+v", + i, providerResult.Session, legacyResult.Session, + )) + } + if !reflect.DeepEqual(providerResult.Messages, legacyResult.Messages) { + mismatches = append(mismatches, fmt.Sprintf( + "result[%d] messages differ", + i, + )) + } + if !reflect.DeepEqual(providerResult.UsageEvents, legacyResult.UsageEvents) { + mismatches = append(mismatches, fmt.Sprintf( + "result[%d] usage events differ", + i, + )) + } + } + if !slices.Equal(observation.ExcludedSessionIDs, legacy.excludedSessionIDs) { + mismatches = append(mismatches, fmt.Sprintf( + "excluded_session_ids: provider=%v legacy=%v", + observation.ExcludedSessionIDs, legacy.excludedSessionIDs, + )) + } + providerSourceErrors := comparableProviderSourceErrors(observation.SourceErrors) + legacySourceErrors := comparableLegacySourceErrors(file.Agent, legacy.sessionErrs) + if !reflect.DeepEqual(providerSourceErrors, legacySourceErrors) { + mismatches = append(mismatches, fmt.Sprintf( + "source_errors differ: provider=%v legacy=%v", + providerSourceErrors, legacySourceErrors, + )) + } + providerPlanned := comparablePlannedEffects(observation.Planned) + legacyPlanned := comparablePlannedEffects( + legacyPlannedEffectsFromProcessResult(file, legacy), + ) + if !slices.Equal(providerPlanned.SourceKeys, legacyPlanned.SourceKeys) { + mismatches = append(mismatches, fmt.Sprintf( + "planned.source_keys: provider=%v legacy=%v", + providerPlanned.SourceKeys, legacyPlanned.SourceKeys, + )) + } + if !reflect.DeepEqual(providerPlanned.DataVersions, legacyPlanned.DataVersions) { + mismatches = append(mismatches, fmt.Sprintf( + "planned.data_versions: provider=%v legacy=%v", + providerPlanned.DataVersions, legacyPlanned.DataVersions, + )) + } + if !slices.Equal(providerPlanned.SkipCacheKeys, legacyPlanned.SkipCacheKeys) { + mismatches = append(mismatches, fmt.Sprintf( + "planned.skip_cache_keys: provider=%v legacy=%v", + providerPlanned.SkipCacheKeys, legacyPlanned.SkipCacheKeys, + )) + } + if !reflect.DeepEqual(providerPlanned.Diagnostics, legacyPlanned.Diagnostics) { + mismatches = append(mismatches, fmt.Sprintf( + "planned.diagnostics: provider=%v legacy=%v", + providerPlanned.Diagnostics, legacyPlanned.Diagnostics, + )) + } + if observation.ForceReplace != legacy.forceReplace { + mismatches = append(mismatches, fmt.Sprintf( + "force_replace: provider=%t legacy=%t", + observation.ForceReplace, legacy.forceReplace, + )) + } + return mismatches +} + +func legacyPlannedEffectsFromProcessResult( + file parser.DiscoveredFile, + legacy processResult, +) ProviderPlannedEffects { + planned := ProviderPlannedEffects{} + for _, result := range legacy.results { + if result.Session.File.Path != "" && + !slices.Contains(planned.SourceKeys, result.Session.File.Path) { + planned.SourceKeys = append(planned.SourceKeys, result.Session.File.Path) + } + if result.Session.ID == "" { + continue + } + state := parser.DataVersionCurrent + if legacy.needsRetry { + state = parser.DataVersionNeedsRetry + } + planned.DataVersions = append(planned.DataVersions, ProviderPlannedDataVersion{ + SessionID: result.Session.ID, + State: state, + }) + } + if legacy.cacheSkip && legacy.mtime != 0 && !legacy.noCacheSkip && + legacy.incremental == nil && legacy.err == nil && len(legacy.results) == 0 && + file.Path != "" { + planned.SkipCacheKeys = append(planned.SkipCacheKeys, file.Path) + } + for _, sessionErr := range legacy.sessionErrs { + sessionID := normalizeLegacySessionID(file.Agent, sessionErr.sessionID) + planned.Diagnostics = append(planned.Diagnostics, ProviderPlannedDiagnostic{ + SourceKey: sessionErr.virtualPath, + DisplayPath: sessionErr.virtualPath, + SessionID: sessionID, + Err: sessionErr.err, + Retryable: true, + }) + } + return planned +} + +type comparableSourceError struct { + SessionID string + SourceKey string + Path string + Err string + Retryable bool +} + +func comparableProviderSourceErrors(sourceErrors []parser.SourceError) []comparableSourceError { + comparable := make([]comparableSourceError, 0, len(sourceErrors)) + for _, sourceErr := range sourceErrors { + path := sourceErr.DisplayPath + if path == "" { + path = sourceErr.SourceKey + } + comparable = append(comparable, comparableSourceError{ + SessionID: sourceErr.SessionID, + SourceKey: sourceErr.SourceKey, + Path: path, + Err: errString(sourceErr.Err), + Retryable: sourceErr.Retryable, + }) + } + return comparable +} + +func comparableLegacySourceErrors( + agent parser.AgentType, + sessionErrs []sessionParseError, +) []comparableSourceError { + comparable := make([]comparableSourceError, 0, len(sessionErrs)) + for _, sessionErr := range sessionErrs { + comparable = append(comparable, comparableSourceError{ + SessionID: normalizeLegacySessionID(agent, sessionErr.sessionID), + SourceKey: sessionErr.virtualPath, + Path: sessionErr.virtualPath, + Err: errString(sessionErr.err), + Retryable: true, + }) + } + return comparable +} + +func normalizeLegacySessionID(agent parser.AgentType, sessionID string) string { + if sessionID == "" { + return "" + } + def, ok := parser.AgentByType(agent) + if !ok || def.IDPrefix == "" { + return sessionID + } + host, rawID := parser.StripHostPrefix(sessionID) + if strings.HasPrefix(rawID, def.IDPrefix) { + return sessionID + } + normalized := def.IDPrefix + rawID + if host != "" { + return host + "~" + normalized + } + return normalized +} + +func errString(err error) string { + if err == nil { + return "" + } + return err.Error() +} + +type comparablePlanned struct { + SourceKeys []string + DataVersions []comparablePlannedDataVersion + SkipCacheKeys []string + Diagnostics []comparablePlannedDiagnostic +} + +type comparablePlannedDataVersion struct { + SessionID string + State parser.DataVersionState +} + +type comparablePlannedDiagnostic struct { + SourceKey string + DisplayPath string + SessionID string + Err string + Retryable bool +} + +func comparablePlannedEffects(planned ProviderPlannedEffects) comparablePlanned { + comparable := comparablePlanned{ + SourceKeys: slices.Clone(planned.SourceKeys), + SkipCacheKeys: slices.Clone(planned.SkipCacheKeys), + } + comparable.DataVersions = make( + []comparablePlannedDataVersion, + 0, + len(planned.DataVersions), + ) + for _, dataVersion := range planned.DataVersions { + comparable.DataVersions = append( + comparable.DataVersions, + comparablePlannedDataVersion{ + SessionID: dataVersion.SessionID, + State: dataVersion.State, + }, + ) + } + comparable.Diagnostics = make( + []comparablePlannedDiagnostic, + 0, + len(planned.Diagnostics), + ) + for _, diagnostic := range planned.Diagnostics { + comparable.Diagnostics = append( + comparable.Diagnostics, + comparablePlannedDiagnostic{ + SourceKey: diagnostic.SourceKey, + DisplayPath: diagnostic.DisplayPath, + SessionID: diagnostic.SessionID, + Err: errString(diagnostic.Err), + Retryable: diagnostic.Retryable, + }, + ) + } + return comparable +} + +func validateProviderOutcome( + def parser.AgentDef, + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + outcome parser.ParseOutcome, +) error { + for _, result := range outcome.Results { + session := result.Result.Session + if session.Agent != def.Type { + return fmt.Errorf( + "%s: provider result session agent mismatch for %q: got %s", + def.Type, + session.ID, + session.Agent, + ) + } + if err := validateProviderParseResultSessionIDs(def, result.Result); err != nil { + return err + } + } + for _, sessionID := range outcome.ExcludedSessionIDs { + if err := validateProviderSessionID(def, sessionID, "excluded session id"); err != nil { + return err + } + } + for _, sourceErr := range outcome.SourceErrors { + if err := validateProviderSessionID(def, sourceErr.SessionID, "diagnostic session id"); err != nil { + return err + } + if sourceErr.SourceKey == "" { + return fmt.Errorf( + "%s: provider diagnostic source key is required for source %q", + def.Type, + source.Key, + ) + } + if !providerSourceKeyMatches(source, fingerprint, sourceErr.SourceKey) { + return fmt.Errorf( + "%s: provider diagnostic source key %q is unrelated to source %q", + def.Type, + sourceErr.SourceKey, + source.Key, + ) + } + } + return nil +} + +func validateProviderParseResultSessionIDs(def parser.AgentDef, result parser.ParseResult) error { + sessionIDs := []struct { + field string + id string + }{ + {field: "result session id", id: result.Session.ID}, + {field: "parent session id", id: result.Session.ParentSessionID}, + } + for _, sessionID := range sessionIDs { + if err := validateProviderSessionID(def, sessionID.id, sessionID.field); err != nil { + return err + } + } + for _, usage := range result.Session.UsageEvents { + if err := validateProviderSessionID(def, usage.SessionID, "session usage event session id"); err != nil { + return err + } + } + for _, usage := range result.UsageEvents { + if err := validateProviderSessionID(def, usage.SessionID, "usage event session id"); err != nil { + return err + } + } + for _, message := range result.Messages { + for _, toolCall := range message.ToolCalls { + if err := validateProviderSessionID(def, toolCall.SubagentSessionID, "tool call subagent session id"); err != nil { + return err + } + for _, event := range toolCall.ResultEvents { + if err := validateProviderSessionID(def, event.SubagentSessionID, "tool result event subagent session id"); err != nil { + return err + } + } + } + } + return nil +} + +func validateProviderSessionID(def parser.AgentDef, sessionID, field string) error { + if sessionID == "" || def.IDPrefix == "" { + return nil + } + if strings.HasPrefix(sessionID, def.IDPrefix) { + return nil + } + return fmt.Errorf( + "%s: provider %s %q must use prefix %q", + def.Type, + field, + sessionID, + def.IDPrefix, + ) +} + +func providerSourceKeyMatches( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + sourceKey string, +) bool { + if sourceKey == "" { + return true + } + for _, candidate := range []string{fingerprint.Key, source.FingerprintKey, source.Key} { + if candidate == "" { + continue + } + if sourceKey == candidate || strings.HasPrefix(sourceKey, candidate+"#") || + strings.HasPrefix(sourceKey, candidate+"::") || + strings.HasPrefix(sourceKey, candidate+"|") { + return true + } + } + return false +} + +func parseOutcomeResults(outcomes []parser.ParseResultOutcome) []parser.ParseResult { + results := make([]parser.ParseResult, 0, len(outcomes)) + for _, outcome := range outcomes { + results = append(results, outcome.Result) + } + return results +} + +func planProviderEffects( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, + outcome parser.ParseOutcome, +) ProviderPlannedEffects { + planned := ProviderPlannedEffects{} + if sourceKey := plannedSourceKey(source, fingerprint); sourceKey != "" { + planned.SourceKeys = append(planned.SourceKeys, sourceKey) + } + if outcome.SkipReason != parser.SkipNone { + if skipKey := plannedSkipKey(source, fingerprint); skipKey != "" { + planned.SkipCacheKeys = append(planned.SkipCacheKeys, skipKey) + } + } + for _, result := range outcome.Results { + if result.Result.Session.ID == "" || + result.DataVersion == parser.DataVersionUnspecified { + continue + } + planned.DataVersions = append(planned.DataVersions, ProviderPlannedDataVersion{ + SessionID: result.Result.Session.ID, + State: result.DataVersion, + RetryReason: result.RetryReason, + }) + } + for _, sourceErr := range outcome.SourceErrors { + planned.Diagnostics = append(planned.Diagnostics, ProviderPlannedDiagnostic{ + SourceKey: sourceErr.SourceKey, + DisplayPath: sourceErr.DisplayPath, + SessionID: sourceErr.SessionID, + Err: sourceErr.Err, + Retryable: sourceErr.Retryable, + }) + } + return planned +} + +func plannedSourceKey( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) string { + if fingerprint.Key != "" { + return fingerprint.Key + } + if source.FingerprintKey != "" { + return source.FingerprintKey + } + return source.Key +} + +func plannedSkipKey( + source parser.SourceRef, + fingerprint parser.SourceFingerprint, +) string { + if source.FingerprintKey != "" { + return source.FingerprintKey + } + return plannedSourceKey(source, fingerprint) +} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go new file mode 100644 index 000000000..2f9b81862 --- /dev/null +++ b/internal/sync/provider_shadow_caller_test.go @@ -0,0 +1,1240 @@ +package sync + +import ( + "context" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/db" + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestProcessFileShadowObservesProviderWithoutReplacingLegacy(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-caller.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile( + sourcePath, + []byte(testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "compare through the caller", + "2026-06-01T10:00:00Z", + "/Users/dev/code/demo", + ), + testjsonl.ClaudeAssistantJSON( + "provider stayed shadow-only", + "2026-06-01T10:01:00Z", + ), + )), + 0o644, + )) + + legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( + sourcePath, "demo", "devbox", + ) + require.NoError(t, err) + require.Len(t, legacyResults, 1) + require.Empty(t, legacyExcluded) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + providerResult := legacyResults[0] + providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) + hash, err := ComputeFileHash(sourcePath) + require.NoError(t, err) + providerResult.Session.File.Hash = hash + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "demo", + } + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: providerResult, + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, + }, + source: source, + } + var comparisons []ProviderShadowComparison + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationShadowCompare, + }, + ProviderShadowRecorder: func(comparison ProviderShadowComparison) { + comparisons = append(comparisons, comparison) + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + require.Len(t, result.results, 1) + assert.Equal(t, "shadow-caller", result.results[0].Session.ID) + assert.Equal(t, parser.AgentClaude, result.results[0].Session.Agent) + require.Len(t, comparisons, 1) + assert.NoError(t, comparisons[0].Err) + assert.Empty(t, comparisons[0].Mismatches) + assert.Equal(t, sourcePath, comparisons[0].File.Path) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestClassifyProviderChangedPathPassesStoredHintsToShadowProvider( + t *testing.T, +) { + root := t.TempDir() + eventPath := filepath.Join(root, "state.sqlite3-wal") + storedPath := filepath.Join(root, "state.sqlite3") + "#session-a" + database := dbtest.OpenTestDB(t) + require.NoError(t, database.UpsertSession(db.Session{ + ID: "claude:session-a", + Project: "demo", + Machine: "devbox", + Agent: string(parser.AgentClaude), + FilePath: &storedPath, + })) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ + Path: root, + }}}, + changedSources: []parser.SourceRef{{ + Provider: parser.AgentClaude, + Key: storedPath, + DisplayPath: storedPath, + FingerprintKey: storedPath, + ProjectHint: "demo", + }}, + } + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationShadowCompare, + }, + }) + + files := engine.classifyPaths([]string{eventPath}) + + require.Len(t, provider.changedRequests, 1) + assert.Equal(t, eventPath, provider.changedRequests[0].Path) + assert.Equal(t, root, provider.changedRequests[0].WatchRoot) + assert.Equal(t, []string{storedPath}, provider.changedRequests[0].StoredSourcePaths) + require.Len(t, files, 1) + assert.Equal(t, storedPath, files[0].Path) + assert.Equal(t, "demo", files[0].Project) + assert.True(t, files[0].ForceParse) + assert.False(t, files[0].ProviderProcess) + require.NotNil(t, files[0].ProviderSource) + assert.Equal(t, storedPath, files[0].ProviderSource.DisplayPath) +} + +func TestClassifyProviderChangedPathRunsAlongsideLegacyClassifier( + t *testing.T, +) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-recognized.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile( + sourcePath, + []byte(testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "legacy already recognizes this", + "2026-06-01T10:00:00Z", + "/Users/dev/code/demo", + ), + )), + 0o644, + )) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ + Path: root, + }}}, + changedSources: []parser.SourceRef{{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "provider-project", + }}, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationShadowCompare, + }, + }) + + files := engine.classifyPaths([]string{sourcePath}) + + require.Len(t, provider.changedRequests, 1) + assert.Equal(t, sourcePath, provider.changedRequests[0].Path) + require.Len(t, files, 1) + assert.Equal(t, sourcePath, files[0].Path) + assert.True(t, files[0].ForceParse) + assert.False(t, files[0].ProviderProcess) + require.NotNil(t, files[0].ProviderSource) + assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) +} + +func TestProcessFileShadowUsesChangedPathProviderSource(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-provider-source.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile( + sourcePath, + []byte(testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "provider source should win", + "2026-06-01T10:00:00Z", + "/Users/dev/code/demo", + ), + testjsonl.ClaudeAssistantJSON( + "force parse should propagate", + "2026-06-01T10:01:00Z", + ), + )), + 0o644, + )) + + legacyResults, legacyExcluded, err := parser.ParseClaudeSessionWithExclusions( + sourcePath, "demo", "devbox", + ) + require.NoError(t, err) + require.Len(t, legacyResults, 1) + require.Empty(t, legacyExcluded) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + providerResult := legacyResults[0] + providerResult.Session.File.Inode, providerResult.Session.File.Device = getFileIdentity(info) + hash, err := ComputeFileHash(sourcePath) + require.NoError(t, err) + providerResult.Session.File.Hash = hash + + changedSource := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: "changed-path-source", + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "demo", + } + findFound := false + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: providerResult, + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, + }, + findFound: &findFound, + } + var comparisons []ProviderShadowComparison + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationShadowCompare, + }, + ProviderShadowRecorder: func(comparison ProviderShadowComparison) { + comparisons = append(comparisons, comparison) + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + ForceParse: true, + ProviderSource: &changedSource, + }) + + require.NoError(t, result.err) + require.Len(t, comparisons, 1) + assert.NoError(t, comparisons[0].Err) + assert.Empty(t, comparisons[0].Mismatches) + assert.Equal(t, changedSource, comparisons[0].Source) + assert.Equal(t, changedSource, provider.parseRequest.Source) + assert.True(t, provider.parseRequest.ForceParse) + assert.Empty(t, provider.findRequest) +} + +func TestClassifyProviderChangedPathMarksAuthoritativeProviderProcess( + t *testing.T, +) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "auth-recognized.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile( + sourcePath, + []byte(testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "authoritative provider owns this", + "2026-06-01T10:00:00Z", + "/Users/dev/code/demo", + ), + )), + 0o644, + )) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + watchPlan: parser.WatchPlan{Roots: []parser.WatchRoot{{ + Path: root, + }}}, + changedSources: []parser.SourceRef{{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }}, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + files := engine.classifyPaths([]string{sourcePath}) + + require.Len(t, provider.changedRequests, 1) + assert.Equal(t, sourcePath, provider.changedRequests[0].Path) + require.Len(t, files, 1) + assert.Equal(t, sourcePath, files[0].Path) + assert.True(t, files[0].ProviderProcess) + assert.False(t, files[0].ForceParse) + require.NotNil(t, files[0].ProviderSource) + assert.Equal(t, sourcePath, files[0].ProviderSource.DisplayPath) +} + +func TestDiscoverProviderSourcesOnlyRunsAuthoritativeProviders(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "provider-only.jsonl") + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "provider-project", + } + makeEngine := func(mode parser.ProviderMigrationMode) (*Engine, *shadowCallerProvider) { + t.Helper() + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + discoverSources: []parser.SourceRef{source}, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: mode, + }, + }) + return engine, provider + } + + shadowEngine, shadowProvider := makeEngine(parser.ProviderMigrationShadowCompare) + files, failures := shadowEngine.discoverProviderSources(context.Background(), nil) + assert.Empty(t, files) + assert.Zero(t, failures) + assert.Empty(t, shadowProvider.calls) + + authoritativeEngine, authoritativeProvider := makeEngine( + parser.ProviderMigrationProviderAuthoritative, + ) + files, failures = authoritativeEngine.discoverProviderSources(context.Background(), nil) + require.Len(t, files, 1) + assert.Zero(t, failures) + assert.Equal(t, []string{"discover"}, authoritativeProvider.calls) + assert.Equal(t, sourcePath, files[0].Path) + assert.Equal(t, "provider-project", files[0].Project) + assert.True(t, files[0].ProviderProcess) + require.NotNil(t, files[0].ProviderSource) + assert.Equal(t, source, *files[0].ProviderSource) +} + +func TestSyncAllProviderDiscoveryFailureSkipsFinishedWatermark(t *testing.T) { + root := t.TempDir() + discoverErr := errors.New("provider discovery failed") + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + discoverErr: discoverErr, + } + database := dbtest.OpenTestDB(t) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + stats := engine.SyncAll(context.Background(), nil) + + assert.Equal(t, []string{"discover"}, provider.calls) + assert.Equal(t, 1, stats.Failed) + started, err := database.GetSyncState(syncStateStartedAt) + require.NoError(t, err) + assert.NotEmpty(t, started) + finished, err := database.GetSyncState(syncStateFinishedAt) + require.NoError(t, err) + assert.Empty(t, finished) +} + +func TestFindSourceFileFallsBackToAuthoritativeNonFileProvider(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "forge.db") + "#session-a" + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentForge, + DisplayName: "Forge", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentForge, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentForge: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentForge: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + found := engine.FindSourceFile("forge:session-a") + + assert.Equal(t, sourcePath, found) + assert.Equal(t, "session-a", provider.findRequest.RawSessionID) + assert.Equal(t, "forge:session-a", provider.findRequest.FullSessionID) +} + +func TestProviderVirtualSourceBackedByEventPreservesHashInDBPath(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "state#prod", "sessions.db") + sourcePath := dbPath + "#session-a" + + assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath)) + assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath+"-wal")) + assert.True(t, providerVirtualSourceBackedByEvent(sourcePath, dbPath+"-shm")) + assert.False(t, providerVirtualSourceBackedByEvent(sourcePath, filepath.Dir(dbPath))) +} + +func TestProcessFileShadowRecordsCachedSkipAsNotComparable(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "-Users-dev-code-demo", "shadow-skip.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile( + sourcePath, + []byte(testjsonl.JoinJSONL( + testjsonl.ClaudeUserJSON( + "already cached", + "2026-06-01T10:00:00Z", + "/Users/dev/code/demo", + ), + )), + 0o644, + )) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + }, + } + var comparisons []ProviderShadowComparison + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationShadowCompare, + }, + ProviderShadowRecorder: func(comparison ProviderShadowComparison) { + comparisons = append(comparisons, comparison) + }, + }) + engine.InjectSkipCache(map[string]int64{ + sourcePath: info.ModTime().UnixNano(), + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.True(t, result.skip) + require.Len(t, comparisons, 1) + assert.Equal(t, "legacy skip", comparisons[0].NotComparableReason) + assert.Empty(t, comparisons[0].Mismatches) + assert.Empty(t, provider.calls) +} + +func TestProcessFileProviderAuthoritativeUsesInjectedProvider(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + ProjectHint: "provider-project", + } + providerResult := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "provider-owned", + Project: "provider-project", + Agent: parser.AgentClaude, + Machine: "devbox", + File: parser.FileInfo{ + Path: sourcePath, + Mtime: info.ModTime().UnixNano(), + }, + }, + Messages: []parser.ParsedMessage{{ + Role: parser.RoleUser, + Content: "parsed through provider", + Timestamp: info.ModTime(), + Ordinal: 0, + }}, + } + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath + "#fingerprint", + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: providerResult, + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + ForceReplace: true, + }, + }, + source: source, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentClaude: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + require.Len(t, result.results, 1) + assert.Equal(t, "provider-owned", result.results[0].Session.ID) + assert.Equal(t, "provider-project", result.results[0].Session.Project) + assert.Equal(t, info.ModTime().UnixNano(), result.mtime) + assert.True(t, result.forceReplace) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + } + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: parser.ParseResult{Session: parser.ParsedSession{ + ID: "provider-current", Agent: parser.AgentClaude, + }}, + DataVersion: parser.DataVersionCurrent, + }, + { + Result: parser.ParseResult{Session: parser.ParsedSession{ + ID: "provider-retry", Agent: parser.AgentClaude, + }}, + DataVersion: parser.DataVersionNeedsRetry, + }, + }, + ResultSetComplete: true, + }, + }, + source: source, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + require.Len(t, result.results, 2) + assert.False(t, result.needsRetryForSession("provider-current")) + assert.True(t, result.needsRetryForSession("provider-retry")) +} + +func TestProcessFileProviderAuthoritativeSuppressesUncleanSkipCache(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "unclean-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + source := parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath + "#source-key", + } + makeEngine := func(outcome parser.ParseOutcome, parseErr error) *Engine { + t.Helper() + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath + "#fingerprint", + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: outcome, + parseErr: parseErr, + }, + source: source, + } + return NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + } + + tests := []struct { + name string + outcome parser.ParseOutcome + parseErr error + wantErr bool + }{ + { + name: "whole source parse error", + wantErr: true, + parseErr: errors.New( + "provider source failed", + ), + }, + { + name: "incomplete empty result set", + outcome: parser.ParseOutcome{ + ResultSetComplete: false, + }, + }, + { + name: "source error", + outcome: parser.ParseOutcome{ + ResultSetComplete: true, + SourceErrors: []parser.SourceError{{ + SourceKey: sourcePath, + Err: errors.New("session failed"), + }}, + }, + }, + { + name: "retry result", + outcome: parser.ParseOutcome{ + ResultSetComplete: true, + Results: []parser.ParseResultOutcome{{ + Result: parser.ParseResult{Session: parser.ParsedSession{ + ID: "provider-retry", Agent: parser.AgentClaude, + }}, + DataVersion: parser.DataVersionNeedsRetry, + }}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + engine := makeEngine(tt.outcome, tt.parseErr) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + if tt.wantErr { + require.Error(t, result.err) + } else { + require.NoError(t, result.err) + } + assert.True(t, result.cacheSkip) + assert.True(t, result.noCacheSkip) + + stats := engine.collectAndBatch( + context.Background(), + singleSyncJob(syncJob{processResult: result, path: sourcePath}), + 1, + 1, + nil, + syncWriteDefault, + ) + if tt.wantErr { + assert.Equal(t, 1, stats.Failed) + } + cache := engine.SnapshotSkipCache() + assert.NotContains(t, cache, sourcePath+"#source-key") + assert.NotContains(t, cache, sourcePath) + }) + } +} + +func TestSyncSingleSessionProviderAuthoritativeBypassesProviderSkipCache(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "single-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + sourceKey := sourcePath + "#source-key" + providerResult := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "provider-owned", + Project: "provider-project", + Agent: parser.AgentClaude, + Machine: "devbox", + StartedAt: info.ModTime(), + EndedAt: info.ModTime(), + MessageCount: 1, + File: parser.FileInfo{ + Path: sourcePath, + Mtime: info.ModTime().UnixNano(), + }, + }, + Messages: []parser.ParsedMessage{{ + Role: parser.RoleUser, + Content: "explicit provider resync", + Timestamp: info.ModTime(), + Ordinal: 0, + }}, + } + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath + "#fingerprint", + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{{ + Result: providerResult, + DataVersion: parser.DataVersionCurrent, + }}, + ResultSetComplete: true, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourceKey, + ProjectHint: "provider-project", + }, + } + database := dbtest.OpenTestDB(t) + filePath := sourcePath + fileSize := info.Size() + fileMtime := info.ModTime().UnixNano() + require.NoError(t, database.UpsertSession(db.Session{ + ID: "provider-owned", + Project: "old-project", + Machine: "devbox", + Agent: string(parser.AgentClaude), + FilePath: &filePath, + FileSize: &fileSize, + FileMtime: &fileMtime, + })) + engine := NewEngine(database, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + engine.InjectSkipCache(map[string]int64{ + sourceKey: info.ModTime().UnixNano(), + }) + + require.NoError(t, engine.SyncSingleSession("provider-owned")) + + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) + assert.True(t, provider.parseRequest.ForceParse) + cache := engine.SnapshotSkipCache() + assert.NotContains(t, cache, sourceKey) +} + +func singleSyncJob(job syncJob) <-chan syncJob { + results := make(chan syncJob, 1) + results <- job + close(results) + return results +} + +func TestProcessFileProviderAuthoritativeForceParseAllowsStaleSourceLookup(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "force-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ResultSetComplete: true}, + }, + source: parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath + "#source-key", + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + engine.forceParse = true + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + assert.False(t, provider.findRequest.RequireFreshSource) + assert.True(t, provider.parseRequest.ForceParse) +} + +func TestProcessFileProviderAuthoritativeNotFoundFails(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "missing-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + found := false + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + }, + findFound: &found, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.Error(t, result.err) + assert.Contains(t, result.err.Error(), "provider source not found") + assert.Empty(t, provider.calls) +} + +func TestProcessFileProviderAuthoritativeTranslatesSkipReason(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "skip-provider-owned.jsonl") + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + info, err := os.Stat(sourcePath) + require.NoError(t, err) + + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentClaude, + DisplayName: "Claude Code", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: sourcePath, + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + }, + outcome: parser.ParseOutcome{ + ResultSetComplete: true, + SkipReason: parser.SkipNoSession, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentClaude, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath + "#source-key", + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{parser.AgentClaude: {root}}, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentClaude: parser.ProviderMigrationProviderAuthoritative, + }, + }) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentClaude, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.True(t, result.cacheSkip) + assert.Equal(t, sourcePath+"#source-key", result.cacheKey) + assert.Equal(t, info.ModTime().UnixNano(), result.mtime) + assert.Empty(t, result.results) + + results := make(chan syncJob, 1) + results <- syncJob{ + processResult: result, + path: sourcePath, + } + close(results) + stats := engine.collectAndBatch(context.Background(), results, 1, 1, nil, syncWriteDefault) + + assert.Equal(t, 1, stats.Skipped) + cache := engine.SnapshotSkipCache() + assert.Equal(t, info.ModTime().UnixNano(), cache[sourcePath+"#source-key"]) + _, cachedByPath := cache[sourcePath] + assert.False(t, cachedByPath) + + cleanResult := processResult{ + results: []parser.ParseResult{{ + Session: parser.ParsedSession{ + ID: "provider-clean", + Project: "provider-project", + Agent: parser.AgentClaude, + Machine: "devbox", + StartedAt: info.ModTime(), + EndedAt: info.ModTime(), + File: parser.FileInfo{ + Path: sourcePath, + Mtime: info.ModTime().UnixNano(), + }, + }, + }}, + mtime: info.ModTime().UnixNano(), + cacheSkip: true, + cacheKey: sourcePath + "#source-key", + } + stats = engine.collectAndBatch( + context.Background(), + singleSyncJob(syncJob{processResult: cleanResult, path: sourcePath}), + 1, + 1, + nil, + syncWriteDefault, + ) + + assert.Equal(t, 1, stats.Synced) + cache = engine.SnapshotSkipCache() + assert.NotContains(t, cache, sourcePath+"#source-key") + assert.NotContains(t, cache, sourcePath) +} + +type shadowCallerProvider struct { + shadowTestProvider + source parser.SourceRef + findRequest parser.FindSourceRequest + findFound *bool + watchPlan parser.WatchPlan + changedSources []parser.SourceRef + changedRequests []parser.ChangedPathRequest + changedErr error + discoverSources []parser.SourceRef + discoverErr error +} + +func (p *shadowCallerProvider) Discover( + context.Context, +) ([]parser.SourceRef, error) { + p.calls = append(p.calls, "discover") + if p.discoverErr != nil { + return nil, p.discoverErr + } + return append([]parser.SourceRef(nil), p.discoverSources...), nil +} + +func (p *shadowCallerProvider) FindSource( + _ context.Context, + req parser.FindSourceRequest, +) (parser.SourceRef, bool, error) { + p.findRequest = req + if p.findFound != nil && !*p.findFound { + return parser.SourceRef{}, false, nil + } + return p.source, true, nil +} + +func (p *shadowCallerProvider) WatchPlan( + context.Context, +) (parser.WatchPlan, error) { + return p.watchPlan, nil +} + +func (p *shadowCallerProvider) SourcesForChangedPath( + _ context.Context, + req parser.ChangedPathRequest, +) ([]parser.SourceRef, error) { + p.changedRequests = append(p.changedRequests, req) + if p.changedErr != nil { + return nil, p.changedErr + } + return append([]parser.SourceRef(nil), p.changedSources...), nil +} + +type shadowCallerFactory struct { + provider *shadowCallerProvider +} + +func (f shadowCallerFactory) Definition() parser.AgentDef { + return f.provider.Definition() +} + +func (f shadowCallerFactory) Capabilities() parser.Capabilities { + return f.provider.Capabilities() +} + +func (f shadowCallerFactory) NewProvider(parser.ProviderConfig) parser.Provider { + return f.provider +} diff --git a/internal/sync/provider_shadow_test.go b/internal/sync/provider_shadow_test.go new file mode 100644 index 000000000..ec3a892f0 --- /dev/null +++ b/internal/sync/provider_shadow_test.go @@ -0,0 +1,541 @@ +package sync + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/parser" +) + +func TestObserveProviderSourcePlansEffectsWithoutWriter(t *testing.T) { + sourceErr := errors.New("bad session") + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: "source-key", + Size: 123, + MTimeNS: 456, + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + }, + }, + DataVersion: parser.DataVersionCurrent, + }, + { + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:two", + Agent: parser.AgentCodex, + }, + }, + DataVersion: parser.DataVersionNeedsRetry, + RetryReason: "fallback parser", + }, + }, + ExcludedSessionIDs: []string{"codex:excluded"}, + SourceErrors: []parser.SourceError{ + { + SourceKey: "source-key", + DisplayPath: "display-path", + SessionID: "codex:bad", + Err: sourceErr, + Retryable: true, + }, + }, + SkipReason: parser.SkipNonInteractive, + ForceReplace: true, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + DisplayPath: "display-path", + FingerprintKey: "fingerprint-key", + }, + Machine: "devbox", + ForceParse: true, + }) + require.NoError(t, err) + + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) + assert.Equal(t, "devbox", provider.parseRequest.Machine) + assert.True(t, provider.parseRequest.ForceParse) + assert.Equal(t, int64(456), provider.parseRequest.Fingerprint.MTimeNS) + + require.Len(t, observation.Results, 2) + assert.Equal(t, "codex:one", observation.Results[0].Session.ID) + assert.Equal(t, []string{"codex:excluded"}, observation.ExcludedSessionIDs) + assert.Equal(t, parser.SkipNonInteractive, observation.SkipReason) + assert.True(t, observation.ForceReplace) + + assert.Equal(t, []string{"source-key"}, observation.Planned.SourceKeys) + assert.Equal(t, []string{"fingerprint-key"}, observation.Planned.SkipCacheKeys) + assert.Equal(t, []string{"codex:one", "codex:two"}, observation.Planned.DataVersionSessionIDs()) + assert.Equal(t, []string{"codex:two"}, observation.Planned.RetrySessionIDs()) + require.Len(t, observation.Planned.Diagnostics, 1) + assert.Equal(t, "codex:bad", observation.Planned.Diagnostics[0].SessionID) + assert.True(t, observation.Planned.Diagnostics[0].Retryable) + assert.ErrorIs(t, observation.Planned.Diagnostics[0].Err, sourceErr) + assert.Empty(t, observation.Planned.SSEScopes) +} + +func TestCompareProviderObservationDetectsSessionMetadataMismatch(t *testing.T) { + providerResult := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + Project: "proj", + Machine: "devbox", + ParentSessionID: "codex:provider-parent", + }, + } + legacyResult := providerResult + legacyResult.Session.ParentSessionID = "codex:legacy-parent" + + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + Results: []parser.ParseResult{providerResult}, + }, + processResult{ + results: []parser.ParseResult{legacyResult}, + }, + parser.DiscoveredFile{}, + ) + + require.NotEmpty(t, mismatches) + assert.Contains(t, mismatches[0], "session") +} + +func TestCompareProviderObservationDetectsSourceErrorContentMismatch(t *testing.T) { + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + SourceErrors: []parser.SourceError{{ + SourceKey: "source-key", + DisplayPath: "source.jsonl", + SessionID: "codex:bad", + Err: errors.New("provider parse failed"), + }}, + }, + processResult{ + sessionErrs: []sessionParseError{{ + sessionID: "codex:bad", + virtualPath: "source.jsonl", + err: errors.New("legacy parse failed"), + }}, + }, + parser.DiscoveredFile{}, + ) + + require.NotEmpty(t, mismatches) + assert.Contains(t, mismatches[0], "source_errors") +} + +func TestCompareProviderObservationNormalizesLegacySourceErrorSessionID(t *testing.T) { + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + SourceErrors: []parser.SourceError{{ + SourceKey: "source.jsonl#bad", + DisplayPath: "source.jsonl#bad", + SessionID: "codex:bad", + Err: errors.New("parse failed"), + Retryable: true, + }}, + Planned: ProviderPlannedEffects{ + Diagnostics: []ProviderPlannedDiagnostic{{ + SourceKey: "source.jsonl#bad", + DisplayPath: "source.jsonl#bad", + SessionID: "codex:bad", + Err: errors.New("parse failed"), + Retryable: true, + }}, + }, + }, + processResult{ + sessionErrs: []sessionParseError{{ + sessionID: "bad", + virtualPath: "source.jsonl#bad", + err: errors.New("parse failed"), + }}, + }, + parser.DiscoveredFile{Agent: parser.AgentCodex}, + ) + + assert.Empty(t, mismatches) +} + +func TestCompareProviderObservationDetectsPlannedDataVersionMismatch(t *testing.T) { + result := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + File: parser.FileInfo{ + Path: "source.jsonl", + }, + }, + } + + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + Results: []parser.ParseResult{result}, + Planned: ProviderPlannedEffects{ + SourceKeys: []string{"source.jsonl"}, + DataVersions: []ProviderPlannedDataVersion{{ + SessionID: "codex:one", + State: parser.DataVersionNeedsRetry, + RetryReason: "fallback parser", + }}, + }, + }, + processResult{ + results: []parser.ParseResult{result}, + }, + parser.DiscoveredFile{Path: "source.jsonl"}, + ) + + require.NotEmpty(t, mismatches) + assert.Contains(t, mismatches[0], "planned.data_versions") +} + +func TestCompareProviderObservationIgnoresProviderOnlyRetryReason(t *testing.T) { + result := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + File: parser.FileInfo{ + Path: "source.jsonl", + }, + }, + } + + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + Results: []parser.ParseResult{result}, + Planned: ProviderPlannedEffects{ + SourceKeys: []string{"source.jsonl"}, + DataVersions: []ProviderPlannedDataVersion{{ + SessionID: "codex:one", + State: parser.DataVersionNeedsRetry, + RetryReason: "fallback parser", + }}, + }, + }, + processResult{ + results: []parser.ParseResult{result}, + needsRetry: true, + }, + parser.DiscoveredFile{Path: "source.jsonl"}, + ) + + assert.Empty(t, mismatches) +} + +func TestCompareProviderObservationIgnoresProviderOnlySSEScopes(t *testing.T) { + result := parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + File: parser.FileInfo{ + Path: "source.jsonl", + }, + }, + } + + mismatches := compareProviderObservationToProcessResult( + ProviderObservation{ + Results: []parser.ParseResult{result}, + Planned: ProviderPlannedEffects{ + SourceKeys: []string{"source.jsonl"}, + DataVersions: []ProviderPlannedDataVersion{{ + SessionID: "codex:one", + State: parser.DataVersionCurrent, + }}, + SSEScopes: []string{"sessions"}, + }, + }, + processResult{ + results: []parser.ParseResult{result}, + }, + parser.DiscoveredFile{Path: "source.jsonl"}, + ) + + assert.Empty(t, mismatches) +} + +func TestObserveProviderSourceRejectsProviderMismatch(t *testing.T) { + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentClaude, + Key: "source-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), string(parser.AgentClaude)) + assert.Contains(t, err.Error(), string(parser.AgentCodex)) + assert.Empty(t, observation) + assert.Empty(t, provider.calls) +} + +func TestObserveProviderSourceRejectsCrossProviderResult(t *testing.T) { + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + IDPrefix: "codex:", + }, + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentClaude, + }, + }, + }, + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "session agent") + assert.Contains(t, err.Error(), string(parser.AgentClaude)) + assert.Contains(t, err.Error(), string(parser.AgentCodex)) + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestObserveProviderSourceRejectsForeignSessionID(t *testing.T) { + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + IDPrefix: "codex:", + }, + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "claude:one", + Agent: parser.AgentCodex, + }, + }, + }, + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "session id") + assert.Contains(t, err.Error(), "claude:one") + assert.Contains(t, err.Error(), "codex:") + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestObserveProviderSourceRejectsForeignNestedSessionID(t *testing.T) { + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + IDPrefix: "codex:", + }, + }, + outcome: parser.ParseOutcome{ + Results: []parser.ParseResultOutcome{ + { + Result: parser.ParseResult{ + Session: parser.ParsedSession{ + ID: "codex:one", + Agent: parser.AgentCodex, + ParentSessionID: "claude:parent", + }, + }, + }, + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "parent session id") + assert.Contains(t, err.Error(), "claude:parent") + assert.Contains(t, err.Error(), "codex:") + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestObserveProviderSourceRejectsEmptyDiagnosticSourceKey(t *testing.T) { + sourceErr := errors.New("bad source") + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + IDPrefix: "codex:", + }, + }, + outcome: parser.ParseOutcome{ + SourceErrors: []parser.SourceError{ + { + SessionID: "codex:bad", + Err: sourceErr, + }, + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "diagnostic source key") + assert.Contains(t, err.Error(), "required") + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestObserveProviderSourceRejectsUnrelatedDiagnosticSourceKey(t *testing.T) { + sourceErr := errors.New("bad source") + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + IDPrefix: "codex:", + }, + }, + fingerprint: parser.SourceFingerprint{ + Key: "fingerprint-key", + }, + outcome: parser.ParseOutcome{ + SourceErrors: []parser.SourceError{ + { + SourceKey: "other-source", + SessionID: "codex:bad", + Err: sourceErr, + }, + }, + }, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + FingerprintKey: "source-fingerprint-key", + }, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "diagnostic source key") + assert.Contains(t, err.Error(), "other-source") + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint", "parse"}, provider.calls) +} + +func TestObserveProviderSourceStopsAfterFingerprintError(t *testing.T) { + fingerprintErr := errors.New("stat failed") + provider := &shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCodex, + DisplayName: "Codex", + }, + }, + fingerprintErr: fingerprintErr, + } + + observation, err := ObserveProviderSource(context.Background(), provider, ProviderObserveRequest{ + Source: parser.SourceRef{ + Provider: parser.AgentCodex, + Key: "source-key", + }, + }) + require.ErrorIs(t, err, fingerprintErr) + assert.Empty(t, observation) + assert.Equal(t, []string{"fingerprint"}, provider.calls) +} + +type shadowTestProvider struct { + parser.ProviderBase + calls []string + fingerprint parser.SourceFingerprint + fingerprintErr error + outcome parser.ParseOutcome + parseErr error + parseRequest parser.ParseRequest +} + +func (p *shadowTestProvider) Fingerprint( + context.Context, + parser.SourceRef, +) (parser.SourceFingerprint, error) { + p.calls = append(p.calls, "fingerprint") + if p.fingerprintErr != nil { + return parser.SourceFingerprint{}, p.fingerprintErr + } + return p.fingerprint, nil +} + +func (p *shadowTestProvider) Parse( + _ context.Context, + req parser.ParseRequest, +) (parser.ParseOutcome, error) { + p.calls = append(p.calls, "parse") + p.parseRequest = req + if p.parseErr != nil { + return parser.ParseOutcome{}, p.parseErr + } + return p.outcome, nil +}