diff --git a/design/extension.md b/design/extension.md index af1479f8..fb59ffc8 100644 --- a/design/extension.md +++ b/design/extension.md @@ -1020,12 +1020,101 @@ no lockfile-retry-budget or SQLite-busy exhaustion at iteration end. ### W3+ inheritance -The lifecycle services' shape is W3-stable. The unified loader -(`KindAdapter`) work in W3 collapses the five per-loader +The lifecycle services' shape is W4-stable. The unified loader +(`KindAdapter`) work in W4 collapses the five per-loader `bundleAndIndexOne` methods to one dispatch, but the install/remove/ upgrade services keep their current public surface. CLI command files' direct service construction (in `extension_pull.ts`, -`extension_update.ts`, `extension_rm.ts`, etc.) persists past W3. +`extension_update.ts`, `extension_rm.ts`, etc.) persists past W4. + +### W3: ReconcileFromDisk + freshness as aggregate query + +W3 introduces `ReconcileFromDiskService` +(`src/libswamp/extensions/reconcile_from_disk_service.ts`) and rewrites +the freshness contract as a two-layer model. + +**Two-layer freshness model.** The freshness contract has two distinct +concerns: + +1. **Type resolution layer** (W3 makes this trivial): + `isFresh(state) = state === "Indexed"`. Constant-time aggregate + query. All other RowState tags are not visible to type resolution. + +2. **State maintenance layer** (split between two paths): + - **Cold-start / explicit reconcile:** `ReconcileFromDiskService`. + Full disk walk across all three origin types (locals, pulled, + source-mounted). Post-hoc state repair. Fires when + `anyKindNeedsInvalidation()` returns true (i.e. any kind's + `isPopulated` flag is false). + - **Warm-start / hot path:** `findStaleFiles` (preserved from + pre-W3). Incremental fingerprint comparison. Fires per-loader + `buildIndex` when the catalog is already populated. + +The original W3 plan targeted slimming `findStaleFiles` to a ~20 LOC +deletion-sweep shim. Ground truth showed that warm-start incremental +detection is load-bearing for the development workflow — 12 loader +tests exercise this path. `findStaleFiles` retains its fingerprint +comparison. The scope change is deliberate. + +**ReconcileFromDisk semantics.** The service: + +- Walks on-disk source trees for all origin types. +- Loads current aggregate state via `repository.loadAll()`. +- Diffs disk vs aggregate and emits RowState transitions using the + existing Extension aggregate methods. +- Delegates to per-loader `bundleAndIndexOne` for type extraction — + NOT `InstallExtensionService`. The source is already on disk and the + lockfile already exists; reconcile is post-hoc state repair. +- Saves via `repository.saveAll()` inside a single SQLite transaction. + +**Locals vs pulled reconcile matrix:** + +| Origin | Source on disk | Source in aggregate | Transition | +|--------|---------------|--------------------|-| +| Local | present | absent | `bundleAndIndexOne` → `Indexed` | +| Local | absent | present | `markSourceMissing` → `OrphanedBundleOnly` or `Tombstoned` | +| Pulled | present | absent | `bundleAndIndexOne` → `Indexed` | +| Pulled | absent | lockfile present | `recordEntryPointUnreadable` (re-fetch is W4) | +| Pulled | absent | lockfile absent | `Tombstoned` (orphan from failed rm) | +| Source-mounted | — | — | Follows local semantics | + +**Trigger points:** cold-start (when `anyKindNeedsInvalidation()` +returns true) + explicit `swamp doctor extensions` call. NOT on every +command — reconcile would dominate the hot-path performance. + +**dryRun mode:** `execute({ dryRun: true })` collects transitions +without calling `repository.saveAll()`. Returns structured +`ReconcileTransition` records (`{ source, fromState, toState, reason }`) +that W6's `swamp doctor extensions` will render directly. + +**Transition-count guardrail:** if a reconcile run would transition +> 50% of existing rows (minimum 10 rows), the run aborts and returns +the transitions without applying them. Catches mass-tombstone bugs. + +**enforceI2 transform.** W3 replaces the `IntraExtensionDuplicateType` +throw in the Extension aggregate's I2 enforcement with a +deterministic-winner + tombstone-loser transform. The Source with the +lexicographically smaller `canonicalPath` wins; the loser is tombstoned +with reason `"renamed"`. Cross-aggregate uniqueness (I-Repo-1) still +throws `DuplicateTypeError` at the repository layer. + +**UNREADABLE_DEP_SENTINEL removal.** The sentinel constant was renamed +to `UNREADABLE_PLACEHOLDER` (internal to `computeSourceFingerprint`). +No external code compares against it. Broken transitive deps produce a +stable fingerprint; the failure surfaces at `bundleAndIndexOne` as +`BundleBuildFailed`. Existing catalog rows with the old sentinel value +are caught by the first reconcile run — no schema migration needed. + +**Forward-only revert posture.** Same as W1b/W2: revert means deleting +`_extension_catalog.db` and rebuilding from disk on the next cold-start. + +**Out of scope (deferred):** + +- Bundle cache file eviction (W3 detects `OrphanedBundleOnly` but does + NOT delete bundle files) +- Loader unification / `KindAdapter` → W4 +- `legacyStore` escape hatch removal → W4 +- `swamp doctor extensions` aggregate-state rendering → W6 ## Lazy Per-Bundle Loading diff --git a/src/cli/mod.ts b/src/cli/mod.ts index f0e6b5e6..0f30d6d0 100644 --- a/src/cli/mod.ts +++ b/src/cli/mod.ts @@ -77,6 +77,7 @@ import "../domain/datastore/datastore_types.ts"; // Import builtin reports to trigger registration import "../domain/reports/builtin/mod.ts"; import { EmbeddedDenoRuntime } from "../infrastructure/runtime/embedded_deno_runtime.ts"; +import { ReconcileFromDiskService } from "../libswamp/mod.ts"; import { type RepoMarkerData, RepoMarkerRepository, @@ -274,6 +275,19 @@ export async function configureExtensionLoaders( repoRoot: repoDir, }); + // W3: cold-start reconcile. Runs once when any kind is not yet + // populated — repairs aggregate state from disk before the loaders + // fire. NOT on every command; only on cold-start. + if (repository.anyKindNeedsInvalidation()) { + const reconciler = new ReconcileFromDiskService({ + denoRuntime, + repository, + lockfileRepository, + repoDir, + }); + await reconciler.execute(); + } + modelRegistry.setLoader(() => loadUserModels( repoDir, diff --git a/src/domain/extensions/bundle_freshness.ts b/src/domain/extensions/bundle_freshness.ts index 2cdee1e9..5c658910 100644 --- a/src/domain/extensions/bundle_freshness.ts +++ b/src/domain/extensions/bundle_freshness.ts @@ -17,12 +17,9 @@ // You should have received a copy of the GNU Affero General Public License // along with Swamp. If not, see . -import { getLogger } from "@logtape/logtape"; import { relative, resolve } from "@std/path"; import { resolveLocalImports } from "../models/local_import_resolver.ts"; -const logger = getLogger(["swamp", "extensions", "bundle-freshness"]); - /** * The extension-catalog kinds this helper can query. Declared * domain-local so the freshness check does not import ExtensionKind @@ -102,15 +99,18 @@ export interface StaleFile { } /** - * Sentinel emitted in place of a real sha-256 hex hash when a transitive - * dep cannot be read (broken symlink, deleted file, FilesystemLoop). The - * fingerprint then encodes "this dep is currently unreadable" as part of - * the source state, so a stable broken state produces a stable - * fingerprint instead of marking the entry permanently stale (#208). - * Cannot collide with a real hash — "MISSING" contains non-hex - * characters. + * Placeholder emitted in place of a real sha-256 hex hash when a + * transitive dep cannot be read (broken symlink, deleted file, + * FilesystemLoop). Encodes "this dep is currently unreadable" into the + * fingerprint so a stable broken state produces a stable fingerprint + * and repairing the dep correctly invalidates it (#208). Cannot collide + * with a real hash — contains non-hex characters. + * + * Internal to computeSourceFingerprint. No external code compares + * against this value — ReconcileFromDisk handles broken-dep behavior + * via the BundleBuildFailed RowState transition. */ -const UNREADABLE_DEP_SENTINEL = "MISSING"; +const UNREADABLE_PLACEHOLDER = "MISSING"; /** * Computes a content-based fingerprint covering an entry point and every @@ -123,10 +123,8 @@ const UNREADABLE_DEP_SENTINEL = "MISSING"; * resolveLocalImports stops at the boundary dir, matching the bundler's * own dependency scope. * - * Unreadable deps (broken symlinks, deleted files, FilesystemLoop) - * produce an UNREADABLE_DEP_SENTINEL entry instead of throwing — so a - * stable broken state yields a stable fingerprint, and repairing the - * dep correctly invalidates it (#208). + * Unreadable deps produce a stable placeholder entry instead of + * throwing, so a stable broken state yields a stable fingerprint (#208). */ export async function computeSourceFingerprint( absolutePath: string, @@ -142,7 +140,7 @@ export async function computeSourceFingerprint( try { fileHash = await hashFile(file, cache); } catch { - fileHash = UNREADABLE_DEP_SENTINEL; + fileHash = UNREADABLE_PLACEHOLDER; } entries.push(`${relPath}:${fileHash}`); } @@ -194,15 +192,6 @@ async function hashFile( return toHex(digest); } -async function bundleExists(bundlePath: string): Promise { - try { - await Deno.stat(bundlePath); - return true; - } catch { - return false; - } -} - function toHex(buffer: ArrayBuffer): string { const view = new Uint8Array(buffer); let out = ""; @@ -242,20 +231,23 @@ export interface FindStaleFilesParams { } /** - * Walks all source directories, compares each file's current fingerprint - * against the catalog-stored fingerprint, and returns the files that need - * rebundling. Also removes catalog entries whose source file has been - * deleted. - * - * A file is stale when — - * 1. It is new (no catalog entry), or - * 2. Its computed fingerprint differs from the catalog's, or - * 3. Fingerprint computation fails (e.g. dep disappeared mid-scan). + * W3 freshness query: a Source is fresh iff its RowState is `Indexed`. + * All other states are not visible to type resolution. An absent state + * (`undefined`) is NOT fresh — the source needs indexing. + */ +export function isFresh(state: string | undefined): boolean { + return state === "Indexed"; +} + +/** + * Warm-start incremental change detection. Walks source directories, + * compares each file's current fingerprint against the catalog, and + * returns files that need rebundling. Also removes catalog entries + * whose source file has been deleted. * - * Previously this was mtime-based. mtime is fragile — atomic-rename - * saves, rsync --times, and sub-millisecond edits can all leave the - * source mtime <= catalog mtime while the content has changed. Content - * fingerprint is strictly stronger. + * Cold-start reconciliation is handled by ReconcileFromDisk (W3). + * This function handles the warm-start path: catalog is populated, + * a few files may have changed since the last run. */ export async function findStaleFiles( params: FindStaleFilesParams, @@ -303,35 +295,14 @@ export async function findStaleFiles( continue; } - // Source content is unchanged, but the cached bundle may have been - // deleted out from under us (manual rm, partial GC, failed previous - // bundle attempt). Without this check the catalog row stays "fresh" - // and a downstream importBundleByPath ENOENTs (swamp-club#212). - // - // ValidationFailed rows are skipped: rebundling them is a no-op - // cycle — bundle still fails schema validation, markCatalogValidationFailed - // re-pins the same fingerprint, every command spawns deno bundle. - // That is the inverse of the loop swamp-club#209 sealed. The W1a - // migration absorbed the legacy `validation_failed` boolean into - // the `state` column; this reader migrated together with the - // writer (markCatalogValidationFailed) so the W1a→W1b window - // never has a writer/reader schism on this guard. if ( catalogEntry.bundle_path && catalogEntry.state !== "ValidationFailed" && !(await bundleExists(catalogEntry.bundle_path)) ) { - logger - .warn`Rebundling ${relativePath}: cached bundle missing from disk`; stale.push({ absolutePath, relativePath, baseDir: dir }); } } catch { - // Defensive backstop only. computeSourceFingerprint is total - // since #208 — unreadable transitive deps produce a sentinel - // entry rather than throwing. Anything reaching this catch is - // an unforeseen failure (Deno API change, crypto.subtle panic, - // boundary-dir stat race). Force a rebundle so the error - // surfaces to the user. stale.push({ absolutePath, relativePath, baseDir: dir }); } } @@ -346,6 +317,15 @@ export async function findStaleFiles( return stale; } +async function bundleExists(bundlePath: string): Promise { + try { + await Deno.stat(bundlePath); + return true; + } catch { + return false; + } +} + /** * Minimal write-side catalog view the validation-failure helper needs. * Same one-way domain→infrastructure boundary as FreshnessCatalog. @@ -397,7 +377,7 @@ export interface MarkCatalogValidationFailedParams { * migrate together so the column is genuinely vestigial during the * W1a → W1b release window. The column itself drops in W1b. * - * Symmetric to the UNREADABLE_DEP_SENTINEL fix in #208: that one made + * Symmetric to the unreadable-dep fix in #208: that one made * computeSourceFingerprint total for unreadable transitive deps; this * one makes the catalog write total for schema-invalid sources. Both * encode "stable broken state" into the freshness contract. diff --git a/src/domain/extensions/bundle_freshness_test.ts b/src/domain/extensions/bundle_freshness_test.ts index d975f513..54ef02f6 100644 --- a/src/domain/extensions/bundle_freshness_test.ts +++ b/src/domain/extensions/bundle_freshness_test.ts @@ -310,8 +310,6 @@ Deno.test("findStaleFiles: catches mtime-preserving content change (#125)", asyn source_fingerprint: origFp, }); - // Swap content, restore the old mtime — this is exactly what - // atomic-rename saves and rsync --times do in the wild. await Deno.writeTextFile(file, "export const a = 2;"); await Deno.utime(file, origMtime, origMtime); @@ -721,7 +719,6 @@ Deno.test("findStaleFiles: broken transitive dep — stale once, then stable (#2 ); await Deno.writeTextFile(dep, "export const x = 1;"); - // Step 1: all readable. Compute fingerprint F1, store in catalog. const f1 = await computeSourceFingerprint(entry, dir); const catalog = new FakeCatalog(); catalog.add({ @@ -736,9 +733,6 @@ Deno.test("findStaleFiles: broken transitive dep — stale once, then stable (#2 source_fingerprint: f1, }); - // Step 2: break the transitive dep. findStaleFiles must mark the - // entry stale on this pass — the dep change is a real fingerprint - // change and the rebundle path needs to fire to refresh the row. await Deno.remove(dep); await Deno.symlink("/nonexistent/path/dep.ts", dep, { type: "file" }); @@ -755,8 +749,6 @@ Deno.test("findStaleFiles: broken transitive dep — stale once, then stable (#2 ); assertEquals(firstPass[0].relativePath, "entry.ts"); - // Step 3: simulate the rebundle path updating the catalog row to - // the new sentinel-bearing fingerprint F2. const f2 = await computeSourceFingerprint(entry, dir); assertNotEquals(f1, f2); catalog.removeBySourcePath(entry); @@ -772,11 +764,6 @@ Deno.test("findStaleFiles: broken transitive dep — stale once, then stable (#2 source_fingerprint: f2, }); - // Step 4: subsequent passes — the regression's load-bearing claim. - // With the row reflecting the broken state, findStaleFiles must - // NOT mark the entry stale. Pre-fix, fingerprint computation threw - // and the file was marked stale on every invocation, triggering - // bundle spawns and the 8s wall time reported in #208. const secondPass = await findStaleFiles({ modelsDir: dir, catalog, @@ -935,11 +922,6 @@ Deno.test("findStaleFiles + markCatalogValidationFailed: stable broken source co }); Deno.test("findStaleFiles + markCatalogValidationFailed: editing a broken source produces a new fingerprint and re-stales", async () => { - // Recovery path. After the broken-state row is in place, editing - // the source to ANY different content (broken or valid) produces a - // new fingerprint that does not match the stored value, so - // findStaleFiles correctly marks the file stale and the loader's - // rebundle pass fires. const dir = await Deno.makeTempDir({ prefix: "swamp_bf_209_recover_" }); try { const file = join(dir, "model.ts"); @@ -956,7 +938,6 @@ Deno.test("findStaleFiles + markCatalogValidationFailed: editing a broken source sourceFingerprint: brokenFp, }); - // Stable broken — not stale. let stale = await findStaleFiles({ modelsDir: dir, catalog, @@ -965,7 +946,6 @@ Deno.test("findStaleFiles + markCatalogValidationFailed: editing a broken source }); assertEquals(stale.length, 0); - // Edit to different content (the recovery path). await Deno.writeTextFile(file, "export const recovered = 42;\n"); stale = await findStaleFiles({ modelsDir: dir, diff --git a/src/domain/extensions/extension.ts b/src/domain/extensions/extension.ts index 2688a3aa..9434e491 100644 --- a/src/domain/extensions/extension.ts +++ b/src/domain/extensions/extension.ts @@ -76,11 +76,11 @@ export type CalVer = string; * - I1: every Source.id.extensionRoot === Extension.extensionRoot * (canonical-form equality). * - I2: within this Extension, no two Sources share `(kind, typeNormalized)` - * in any non-Tombstoned state. W1b throws `IntraExtensionDuplicateType` - * on violation. W3's `ReconcileFromDisk` will replace the throw with - * a deterministic-winner + tombstone-loser transform; W1b sees this - * as a corruption case because the repository only constructs - * aggregates from already-persisted data that previously satisfied I2. + * in any non-Tombstoned state. Enforced by a deterministic-winner + + * tombstone-loser transform: the Source with the lexicographically + * smaller `canonicalPath` wins; the loser is tombstoned with reason + * `"renamed"`. Cross-aggregate uniqueness is separately enforced by + * the repository's I-Repo-1 invariant. * * I3 (ValidationFailed retains fingerprint+bundle) and I4 (Tombstoned * excluded from registration but retained in-memory) are structural — @@ -101,10 +101,11 @@ export interface Extension { } /** - * Thrown when an Extension is constructed (or transitioned) with two - * non-Tombstoned Sources sharing the same `(kind, typeNormalized)`. - * W1b's enforcement is throw-on-violation; W3 will replace this with - * the deterministic-winner transform documented in the design. + * Thrown by the repository's I-Repo-1 invariant when two Sources in + * DIFFERENT Extensions share `(kind, typeNormalized)`. Within a single + * Extension, the aggregate resolves duplicates via deterministic-winner + * transform (see {@link enforceI2}); `IntraExtensionDuplicateType` is + * retained for cross-aggregate violations surfaced by the repository. */ export class IntraExtensionDuplicateType extends Error { readonly extensionName: string; @@ -192,7 +193,7 @@ export function makeExtension(args: { } sources.set(s.id, s); } - enforceI2(args.name, args.version, sources); + const resolved = enforceI2(sources); return { name: args.name, @@ -200,7 +201,7 @@ export function makeExtension(args: { origin: args.origin, extensionRoot: canonicalRoot, checksum: args.checksum, - sources, + sources: resolved, }; } @@ -244,7 +245,6 @@ export function tombstoneAll(extension: Extension): Extension { * doesn't match this Extension's (I1). * * @throws SourceExtensionRootMismatch if I1 is violated. - * @throws IntraExtensionDuplicateType if I2 is violated. */ export function observeFreshSource( extension: Extension, @@ -287,8 +287,8 @@ export function observeFreshSource( }), ); } - enforceI2(extension.name, extension.version, next); - return { ...extension, sources: next }; + const resolved = enforceI2(next); + return { ...extension, sources: resolved }; } /** @@ -440,47 +440,59 @@ function updateSourceState( } const next = new Map(extension.sources); next.set(location, withState(existing, state)); - enforceI2(extension.name, extension.version, next); - return { ...extension, sources: next }; + const resolved = enforceI2(next); + return { ...extension, sources: resolved }; } /** - * Validates I2 (intra-extension `(kind, typeNormalized)` uniqueness in - * non-Tombstoned states). Throws on violation. + * Enforces I2 (intra-extension `(kind, typeNormalized)` uniqueness in + * non-Tombstoned states) via deterministic-winner + tombstone-loser + * transform. * - * W3's `ReconcileFromDisk` will replace this with the deterministic- - * winner + tombstone-loser transform (origin precedence reduces to - * lexicographic-on-canonicalPath within a single Extension since all - * Sources share the Extension's origin). For W1b a thrown invariant - * surfaces corruption that the repository should never produce given - * its diff-based saves; tests assert the throw, not a transformation. + * When two non-Tombstoned Sources share `(kind, type)`, the one with + * the lexicographically smaller `canonicalPath` wins; the loser is + * tombstoned with reason `"renamed"`. Within a single Extension all + * Sources share the same origin, so origin-precedence reduces to + * path ordering — deterministic across platforms because + * `canonicalPath` is already NFC-normalised and case-folded. + * + * Returns a NEW map with losers tombstoned. Callers replace their + * sources map with the result. */ function enforceI2( - name: string, - version: CalVer, sources: ReadonlyMap, -): void { +): Map { const seen = new Map(); + const losers: Source[] = []; + for (const source of sources.values()) { if (source.state.tag === "Tombstoned") continue; const typeName = extractType(source.state); - if (typeName === null) continue; // states without a type don't conflict + if (typeName === null) continue; const key = `${source.kind}::${typeName}`; const prior = seen.get(key); if (prior) { - throw new IntraExtensionDuplicateType({ - extensionName: name, - extensionVersion: version, - kind: source.kind, - type: typeName, - canonicalPaths: [ - prior.id.canonicalPath, - source.id.canonicalPath, - ], - }); + if (source.id.canonicalPath < prior.id.canonicalPath) { + losers.push(prior); + seen.set(key, source); + } else { + losers.push(source); + } + } else { + seen.set(key, source); } - seen.set(key, source); } + + if (losers.length === 0) return new Map(sources); + + const result = new Map(sources); + for (const loser of losers) { + result.set( + loser.id, + withState(loser, { tag: "Tombstoned", reason: "renamed" }), + ); + } + return result; } /** diff --git a/src/domain/extensions/extension_test.ts b/src/domain/extensions/extension_test.ts index 9e7a9f54..bd1cfe85 100644 --- a/src/domain/extensions/extension_test.ts +++ b/src/domain/extensions/extension_test.ts @@ -20,7 +20,6 @@ import { assert, assertEquals, assertFalse, assertThrows } from "@std/assert"; import { makeBundleLocation } from "./bundle_location.ts"; import { - IntraExtensionDuplicateType, makeExtension, makeLocalExtension, markSourceMissing, @@ -85,20 +84,74 @@ Deno.test("makeExtension: I1 — Source with mismatched extensionRoot throws", ( ); }); -Deno.test("makeExtension: I2 — duplicate (kind, type) in non-Tombstoned states throws", () => { +Deno.test("makeExtension: I2 — duplicate (kind, type) resolved by deterministic-winner transform", () => { + // a.ts < b.ts lexicographically → a wins, b is tombstoned. const a = indexedSource("models/a.ts", "@scope/foo/instance"); const b = indexedSource("models/b.ts", "@scope/foo/instance"); - assertThrows( - () => - makeExtension({ - name: "@scope/foo", - version: "1.0.0", - origin: "pulled", - extensionRoot: EXT_ROOT, - sources: [a, b], - }), - IntraExtensionDuplicateType, - ); + const ext = makeExtension({ + name: "@scope/foo", + version: "1.0.0", + origin: "pulled", + extensionRoot: EXT_ROOT, + sources: [a, b], + }); + assertEquals(ext.sources.size, 2); + const aAfter = ext.sources.get(a.id); + const bAfter = ext.sources.get(b.id); + assert(aAfter); + assert(bAfter); + assertEquals(aAfter.state.tag, "Indexed"); + assertEquals(bAfter.state.tag, "Tombstoned"); + if (bAfter.state.tag === "Tombstoned") { + assertEquals(bAfter.state.reason, "renamed"); + } +}); + +Deno.test("makeExtension: I2 — three-way collision tombstones two losers", () => { + const a = indexedSource("models/a.ts", "@scope/foo/instance"); + const b = indexedSource("models/b.ts", "@scope/foo/instance"); + const c = indexedSource("models/c.ts", "@scope/foo/instance"); + const ext = makeExtension({ + name: "@scope/foo", + version: "1.0.0", + origin: "pulled", + extensionRoot: EXT_ROOT, + sources: [a, b, c], + }); + assertEquals(ext.sources.size, 3); + const aAfter = ext.sources.get(a.id); + const bAfter = ext.sources.get(b.id); + const cAfter = ext.sources.get(c.id); + assert(aAfter); + assert(bAfter); + assert(cAfter); + assertEquals(aAfter.state.tag, "Indexed"); + assertEquals(bAfter.state.tag, "Tombstoned"); + assertEquals(cAfter.state.tag, "Tombstoned"); +}); + +Deno.test("makeExtension: I2 — already-tombstoned loser is idempotent", () => { + const a = indexedSource("models/a.ts", "@scope/foo/instance"); + const bTombstoned = makeSource({ + id: makeSourceLocation(`${EXT_ROOT}/models/b.ts`, EXT_ROOT), + kind: "model", + fingerprint: FP, + state: { tag: "Tombstoned", reason: "renamed" }, + }); + const ext = makeExtension({ + name: "@scope/foo", + version: "1.0.0", + origin: "pulled", + extensionRoot: EXT_ROOT, + sources: [a, bTombstoned], + }); + assertEquals(ext.sources.size, 2); + const aAfter = ext.sources.get(a.id); + const bAfter = ext.sources.get(bTombstoned.id); + assert(aAfter); + assert(bAfter); + assertEquals(aAfter.state.tag, "Indexed"); + assertEquals(bAfter.state.tag, "Tombstoned"); }); Deno.test("makeExtension: I2 — same type across different kinds is allowed", () => { diff --git a/src/infrastructure/persistence/extension_repository.ts b/src/infrastructure/persistence/extension_repository.ts index 037a2f52..b0c48add 100644 --- a/src/infrastructure/persistence/extension_repository.ts +++ b/src/infrastructure/persistence/extension_repository.ts @@ -286,6 +286,35 @@ export class ExtensionRepository { } } + /** + * Whole-repo cold-start check for {@link ReconcileFromDiskService}. + * Returns `true` if ANY kind is not yet populated — reconcile then + * runs a full-tree reconcile across all origins. + * + * Checks only the `isPopulated` flag — the cheapest guard that + * catches the cold-start case (first run, after invalidateAll, after + * catalog deletion). Per-kind guard values (layout version, base path, + * source dirs fingerprint) require loader-computed inputs not available + * at the CLI layer; those guards continue to trigger per-loader + * rebuilds via {@link invalidationGuards}. + * + * Loaders keep their per-kind interface unchanged (W2/legacy path). + * W4 collapses both entry points when it unifies loaders. + */ + anyKindNeedsInvalidation(): boolean { + const kinds: ExtensionKind[] = [ + "model", + "vault", + "driver", + "datastore", + "report", + ]; + for (const kind of kinds) { + if (!this.legacyStore.isPopulated(kind)) return true; + } + return false; + } + // ----- private helpers ----- /** diff --git a/src/infrastructure/persistence/extension_repository_test.ts b/src/infrastructure/persistence/extension_repository_test.ts index 296688e1..2e5cf66a 100644 --- a/src/infrastructure/persistence/extension_repository_test.ts +++ b/src/infrastructure/persistence/extension_repository_test.ts @@ -594,12 +594,12 @@ Deno.test("ExtensionRepository: invalidationGuards parity over all 5 kinds × 4 }); // ===== Test #13: W3-corruption boundary — two pulled versions on disk ===== -Deno.test("ExtensionRepository: two pulled rows for same name resolve to same version → DuplicateTypeError surfaces (W3 territory)", () => { +Deno.test("ExtensionRepository: two pulled rows for same name resolve to same version → deterministic-winner transform (W3)", () => { withRepository((repo, cat, repoRoot) => { - // Set up: two source files for the SAME logical extension on disk - // (interrupted upgrade). Each row has empty extension_version. - // Lockfile says the only version present is 2.0.0, so both rows - // resolve to the same (name, version), then I-Repo-1 fires. + // Two source files for the SAME logical extension on disk + // (interrupted upgrade). Both resolve to same (name, version). + // W3's deterministic-winner transform tombstones the loser + // instead of throwing. const sp1 = `${repoRoot}/.swamp/pulled-extensions/@scope/foo/models/instance.ts`; const sp2 = @@ -621,28 +621,22 @@ Deno.test("ExtensionRepository: two pulled rows for same name resolve to same ve }); } - // loadAll runs the empty-version fallback for both rows. Both - // resolve to (name=@scope/foo, version=2.0.0). When the test then - // tries to SAVE the resulting aggregate, the intra-extension I2 - // fires inside makeExtension. We catch the exception inside loadAll - // because the two rows fold into a single Extension whose two - // Sources occupy the same (kind, type). - let thrown: unknown; - try { - repo.loadAll(); - } catch (e) { - thrown = e; + // loadAll resolves the duplicate via deterministic-winner transform. + // The lexicographically smaller canonicalPath wins; the other is + // tombstoned with reason "renamed". + const extensions = repo.loadAll(); + assertEquals(extensions.length, 1); + const ext = extensions[0]; + assertEquals(ext.name, "@scope/foo"); + assertEquals(ext.sources.size, 2); + let indexed = 0; + let tombstoned = 0; + for (const s of ext.sources.values()) { + if (s.state.tag === "Indexed") indexed++; + if (s.state.tag === "Tombstoned") tombstoned++; } - // Either I2 (intra-extension) or I-Repo-1 fires — either way, the - // corruption surfaces as an error rather than silent first-wins. - // The repository's loadAll currently uses makeExtension which throws - // IntraExtensionDuplicateType. The post-condition is the same: - // corruption surfaces, fallback does NOT try to repair. - assert(thrown instanceof Error); - if (!(thrown instanceof Error)) return; - // Both source paths must appear in the error message. - assertStringIncludes(thrown.message, "models/instance.ts"); - assertStringIncludes(thrown.message, "models/extra/instance.ts"); + assertEquals(indexed, 1, "exactly one winner"); + assertEquals(tombstoned, 1, "exactly one loser tombstoned"); }, { lockedVersions: fixedLockedVersions({ "@scope/foo": "2.0.0" }) }); }); diff --git a/src/libswamp/extensions/reconcile_from_disk_bench.ts b/src/libswamp/extensions/reconcile_from_disk_bench.ts new file mode 100644 index 00000000..7a1e16a0 --- /dev/null +++ b/src/libswamp/extensions/reconcile_from_disk_bench.ts @@ -0,0 +1,189 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +/** + * W3 cold-start performance benchmark. Generates a repo with 50 local + * extensions × 1 source each, runs ReconcileFromDisk, and measures + * wall time. Run with: + * + * deno bench --unstable-bundle --allow-all src/libswamp/extensions/reconcile_from_disk_bench.ts + * + * Pre-committed threshold: ≤ 1.2x of pre-W3 cold-start baseline. + * If blown, optimize (fingerprint caching, mtime fast-path) before shipping. + */ + +import { join } from "@std/path"; +import { ensureDir } from "@std/fs"; +import { ReconcileFromDiskService } from "./reconcile_from_disk_service.ts"; +import { ExtensionCatalogStore } from "../../infrastructure/persistence/extension_catalog_store.ts"; +import { ExtensionRepository } from "../../infrastructure/persistence/extension_repository.ts"; +import { LockfileRepository } from "../../infrastructure/persistence/lockfile_repository.ts"; +import type { DenoRuntime } from "../../domain/runtime/deno_runtime.ts"; + +import "../../domain/models/models.ts"; + +const testDenoRuntime: DenoRuntime = { + ensureDeno: () => Promise.resolve(Deno.execPath()), +}; + +const MINIMAL_MODEL = (typeId: string) => ` +import { z } from "npm:zod@4"; +export const model = { + type: "${typeId}", + version: "2026.05.05.1", + globalArguments: z.object({}), + resources: { + "data": { + description: "x", + schema: z.object({}), + lifetime: "infinite", + garbageCollection: 1, + }, + }, + methods: { + noop: { + description: "noop", + arguments: z.object({}), + execute: async () => ({ dataHandles: [] }), + }, + }, +}; +`; + +const EXTENSION_COUNT = 50; + +async function setupBenchRepo(): Promise<{ + repoDir: string; + cleanup: () => Promise; +}> { + const repoDir = await Deno.makeTempDir({ prefix: "swamp_reconcile_bench_" }); + await ensureDir(join(repoDir, ".swamp")); + const modelsDir = join(repoDir, "extensions", "models"); + await ensureDir(modelsDir); + const lockfilePath = join(modelsDir, "upstream_extensions.json"); + await Deno.writeTextFile(lockfilePath, "{}"); + + for (let i = 0; i < EXTENSION_COUNT; i++) { + await Deno.writeTextFile( + join(modelsDir, `model_${i}.ts`), + MINIMAL_MODEL(`@bench/model-${i}`), + ); + } + + return { + repoDir, + cleanup: async () => { + if (Deno.build.os === "windows") { + await Deno.remove(repoDir, { recursive: true }).catch(() => {}); + } else { + await Deno.remove(repoDir, { recursive: true }); + } + }, + }; +} + +Deno.bench( + `ReconcileFromDisk cold-start: ${EXTENSION_COUNT} local models`, + { group: "reconcile-cold-start", baseline: true }, + async (b) => { + const { repoDir, cleanup } = await setupBenchRepo(); + try { + const dbPath = join(repoDir, ".swamp", "_extension_catalog.db"); + const lockfilePath = join( + repoDir, + "extensions", + "models", + "upstream_extensions.json", + ); + + b.start(); + const catalog = new ExtensionCatalogStore(dbPath); + const lockfileRepository = await LockfileRepository.create( + lockfilePath, + ); + const repository = new ExtensionRepository({ + catalog, + lockfileRepository, + repoRoot: repoDir, + }); + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + b.end(); + + if (result.transitions.length === 0) { + throw new Error("Expected transitions from cold-start reconcile"); + } + catalog.close(); + } finally { + await cleanup(); + } + }, +); + +Deno.bench( + `ReconcileFromDisk warm-start (no-op): ${EXTENSION_COUNT} local models`, + { group: "reconcile-warm-start" }, + async (b) => { + const { repoDir, cleanup } = await setupBenchRepo(); + try { + const dbPath = join(repoDir, ".swamp", "_extension_catalog.db"); + const lockfilePath = join( + repoDir, + "extensions", + "models", + "upstream_extensions.json", + ); + + const catalog = new ExtensionCatalogStore(dbPath); + const lockfileRepository = await LockfileRepository.create( + lockfilePath, + ); + const repository = new ExtensionRepository({ + catalog, + lockfileRepository, + repoRoot: repoDir, + }); + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + // Warm up: run reconcile once to populate catalog. + await service.execute(); + + b.start(); + const result = await service.execute(); + b.end(); + + if (result.transitions.length !== 0) { + throw new Error("Warm-start should produce zero transitions"); + } + catalog.close(); + } finally { + await cleanup(); + } + }, +); diff --git a/src/libswamp/extensions/reconcile_from_disk_service.ts b/src/libswamp/extensions/reconcile_from_disk_service.ts new file mode 100644 index 00000000..49021849 --- /dev/null +++ b/src/libswamp/extensions/reconcile_from_disk_service.ts @@ -0,0 +1,631 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +import { getLogger } from "@logtape/logtape"; +import { basename as pathBasename, join, relative, resolve } from "@std/path"; +import { + type Extension, + makeExtension, + makeLocalExtension, + markSourceMissing, + observeFreshSource, + recordBundleBuildFailed, + recordBundled, + recordEntryPointUnreadable, +} from "../../domain/extensions/extension.ts"; +import { makeSource } from "../../domain/extensions/source.ts"; +import { makeSourceLocation } from "../../domain/extensions/source_location.ts"; +import { makeBundleLocation } from "../../domain/extensions/bundle_location.ts"; +import { + computeSourceFingerprint, + createFreshnessCache, + type FreshnessCache, +} from "../../domain/extensions/bundle_freshness.ts"; +import type { RowStateTag } from "../../domain/extensions/row_state.ts"; +import type { SourceLocation } from "../../domain/extensions/source_location.ts"; +import type { ExtensionRepository } from "../../infrastructure/persistence/extension_repository.ts"; +import type { + ExtensionKind, +} from "../../infrastructure/persistence/extension_catalog_store.ts"; +import { BUNDLE_LAYOUT_VERSION } from "../../infrastructure/persistence/extension_catalog_store.ts"; +import type { LockfileRepository } from "../../infrastructure/persistence/lockfile_repository.ts"; +import { swampPath } from "../../infrastructure/persistence/paths.ts"; +import { UserModelLoader } from "../../domain/models/user_model_loader.ts"; +import { UserDriverLoader } from "../../domain/drivers/user_driver_loader.ts"; +import { UserVaultLoader } from "../../domain/vaults/user_vault_loader.ts"; +import { UserDatastoreLoader } from "../../domain/datastore/user_datastore_loader.ts"; +import { UserReportLoader } from "../../domain/reports/user_report_loader.ts"; +import type { DenoRuntime } from "../../domain/runtime/deno_runtime.ts"; +import { + collectDirsForKind, + expandSourcePaths, + readSwampSources, + resolveSourceExtensionDirs, +} from "../../infrastructure/persistence/swamp_sources_repository.ts"; + +const logger = getLogger(["swamp", "extensions", "reconcile"]); + +/** Subdirectories of a per-extension subtree, paired with their kind. */ +const KIND_DIRS = [ + "models", + "vaults", + "drivers", + "datastores", + "reports", +] as const; + +type KindDir = typeof KIND_DIRS[number]; + +/** + * A single state transition produced by reconcile. Structured value — + * W6's `swamp doctor extensions` renders this directly. + */ +export interface ReconcileTransition { + readonly source: SourceLocation; + readonly fromState: RowStateTag | null; + readonly toState: RowStateTag; + readonly reason: string; +} + +/** + * Result of a reconcile run. + */ +export interface ReconcileResult { + readonly transitions: readonly ReconcileTransition[]; + readonly applied: boolean; +} + +/** + * W3 application service — reconciles on-disk extension state against + * the persisted catalog aggregate state. + * + * Walks the on-disk source tree across all three origin types (locals, + * pulled, source-mounted), loads the current aggregate state via + * {@link ExtensionRepository.loadAll}, diffs the two, and emits + * {@link ReconcileTransition} records. + * + * Delegates to per-loader `bundleAndIndexOne` for type extraction — + * NOT {@link InstallExtensionService}. The source is already on disk + * and the lockfile already exists; reconcile is post-hoc state repair, + * not a fresh install. + * + * **Trigger points:** cold-start (when `anyKindNeedsInvalidation()` + * returns true) + explicit `swamp doctor extensions` call. NOT on + * every command. + * + * **dryRun mode:** when `dryRun: true`, collects transitions without + * calling `repository.saveAll()`. Returns the same structured result + * either way. + * + * **Transition-count guardrail:** if any single run would transition + * > 50% of existing rows, the run aborts and returns the transitions + * without applying them. Catches mass-tombstone bugs before they + * destroy legitimate rows. + */ +export class ReconcileFromDiskService { + private readonly denoRuntime: DenoRuntime; + private readonly repository: ExtensionRepository; + private readonly lockfileRepository: LockfileRepository; + private readonly repoDir: string; + + constructor(args: { + denoRuntime: DenoRuntime; + repository: ExtensionRepository; + lockfileRepository: LockfileRepository; + repoDir: string; + }) { + this.denoRuntime = args.denoRuntime; + this.repository = args.repository; + this.lockfileRepository = args.lockfileRepository; + this.repoDir = resolve(args.repoDir); + } + + async execute( + options?: { dryRun?: boolean }, + ): Promise { + const dryRun = options?.dryRun ?? false; + const transitions: ReconcileTransition[] = []; + + const existingExtensions = this.repository.loadAll(); + const totalExistingRows = countSources(existingExtensions); + + const reconciledExtensions = await this.reconcileAll( + existingExtensions, + transitions, + ); + + const GUARDRAIL_MIN_ROWS = 10; + if ( + transitions.length > 0 && totalExistingRows >= GUARDRAIL_MIN_ROWS + ) { + const ratio = transitions.length / totalExistingRows; + if (ratio > 0.5) { + if (!dryRun) this.markAllKindsPopulated(); + logger + .warn`Skipped catalog repair: too many entries would change (${transitions.length}/${totalExistingRows}). Run ${"swamp doctor extensions"} to inspect.`; + return { transitions, applied: false }; + } + } + + if (!dryRun && transitions.length > 0) { + this.repository.saveAll(reconciledExtensions); + this.markAllKindsPopulated(); + logger + .info`Extension catalog updated: ${transitions.length} ${ + transitions.length === 1 ? "entry" : "entries" + } repaired`; + } else if (!dryRun && totalExistingRows === 0) { + this.markAllKindsPopulated(); + } else if (transitions.length === 0) { + logger.debug`Reconcile complete: no transitions`; + } + + return { transitions, applied: !dryRun && transitions.length > 0 }; + } + + private async reconcileAll( + existingExtensions: Extension[], + transitions: ReconcileTransition[], + ): Promise { + const cache = createFreshnessCache(); + const result: Extension[] = []; + + // Gather ALL local + source-mounted on-disk sources into one map, + // then reconcile the @local/ aggregate once. Prevents the + // duplicate-extension bug where reconcileLocals and + // reconcileSourceMounted each build separate @local/ + // aggregates that conflict on saveAll. + const localExt = await this.reconcileLocalAndSourceMounted( + existingExtensions, + transitions, + cache, + ); + if (localExt) result.push(localExt); + + const pulledExts = await this.reconcilePulled( + existingExtensions, + transitions, + cache, + ); + result.push(...pulledExts); + + return result; + } + + private async reconcileLocalAndSourceMounted( + existingExtensions: Extension[], + transitions: ReconcileTransition[], + cache: FreshnessCache, + ): Promise { + const basename = pathBasename(this.repoDir) || "unknown"; + const localName = `@local/${basename}`; + const existing = existingExtensions.find( + (e) => e.name === localName && e.origin === "local", + ); + + // Gather ALL local + source-mounted on-disk sources into one map. + const onDiskSources = new Map(); + + // Local extensions under extensions// + for (const kindDir of KIND_DIRS) { + const dir = join(this.repoDir, "extensions", kindDir); + const files = await collectTsFiles(dir); + for (const absolutePath of files) { + onDiskSources.set(absolutePath, { kind: kindDir, baseDir: dir }); + } + } + + // Source-mounted extensions from .swamp-sources.yaml + const config = await readSwampSources(this.repoDir); + if (config) { + const expanded = await expandSourcePaths(config, this.repoDir); + const resolved = await resolveSourceExtensionDirs(expanded); + for (const sourceDirs of resolved) { + for (const kindDir of KIND_DIRS) { + const dirs = collectDirsForKind([sourceDirs], kindDir); + for (const dir of dirs) { + const files = await collectTsFiles(dir); + for (const absolutePath of files) { + onDiskSources.set(absolutePath, { kind: kindDir, baseDir: dir }); + } + } + } + } + } + + let ext = existing ?? + makeLocalExtension({ repoRoot: this.repoDir, basename }); + + ext = await this.reconcileExtension( + ext, + onDiskSources, + transitions, + cache, + "local", + ); + + if (ext.sources.size === 0 && !existing) return null; + return ext; + } + + private async reconcilePulled( + existingExtensions: Extension[], + transitions: ReconcileTransition[], + cache: FreshnessCache, + ): Promise { + const pulledRoot = swampPath(this.repoDir, "pulled-extensions"); + const result: Extension[] = []; + const lockfileEntries = this.lockfileRepository.getAllEntries(); + + for (const extensionName of Object.keys(lockfileEntries)) { + const extRoot = join(pulledRoot, extensionName); + const version = this.lockfileRepository.getLockedVersion(extensionName) ?? + ""; + const existing = existingExtensions.find( + (e) => e.name === extensionName && e.version === version, + ); + + const onDiskSources = new Map< + string, + { kind: KindDir; baseDir: string } + >(); + for (const kindDir of KIND_DIRS) { + const dir = join(extRoot, kindDir); + const files = await collectTsFiles(dir); + for (const absolutePath of files) { + onDiskSources.set(absolutePath, { kind: kindDir, baseDir: dir }); + } + } + + let ext = existing ?? makeExtension({ + name: extensionName, + version, + origin: "pulled", + extensionRoot: extRoot, + sources: [], + }); + + ext = await this.reconcileExtension( + ext, + onDiskSources, + transitions, + cache, + "pulled", + ); + + result.push(ext); + } + + // Handle orphaned pulled extensions: in catalog but not in lockfile. + for (const existing of existingExtensions) { + if (existing.origin !== "pulled") continue; + if (lockfileEntries[existing.name]) continue; + let ext = existing; + for (const [loc, source] of ext.sources) { + if (source.state.tag === "Tombstoned") continue; + transitions.push({ + source: loc, + fromState: source.state.tag, + toState: "Tombstoned", + reason: "orphan: no lockfile entry", + }); + ext = markSourceMissing(ext, { location: loc, bundleOnDisk: null }); + } + result.push(ext); + } + + return result; + } + + private async reconcileExtension( + extension: Extension, + onDiskSources: Map, + transitions: ReconcileTransition[], + cache: FreshnessCache, + originType: "local" | "pulled", + ): Promise { + let ext = extension; + + // Phase 1: Sources on disk — ensure they're in the aggregate. + for (const [absolutePath, { kind, baseDir }] of onDiskSources) { + const loc = makeSourceLocation(absolutePath, ext.extensionRoot); + const existingSource = findSourceByPath(ext, loc.canonicalPath); + // Use the existing source's id for aggregate operations (Map key + // equality is by reference). For new sources, use the fresh loc. + const effectiveLoc = existingSource?.id ?? loc; + + if (existingSource && existingSource.state.tag === "Indexed") { + try { + const fp = await computeSourceFingerprint( + absolutePath, + baseDir, + cache, + ); + if (fp === existingSource.fingerprint) continue; + } catch { + // Fingerprint failed — fall through to re-bundle. + } + } + + const loader = this.makeLoaderForKind(kind); + const relativePath = relative(baseDir, absolutePath); + try { + const out = await loader.bundleAndIndexOne({ + absolutePath, + relativePath, + baseDir, + }); + if (!out) continue; + + const fp = await computeSourceFingerprint( + absolutePath, + baseDir, + cache, + ); + const bundle = makeBundleLocation(out.bundlePath, fp); + const fromState = existingSource?.state.tag ?? null; + + ext = observeFreshSource(ext, { + location: effectiveLoc, + kind: out.kind, + fingerprint: fp, + type: out.typeNormalized, + bundle, + }); + + ext = recordBundled(ext, { + location: effectiveLoc, + type: out.typeNormalized, + bundle, + }); + + if (fromState !== "Indexed") { + transitions.push({ + source: effectiveLoc, + fromState, + toState: "Indexed", + reason: fromState === null + ? "new source discovered on disk" + : `re-indexed from ${fromState}`, + }); + } + } catch (error) { + const fromState = existingSource?.state.tag ?? null; + const errorMsg = error instanceof Error ? error.message : String(error); + + if (existingSource) { + ext = recordBundleBuildFailed(ext, { + location: effectiveLoc, + lastError: errorMsg, + }); + } else { + ext = makeExtensionWithNewSource(ext, effectiveLoc, kind, { + tag: "BundleBuildFailed", + lastError: errorMsg, + }); + } + + if (fromState !== "BundleBuildFailed") { + transitions.push({ + source: effectiveLoc, + fromState, + toState: "BundleBuildFailed", + reason: `bundle build failed: ${errorMsg}`, + }); + } + } + } + + // Phase 2: Sources in aggregate but NOT on disk → transition. + for (const [loc, source] of ext.sources) { + if (source.state.tag === "Tombstoned") continue; + if (onDiskSources.has(loc.canonicalPath)) continue; + + const fromState = source.state.tag; + + if (originType === "pulled") { + // Pulled: lockfile is canonical. Source missing but lockfile + // entry present → EntryPointUnreadable (re-fetch is W4). + // This path only fires if the lockfile has the entry — + // orphan handling (no lockfile entry) is in reconcilePulled. + ext = recordEntryPointUnreadable(ext, { + location: loc, + lastError: "source file missing from disk", + }); + transitions.push({ + source: loc, + fromState, + toState: "EntryPointUnreadable", + reason: "pulled source missing from disk (lockfile entry present)", + }); + } else { + // Local / source-mounted: source is canonical → tombstone. + const bundleOnDisk = extractBundlePath(source.state); + ext = markSourceMissing(ext, { location: loc, bundleOnDisk }); + const newState = bundleOnDisk ? "OrphanedBundleOnly" : "Tombstoned"; + transitions.push({ + source: loc, + fromState, + toState: newState, + reason: "source file deleted from disk", + }); + } + } + + return ext; + } + + private makeLoaderForKind( + kindDir: KindDir, + ): { + bundleAndIndexOne: (args: { + absolutePath: string; + relativePath: string; + baseDir: string; + }) => Promise< + | { + kind: + | "model" + | "extension" + | "vault" + | "driver" + | "datastore" + | "report"; + typeNormalized: string; + bundlePath: string; + fingerprint: string; + } + | null + >; + } { + switch (kindDir) { + case "models": + return new UserModelLoader( + this.denoRuntime, + this.repoDir, + undefined, + this.repository, + ); + case "vaults": + return new UserVaultLoader( + this.denoRuntime, + this.repoDir, + undefined, + this.repository, + ); + case "drivers": + return new UserDriverLoader( + this.denoRuntime, + this.repoDir, + undefined, + this.repository, + ); + case "datastores": + return new UserDatastoreLoader( + this.denoRuntime, + this.repoDir, + this.repository, + ); + case "reports": + return new UserReportLoader( + this.denoRuntime, + this.repoDir, + undefined, + this.repository, + ); + } + } + + private markAllKindsPopulated(): void { + const catalog = this.repository.legacyStore; + const kinds: ExtensionKind[] = [ + "model", + "vault", + "driver", + "datastore", + "report", + ]; + catalog.setLayoutVersion(BUNDLE_LAYOUT_VERSION); + for (const kind of kinds) { + catalog.markPopulated(kind); + } + } +} + +function findSourceByPath( + extension: Extension, + canonicalPath: string, +): import("../../domain/extensions/source.ts").Source | undefined { + for (const [loc, source] of extension.sources) { + if (loc.canonicalPath === canonicalPath) return source; + } + return undefined; +} + +function countSources(extensions: Extension[]): number { + let count = 0; + for (const ext of extensions) { + for (const source of ext.sources.values()) { + if (source.state.tag !== "Tombstoned") count++; + } + } + return count; +} + +function extractBundlePath( + state: { tag: string; bundle?: { canonicalPath: string } }, +): ReturnType | null { + if ("bundle" in state && state.bundle) { + return state.bundle as ReturnType; + } + return null; +} + +function makeExtensionWithNewSource( + extension: Extension, + location: SourceLocation, + kindDir: KindDir, + state: { tag: "BundleBuildFailed"; lastError: string }, +): Extension { + const kind = kindDirToExtensionKind(kindDir); + const source = makeSource({ + id: location, + kind, + fingerprint: "", + state, + }); + return makeExtension({ + ...extension, + sources: [...extension.sources.values(), source], + }); +} + +function kindDirToExtensionKind( + kindDir: KindDir, +): "model" | "vault" | "driver" | "datastore" | "report" { + switch (kindDir) { + case "models": + return "model"; + case "vaults": + return "vault"; + case "drivers": + return "driver"; + case "datastores": + return "datastore"; + case "reports": + return "report"; + } +} + +async function collectTsFiles(dir: string): Promise { + const out: string[] = []; + try { + for await (const entry of Deno.readDir(dir)) { + const path = join(dir, entry.name); + if (entry.isFile && entry.name.endsWith(".ts")) { + out.push(path); + } else if (entry.isDirectory && !entry.name.startsWith("_")) { + out.push(...await collectTsFiles(path)); + } + } + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) throw error; + } + return out; +} diff --git a/src/libswamp/extensions/reconcile_from_disk_service_test.ts b/src/libswamp/extensions/reconcile_from_disk_service_test.ts new file mode 100644 index 00000000..0e068b64 --- /dev/null +++ b/src/libswamp/extensions/reconcile_from_disk_service_test.ts @@ -0,0 +1,681 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +import { assertEquals, assertGreater } from "@std/assert"; +import { basename as pathBasename, join } from "@std/path"; +import { ensureDir } from "@std/fs"; +import { swampPath } from "../../infrastructure/persistence/paths.ts"; +import { ReconcileFromDiskService } from "./reconcile_from_disk_service.ts"; +import { ExtensionCatalogStore } from "../../infrastructure/persistence/extension_catalog_store.ts"; +import { ExtensionRepository } from "../../infrastructure/persistence/extension_repository.ts"; +import { LockfileRepository } from "../../infrastructure/persistence/lockfile_repository.ts"; +import type { DenoRuntime } from "../../domain/runtime/deno_runtime.ts"; + +import "../../domain/models/models.ts"; + +const testDenoRuntime: DenoRuntime = { + ensureDeno: () => Promise.resolve(Deno.execPath()), +}; + +const MINIMAL_MODEL_CODE = (typeId: string) => ` +import { z } from "npm:zod@4"; + +export const model = { + type: "${typeId}", + version: "2026.05.05.1", + globalArguments: z.object({}), + resources: { + "data": { + description: "x", + schema: z.object({}), + lifetime: "infinite", + garbageCollection: 1, + }, + }, + methods: { + noop: { + description: "noop", + arguments: z.object({}), + execute: async () => ({ dataHandles: [] }), + }, + }, +}; +`; + +async function withFixtureRepo( + fn: (args: { + repoDir: string; + repository: ExtensionRepository; + catalog: ExtensionCatalogStore; + lockfileRepository: LockfileRepository; + }) => Promise, +): Promise { + const repoDir = await Deno.makeTempDir({ prefix: "swamp_reconcile_test_" }); + await ensureDir(join(repoDir, ".swamp")); + await ensureDir(join(repoDir, "extensions", "models")); + const dbPath = join(repoDir, ".swamp", "_extension_catalog.db"); + const lockfilePath = join( + repoDir, + "extensions", + "models", + "upstream_extensions.json", + ); + await Deno.writeTextFile(lockfilePath, "{}"); + + const catalog = new ExtensionCatalogStore(dbPath); + const lockfileRepository = await LockfileRepository.create(lockfilePath); + const repository = new ExtensionRepository({ + catalog, + lockfileRepository, + repoRoot: repoDir, + }); + + try { + await fn({ repoDir, repository, catalog, lockfileRepository }); + } finally { + catalog.close(); + if (Deno.build.os === "windows") { + await Deno.remove(repoDir, { recursive: true }).catch(() => {}); + } else { + await Deno.remove(repoDir, { recursive: true }); + } + } +} + +Deno.test( + "ReconcileFromDisk: empty repo produces zero transitions", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + assertEquals(result.transitions.length, 0); + assertEquals(result.applied, false); + }, + ); + }, +); + +Deno.test( + "ReconcileFromDisk: discovers new local model and indexes it", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-new-${ts}`; + const modelPath = join(repoDir, "extensions", "models", "test.ts"); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + + assertGreater( + result.transitions.length, + 0, + "must discover and index the new model", + ); + assertEquals(result.applied, true); + + const found = result.transitions.find( + (t) => t.toState === "Indexed", + ); + assertEquals( + found !== undefined, + true, + "must have an Indexed transition", + ); + }, + ); + }, +); + +Deno.test( + "ReconcileFromDisk: dryRun collects transitions without applying", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, catalog, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-dry-${ts}`; + const modelPath = join(repoDir, "extensions", "models", "dry.ts"); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + const rowsBefore = catalog.findAll().length; + const result = await service.execute({ dryRun: true }); + + assertGreater(result.transitions.length, 0); + assertEquals(result.applied, false, "dryRun must not apply"); + assertEquals( + catalog.findAll().length, + rowsBefore, + "catalog unchanged in dryRun", + ); + }, + ); + }, +); + +Deno.test({ + name: "ReconcileFromDisk: idempotence — second run produces zero transitions", + ignore: Deno.build.os === "windows", + fn: async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-idem-${ts}`; + const modelPath = join(repoDir, "extensions", "models", "idem.ts"); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + const first = await service.execute(); + assertGreater(first.transitions.length, 0); + assertEquals(first.applied, true); + + const second = await service.execute(); + assertEquals( + second.transitions.length, + 0, + "second reconcile must produce zero transitions (idempotence)", + ); + assertEquals(second.applied, false); + }, + ); + }, +}); + +Deno.test( + "ReconcileFromDisk: deleted local source → tombstoned", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-del-${ts}`; + const modelPath = join(repoDir, "extensions", "models", "del.ts"); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + await service.execute(); + + await Deno.remove(modelPath); + const result = await service.execute(); + + const tombstone = result.transitions.find( + (t) => + t.toState === "Tombstoned" || t.toState === "OrphanedBundleOnly", + ); + assertEquals( + tombstone !== undefined, + true, + "deleted source must produce a tombstone/orphan transition", + ); + assertEquals(result.applied, true); + }, + ); + }, +); + +Deno.test( + "ReconcileFromDisk: transition-count guardrail aborts on mass tombstone", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, catalog, lockfileRepository }) => { + // Seed the catalog with 10 fake rows directly. Then run + // reconcile with an empty disk — all 10 would tombstone, + // exceeding the 50% guardrail (10/10 = 100%). + for (let i = 0; i < 10; i++) { + catalog.upsertWithIdentity({ + source_path: join( + repoDir, + "extensions", + "models", + `fake${i}.ts`, + ), + type_normalized: `@test/fake${i}`, + kind: "model", + bundle_path: "", + version: "0.0.0", + description: "", + extends_type: "", + source_mtime: "", + source_fingerprint: "fp", + state: "Indexed", + extension_name: `@local/${pathBasename(repoDir)}`, + extension_version: "0.0.0", + }); + } + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + + assertEquals( + result.applied, + false, + "guardrail must abort when > 50% of rows would transition", + ); + assertGreater(result.transitions.length, 0); + }, + ); + }, +); + +Deno.test( + "ReconcileFromDisk: ReconcileTransition has structured fields", + async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-struct-${ts}`; + const modelPath = join( + repoDir, + "extensions", + "models", + "struct.ts", + ); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute({ dryRun: true }); + + for (const t of result.transitions) { + assertEquals(typeof t.source.canonicalPath, "string"); + assertEquals(typeof t.toState, "string"); + assertEquals(typeof t.reason, "string"); + } + }, + ); + }, +); + +// -- Regression tests for the three bug classes W3 structurally fixes ----- + +Deno.test({ + name: + "ReconcileFromDisk regression #208: broken transitive dep → stable state, no rebundle loop", + ignore: Deno.build.os === "windows", + fn: async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const ts = Date.now(); + const typeId = `@test/dep-break-${ts}`; + const modelsDir = join(repoDir, "extensions", "models"); + const entry = join(modelsDir, "entry.ts"); + await Deno.writeTextFile(entry, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + // First reconcile: everything works. + const first = await service.execute(); + assertEquals(first.applied, true); + + // Now corrupt the file — make it import a nonexistent dep. + // This changes the fingerprint, and bundleAndIndexOne will + // either fail or skip it. + await Deno.writeTextFile( + entry, + `import { x } from './nonexistent.ts';\nexport const broken = x;\n`, + ); + + // Second reconcile: detects fingerprint change, tries to + // rebundle. Either BundleBuildFailed or skipped (null return). + await service.execute(); + + // Third reconcile: whatever state we're in, it must be STABLE. + const third = await service.execute(); + assertEquals( + third.transitions.length, + 0, + "#208: broken dep state must be stable — no rebundle loop", + ); + }, + ); + }, +}); + +Deno.test({ + name: + "ReconcileFromDisk regression #209: schema-invalid extension → stable state, no rebundle loop", + ignore: Deno.build.os === "windows", + fn: async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + const modelsDir = join(repoDir, "extensions", "models"); + // This file exports something that isn't a valid model schema. + // bundleAndIndexOne will fail (either during bundle or validation). + const file = join(modelsDir, "broken_schema.ts"); + await Deno.writeTextFile( + file, + 'export const model = { not: "a valid model schema" };\n', + ); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + // First reconcile: bundleAndIndexOne fails → BundleBuildFailed. + await service.execute(); + // The file might be skipped entirely (bundleAndIndexOne returns + // null for non-model exports) or fail. Either way, subsequent + // reconciles must not loop. + + // Second reconcile: stable state → zero transitions. + const result = await service.execute(); + assertEquals( + result.transitions.length, + 0, + "#209: schema-invalid extension must converge to stable state", + ); + }, + ); + }, +}); + +Deno.test({ + name: + "ReconcileFromDisk regression #212: cached bundle missing → rebundles once, not in a loop", + ignore: Deno.build.os === "windows", + fn: async () => { + await withFixtureRepo( + async ({ repoDir, repository, lockfileRepository, catalog }) => { + const ts = Date.now(); + const typeId = `@test/reconcile-212-${ts}`; + const modelPath = join( + repoDir, + "extensions", + "models", + "missing_bundle.ts", + ); + await Deno.writeTextFile(modelPath, MINIMAL_MODEL_CODE(typeId)); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + // First reconcile: indexes the model. + const first = await service.execute(); + assertEquals(first.applied, true); + + // Find the bundle path from the catalog and delete it. + const rows = catalog.findAll(); + const row = rows.find((r) => + r.source_path.includes("missing_bundle.ts") + ); + assertEquals(row !== undefined, true, "row must exist in catalog"); + if (row?.bundle_path) { + try { + await Deno.remove(row.bundle_path); + } catch { + // Bundle might not exist on disk (Deno bundle output location) + } + } + + // Second reconcile: detects issue, re-indexes. + await service.execute(); + // May or may not produce transitions depending on whether + // fingerprint changed. The key assertion is the THIRD run. + + // Third reconcile: must be stable — zero transitions. + const third = await service.execute(); + assertEquals( + third.transitions.length, + 0, + "#212: after rebundle, state must be stable (no loop)", + ); + }, + ); + }, +}); + +// -- Pulled extension reconcile matrix ------------------------------------ + +async function withPulledFixtureRepo( + fn: (args: { + repoDir: string; + repository: ExtensionRepository; + catalog: ExtensionCatalogStore; + lockfileRepository: LockfileRepository; + }) => Promise, + lockfileContent: Record, +): Promise { + const repoDir = await Deno.makeTempDir({ + prefix: "swamp_reconcile_pulled_", + }); + await ensureDir(join(repoDir, ".swamp")); + await ensureDir(join(repoDir, "extensions", "models")); + const dbPath = join(repoDir, ".swamp", "_extension_catalog.db"); + const lockfilePath = join( + repoDir, + "extensions", + "models", + "upstream_extensions.json", + ); + await Deno.writeTextFile(lockfilePath, JSON.stringify(lockfileContent)); + + const catalog = new ExtensionCatalogStore(dbPath); + const lockfileRepository = await LockfileRepository.create(lockfilePath); + const repository = new ExtensionRepository({ + catalog, + lockfileRepository, + repoRoot: repoDir, + }); + + try { + await fn({ repoDir, repository, catalog, lockfileRepository }); + } finally { + catalog.close(); + if (Deno.build.os === "windows") { + await Deno.remove(repoDir, { recursive: true }).catch(() => {}); + } else { + await Deno.remove(repoDir, { recursive: true }); + } + } +} + +Deno.test( + "ReconcileFromDisk pulled: new source on disk + lockfile entry → indexed", + async () => { + const ts = Date.now(); + const extName = `@test/pulled-new-${ts}`; + const typeId = `@test/pulled-model-${ts}`; + await withPulledFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + // Stage a model in the pulled-extensions directory. + const extRoot = join( + swampPath(repoDir, "pulled-extensions"), + extName, + "models", + ); + await ensureDir(extRoot); + await Deno.writeTextFile( + join(extRoot, "noop.ts"), + MINIMAL_MODEL_CODE(typeId), + ); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + + const indexed = result.transitions.find( + (t) => t.toState === "Indexed", + ); + assertEquals( + indexed !== undefined, + true, + "pulled: new source must be indexed", + ); + assertEquals(result.applied, true); + }, + { [extName]: { version: "1.0.0", files: [] } }, + ); + }, +); + +Deno.test( + "ReconcileFromDisk pulled: source missing + no lockfile entry → tombstoned (orphan)", + async () => { + const ts = Date.now(); + const extName = `@test/pulled-orphan-${ts}`; + await withPulledFixtureRepo( + async ({ repoDir, repository, catalog, lockfileRepository }) => { + // Seed a catalog row for a pulled extension that has NO + // lockfile entry and NO on-disk files. This simulates an + // orphan from a failed rm. + const extRoot = join( + swampPath(repoDir, "pulled-extensions"), + extName, + ); + catalog.upsertWithIdentity({ + source_path: join(extRoot, "models", "ghost.ts"), + type_normalized: `${extName}/ghost`, + kind: "model", + bundle_path: "", + version: "1.0.0", + description: "", + extends_type: "", + source_mtime: "", + source_fingerprint: "fp", + state: "Indexed", + extension_name: extName, + extension_version: "1.0.0", + }); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + const result = await service.execute(); + + const tombstone = result.transitions.find( + (t) => t.toState === "Tombstoned", + ); + assertEquals( + tombstone !== undefined, + true, + "pulled orphan must be tombstoned", + ); + assertEquals(result.applied, true); + }, + {}, + ); + }, +); + +Deno.test( + "ReconcileFromDisk pulled: source missing + lockfile present → EntryPointUnreadable", + async () => { + const ts = Date.now(); + const extName = `@test/pulled-missing-${ts}`; + const typeId = `@test/pulled-missing-model-${ts}`; + await withPulledFixtureRepo( + async ({ repoDir, repository, lockfileRepository }) => { + // Stage, reconcile to index, then delete the source file. + const extRoot = join( + swampPath(repoDir, "pulled-extensions"), + extName, + "models", + ); + await ensureDir(extRoot); + await Deno.writeTextFile( + join(extRoot, "noop.ts"), + MINIMAL_MODEL_CODE(typeId), + ); + + const service = new ReconcileFromDiskService({ + denoRuntime: testDenoRuntime, + repository, + lockfileRepository, + repoDir, + }); + + // First reconcile: indexes it. + await service.execute(); + + // Delete the source file (but lockfile entry remains). + await Deno.remove(join(extRoot, "noop.ts")); + + // Second reconcile: source missing + lockfile present → + // EntryPointUnreadable. + const result = await service.execute(); + const unreadable = result.transitions.find( + (t) => t.toState === "EntryPointUnreadable", + ); + assertEquals( + unreadable !== undefined, + true, + "pulled: missing source with lockfile entry → EntryPointUnreadable", + ); + }, + { [extName]: { version: "1.0.0", files: [] } }, + ); + }, +); diff --git a/src/libswamp/mod.ts b/src/libswamp/mod.ts index ddd4091c..2e86ea35 100644 --- a/src/libswamp/mod.ts +++ b/src/libswamp/mod.ts @@ -672,6 +672,13 @@ export { } from "./extensions/remove_extension_service.ts"; export { UpgradeExtensionService } from "./extensions/upgrade_extension_service.ts"; +// W3 reconcile service — post-hoc state repair from disk. +export { + ReconcileFromDiskService, + type ReconcileResult, + type ReconcileTransition, +} from "./extensions/reconcile_from_disk_service.ts"; + // Extension layout detection export { classifyExtensionFile,