diff --git a/packages/analysis/src/dead-code.ts b/packages/analysis/src/dead-code.ts index 0e41a082..3b2d6701 100644 --- a/packages/analysis/src/dead-code.ts +++ b/packages/analysis/src/dead-code.ts @@ -239,7 +239,7 @@ function compareDeadSymbol(a: DeadSymbol, b: DeadSymbol): number { } async function fetchSymbols(store: IGraphStore): Promise { - // AC-A-6b: typed `listNodes({kinds: SYMBOL_KINDS})` replaces a `WHERE kind + // Typed `listNodes({kinds: SYMBOL_KINDS})` replaces a `WHERE kind // IN (...)` raw SELECT. The narrowed kind set guarantees every returned // node carries `start_line`/`is_exported` (Function/Method/etc. are all // LocatedNodes), so the JS-side coercion is a one-shot cast. @@ -271,7 +271,7 @@ async function fetchReferrers( ids: readonly string[], ): Promise { if (ids.length === 0) return []; - // AC-A-6b: typed `listEdges({types, toIds})` replaces a raw `WHERE r.to_id + // Typed `listEdges({types, toIds})` replaces a raw `WHERE r.to_id // IN (...) AND r.type IN (...)` SELECT joined to nodes. The TS-side join // hydrates source-file metadata via `listNodes({ids})`. const edges = await store.listEdges({ @@ -301,7 +301,7 @@ async function fetchCommunityMembership( ids: readonly string[], ): Promise { if (ids.length === 0) return []; - // AC-A-6b: typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a + // Typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a // `WHERE type = 'MEMBER_OF' AND from_id IN (...)` raw SELECT. const edges = await store.listEdgesByType("MEMBER_OF", { fromIds: ids }); const out: MembershipRow[] = []; diff --git a/packages/analysis/src/detect-changes.ts b/packages/analysis/src/detect-changes.ts index 0a0d5413..31256510 100644 --- a/packages/analysis/src/detect-changes.ts +++ b/packages/analysis/src/detect-changes.ts @@ -101,7 +101,7 @@ function hunkOverlaps( } async function symbolsForFile(store: IGraphStore, filePath: string): Promise { - // AC-A-6b: typed `listNodes({filePath})` replaces a `WHERE file_path = ? + // Typed `listNodes({filePath})` replaces a `WHERE file_path = ? // AND kind NOT IN ('File','Folder') AND start_line IS NOT NULL AND // end_line IS NOT NULL` raw SELECT. The finder narrows to one file at the // adapter layer; the kind exclusion + line-presence guard run in JS. @@ -135,7 +135,7 @@ async function processesForSymbols( // participates in the process. Find the set of distinct Process ids that // have an edge into any of the affected symbols. // - // AC-A-6b: typed `listEdgesByType("PROCESS_STEP", {toIds})` replaces the + // Typed `listEdgesByType("PROCESS_STEP", {toIds})` replaces the // raw `WHERE r.type = 'PROCESS_STEP' AND r.to_id IN (...)` SELECT. The // `kind = 'Process'` predicate from the JOIN is enforced when we hydrate // the process metadata below. @@ -145,7 +145,7 @@ async function processesForSymbols( ); if (candidateProcessIds.length === 0) return []; - // AC-A-6b: typed `listNodes({ids, kinds:["Process"]})` replaces the + // Typed `listNodes({ids, kinds:["Process"]})` replaces the // `WHERE id IN (...) AND kind = 'Process'` lookup. const processNodes = await store.listNodes({ ids: candidateProcessIds, @@ -159,7 +159,7 @@ async function processesForSymbols( .filter((s) => s.length > 0); const entryMap = new Map(); if (entryIds.length > 0) { - // AC-A-6b: typed `listNodes({ids})` replaces the bulk `WHERE id IN (...)` + // Typed `listNodes({ids})` replaces the bulk `WHERE id IN (...)` // entry-point file_path lookup. const entryNodes = await store.listNodes({ ids: entryIds }); for (const node of entryNodes) { diff --git a/packages/analysis/src/group/__fixtures__/two-repo-contracts.ts b/packages/analysis/src/group/__fixtures__/two-repo-contracts.ts index e6900eb5..8501478b 100644 --- a/packages/analysis/src/group/__fixtures__/two-repo-contracts.ts +++ b/packages/analysis/src/group/__fixtures__/two-repo-contracts.ts @@ -1,5 +1,5 @@ /** - * Synthetic 2-repo cross-repo-contracts fixture (AC-M6-5 quickcheck). + * Synthetic 2-repo cross-repo-contracts quickcheck fixture. * * Models a producer/consumer pair across two repos in the same group: * - `api-svc` — HTTP route producer + gRPC service producer @@ -18,9 +18,9 @@ * consumer to producer; consumer_of points from producer to consumer) * 4. Two runs on the same input are byte-identical (determinism contract) * - * All `repo_uri` values follow the Sourcegraph host/path scheme codified - * by AC-M6-1 (`packages/core-types/src/nodes.ts:524-552`) — see ADR 0012 - * for the rationale. + * All `repo_uri` values follow the Sourcegraph host/path scheme — see + * `packages/core-types/src/nodes.ts` for the typed RepoNode and ADR + * 0012 for the rationale. */ import type { ComputeCrossRepoLinksOpts } from "../cross-repo-links.js"; diff --git a/packages/analysis/src/group/cross-repo-links-quickcheck.test.ts b/packages/analysis/src/group/cross-repo-links-quickcheck.test.ts index fe781a99..bf84c08d 100644 --- a/packages/analysis/src/group/cross-repo-links-quickcheck.test.ts +++ b/packages/analysis/src/group/cross-repo-links-quickcheck.test.ts @@ -1,5 +1,5 @@ /** - * Quickcheck — populated-case 2-repo fixture (AC-M6-5). + * Quickcheck — populated-case 2-repo fixture. * * The existing `cross-repo-links.test.ts` covers the empty + alpha-sort * + dedup + skip + error paths. This file pins the populated-case diff --git a/packages/analysis/src/impact.ts b/packages/analysis/src/impact.ts index 97f2d230..3f27fc13 100644 --- a/packages/analysis/src/impact.ts +++ b/packages/analysis/src/impact.ts @@ -81,7 +81,7 @@ async function resolveByName( name: string, filters: { readonly filePath?: string; readonly kind?: string }, ): Promise { - // AC-A-6b: typed finder replaces a `WHERE name = ?` raw SELECT. + // Typed finder replaces a `WHERE name = ?` raw SELECT. const nodes = await store.listNodesByName(name); const all = nodes.map(nodeToNodeRef); // Prefer resolved nodes over unresolved placeholder Property rows when both @@ -102,7 +102,7 @@ async function resolveByName( } async function resolveById(store: IGraphStore, id: string): Promise { - // AC-A-6b: typed `listNodes({ids})` replaces a `WHERE id = ? LIMIT 1` raw SELECT. + // Typed `listNodes({ids})` replaces a `WHERE id = ? LIMIT 1` raw SELECT. const nodes = await store.listNodes({ ids: [id], limit: 1 }); const first = nodes[0]; return first ? nodeToNodeRef(first) : undefined; @@ -124,7 +124,7 @@ async function hydrateNodes( ): Promise> { const out = new Map(); if (ids.length === 0) return out; - // AC-A-6b: typed `listNodes({ids})` replaces a `WHERE id IN (?,?,...)` raw SELECT. + // Typed `listNodes({ids})` replaces a `WHERE id IN (?,?,...)` raw SELECT. // The adapter de-dupes the input set internally so callers can pass repeats. const nodes = await store.listNodes({ ids }); for (const node of nodes) { @@ -185,10 +185,11 @@ async function relationsByEdge( toIds.add(to); } if (fromIds.size === 0 || toIds.size === 0) return map; - // AC-A-6b: typed `listEdges({fromIds, toIds})` replaces a `WHERE from_id IN - // (?) AND to_id IN (?)` raw SELECT. The result is filtered down to the - // exact predecessor → successor pairs we walked, since `listEdges` returns - // every edge whose endpoints fall in the AND-combined sets. + // Typed `listEdges({fromIds, toIds})` replaces a `WHERE from_id IN + // (?) AND to_id IN (?)` raw SELECT. The result is filtered down to + // the exact predecessor → successor pairs we walked, since + // `listEdges` returns every edge whose endpoints fall in the AND- + // combined sets. const edges = await store.listEdges({ fromIds: [...fromIds], toIds: [...toIds], @@ -238,7 +239,7 @@ async function fetchAffectedModules( ): Promise { if (allIds.length === 0) return []; const unique = Array.from(new Set(allIds)); - // AC-A-6b: typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a + // Typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a // `WHERE type = 'MEMBER_OF' AND from_id IN (?)` raw SELECT. const membership = await store.listEdgesByType("MEMBER_OF", { fromIds: unique }); if (membership.length === 0) return []; @@ -256,9 +257,10 @@ async function fetchAffectedModules( if (communityHits.size === 0) return []; const communityIds = [...communityHits.keys()]; - // AC-A-6b: typed `listNodes({ids, kinds:["Community"]})` replaces a raw - // SELECT joined to the kind discriminator. We narrow to Community + cast - // because the `inferred_label` field lives on CommunityNode only. + // Typed `listNodes({ids, kinds:["Community"]})` replaces a raw + // SELECT joined to the kind discriminator. We narrow to Community + + // cast because the `inferred_label` field lives on CommunityNode + // only. const labelNodes = await store.listNodes({ ids: communityIds, kinds: ["Community"] }); const labelById = new Map(); for (const node of labelNodes) { @@ -305,11 +307,11 @@ async function fetchAffectedProcesses( // Process's entry point, then match Process nodes whose `entry_point_id` // equals any reached ancestor (including the target itself). // - // AC-A-6b: typed `traverseAncestors` replaces the `WITH RECURSIVE + // Typed `traverseAncestors` replaces the `WITH RECURSIVE // member_ancestors USING KEY (ancestor_id)` raw query. // `listNodesByEntryPoint(id)` replaces the `WHERE entry_point_id = ?` - // join. Each ancestor lookup is an independent traversal, so we run them - // in parallel and dedupe the union. + // join. Each ancestor lookup is an independent traversal, so we run + // them in parallel and dedupe the union. const ancestorIds = new Set(); for (const sid of symbolIds) ancestorIds.add(sid); // Limit per-target traversal to depth 8 to match the original diff --git a/packages/analysis/src/page-rank.test.ts b/packages/analysis/src/page-rank.test.ts index 46266e1e..9e966257 100644 --- a/packages/analysis/src/page-rank.test.ts +++ b/packages/analysis/src/page-rank.test.ts @@ -37,8 +37,8 @@ test("pageRank: 10-node fixture — mass concentrates on node C, sums to ~1", () assert.equal(adj.nodes.length, 10); const pr = pageRank(adj); const total = pr.reduce((acc, v) => acc + v, 0); - // Fixed 50 iterations is loose convergence by design (W-M5-3 bans - // tolerance-based termination); the sum stays ~1 within float + // Fixed 50 iterations is loose convergence by design (tolerance- + // based termination is forbidden); the sum stays ~1 within float // noise on a balanced graph. assert.ok(Math.abs(total - 1) < 1e-6, `pagerank sum should be ~1.0; got ${total}`); // C has 4 inbound edges (B->C plus E, G, I -> C); the other nodes @@ -60,7 +60,7 @@ test("pageRank: determinism snapshot — hex fingerprint is stable", () => { // If this hex changes, byte-identity of the kernel has drifted. // Investigate: did damping, iteration count, dangling-mass math, // or edge iteration order change? NONE of those are allowed to - // shift without an explicit, documented rev (see W-M5-3). + // shift without an explicit, documented rev. // // Captured on V8 (Node 24) from the lifted kernel. Little-endian // Float64 bytes for the 10-node PageRank output, in adj.nodes diff --git a/packages/analysis/src/page-rank.ts b/packages/analysis/src/page-rank.ts index 2f2dff3d..5c3982c9 100644 --- a/packages/analysis/src/page-rank.ts +++ b/packages/analysis/src/page-rank.ts @@ -1,11 +1,10 @@ /** * Request-time PageRank kernel for `@opencodehub/analysis`. * - * Lifted verbatim from `packages/scip-ingest/src/materialize.ts` - * (AC-M5-2). The algorithm uses fixed iterations + fixed damping — - * tolerance-based convergence is banned by W-M5-3, because any - * numerical drift breaks the byte-identity guarantee that the - * AC-M5-4 skeleton BOM item + future graphHash depend on. + * The algorithm uses fixed iterations + fixed damping — + * tolerance-based convergence is forbidden because any numerical drift + * breaks the byte-identity guarantee that the skeleton BOM item + + * future graphHash depend on. * * The kernel operates on an adjacency-list snapshot built from a * stream of directed edges. scip-ingest's `DerivedEdge` is a @@ -35,9 +34,8 @@ export interface Adjacency { * preserves the edge iteration order within each outgoing row so the * PageRank fold across `outAdj[u]` is reproducible. * - * Preserves the byte-identity of the pre-lift implementation (see - * `packages/scip-ingest/src/materialize.ts@` before - * AC-M5-2). + * Preserves the byte-identity of the implementation that originally + * lived in `packages/scip-ingest/src/materialize.ts`. */ export function buildAdjacency(edges: readonly EdgeLike[]): Adjacency { const nodeSet = new Set(); @@ -81,7 +79,7 @@ export function buildAdjacency(edges: readonly EdgeLike[]): Adjacency { * Compute PageRank over a directed, weighted adjacency. * * Fixed iterations (default 50) and fixed damping (default 0.85) — - * NO tolerance-based convergence (W-M5-3). Returns a Float64Array + * NO tolerance-based convergence — fixed iterations only. Returns a Float64Array * indexed by `adj.nodes` order. * * Dangling-mass distribution: at every iteration, mass held on diff --git a/packages/analysis/src/rename.ts b/packages/analysis/src/rename.ts index a3cb803e..23d9c125 100644 --- a/packages/analysis/src/rename.ts +++ b/packages/analysis/src/rename.ts @@ -49,7 +49,7 @@ async function findCandidates( symbolName: string, scopeFile: string | undefined, ): Promise { - // AC-A-6b: typed `listNodesByName(name, {filePath})` replaces a raw + // Typed `listNodesByName(name, {filePath})` replaces a raw // `WHERE name = ? [AND file_path = ?]` SELECT. The finder returns full // GraphNodes; we map onto the local SymbolLocation shape so downstream // rename logic stays unchanged. @@ -78,7 +78,7 @@ async function referrersOf( store: IGraphStore, targetId: string, ): Promise { - // AC-A-6b: typed `listEdges({types, toIds})` replaces a raw `WHERE + // Typed `listEdges({types, toIds})` replaces a raw `WHERE // r.to_id = ? AND r.type IN (...)` SELECT joined to nodes. The TS-side // join hydrates referrer node metadata via `listNodes({ids})`. const edges = await store.listEdges({ @@ -106,7 +106,7 @@ async function referrersOf( } async function allRepoFiles(store: IGraphStore): Promise { - // AC-A-6b: typed `listNodesByKind("File")` replaces a `SELECT DISTINCT + // Typed `listNodesByKind("File")` replaces a `SELECT DISTINCT // file_path FROM nodes WHERE kind = 'File'` raw SELECT. const files = await store.listNodesByKind("File"); const seen = new Set(); diff --git a/packages/analysis/src/risk-snapshot.ts b/packages/analysis/src/risk-snapshot.ts index 3bac3357..648ff1b9 100644 --- a/packages/analysis/src/risk-snapshot.ts +++ b/packages/analysis/src/risk-snapshot.ts @@ -117,7 +117,7 @@ export async function buildRiskSnapshot( ): Promise { const perCommunityRisk: Record = {}; - // AC-A-6b: typed `listNodesByKind("Community")` replaces a `WHERE kind = + // Typed `listNodesByKind("Community")` replaces a `WHERE kind = // 'Community'` raw SELECT. The finder rehydrates {@link CommunityNode} // directly so callers consume `inferredLabel`/`symbolCount`/`cohesion` via // typed fields rather than column casts. @@ -141,7 +141,7 @@ export async function buildRiskSnapshot( // Community nodes are optional. } - // AC-A-6b: typed `countNodesByKind` aggregates every kind into a single + // Typed `countNodesByKind` aggregates every kind into a single // round-trip; we sum the result to mirror the legacy `COUNT(*) FROM nodes`. // `countEdgesByType` does the same for relations. let totalNodeCount = 0; @@ -165,7 +165,7 @@ export async function buildRiskSnapshot( note: 0, }; try { - // AC-A-6b: typed `listFindings()` replaces the + // Typed `listFindings()` replaces the // `WHERE kind = 'Finding' GROUP BY severity` aggregate. The histogram is // built JS-side; the finding row count never blows up because Finding // nodes are bounded by the scanner output (typically O(100s)). diff --git a/packages/analysis/src/test-utils.ts b/packages/analysis/src/test-utils.ts index 14024c7a..ca04f74c 100644 --- a/packages/analysis/src/test-utils.ts +++ b/packages/analysis/src/test-utils.ts @@ -3,10 +3,9 @@ * settings as production code, and so tests can import it without reaching * across the dist boundary. * - * `FakeStore` is an in-memory stand-in for {@link IGraphStore}. AC-A-6b - * removed the SQL-regex dispatcher (formerly ~270 lines) and replaced it - * with direct implementations of every typed finder the analysis/ surface - * consumes — `listNodes`, `listNodesByKind`, `listNodesByName`, + * `FakeStore` is an in-memory stand-in for {@link IGraphStore} that + * implements every typed finder the analysis/ surface consumes — + * `listNodes`, `listNodesByKind`, `listNodesByName`, * `listNodesByEntryPoint`, `listEdges`, `listEdgesByType`, `listFindings`, * `countNodesByKind`, `countEdgesByType`, `traverseAncestors`, * `traverseDescendants`, `traverse`, plus the ITemporalStore-compat noops. diff --git a/packages/analysis/src/verdict.ts b/packages/analysis/src/verdict.ts index 0867dcff..468e5e4b 100644 --- a/packages/analysis/src/verdict.ts +++ b/packages/analysis/src/verdict.ts @@ -517,7 +517,7 @@ async function collectCommunities( ): Promise { if (symbolIds.length === 0) return; try { - // AC-A-6b: typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a + // Typed `listEdgesByType("MEMBER_OF", {fromIds})` replaces a // `WHERE r.type = 'MEMBER_OF' AND r.from_id IN (...)` raw SELECT. The // community label join becomes a TS-side `listNodes({ids})` lookup. const edges = await store.listEdgesByType("MEMBER_OF", { fromIds: symbolIds }); @@ -550,7 +550,7 @@ async function collectFindings( if (symbolIds.length > 0) { try { - // AC-A-6b: typed `listEdgesByType("FOUND_IN", {toIds})` replaces a + // Typed `listEdgesByType("FOUND_IN", {toIds})` replaces a // `WHERE r.type = 'FOUND_IN' AND r.to_id IN (...)` raw SELECT. The // join to `nodes WHERE kind = 'Finding'` becomes a typed // `listFindings()` filtered by id post-fetch. @@ -581,7 +581,7 @@ async function collectFindings( // to a specific symbol. if (files.length > 0) { try { - // AC-A-6b: typed `listFindings()` replaces a + // Typed `listFindings()` replaces a // `WHERE kind = 'Finding' AND file_path IN (...)` raw SELECT. The // file membership filter runs JS-side; finding rows are bounded by the // scanner output (typically O(100s)) so the filter is cheap. @@ -633,7 +633,7 @@ async function collectFileMeta( if (files.length === 0) return out; const fileSet = new Set(files); try { - // AC-A-6b: typed `listNodesByKind("File")` replaces a + // Typed `listNodesByKind("File")` replaces a // `WHERE kind = 'File' AND file_path IN (...)` raw SELECT. The file // membership filter runs JS-side because `listNodesByKind` exposes a // single-file-path option only. @@ -673,7 +673,7 @@ async function collectFileMeta( // separate set of finder calls because `cyclomatic_complexity` is // populated on child symbol rows, not on the File row itself. // - // AC-A-6b: typed `listNodesByKind` per callable kind replaces a + // Typed `listNodesByKind` per callable kind replaces a // `WHERE kind IN ('Function','Method','Constructor') AND file_path IN // (...) GROUP BY file_path MAX(cyclomatic_complexity)` aggregate. The MAX // reduction runs JS-side as a single linear sweep. @@ -709,7 +709,7 @@ async function collectReviewers( // Build a list of File node ids — the form `File::`. const fileNodeIds = files.map((f) => `File:${f}:${f}`); try { - // AC-A-6b: typed `listEdgesByType("OWNED_BY", {fromIds})` replaces a + // Typed `listEdgesByType("OWNED_BY", {fromIds})` replaces a // `WHERE r.type = 'OWNED_BY' AND r.from_id IN (...)` raw SELECT. The // SUM(confidence) GROUP BY contributor + JOIN to nodes both run TS-side // — `listNodes({ids})` materializes the contributor metadata. diff --git a/packages/cli/src/commands/analyze.test.ts b/packages/cli/src/commands/analyze.test.ts index d7253343..a4a9f330 100644 --- a/packages/cli/src/commands/analyze.test.ts +++ b/packages/cli/src/commands/analyze.test.ts @@ -163,7 +163,7 @@ test("resolveSummariesEnabled: explicit --no-summaries turns it off", () => { assert.equal(resolveSummariesEnabled(false, {}), false); }); -test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=1 kills the phase (SUM-S-001)", () => { +test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=1 kills the phase", () => { assert.equal(resolveSummariesEnabled(undefined, { CODEHUB_BEDROCK_DISABLED: "1" }), false); }); @@ -182,7 +182,7 @@ test("resolveSummariesEnabled: CODEHUB_BEDROCK_DISABLED=0 does not kill the phas }); // --------------------------------------------------------------------------- -// Dirty-tree bypass on the analyze fast-path (T-M1-1 / EARS requirement). +// Dirty-tree bypass on the analyze fast-path. // --------------------------------------------------------------------------- test("checkFastPath: dirty working tree bypasses the fast-path even when HEAD matches", async () => { diff --git a/packages/cli/src/commands/analyze.ts b/packages/cli/src/commands/analyze.ts index f54c4cfa..5eb5b032 100644 --- a/packages/cli/src/commands/analyze.ts +++ b/packages/cli/src/commands/analyze.ts @@ -212,11 +212,11 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi ? await openSummaryCacheAdapter(repoPath) : undefined; - // Mirror the same pattern for the embeddings phase's content-hash skip - // (T-M1-3). Only open when `--embeddings` is on AND `--force` is off — - // force re-embeds everything, so the adapter would do no useful work. - // When the prior DB is absent the adapter returns undefined and the - // phase degrades to "every chunk is new". + // Mirror the same pattern for the embeddings phase's content-hash skip. + // Only open when `--embeddings` is on AND `--force` is off — force + // re-embeds everything, so the adapter would do no useful work. When the + // prior DB is absent the adapter returns undefined and the phase + // degrades to "every chunk is new". const embeddingHashAdapter = opts.embeddings === true && opts.force !== true ? await openEmbeddingHashCacheAdapter(repoPath) @@ -518,8 +518,8 @@ export async function loadPreviousGraph( /** * Resolve the effective `summaries` flag, honoring the - * `CODEHUB_BEDROCK_DISABLED=1` env kill-switch (SUM-S-001) and the P04 - * default-on contract (absent flag → enabled). + * `CODEHUB_BEDROCK_DISABLED=1` env kill-switch and the P04 default-on + * contract (absent flag → enabled). * * Truth table (post-P04): * - env var set + flag undefined → false (kill-switch wins) @@ -654,7 +654,7 @@ async function openSummaryCacheAdapter( /** * Open a read-only DuckDB store scoped to the `embeddings` content-hash - * probe (T-M1-3). The returned adapter's `list()` loads every prior + * probe. The returned adapter's `list()` loads every prior * `(granularity, nodeId, chunkIndex) → content_hash` row in a single * round-trip so the embeddings phase can skip chunks whose source text is * unchanged across runs. Returns `undefined` when the store cannot be @@ -704,11 +704,11 @@ function fileFromNodeId(id: string): string | undefined { // `PREV_NODE_SELECT_COLUMNS` was the explicit column whitelist used by the // legacy SQL `SELECT * FROM nodes` round-trip in {@link loadPreviousGraph}. -// AC-A-6e migrated that read path to `store.graph.listNodes()`, which -// already returns rehydrated `GraphNode` objects, so the constant is no -// longer load-bearing here. The `rowToGraphNode` / `rowToCodeRelation` -// adapters below remain exported for external consumers that hand-roll -// over the DuckDB wide-column shape. +// That read path now goes through `store.graph.listNodes()`, which already +// returns rehydrated `GraphNode` objects, so the constant is no longer +// load-bearing here. The `rowToGraphNode` / `rowToCodeRelation` adapters +// below remain exported for external consumers that hand-roll over the +// DuckDB wide-column shape. const NODE_KIND_SET: ReadonlySet = new Set(NODE_KINDS); const RELATION_TYPE_SET: ReadonlySet = new Set(RELATION_TYPES); diff --git a/packages/cli/src/commands/code-pack.ts b/packages/cli/src/commands/code-pack.ts index 4428fc45..b3d5806a 100644 --- a/packages/cli/src/commands/code-pack.ts +++ b/packages/cli/src/commands/code-pack.ts @@ -12,14 +12,14 @@ * - `pack` (DEFAULT) — `@opencodehub/pack`'s `generatePack`. Opens a * read-only graph store via `openStore({ readOnly: true })` and walks * the indexed graph to produce the 8 mandatory BOM items + manifest + - * optional Parquet embeddings sidecar. AC-A-4 relocated the sidecar - * emitter into pack/; cli/ passes the composed `Store` and pack + * optional Parquet embeddings sidecar. The sidecar emitter lives in + * `@opencodehub/pack`; cli/ passes the composed `Store` and pack * dispatches on `store.backend` (DuckDB COPY for `duck`, degraded * stamp for `lbug` v1). * - `repomix` — legacy single-file snapshot via `npx repomix`. Retained - * under an opt-in flag for one milestone (drop deferred to M7 per - * spec 005 Q-DELTA-6). Internally delegates to `runPack` so the - * repomix shell-out is implemented exactly once. + * under an opt-in flag for one milestone before removal. Internally + * delegates to `runPack` so the repomix shell-out is implemented + * exactly once. * * The CLI surface is: * @@ -143,12 +143,13 @@ async function runPackEngine(repoPath: string, args: CodePackArgs): Promise`. The fake + * The command consumes the composed `Store` envelope and routes graph + * reads through `store.graph.`. The fake * below implements just the finders `runContext` calls * (`listNodes`, `listNodesByName`, `listEdgesByType`, `traverse`, * `search`, `close`) over an in-memory fixture, so the tests stay tied diff --git a/packages/cli/src/commands/context.ts b/packages/cli/src/commands/context.ts index 58244423..077f698e 100644 --- a/packages/cli/src/commands/context.ts +++ b/packages/cli/src/commands/context.ts @@ -9,7 +9,7 @@ * concept-phrase queries still work; when it yields more than one row * and no disambiguator narrows the set, we surface the candidate list. * - * Per AC-A-6e: this command is graph-only — the lifecycle owner + * This command is graph-only — the lifecycle owner * (`openStoreForCommand`) constructs the composed `Store` envelope, but * `runContext` reaches through `store.graph` for every read so the * `IGraphStore` typed-finder surface stays the only contract. diff --git a/packages/cli/src/commands/doctor.test.ts b/packages/cli/src/commands/doctor.test.ts index 8ff0164d..02dec63a 100644 --- a/packages/cli/src/commands/doctor.test.ts +++ b/packages/cli/src/commands/doctor.test.ts @@ -119,9 +119,9 @@ test("embedder weights check reports ok when fp32 weights present", async () => } }); -// DOC-E-002 — the int8 file on disk is `model_int8.onnx` (underscore), -// per `embedder/src/paths.ts:49`. The doctor check must use the same -// spelling; a hyphen-vs-underscore mismatch is how this historically +// The int8 file on disk is `model_int8.onnx` (underscore), per +// `embedder/src/paths.ts:49`. The doctor check must use the same spelling; +// a hyphen-vs-underscore mismatch is how this historically // false-negative'd. test("embedder weights check reports ok when int8 weights present (underscore filename)", async () => { const home = await mkdtemp(join(tmpdir(), "codehub-doctor-emb-int8-")); @@ -141,9 +141,9 @@ test("embedder weights check reports ok when int8 weights present (underscore fi } }); -// DOC-E-002 (negative control) — the old hyphenated `model-int8.onnx` -// must NOT count as a match. If it did, we'd silently accept a stale -// artefact the embedder can't actually load. +// Negative control — the old hyphenated `model-int8.onnx` must NOT count +// as a match. If it did, we'd silently accept a stale artefact the +// embedder can't actually load. test("embedder weights check reports warn when only hyphenated int8 file is present", async () => { const home = await mkdtemp(join(tmpdir(), "codehub-doctor-emb-hyphen-")); try { @@ -160,7 +160,7 @@ test("embedder weights check reports warn when only hyphenated int8 file is pres } }); -// DOC-E-001 — the tree-sitter and duckdb checks resolve from the CLI's own +// The tree-sitter and duckdb checks resolve from the CLI's own // node_modules first, then fall back to --repoRoot. In a workspace install // the CLI's own resolution context already sees the dependencies (hoisted // or otherwise), so passing a non-existent --repoRoot should still succeed @@ -198,10 +198,10 @@ test("native-binding checks tolerate a missing --repoRoot fallback (workspace in } }); -// DOC-E-001 (wiring) — runDoctor should thread `repoRoot` through -// DoctorOptions so the --repoRoot CLI flag has a visible effect on check -// construction. We don't need to actually execute the checks — just -// confirm the override is accepted and the report still comes back. +// Wiring — runDoctor should thread `repoRoot` through DoctorOptions so the +// --repoRoot CLI flag has a visible effect on check construction. We don't +// need to actually execute the checks — just confirm the override is +// accepted and the report still comes back. test("runDoctor accepts --repoRoot override via DoctorOptions", async () => { const home = await mkdtemp(join(tmpdir(), "codehub-doctor-reporoot-")); try { diff --git a/packages/cli/src/commands/find-enclosing-symbol.ts b/packages/cli/src/commands/find-enclosing-symbol.ts index 4c25d32a..d340c718 100644 --- a/packages/cli/src/commands/find-enclosing-symbol.ts +++ b/packages/cli/src/commands/find-enclosing-symbol.ts @@ -36,11 +36,10 @@ export type NodesByFile = ReadonlyMap; /** * Code-kind allow set used when resolving SARIF findings back to an - * enclosing symbol. Matches the set enumerated in the T-M1-4 packet - * conventions (Function, Method, Constructor, Class, Interface, Struct, - * Enum, Trait) and is a strict superset of `SCIP_SYMBOL_KINDS` — we - * additionally allow `Constructor` here because SARIF tooling routinely - * emits findings inside constructor bodies. + * enclosing symbol. Covers Function, Method, Constructor, Class, + * Interface, Struct, Enum, and Trait — a strict superset of + * `SCIP_SYMBOL_KINDS`; we additionally allow `Constructor` here because + * SARIF tooling routinely emits findings inside constructor bodies. */ export const ENCLOSING_SYMBOL_KINDS: ReadonlySet = new Set([ "Function", diff --git a/packages/cli/src/commands/open-store.ts b/packages/cli/src/commands/open-store.ts index 59fadba3..74c8a8fb 100644 --- a/packages/cli/src/commands/open-store.ts +++ b/packages/cli/src/commands/open-store.ts @@ -7,8 +7,8 @@ * so callers can route graph-tier queries through `store.graph` and * temporal-tier queries (cochanges, summaries, `--sql` escape hatch) * through `store.temporal`. Backend selection follows the standard - * `openStore` resolution (env-driven `CODEHUB_STORE`, defaulting to - * `"duck"` until AC-A-9 flips the default). + * `openStore` resolution (env-driven `CODEHUB_STORE`, with auto-detect + * when unset). */ import { resolve } from "node:path"; diff --git a/packages/cli/src/commands/sql.ts b/packages/cli/src/commands/sql.ts index 362e2082..4fe53831 100644 --- a/packages/cli/src/commands/sql.ts +++ b/packages/cli/src/commands/sql.ts @@ -4,9 +4,9 @@ * rejects any mutation, and a per-statement JS timer interrupts long * queries. * - * Per AC-A-6e: routes through `store.temporal.exec()` rather than the - * graph-tier escape hatch — `--sql` is the one CLI surface that consumes - * the tabular view directly. Graph-only commands stay on + * Routes through `store.temporal.exec()` rather than the graph-tier + * escape hatch — `--sql` is the one CLI surface that consumes the + * tabular view directly. Graph-only commands stay on * `store.graph.()`. */ diff --git a/packages/cli/src/lib/is-indexed.ts b/packages/cli/src/lib/is-indexed.ts index c03c92c9..292c5cd2 100644 --- a/packages/cli/src/lib/is-indexed.ts +++ b/packages/cli/src/lib/is-indexed.ts @@ -9,7 +9,7 @@ * - The `graphFile` for any in-tree backend (currently `duck` → * `graph.duckdb`, `lbug` → `graph.lbug`). Filenames come from the * storage `describeArtifacts` helper so two-store deployments share a - * single source of truth (see AC-A-8). + * single source of truth. * * Returns a plain boolean — UI surfaces (e.g. `codehub list`) want to * render a single column without leaking which backend produced the diff --git a/packages/cli/src/scip-downloader.test.ts b/packages/cli/src/scip-downloader.test.ts index 5a6df957..beb92787 100644 --- a/packages/cli/src/scip-downloader.test.ts +++ b/packages/cli/src/scip-downloader.test.ts @@ -312,8 +312,8 @@ describe("installScipTool", () => { const dir = await mkdtemp(join(tmpdir(), "och-scip-placeholder-")); try { // All 4 adapter pins (clang/ruby/dotnet/kotlin) now ship real sha256 - // digests post AC-M4-1..4. To exercise the placeholder-refusal path - // we synthesize a placeholder pin and install via override. + // digests. To exercise the placeholder-refusal path we synthesize a + // placeholder pin and install via override. const PLACEHOLDER = "0".repeat(64); const replacement: ScipToolPin = { ...SCIP_PINS.clang, diff --git a/packages/cli/src/scip-downloader.ts b/packages/cli/src/scip-downloader.ts index a333d2f0..99dc0f84 100644 --- a/packages/cli/src/scip-downloader.ts +++ b/packages/cli/src/scip-downloader.ts @@ -24,10 +24,10 @@ * concurrent setup is out of scope — the atomic-rename still means no half- * written binary ever appears at the final path. * - * Placeholder SHA256 handling: AC-M4-0 ships with all-zero placeholder hashes - * in `scip-pins.ts`. We refuse to verify against placeholder hashes at - * runtime. The adapter first-install smoke tests (AC-M4-1..4) pass - * `allowPlaceholder: true` so they can compute the real hash and substitute + * Placeholder SHA256 handling: some pins ship with all-zero placeholder + * hashes in `scip-pins.ts`. We refuse to verify against placeholder hashes + * at runtime. Each adapter's first-install smoke test passes + * `allowPlaceholder: true` so it can compute the real hash and substitute * it back into the pin file. */ @@ -161,8 +161,8 @@ export class PlaceholderHashError extends Error { constructor(tool: ScipTool) { super( `scip-${tool} pin still carries placeholder SHA256 digests. ` + - `The real hash is computed by AC-M4-1..4 at adapter first-install time. ` + - `Pass allowPlaceholder: true from a smoke test, or wait for the adapter PR.`, + `The real hash is computed at adapter first-install time. ` + + `Pass allowPlaceholder: true from a smoke test, or wait for the adapter to land.`, ); this.name = "PlaceholderHashError"; this.tool = tool; diff --git a/packages/cli/src/scip-pins.ts b/packages/cli/src/scip-pins.ts index 13f86161..74b12ea4 100644 --- a/packages/cli/src/scip-pins.ts +++ b/packages/cli/src/scip-pins.ts @@ -11,16 +11,16 @@ * digest, and (optionally) the binary's executable name on * disk. * - * AC-M4-0 shipped PLACEHOLDER SHA256 hashes (64 zeros) for the standalone - * binaries. Each AC-M4-1..4 adapter PR computes and substitutes the real - * digest against the upstream release asset. The `placeholder: true` flag is - * the canonical "do NOT trust this hash at runtime" marker — `installScipTool()` - * refuses to run when the selected pin has `placeholder: true` unless the - * caller sets `opts.allowPlaceholder` (reserved for adapter first-install - * smoke tests). + * Some pins ship PLACEHOLDER SHA256 hashes (64 zeros) for standalone + * binaries until each adapter's first-install smoke test computes and + * substitutes the real digest against the upstream release asset. The + * `placeholder: true` flag is the canonical "do NOT trust this hash at + * runtime" marker — `installScipTool()` refuses to run when the selected pin + * has `placeholder: true` unless the caller sets `opts.allowPlaceholder` + * (reserved for adapter first-install smoke tests). * - * As of AC-M4-4 (2026-05-05), `scip-kotlin` is the first pin promoted to real - * digests: upstream ships the plugin as a Maven Central JAR + * `scip-kotlin` ships a real SHA256 computed against Maven Central: upstream + * publishes the plugin as a Maven Central JAR * (`com.sourcegraph:semanticdb-kotlinc:0.6.0`) whose SHA256 is stable and * publicly verifiable — no first-install smoke test needed. * @@ -231,7 +231,7 @@ const SCIP_DOTNET_PIN: ScipToolPin = { * `binName` is the JAR filename inside `~/.codehub/bin/` — the adapter * references it by absolute path when invoking `kotlinc -Xplugin=`. * - * SHA256 computed against Maven Central at implementation time (AC-M4-4). + * SHA256 computed against Maven Central at implementation time. */ const SCIP_KOTLIN_JAR_SHA256 = "bd6abb49d95a909c48dbf1bc2ce27f5ebcd871952f2f5683edb72a806db9b8ba"; const SCIP_KOTLIN_JAR_URL = diff --git a/packages/cli/src/skills-gen.test.ts b/packages/cli/src/skills-gen.test.ts index e8541487..f435b0d9 100644 --- a/packages/cli/src/skills-gen.test.ts +++ b/packages/cli/src/skills-gen.test.ts @@ -1,7 +1,7 @@ /** * Tests for `generateSkills`. * - * Post AC-A-6e the generator consumes a typed-finder surface + * The generator consumes a typed-finder surface * (`Pick`). The fake store below * implements those four methods over an in-memory fixture so the tests @@ -64,9 +64,8 @@ interface Fixture { // --------------------------------------------------------------------------- // Fake store — implements the four typed finders the generator needs over an -// in-memory fixture. The legacy SQL-dispatch fake was retired with AC-A-6e; -// matching the production interface keeps tests honest about which finders -// the generator actually calls. +// in-memory fixture. Matching the production interface keeps tests +// honest about which finders the generator actually calls. // --------------------------------------------------------------------------- function makeFakeStore(fixture: Fixture): SkillsGenStore { diff --git a/packages/cobol-proleap/README.md b/packages/cobol-proleap/README.md index 3b22d449..0ba5ed82 100644 --- a/packages/cobol-proleap/README.md +++ b/packages/cobol-proleap/README.md @@ -19,7 +19,7 @@ const result = await parseCobolDeep(["a.cbl", "b.cob"], { Returns `{ elements, diagnostics, fellBackToRegex }`. On a JVM crash or malformed JSON, every input file is silently reparsed through the regex hot path so a -single bad file never aborts the run (spec AC-M4-6 success criterion #3). +single bad file never aborts the run. ## Install @@ -63,7 +63,7 @@ design. If `java --version` reports < 17, both `codehub setup --cobol-proleap` and `codehub analyze --allow-build-scripts=proleap` refuse to run with a clear -install hint (spec S-M4-2). +install hint. ## Anti-goals diff --git a/packages/cobol-proleap/src/jre-probe.ts b/packages/cobol-proleap/src/jre-probe.ts index ad70f451..a43b4cda 100644 --- a/packages/cobol-proleap/src/jre-probe.ts +++ b/packages/cobol-proleap/src/jre-probe.ts @@ -1,7 +1,7 @@ /** * JRE probe — spawns `java --version` and parses the major version from * stdout/stderr. The ProLeap wrapper compiles against Java 17 source/target, - * so any JRE < 17 refuses to run with a clear install hint (spec S-M4-2). + * so any JRE < 17 refuses to run with a clear install hint. * * `java --version` historically printed to stderr on some distributions * and stdout on others; we concatenate both for robust matching. The diff --git a/packages/core-types/src/language-id.ts b/packages/core-types/src/language-id.ts index 2df57202..455ae9ec 100644 --- a/packages/core-types/src/language-id.ts +++ b/packages/core-types/src/language-id.ts @@ -27,5 +27,6 @@ export type LanguageId = | "php" | "dart" // COBOL ships via the regex-provider discriminator in the ingestion grammar - // registry — there is no tree-sitter grammar for it. See T-M4-5. + // registry — there is no tree-sitter grammar for it. The regex provider + // handles COBOL. | "cobol"; diff --git a/packages/core-types/src/nodes.test.ts b/packages/core-types/src/nodes.test.ts index 32e7b46b..567c99cc 100644 --- a/packages/core-types/src/nodes.test.ts +++ b/packages/core-types/src/nodes.test.ts @@ -24,7 +24,7 @@ test("NODE_KINDS: contains all v1.0 + M6 additions (append-only)", () => { // Appended, not inserted: the original last MVP kind stays at its prior slot. const firstNewIdx = NODE_KINDS.indexOf("Finding"); assert.equal(NODE_KINDS[firstNewIdx - 1], "Section"); - // Appended in the spec order. AC-M6-1 adds `Repo` at the tail. + // Appended in order; `Repo` is the most recent addition at the tail. assert.deepEqual(NODE_KINDS.slice(firstNewIdx), [ "Finding", "Dependency", diff --git a/packages/core-types/src/nodes.ts b/packages/core-types/src/nodes.ts index 067c2769..428e7495 100644 --- a/packages/core-types/src/nodes.ts +++ b/packages/core-types/src/nodes.ts @@ -516,10 +516,10 @@ export interface ProjectProfileNode extends NodeBase { * * Singleton per graph — constructed via `makeNodeId("Repo", "", "repo")` so * the id stays stable across clones of the same repo on different absolute - * paths (mirroring ProjectProfileNode). The 9 attributes below match spec - * 005 AC-M6-1 E-M6-1 exactly; `indexTime` is deliberately kept OUT of - * `pack_hash` / `graphHash` inputs (it serializes as a node field but does - * not feed determinism-sensitive pipelines). + * paths (mirroring ProjectProfileNode). `indexTime` is deliberately kept OUT + * of `pack_hash` / `graphHash` inputs (it serializes as a node field but does + * not feed determinism-sensitive pipelines) so two indexes built from the + * same commit yield byte-identical graph hashes. */ export interface RepoNode extends NodeBase { readonly kind: "Repo"; @@ -529,7 +529,7 @@ export interface RepoNode extends NodeBase { * Sourcegraph-style host-path key. Example: `github.com/org/repo`. * * When `originUrl` is null, this is `local:` - * so the handle remains deterministic and distinguishable per S-M6-1. + * so the handle remains deterministic and distinguishable. */ readonly repoUri: string; /** Default branch at index time. Example: `main`. Null when detached or unknown. */ diff --git a/packages/frameworks/src/catalog.ts b/packages/frameworks/src/catalog.ts index 954d69fe..f5790b90 100644 --- a/packages/frameworks/src/catalog.ts +++ b/packages/frameworks/src/catalog.ts @@ -2,8 +2,10 @@ * Top-20 framework detection catalog. * * A typed, declarative table of `FrameworkRule` entries covering the - * 20 frameworks enumerated in - * `.erpaval/sessions/2026-04-24-v1-backlog-and-framework-detection/research/frameworks-top20.md`. + * top-20 framework set OpenCodeHub recognizes today (React, Next.js, Vue, + * Angular, Svelte, Express, FastAPI, Django, Flask, Spring Boot, Ruby on + * Rails, Laravel, .NET, Gin, Fiber, NestJS, Astro, Remix, SolidStart, and + * Nuxt). * * Each rule is self-describing: category + tier + manifest fingerprint + * optional file / regex / variant markers + optional `parent` for wrapping diff --git a/packages/ingestion/src/parse/cobol-regex.test.ts b/packages/ingestion/src/parse/cobol-regex.test.ts index adfd57c8..4ccdcef2 100644 --- a/packages/ingestion/src/parse/cobol-regex.test.ts +++ b/packages/ingestion/src/parse/cobol-regex.test.ts @@ -19,7 +19,7 @@ import { parseCobolFile } from "./cobol-regex.js"; const HELLO_CBL = [ "000100 IDENTIFICATION DIVISION.", "000200 PROGRAM-ID. HELLO-WORLD.", - "000300 AUTHOR. T-M4-5.", + "000300 AUTHOR. INGESTION-FIXTURE.", "000400*> Minimal hello-world program for the regex hot path fixture suite.", "000500 ENVIRONMENT DIVISION.", "000600 DATA DIVISION.", @@ -298,7 +298,7 @@ describe("parseCobolFile — performance", () => { it("p50 parse time ≤ 2 ms on a 1000-line fixture", () => { // Tile the accounts fixture up to ~1000 lines for a realistic workload. // The fixture is 28 lines; 40 repeats + tail = 1120 lines, which covers - // the "1000-line fixture" invariant from the T-M4-5 success criteria. + // the 1000-line-fixture performance invariant for COBOL regex parsing. // // Budget is 2ms (not 1ms) to survive concurrent test-runner contention on // CI and shared devboxes. Isolated runs stay at ~0.5ms p50; the 2ms diff --git a/packages/ingestion/src/parse/cobol-regex.ts b/packages/ingestion/src/parse/cobol-regex.ts index bd269042..132675d8 100644 --- a/packages/ingestion/src/parse/cobol-regex.ts +++ b/packages/ingestion/src/parse/cobol-regex.ts @@ -23,14 +23,14 @@ * Columns 73-80 identification (ignored) * * The default parse path runs at ≤ 1 ms on 1000-line fixtures; a p50 - * regression in that number is a graph-ingestion regression (T-M4-5 SC). + * regression in that number is a graph-ingestion regression. * * ## Anti-goals * * - NOT a full parse: `PERFORM ... THRU ... VARYING`, `COPY ... REPLACING * ==tag== BY ==value==`, and nested `EXEC SQL` blocks are all resolved - * heuristically. The deep-parse path (ProLeap, T-M4-6) owns the precise - * AST. + * heuristically. The deep-parse path (ProLeap, when wired in) owns the + * precise AST. * - NOT free-format aware: the 99% legacy estate is fixed-format; * free-format COBOL (column-0 start) lands with the ProLeap backend. * - NO filesystem I/O, NO subprocesses, NO external deps. The function diff --git a/packages/ingestion/src/parse/fixtures/cobol/hello.cbl b/packages/ingestion/src/parse/fixtures/cobol/hello.cbl index e238031b..f8727f43 100644 --- a/packages/ingestion/src/parse/fixtures/cobol/hello.cbl +++ b/packages/ingestion/src/parse/fixtures/cobol/hello.cbl @@ -1,6 +1,6 @@ 000100 IDENTIFICATION DIVISION. 000200 PROGRAM-ID. HELLO-WORLD. -000300 AUTHOR. T-M4-5. +000300 AUTHOR. INGESTION-FIXTURE. 000400*> Minimal hello-world program for the regex hot path fixture suite. 000500 ENVIRONMENT DIVISION. 000600 DATA DIVISION. diff --git a/packages/ingestion/src/parse/grammar-registry.ts b/packages/ingestion/src/parse/grammar-registry.ts index 80b59b6f..36387ced 100644 --- a/packages/ingestion/src/parse/grammar-registry.ts +++ b/packages/ingestion/src/parse/grammar-registry.ts @@ -18,7 +18,7 @@ * * This module abstracts those differences behind {@link loadGrammar}. * - * ## Regex-provider escape hatch (T-M4-5) + * ## Regex-provider escape hatch * * Some languages — COBOL is the first — have no maintained tree-sitter * grammar and ship via a pure-regex extractor instead. The registry encodes diff --git a/packages/ingestion/src/parse/language-detector.ts b/packages/ingestion/src/parse/language-detector.ts index bbfffb14..87040398 100644 --- a/packages/ingestion/src/parse/language-detector.ts +++ b/packages/ingestion/src/parse/language-detector.ts @@ -48,7 +48,7 @@ const EXTENSION_MAP: ReadonlyMap = new Map([ [".dart", "dart"], // --- COBOL (regex hot path; see parse/cobol-regex.ts). Fixed-format .cbl / // .cob programs and .cpy copybooks. Free-format COBOL is NOT handled - // in v1 — that's T-M4-6 (ProLeap deep-parse). --- + // in v1 — the ProLeap deep-parse path will own that AST when wired in. --- [".cbl", "cobol"], [".cob", "cobol"], [".cpy", "cobol"], diff --git a/packages/ingestion/src/parse/wasm-grammar-resolution.test.ts b/packages/ingestion/src/parse/wasm-grammar-resolution.test.ts index 412c8315..3068a398 100644 --- a/packages/ingestion/src/parse/wasm-grammar-resolution.test.ts +++ b/packages/ingestion/src/parse/wasm-grammar-resolution.test.ts @@ -16,7 +16,7 @@ * (verifies the commit + build-script loop landed correctly). * - A known per-grammar-package entry (python) still resolves — the * refactor must not regress the 11-entry primary mapping. - * - PHP resolves to the `php_only` variant (AC-4 invariant). + * - PHP resolves to the `php_only` variant. */ import { strict as assert } from "node:assert"; @@ -57,7 +57,7 @@ describe("resolveGrammarWasmPath — per-grammar package path unchanged", () => ); }); - it("php resolves to php_only.wasm (AC-4 invariant)", () => { + it("php resolves to php_only.wasm", () => { const wasmPath = _resolveGrammarWasmPathForTests("php"); assert.ok(wasmPath !== undefined); assert.ok( diff --git a/packages/ingestion/src/pipeline/gitignore.test.ts b/packages/ingestion/src/pipeline/gitignore.test.ts index 0ed5dd2f..5b5c1595 100644 --- a/packages/ingestion/src/pipeline/gitignore.test.ts +++ b/packages/ingestion/src/pipeline/gitignore.test.ts @@ -1,5 +1,5 @@ /** - * Nested `.gitignore` regression suite — DET-E-004 and DET-U-003. + * Nested `.gitignore` regression suite. * * Builds a 3-level fixture where each layer either ignores or re-includes * paths the parent layer decided. The loader must stack rules from repo diff --git a/packages/ingestion/src/pipeline/gitignore.ts b/packages/ingestion/src/pipeline/gitignore.ts index b5ac2853..d95f8a9a 100644 --- a/packages/ingestion/src/pipeline/gitignore.ts +++ b/packages/ingestion/src/pipeline/gitignore.ts @@ -7,10 +7,9 @@ * - Leading-slash anchored-to-root matches. * - Negation (`!`) re-includes a previously excluded path. * - `*` (single segment), `?` (single char), `**` (any number of segments). - * - Nested `.gitignore` files with layered negation (DET-U-003 / - * DET-E-004). Rules stack from repo root downward; deeper layers - * override shallower ones so `docs/.gitignore` can negate rules set - * by the repo-root file. + * - Nested `.gitignore` files with layered negation. Rules stack from + * repo root downward; deeper layers override shallower ones so + * `docs/.gitignore` can negate rules set by the repo-root file. * * Not supported today: character classes (`[abc]`), escaped metacharacters * (`\*`). We surface them as warnings when the operator enables verbose diff --git a/packages/ingestion/src/pipeline/orchestrator.test.ts b/packages/ingestion/src/pipeline/orchestrator.test.ts index 5d214dd7..368bb339 100644 --- a/packages/ingestion/src/pipeline/orchestrator.test.ts +++ b/packages/ingestion/src/pipeline/orchestrator.test.ts @@ -41,7 +41,7 @@ describe("runIngestion (end-to-end)", () => { "incremental-scope", "profile", "dependencies", - // `repo-node` (AC-M6-1) depends on `profile` only, so the topological + // `repo-node` depends on `profile` only, so the topological // alphabetic tiebreak lands it after `dependencies` and before `sbom`. "repo-node", "sbom", diff --git a/packages/ingestion/src/pipeline/orchestrator.ts b/packages/ingestion/src/pipeline/orchestrator.ts index 66a62452..489472e0 100644 --- a/packages/ingestion/src/pipeline/orchestrator.ts +++ b/packages/ingestion/src/pipeline/orchestrator.ts @@ -115,7 +115,7 @@ export interface RunIngestionOptions extends PipelineOptions { * Optional adapter the embeddings phase probes before issuing embedder * calls. Production wires this to the DuckDB store's * `listEmbeddingHashes` implementation so re-analyze runs skip chunks - * whose `content_hash` matches a prior row (T-M1-3). Absent by default — + * whose `content_hash` matches a prior row. Absent by default — * the phase degrades to "every chunk is new" which is still correct, * just more expensive. Ignored when `options.force === true`. */ @@ -140,7 +140,7 @@ export async function runIngestion( (normalizedOptions as unknown as Record)[SUMMARY_CACHE_OPTIONS_KEY] = options.summaryCacheAdapter; } - // Same trick for the embeddings phase's content-hash cache (T-M1-3). + // Same trick for the embeddings phase's content-hash cache. // Attached here (not in stripPhaseKeys) so the typed option shape stays // minimal — this is a well-known extension point, not a first-class // `PipelineOptions` field. diff --git a/packages/ingestion/src/pipeline/phases/default-set.ts b/packages/ingestion/src/pipeline/phases/default-set.ts index d7a8bf00..2983ab80 100644 --- a/packages/ingestion/src/pipeline/phases/default-set.ts +++ b/packages/ingestion/src/pipeline/phases/default-set.ts @@ -55,7 +55,7 @@ import { toolsPhase } from "./tools.js"; export const DEFAULT_PHASES: readonly PipelinePhase[] = [ scanPhase, profilePhase, - // `repo-node` emits one RepoNode (AC-M6-1) and runs immediately after + // `repo-node` emits one RepoNode and runs immediately after // `profile` so it inherits the detected-languages list when deriving // `languageStats`. It has no downstream dependents — the node is read // from the graph by MCP tools at query time, not consumed by later phases. diff --git a/packages/ingestion/src/pipeline/phases/embeddings.test.ts b/packages/ingestion/src/pipeline/phases/embeddings.test.ts index 9b26f8bf..3766dc75 100644 --- a/packages/ingestion/src/pipeline/phases/embeddings.test.ts +++ b/packages/ingestion/src/pipeline/phases/embeddings.test.ts @@ -472,14 +472,14 @@ describe("embeddingsPhase — hierarchical tiers (P03)", () => { }); // --------------------------------------------------------------------------- -// T-M1-3 content-hash skip: integration-style tests that run the phase twice -// against the same graph and verify the second run short-circuits on every -// chunk whose prior hash matches. Uses the same HTTP-embedder stub as the P03 -// tier tests above (fetch stub installed there would already be torn down, so -// we install a fresh one scoped to this describe block). +// Content-hash skip: integration-style tests that run the phase twice against +// the same graph and verify the second run short-circuits on every chunk +// whose prior hash matches. Uses the same HTTP-embedder stub as the P03 tier +// tests above (fetch stub installed there would already be torn down, so we +// install a fresh one scoped to this describe block). // --------------------------------------------------------------------------- -describe("embeddingsPhase — content-hash skip (T-M1-3)", () => { +describe("embeddingsPhase — content-hash skip", () => { const originalUrl = process.env["CODEHUB_EMBEDDING_URL"]; const originalModel = process.env["CODEHUB_EMBEDDING_MODEL"]; const originalDims = process.env["CODEHUB_EMBEDDING_DIMS"]; diff --git a/packages/ingestion/src/pipeline/phases/embeddings.ts b/packages/ingestion/src/pipeline/phases/embeddings.ts index f8ceefb6..70a80e19 100644 --- a/packages/ingestion/src/pipeline/phases/embeddings.ts +++ b/packages/ingestion/src/pipeline/phases/embeddings.ts @@ -216,7 +216,7 @@ export interface EmbedderPhaseOutput { */ readonly summaryFused: boolean; /** - * Chunks short-circuited by the content-hash skip (T-M1-3). Counts + * Chunks short-circuited by the content-hash skip. Counts * chunks whose `(granularity, node_id, chunk_index)` had a prior row * with identical `content_hash` in the store — so the phase neither * embedded them nor emitted a row. `0` when `options.force === true`, @@ -566,14 +566,15 @@ async function runEmbeddings(ctx: PipelineContext): Promise community: 0, }; - // Prior-hash cache (T-M1-3). When the CLI plugs an adapter AND the caller + // Prior-hash cache. When the CLI plugs an adapter AND the caller // did not pass `force: true`, we load every prior `content_hash` from the // `embeddings` table in a single round-trip. Chunks whose // `(granularity, nodeId, chunkIndex)` key maps to an identical freshly- // computed hash skip both `embedder.embed()` and the upsert batch — // unchanged source reduces a full re-analyze to a no-op for the // embeddings phase. Under `force`, or with no adapter installed, the map - // is empty and the phase behaves exactly as it did pre-M1-3. + // is empty and the phase behaves exactly as it did before the + // content-hash skip landed. const forceFlag = ctx.options.force === true; const hashCache = resolveEmbeddingHashCacheAdapter(ctx); const priorHashes: Map = @@ -651,7 +652,7 @@ async function runEmbeddings(ctx: PipelineContext): Promise continue; } chunksTotal += chunks.length; - // Content-hash skip (T-M1-3). A symbol can emit multiple chunks + // Content-hash skip. A symbol can emit multiple chunks // (long signature+summary+body). We only skip when *every* fresh // chunk hash matches its prior row — otherwise one mismatched chunk // would leave the tier partially updated with stale neighbours. @@ -718,7 +719,7 @@ async function runEmbeddings(ctx: PipelineContext): Promise continue; } chunksTotal += 1; - // Content-hash skip (T-M1-3). Single-chunk tier — the compare is + // Content-hash skip. Single-chunk tier — the compare is // straightforward: if the prior row's hash equals the fresh hash, // bail before queuing work. const contentHash = hashText("file", firstChunk); @@ -791,7 +792,7 @@ async function runEmbeddings(ctx: PipelineContext): Promise continue; } chunksTotal += 1; - // Content-hash skip (T-M1-3). Community tier is also single-chunk. + // Content-hash skip. Community tier is also single-chunk. const contentHash = hashText("community", firstChunk); if ( priorHashes.size > 0 && diff --git a/packages/ingestion/src/pipeline/phases/parse-external-stubs.test.ts b/packages/ingestion/src/pipeline/phases/parse-external-stubs.test.ts index 2c60b792..e40ede93 100644 --- a/packages/ingestion/src/pipeline/phases/parse-external-stubs.test.ts +++ b/packages/ingestion/src/pipeline/phases/parse-external-stubs.test.ts @@ -1,5 +1,5 @@ /** - * Parse phase — external-specifier stubs (DET-E-003). + * Parse phase — external-specifier stubs. * * Previously, unresolved external imports (`import { foo } from "some-lib"`) * were silently dropped by the parse phase. P06 emits one diff --git a/packages/ingestion/src/pipeline/phases/parse.test.ts b/packages/ingestion/src/pipeline/phases/parse.test.ts index 3e454001..f246fa68 100644 --- a/packages/ingestion/src/pipeline/phases/parse.test.ts +++ b/packages/ingestion/src/pipeline/phases/parse.test.ts @@ -716,7 +716,7 @@ describe("parsePhase (cache key determinism)", () => { }); }); -describe("parsePhase — COBOL regex hot path (T-M4-5)", () => { +describe("parsePhase — COBOL regex hot path", () => { let repo: string; beforeEach(async () => { diff --git a/packages/ingestion/src/pipeline/phases/parse.ts b/packages/ingestion/src/pipeline/phases/parse.ts index 1c9f7803..96ea4f97 100644 --- a/packages/ingestion/src/pipeline/phases/parse.ts +++ b/packages/ingestion/src/pipeline/phases/parse.ts @@ -137,7 +137,7 @@ async function runParse( ); // Partition the candidates by provider kind. Regex-provider languages - // (currently only `cobol` via T-M4-5) bypass the worker pool entirely — + // (currently only `cobol`) bypass the worker pool entirely — // they carry no tree-sitter grammar, so the content-addressed parse // cache, the piscina worker, the unified-query evaluator, and the // three-tier resolver chain are all skipped. The regex handler lower @@ -612,7 +612,7 @@ async function runParse( } } - // ---- Regex-provider dispatch: COBOL (T-M4-5). ------------------------- + // ---- Regex-provider dispatch: COBOL. ---------------------------------- // // COBOL files bypass the tree-sitter worker pool entirely. `parseCobolFile` // returns `CobolElement` records that we map to `CodeElement` graph nodes diff --git a/packages/ingestion/src/pipeline/phases/repo-node.test.ts b/packages/ingestion/src/pipeline/phases/repo-node.test.ts index 0294c02a..31cd1582 100644 --- a/packages/ingestion/src/pipeline/phases/repo-node.test.ts +++ b/packages/ingestion/src/pipeline/phases/repo-node.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the `repo-node` phase (AC-M6-1). + * Tests for the `repo-node` phase. * * Covers: * - RepoNode output shape conforms to the core-types interface. @@ -142,7 +142,7 @@ describe("runRepoNodePhase", () => { assert.equal(repoNode.name, "github.com/acme/example"); }); - it("falls back to local: when no origin remote exists (S-M6-1)", async () => { + it("falls back to local: when no origin remote exists", async () => { const probe = stubProbe({ originUrl: async () => null, defaultBranch: async () => null, diff --git a/packages/ingestion/src/pipeline/phases/repo-node.ts b/packages/ingestion/src/pipeline/phases/repo-node.ts index 5de831d7..3c4ffc35 100644 --- a/packages/ingestion/src/pipeline/phases/repo-node.ts +++ b/packages/ingestion/src/pipeline/phases/repo-node.ts @@ -1,5 +1,5 @@ /** - * Repo-node phase (AC-M6-1) — emits one first-class `RepoNode` per graph. + * Repo-node phase — emits one first-class `RepoNode` per graph. * * Runs after the `profile` phase so we can inherit `ProjectProfileNode.languages` * when deriving `languageStats`. Probes three git endpoints via @@ -10,14 +10,13 @@ * * All probes fail-safe: when git is absent, the repo is not a git working * tree, or the command exits non-zero, the phase returns a deterministic - * `local:` handle (S-M6-1). The phase never throws on - * git failures — it downgrades to the local-only shape. + * `local:` handle. The phase never throws on git + * failures — it downgrades to the local-only shape. * * `indexTime` is populated inside this phase but is explicitly kept out of - * graphHash determinism inputs by the spec (W-M6-1) — graphHash hashes the - * node verbatim, so callers that need fixture-stable hashes must freeze - * `indexTime` at the fixture level or omit the phase from the determinism - * gate. + * graphHash determinism inputs — graphHash hashes the node verbatim, so + * callers that need fixture-stable hashes must freeze `indexTime` at the + * fixture level or omit the phase from the determinism gate. */ import { execFile } from "node:child_process"; @@ -102,8 +101,8 @@ export const defaultGitProbe: GitProbe = { /** * Fixed sentinel used when we can't resolve a deterministic per-commit * timestamp. Anchored to the Unix epoch so it clearly signals "unknown" and - * carries NO run-to-run variance — this is the core of W-M6-1's determinism - * guarantee when the phase runs outside a git working tree. + * carries NO run-to-run variance — this preserves graphHash determinism when + * the phase runs outside a git working tree. */ const UNKNOWN_INDEX_TIME = "1970-01-01T00:00:00Z"; @@ -114,9 +113,10 @@ const UNKNOWN_INDEX_TIME = "1970-01-01T00:00:00Z"; * unavailable or the repo is not a git working tree. * * graphHash determinism requires this: `new Date().toISOString()` would - * inject wall-clock noise into every node, breaking W-M6-1 on any pipeline - * run where the repo-node phase is active. Pinning to the HEAD commit time - * gives us "stable per commit" without excluding the field from graphHash. + * inject wall-clock noise into every node, breaking determinism on any + * pipeline run where the repo-node phase is active. Pinning to the HEAD + * commit time gives us "stable per commit" without excluding the field + * from graphHash. */ async function probeCommitTime(repoPath: string): Promise { const out = await tryGit(repoPath, ["show", "-s", "--format=%cI", "HEAD"]); @@ -193,7 +193,7 @@ function finalizeRepoUri(host: string, path: string): string | null { return `${cleanHost}/${cleanPath}`; } -/** `local:` — the S-M6-1 fallback handle. */ +/** `local:` — fallback handle when no git remote exists. */ export function deriveLocalRepoUri(absolutePath: string): string { const digest = createHash("sha256").update(absolutePath, "utf8").digest("hex"); return `local:${digest.slice(0, 12)}`; @@ -239,7 +239,7 @@ export async function runRepoNodePhase(input: RepoNodePhaseInput): Promise = { const out = await runRepoNodePhase({ repoPath: ctx.repoPath, // The pipeline does not yet thread group / visibility / indexer through - // PipelineOptions — reserve those for a later AC. For now we surface - // deterministic defaults that match the RepoNode interface contract. + // PipelineOptions — that wiring lands in a later iteration. For now we + // surface deterministic defaults that match the RepoNode interface + // contract. indexer: `opencodehub@${resolveIndexerVersion()}`, detectedLanguages, }); diff --git a/packages/ingestion/src/pipeline/phases/scan.test.ts b/packages/ingestion/src/pipeline/phases/scan.test.ts index d9bed9d6..b6f2e1b9 100644 --- a/packages/ingestion/src/pipeline/phases/scan.test.ts +++ b/packages/ingestion/src/pipeline/phases/scan.test.ts @@ -130,7 +130,7 @@ describe("scanPhase", () => { }); }); -describe("scanPhase — submodule enumeration (ING-E-002, ING-S-001)", () => { +describe("scanPhase — submodule enumeration", () => { let outerRepo: string; let innerRepo: string; diff --git a/packages/ingestion/src/pipeline/phases/scan.ts b/packages/ingestion/src/pipeline/phases/scan.ts index 71cf3a8c..48013076 100644 --- a/packages/ingestion/src/pipeline/phases/scan.ts +++ b/packages/ingestion/src/pipeline/phases/scan.ts @@ -84,7 +84,7 @@ async function runScan(ctx: PipelineContext): Promise { const maxTotalFiles = ctx.options.maxTotalFiles ?? DEFAULT_MAX_TOTAL_FILES; // Layered gitignore chain — nested `.gitignore` files stack from repo - // root downward; deeper layers override shallower ones (DET-E-004). + // root downward; deeper layers override shallower ones. const chain = await loadGitignoreChain(ctx.repoPath); const hardcoded = new Set(HARDCODED_IGNORES); diff --git a/packages/ingestion/src/pipeline/phases/summarize.test.ts b/packages/ingestion/src/pipeline/phases/summarize.test.ts index 31885d5d..c9a62699 100644 --- a/packages/ingestion/src/pipeline/phases/summarize.test.ts +++ b/packages/ingestion/src/pipeline/phases/summarize.test.ts @@ -541,7 +541,7 @@ describe("summarizePhase — phase name constant", () => { }); }); -describe("summarizePhase — credential soft-fail (SUM-UN-001)", () => { +describe("summarizePhase — credential soft-fail", () => { it("returns skippedReason=no-credentials when the summarizer throws NoCredentialsError", async () => { const graph = new KnowledgeGraph(); const funcId = makeNodeId("Function", "src/a.py", "alpha") as NodeId; @@ -571,8 +571,8 @@ describe("summarizePhase — credential soft-fail (SUM-UN-001)", () => { // Fake summarizer whose first call throws a credential-missing error. // The phase must convert that into a soft-fail (no rows, no failure - // counter) because SUM-UN-001 guarantees analyze stays green for - // contributors without AWS credentials. + // counter) so analyze stays green for contributors without AWS + // credentials. const credErr = new Error("Could not load credentials from any providers"); (credErr as { name: string }).name = "CredentialsProviderError"; const adapter: SummarizerAdapter = { diff --git a/packages/ingestion/src/pipeline/phases/summarize.ts b/packages/ingestion/src/pipeline/phases/summarize.ts index 38d1e7d8..ab30f02d 100644 --- a/packages/ingestion/src/pipeline/phases/summarize.ts +++ b/packages/ingestion/src/pipeline/phases/summarize.ts @@ -282,8 +282,9 @@ async function runSummarize(ctx: PipelineContext): Promise // Instantiating the summarizer resolves the AWS SDK credential chain, which // throws `CredentialsProviderError` / `NoCredentialsError` when no creds // are configured. Catch that family here so contributors without Bedrock - // access still get a green analyze — see SUM-S-002 / SUM-UN-001. Any other - // factory error continues to surface so real bugs don't go silent. + // access still get a green analyze — the missing-credentials path emits a + // skip note and zero rows, while every other factory error continues to + // surface so real bugs don't go silent. let summarizer: SummarizerAdapter; try { summarizer = (testHooks?.summarizerFactory ?? defaultSummarizerFactory)({ modelId }); diff --git a/packages/ingestion/src/pipeline/profile-detectors/framework-detector.ts b/packages/ingestion/src/pipeline/profile-detectors/framework-detector.ts index 67f6993d..e6727732 100644 --- a/packages/ingestion/src/pipeline/profile-detectors/framework-detector.ts +++ b/packages/ingestion/src/pipeline/profile-detectors/framework-detector.ts @@ -3,7 +3,7 @@ * * Re-exports the framework dispatcher from `@opencodehub/frameworks` so * callers that still import from the old profile-detectors path continue - * to compile. Slated for removal after one release per roadmap §M4 T-M4-7. + * to compile. Slated for removal after one release. * * @deprecated Import from `@opencodehub/frameworks` instead. */ diff --git a/packages/ingestion/src/providers/cobol.ts b/packages/ingestion/src/providers/cobol.ts index 3bd0d347..806650fd 100644 --- a/packages/ingestion/src/providers/cobol.ts +++ b/packages/ingestion/src/providers/cobol.ts @@ -4,7 +4,7 @@ * COBOL has no tree-sitter grammar, so the parse pipeline does NOT route * `.cbl` / `.cob` / `.cpy` files through the worker pool or this provider's * extract methods. Instead, `packages/ingestion/src/parse/cobol-regex.ts` - * emits `CodeElement` graph nodes directly from a regex pass; see T-M4-5. + * emits `CodeElement` graph nodes directly from a regex pass. * * This stub exists solely to satisfy the compile-time * `satisfies Record` constraint in diff --git a/packages/ingestion/src/providers/registry.test.ts b/packages/ingestion/src/providers/registry.test.ts index dc3df5e9..e7365106 100644 --- a/packages/ingestion/src/providers/registry.test.ts +++ b/packages/ingestion/src/providers/registry.test.ts @@ -20,8 +20,8 @@ const ALL_LANGUAGES: readonly LanguageId[] = [ "swift", "php", "dart", - // --- Regex-provider languages (T-M4-5). The cobol provider is a stub; the - // regex hot path in `parse/cobol-regex.ts` owns the actual extraction. + // --- Regex-provider languages. The cobol provider is a stub; the regex + // hot path in `parse/cobol-regex.ts` owns the actual extraction. "cobol", ]; diff --git a/packages/mcp/src/connection-pool.ts b/packages/mcp/src/connection-pool.ts index 6f0be19d..6e7ca026 100644 --- a/packages/mcp/src/connection-pool.ts +++ b/packages/mcp/src/connection-pool.ts @@ -19,14 +19,13 @@ * * `shutdown()` drains the pool on stdio close so the server exits cleanly. * - * AC-A-6c migration: previously held `DuckDbStore` directly. Now caches - * the composed `OpenStoreResult` so MCP tools can route graph-tier calls - * through `store.graph` and temporal-tier calls (cochanges, summaries, - * `--sql` escape hatch) through `store.temporal`. Backend selection - * follows the standard `openStore` resolution (env-driven `CODEHUB_STORE`, - * defaulting to `"duck"`); `OpenStoreResult.close()` is the deterministic - * composite close — for the DuckDB-only deployment that's a single - * underlying close, identical to the prior behavior. + * The pool caches the composed `OpenStoreResult` so MCP tools can route + * graph-tier calls through `store.graph` and temporal-tier calls + * (cochanges, summaries, `--sql` escape hatch) through `store.temporal`. + * Backend selection follows the standard `openStore` resolution (env- + * driven `CODEHUB_STORE`, with auto-detect when unset). + * `OpenStoreResult.close()` is the deterministic composite close — for + * the DuckDB-only deployment that's a single underlying close. */ import { openStore, type Store } from "@opencodehub/storage"; diff --git a/packages/mcp/src/error-envelope.test.ts b/packages/mcp/src/error-envelope.test.ts index bff9506b..c1bf96de 100644 --- a/packages/mcp/src/error-envelope.test.ts +++ b/packages/mcp/src/error-envelope.test.ts @@ -56,7 +56,7 @@ test("toolError round-trips AMBIGUOUS_REPO with hint", () => { }); // --------------------------------------------------------------------------- -// AC-M6-2 — structured AMBIGUOUS_REPO with choices[] + total_matches. +// Structured AMBIGUOUS_REPO with choices[] + total_matches. // --------------------------------------------------------------------------- test("toolAmbiguousRepoError populates structured fields alongside legacy ones", () => { @@ -82,7 +82,7 @@ test("toolAmbiguousRepoError populates structured fields alongside legacy ones", assert.ok(detail.message.includes("2 repos")); assert.ok(detail.hint?.includes("alpha")); - // New structured contract — AC-M6-2 §5. + // Structured contract — error_code + jsonrpc_code + counts. assert.equal(detail.error_code, "AMBIGUOUS_REPO"); assert.equal(detail.jsonrpc_code, -32602); assert.equal(detail.total_matches, 2); diff --git a/packages/mcp/src/error-envelope.ts b/packages/mcp/src/error-envelope.ts index a5fc2a4c..4e0998eb 100644 --- a/packages/mcp/src/error-envelope.ts +++ b/packages/mcp/src/error-envelope.ts @@ -40,9 +40,9 @@ export interface ErrorDetail { * are intentional — this shape crosses the MCP boundary to an agent, and * the research spec (§6.2 of research-m5m6.yaml) names them that way. * - * `repo_uri` is derived from the registry at error-construction time. Once - * AC-M6-1's `RepoNode` type lands in M7, this field will be pulled from - * the registry-backed node instead of being computed from + * `repo_uri` is derived from the registry at error-construction time. + * Once the registry surfaces the persisted RepoNode, this field will + * be pulled from there instead of being computed from * `RegistryEntry.name`. */ export interface RepoChoice { @@ -63,7 +63,7 @@ export interface AmbiguousRepoDetail extends ErrorDetail { readonly error_code: "AMBIGUOUS_REPO"; /** JSON-RPC code for "invalid params" — per MCP spec. */ readonly jsonrpc_code: -32602; - /** Capped at 10 — see AC-M6-2 §5. */ + /** Capped at 10. */ readonly choices: readonly RepoChoice[]; /** Full count of matching registry entries (may exceed `choices.length`). */ readonly total_matches: number; diff --git a/packages/mcp/src/repo-resolver.test.ts b/packages/mcp/src/repo-resolver.test.ts index 8fdd2b4f..922ed72f 100644 --- a/packages/mcp/src/repo-resolver.test.ts +++ b/packages/mcp/src/repo-resolver.test.ts @@ -147,7 +147,7 @@ test("resolveRepo throws NOT_FOUND for unknown name", async () => { }); // --------------------------------------------------------------------------- -// AC-M6-2 — repo_uri alias + structured AMBIGUOUS_REPO payload. +// repo_uri alias + structured AMBIGUOUS_REPO payload. // --------------------------------------------------------------------------- test("deriveRepoUri passes through URI-shaped names and hashes local-only paths", () => { diff --git a/packages/mcp/src/repo-resolver.ts b/packages/mcp/src/repo-resolver.ts index 59c0c8f3..7027c3ff 100644 --- a/packages/mcp/src/repo-resolver.ts +++ b/packages/mcp/src/repo-resolver.ts @@ -121,7 +121,7 @@ export async function resolveRepo( let entry: RegistryEntry | undefined; let resolvedName: string | undefined; - // `repo_uri` wins when both are provided (per AC-M6-2 §5). + // `repo_uri` wins when both are provided. if (repoUri !== undefined) { const wanted = normalizeRepoUri(repoUri); for (const key of names) { @@ -189,10 +189,10 @@ function normalizeResolveArg(arg: ResolveRepoArg): { * Build the structured AMBIGUOUS_REPO error with a `choices[]` payload * derived from registry entries. * - * TODO(M7 / AC-M6-1): once `RepoNode` lands in core-types and the registry - * is reshaped to expose `default_branch` + `group`, switch this to pull - * those fields from the node instead of defaulting to `null`. For now - * they're placeholders so the wire shape is stable. + * Once the registry is reshaped to expose `default_branch` + `group` + * from the persisted RepoNode, switch this to pull those fields from + * the node instead of defaulting to `null`. For now they're + * placeholders so the wire shape is stable. */ function buildAmbiguousError( registry: Record, @@ -227,8 +227,9 @@ function buildAmbiguousError( * - Else, fall back to `local:` so two local repos * with colliding short names still have distinct URIs. * - * M7 will replace this with the registry-backed RepoNode.repo_uri once - * AC-M6-1 lands. Kept deterministic so tests can assert exact values. + * Future work will replace this with the registry-backed + * RepoNode.repo_uri. Kept deterministic so tests can assert exact + * values. */ export function deriveRepoUri(entry: RegistryEntry): string { if (entry.name.includes("/")) return entry.name; diff --git a/packages/mcp/src/repo-uri-for-entry.ts b/packages/mcp/src/repo-uri-for-entry.ts index 2cdd9452..96f93bfa 100644 --- a/packages/mcp/src/repo-uri-for-entry.ts +++ b/packages/mcp/src/repo-uri-for-entry.ts @@ -1,17 +1,17 @@ /** - * `repoUriForEntry` — resolve a `repo_uri` for a registry entry, preferring - * the graph-backed `RepoNode.repoUri` when the repo has been indexed with - * AC-M6-1's phase, otherwise falling back to `deriveRepoUri(entry)` from - * `repo-resolver.ts` (shipped by AC-M6-2). + * `repoUriForEntry` — resolve a `repo_uri` for a registry entry, + * preferring the graph-backed `RepoNode.repoUri` when the repo's index + * carries one, otherwise falling back to `deriveRepoUri(entry)` from + * `repo-resolver.ts`. * - * Used by the `group_*` MCP tools (AC-M6-4) so that every repo-identified - * response row carries a stable `repo_uri` alongside its legacy `name` / - * `_repo` string. Lookups are best-effort — any DB-open / query failure - * falls back silently to the derived URI so a single unhealthy repo cannot - * break the whole response. + * Used by the `group_*` MCP tools so that every repo-identified + * response row carries a stable `repo_uri` alongside its legacy `name` + * / `_repo` string. Lookups are best-effort — any DB-open / query + * failure falls back silently to the derived URI so a single unhealthy + * repo cannot break the whole response. * - * Determinism: `deriveRepoUri` is pure; `RepoNode.repoUri` is byte-stable - * after AC-M6-1 lands. Neither path depends on wall-clock. + * Determinism: `deriveRepoUri` is pure; `RepoNode.repoUri` is byte- + * stable when present. Neither path depends on wall-clock. */ // biome-ignore-all lint/complexity/useLiteralKeys: dot-access disallowed on Record index signatures @@ -23,9 +23,9 @@ import type { ConnectionPool } from "./connection-pool.js"; import { deriveRepoUri, type RegistryEntry } from "./repo-resolver.js"; /** - * Preferred: read `RepoNode.repoUri` from DuckDB. Only repos indexed AFTER - * AC-M6-1 landed carry this row — earlier indexes fall back to the - * derived URI. + * Preferred: read `RepoNode.repoUri` from the persisted Repo row. + * Only repos that were indexed with the first-class Repo entity carry + * this row — earlier indexes fall back to the derived URI. */ async function readRepoNodeUri(graph: IGraphStore): Promise { const repoId = makeNodeId("Repo", "", "repo"); @@ -59,7 +59,8 @@ export async function repoUriForEntry( } catch { // Fall through to derived URI — a missing DB file, an unreadable // nodes table, or any other transient failure must not break the - // group response. AC-M6-4 is additive; legacy fields stay correct. + // group response. The repo_uri output is additive; legacy fields + // stay correct. } } return deriveRepoUri(entry); diff --git a/packages/mcp/src/test-utils.ts b/packages/mcp/src/test-utils.ts index 2c871183..8a60d87b 100644 --- a/packages/mcp/src/test-utils.ts +++ b/packages/mcp/src/test-utils.ts @@ -2,8 +2,8 @@ /** * Shared MCP test fixtures. * - * After AC-A-6c the production tools/resources call typed finders on - * `IGraphStore` (`listNodes`, `listNodesByKind`, `listEdges`, + * The production tools/resources call typed finders on `IGraphStore` + * (`listNodes`, `listNodesByKind`, `listEdges`, * `listEdgesByType`, `listFindings`, `listRoutes`, `getRepoNode`, * `traverseAncestors`, `listEmbeddingHashes`, etc.) rather than raw * `query()`. This file gives every mcp test a small, composable @@ -65,13 +65,13 @@ import { ConnectionPool } from "./connection-pool.js"; // ───────────────────────────────────────────────────────────────────────────── // Store wrapper — composes the IGraphStore-shaped fake into the OpenStoreResult -// shape the connection pool returns post AC-A-6c. +// shape the connection pool returns. // ───────────────────────────────────────────────────────────────────────────── /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ export function wrapAsStore(fake: unknown): Store { diff --git a/packages/mcp/src/tools/group-contracts.ts b/packages/mcp/src/tools/group-contracts.ts index d5b5f356..16e847ce 100644 --- a/packages/mcp/src/tools/group-contracts.ts +++ b/packages/mcp/src/tools/group-contracts.ts @@ -40,11 +40,11 @@ const GroupContractsInput = { interface ContractRow { readonly consumerRepo: string; - /** Additive per AC-M6-4 — cross-repo handle for the consumer repo. */ + /** Cross-repo handle for the consumer repo. */ readonly consumerRepoUri: string; readonly consumerSymbol: string; readonly producerRepo: string; - /** Additive per AC-M6-4 — cross-repo handle for the producer repo. */ + /** Cross-repo handle for the producer repo. */ readonly producerRepoUri: string; readonly producerRoute: string; readonly method: string; @@ -141,8 +141,8 @@ export async function runGroupContracts( const missing: string[] = []; const consumersByRepo = new Map(); const producersByRepo = new Map(); - // AC-M6-4: resolve `repo_uri` for every registered member so every - // ContractRow carries `consumerRepoUri` / `producerRepoUri` additively. + // Resolve `repo_uri` for every registered member so every + // ContractRow carries `consumerRepoUri` / `producerRepoUri`. const repoUriByName = new Map(); for (const repo of sortedRepos) { diff --git a/packages/mcp/src/tools/group-list.ts b/packages/mcp/src/tools/group-list.ts index 7961309a..5b0aa5ba 100644 --- a/packages/mcp/src/tools/group-list.ts +++ b/packages/mcp/src/tools/group-list.ts @@ -13,9 +13,9 @@ import { repoUriForEntry } from "../repo-uri-for-entry.js"; import { fromToolResult, type ToolContext, type ToolResult, toToolResult } from "./shared.js"; /** - * One repo entry as surfaced by `group_list`. `repo_uri` is additive per - * AC-M6-4 and is the authoritative cross-repo handle going forward; the - * legacy `name` field stays through M7 so existing consumers keep working. + * One repo entry as surfaced by `group_list`. `repo_uri` is the + * authoritative cross-repo handle going forward; the legacy `name` + * field is additive so existing consumers keep working. */ interface GroupRepoSummary { readonly name: string; @@ -40,7 +40,7 @@ export async function runGroupList(ctx: ToolContext): Promise { const repos: GroupRepoSummary[] = []; for (const r of g.repos) { const entry: RegistryEntry | undefined = registry[r.name]; - // Prefer the graph-backed RepoNode.repoUri (AC-M6-1) when the repo + // Prefer the graph-backed RepoNode.repoUri when the repo // is registered; otherwise fall back to deriveRepoUri against a // synthetic entry built from the group record so orphan references // still receive a stable `local:`. diff --git a/packages/mcp/src/tools/group-query.ts b/packages/mcp/src/tools/group-query.ts index cad16fec..8612d10d 100644 --- a/packages/mcp/src/tools/group-query.ts +++ b/packages/mcp/src/tools/group-query.ts @@ -67,7 +67,7 @@ const GroupQueryInput = { interface ResultRow { readonly _repo: string; /** - * Additive per AC-M6-4. Authoritative cross-repo handle alongside the + * Additive — the authoritative cross-repo handle alongside the * legacy `_repo` (registry name). Derived from the graph-backed * `RepoNode.repoUri` when available, otherwise `deriveRepoUri`. */ @@ -155,8 +155,8 @@ export async function runGroupQuery(ctx: ToolContext, args: GroupQueryArgs): Pro ); continue; } - // AC-M6-4 additive field — resolve once per repo so every result row - // from this repo receives the same `_repo_uri`. Best-effort: the + // Additive field — resolve once per repo so every result row from + // this repo receives the same `_repo_uri`. Best-effort: the // helper falls back to `deriveRepoUri` on any DB failure. const repoUri = await repoUriForEntry(hit, ctx.pool); const repoPath = resolve(hit.path); diff --git a/packages/mcp/src/tools/group-status.ts b/packages/mcp/src/tools/group-status.ts index 01914ff0..354acbba 100644 --- a/packages/mcp/src/tools/group-status.ts +++ b/packages/mcp/src/tools/group-status.ts @@ -31,10 +31,10 @@ const GroupStatusInput = { interface RepoStatusRow { readonly name: string; /** - * Cross-repo handle. Additive per AC-M6-4: prefers the graph-backed - * `RepoNode.repoUri` when the repo has been indexed with AC-M6-1's - * phase; falls back to `deriveRepoUri` for orphan references / pre-M6 - * indexes. Legacy `name` field stays through M7. + * Cross-repo handle. Prefers the graph-backed `RepoNode.repoUri` + * when the repo's index carries one; falls back to `deriveRepoUri` + * for orphan references / pre-RepoNode indexes. Legacy `name` field + * stays through the next major. */ readonly repo_uri: string; readonly path: string; @@ -73,7 +73,7 @@ export async function runGroupStatus(ctx: ToolContext, args: GroupStatusArgs): P const hit = registry[repo.name]; if (!hit) { // Orphan reference — still emit a deterministic repo_uri so - // consumers always receive the additive AC-M6-4 field. + // consumers always receive the additive `repo_uri` field. const orphanUri = deriveRepoUri({ name: repo.name, path: repo.path, diff --git a/packages/mcp/src/tools/group-sync.ts b/packages/mcp/src/tools/group-sync.ts index 777a8c87..eb8105ab 100644 --- a/packages/mcp/src/tools/group-sync.ts +++ b/packages/mcp/src/tools/group-sync.ts @@ -61,10 +61,10 @@ export async function runGroupSyncTool(ctx: ToolContext, args: GroupSyncArgs): P ); const inputs: SyncRepoInput[] = []; const missing: string[] = []; - // AC-M6-4: additive per-repo `{name, repo_uri}` rows surfaced in the - // structured response so agents that consume `group_sync` can key on - // the new handle without re-running `group_list`. Legacy top-level - // `repos: string[]` (from `ContractRegistry`) stays intact. + // Additive per-repo `{name, repo_uri}` rows surfaced in the + // structured response so agents that consume `group_sync` can key + // on the new handle without re-running `group_list`. Legacy top- + // level `repos: string[]` (from `ContractRegistry`) stays intact. const reposWithUri: { readonly name: string; readonly repo_uri: string }[] = []; for (const repo of sortedRepos) { const hit = registry[repo.name]; @@ -114,7 +114,7 @@ export async function runGroupSyncTool(ctx: ToolContext, args: GroupSyncArgs): P crossLinkCount: registryResult.crossLinks.length, missingRepos: missing, repos: registryResult.repos, - // AC-M6-4 additive field — per-repo `{name, repo_uri}` rows. + // Additive field — per-repo `{name, repo_uri}` rows. reposWithUri, }, next, diff --git a/packages/mcp/src/tools/group-tools.test.ts b/packages/mcp/src/tools/group-tools.test.ts index 45efd7de..cac3a849 100644 --- a/packages/mcp/src/tools/group-tools.test.ts +++ b/packages/mcp/src/tools/group-tools.test.ts @@ -33,7 +33,7 @@ interface FakeRepoData { /** * Optional: the graph-backed `RepoNode.repoUri`. When set, the typed * `getRepoNode("Repo::::repo")` finder returns this URI; otherwise - * `repoUriForEntry` falls back to `deriveRepoUri` (AC-M6-4). + * `repoUriForEntry` falls back to `deriveRepoUri`. */ readonly repoNodeUri?: string; /** Optional seed for FETCHES edges returned by group_contracts. */ @@ -119,9 +119,9 @@ interface RepoFixture { readonly edgeCount: number; readonly searchResults: readonly SearchResult[]; /** - * Optional: graph-backed `RepoNode.repoUri` for AC-M6-4 assertions. - * When set, the typed `getRepoNode` finder surfaces the URI; otherwise - * the tool falls back to `deriveRepoUri`. + * Optional: graph-backed `RepoNode.repoUri`. When set, the typed + * `getRepoNode` finder surfaces the URI; otherwise the tool falls + * back to `deriveRepoUri`. */ readonly repoNodeUri?: string; readonly fetchesEdges?: readonly { @@ -632,7 +632,7 @@ test("query without repo arg returns AMBIGUOUS_REPO when >1 repo registered", as error: { code: string; hint?: string; - // AC-M6-2: structured disambiguation payload. + // Structured disambiguation payload. error_code?: string; jsonrpc_code?: number; total_matches?: number; @@ -648,7 +648,7 @@ test("query without repo arg returns AMBIGUOUS_REPO when >1 repo registered", as // Hint names both registered repos so the agent can retry. assert.ok(sc.error.hint?.includes("alpha")); assert.ok(sc.error.hint?.includes("bravo")); - // New structured contract (AC-M6-2). + // Structured contract — error_code + jsonrpc_code + counts. assert.equal(sc.error.error_code, "AMBIGUOUS_REPO"); assert.equal(sc.error.jsonrpc_code, -32602); assert.equal(sc.error.total_matches, 2); @@ -717,12 +717,12 @@ test("group_query is deterministic across 3 successive runs (byte-equal structur }); // --------------------------------------------------------------------------- -// AC-M6-4 — additive `repo_uri` across group_* tool responses. -// Legacy fields (`name`, `_repo`, `consumerRepo`, `producerRepo`) stay -// byte-for-byte; the new fields augment them without altering ordering. +// Additive `repo_uri` across group_* tool responses. Legacy fields +// (`name`, `_repo`, `consumerRepo`, `producerRepo`) stay byte-for-byte; +// the new fields augment them without altering ordering. // --------------------------------------------------------------------------- -test("group_list emits repo_uri derived from deriveRepoUri when no RepoNode exists (AC-M6-4)", async () => { +test("group_list emits repo_uri derived from deriveRepoUri when no RepoNode exists", async () => { await withTestHarness( [ { name: "alpha", nodeCount: 1, edgeCount: 0, searchResults: [] }, @@ -759,7 +759,7 @@ test("group_list emits repo_uri derived from deriveRepoUri when no RepoNode exis ); }); -test("group_list emits repo_uri from RepoNode.repoUri when the graph has one (AC-M6-4)", async () => { +test("group_list emits repo_uri from RepoNode.repoUri when the graph has one", async () => { await withTestHarness( [ { @@ -800,7 +800,7 @@ test("group_list emits repo_uri from RepoNode.repoUri when the graph has one (AC ); }); -test("group_status per-member row carries both name and repo_uri (AC-M6-4)", async () => { +test("group_status per-member row carries both name and repo_uri", async () => { await withTestHarness( [ { @@ -841,7 +841,7 @@ test("group_status per-member row carries both name and repo_uri (AC-M6-4)", asy ); }); -test("group_status emits repo_uri for orphan references (not in registry) (AC-M6-4)", async () => { +test("group_status emits repo_uri for orphan references (not in registry)", async () => { await withTestHarness( [{ name: "alpha", nodeCount: 1, edgeCount: 0, searchResults: [] }], [{ name: "mixed", repos: ["alpha", "ghost"] }], @@ -874,7 +874,7 @@ test("group_status emits repo_uri for orphan references (not in registry) (AC-M6 ); }); -test("group_query result row carries both _repo and _repo_uri (AC-M6-4)", async () => { +test("group_query result row carries both _repo and _repo_uri", async () => { await withTestHarness( [ { @@ -926,7 +926,7 @@ test("group_query result row carries both _repo and _repo_uri (AC-M6-4)", async ); }); -test("group_contracts ContractRow carries both legacy and *RepoUri fields (AC-M6-4)", async () => { +test("group_contracts ContractRow carries both legacy and *RepoUri fields", async () => { await withTestHarness( [ { @@ -981,7 +981,7 @@ test("group_contracts ContractRow carries both legacy and *RepoUri fields (AC-M6 ); }); -test("group_sync structuredContent carries reposWithUri {name, repo_uri} additively (AC-M6-4)", async () => { +test("group_sync structuredContent carries reposWithUri {name, repo_uri} additively", async () => { await withTestHarness( [ { @@ -1016,7 +1016,7 @@ test("group_sync structuredContent carries reposWithUri {name, repo_uri} additiv ); }); -test("group_list repo_uri for bare names is byte-equal to deriveRepoUri (AC-M6-4)", async () => { +test("group_list repo_uri for bare names is byte-equal to deriveRepoUri", async () => { await withTestHarness( [{ name: "solo", nodeCount: 1, edgeCount: 0, searchResults: [] }], [{ name: "only", repos: ["solo"] }], diff --git a/packages/mcp/src/tools/list-dead-code.test.ts b/packages/mcp/src/tools/list-dead-code.test.ts index fc738122..53324568 100644 --- a/packages/mcp/src/tools/list-dead-code.test.ts +++ b/packages/mcp/src/tools/list-dead-code.test.ts @@ -42,8 +42,8 @@ import type { ToolContext } from "./shared.js"; /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ function wrapAsStore(fake: unknown): import("@opencodehub/storage").Store { @@ -78,9 +78,9 @@ interface FakeEdge { /** * In-memory fake of the typed-finder surface `classifyDeadness` consumes: - * `listNodes`, `listEdges`, `listEdgesByType`. AC-A-6b dropped the SQL-regex - * dispatcher from the production code path; the fake mirrors the same - * filtering semantics directly against the seeded `nodes` / `edges` arrays. + * `listNodes`, `listEdges`, `listEdgesByType`. The fake mirrors the same + * filtering semantics directly against the seeded `nodes` / `edges` + * arrays. */ function makeFakeStore(nodes: readonly FakeNode[], edges: readonly FakeEdge[]): DuckDbStore { const nodeAsGraphNode = (n: FakeNode): GraphNode => n as unknown as GraphNode; diff --git a/packages/mcp/src/tools/list-findings-delta.test.ts b/packages/mcp/src/tools/list-findings-delta.test.ts index b2afe66b..5976ad6a 100644 --- a/packages/mcp/src/tools/list-findings-delta.test.ts +++ b/packages/mcp/src/tools/list-findings-delta.test.ts @@ -33,8 +33,8 @@ import type { ToolContext } from "./shared.js"; /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ function wrapAsStore(fake: unknown): import("@opencodehub/storage").Store { diff --git a/packages/mcp/src/tools/pack-codebase.ts b/packages/mcp/src/tools/pack-codebase.ts index 293b0033..adc01e3e 100644 --- a/packages/mcp/src/tools/pack-codebase.ts +++ b/packages/mcp/src/tools/pack-codebase.ts @@ -291,7 +291,7 @@ async function callRealPackEngine(args: { } } -/** Real-world repomix shell-out. Mirrors the pre-AC-M5-7 implementation. */ +/** Real-world repomix shell-out. */ async function callRealRepomixEngine(args: { repoPath: string; style: "xml" | "markdown" | "json" | "plain"; diff --git a/packages/mcp/src/tools/query.test.ts b/packages/mcp/src/tools/query.test.ts index 9bd652ad..ced9867b 100644 --- a/packages/mcp/src/tools/query.test.ts +++ b/packages/mcp/src/tools/query.test.ts @@ -55,8 +55,8 @@ import type { EmbedderFactory, ToolContext } from "./shared.js"; /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ function wrapAsStore(fake: unknown): import("@opencodehub/storage").Store { diff --git a/packages/mcp/src/tools/remove-dead-code.test.ts b/packages/mcp/src/tools/remove-dead-code.test.ts index 4178537d..09b1cc5e 100644 --- a/packages/mcp/src/tools/remove-dead-code.test.ts +++ b/packages/mcp/src/tools/remove-dead-code.test.ts @@ -43,8 +43,8 @@ import { type RemoveDeadCodeContext, registerRemoveDeadCodeTool } from "./remove /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ function wrapAsStore(fake: unknown): import("@opencodehub/storage").Store { @@ -73,9 +73,10 @@ interface FakeNode { /** * In-memory fake of the typed-finder surface that `classifyDeadness` and - * `enrichWithEndLines` consume post AC-A-6c: `listNodes`, `listEdges`, - * `listEdgesByType`. Edges are absent from these tests (the dead-code path - * looks for inbound referrers but we only seed isolated dead candidates). + * `enrichWithEndLines` consume: `listNodes`, `listEdges`, + * `listEdgesByType`. Edges are absent from these tests (the dead-code + * path looks for inbound referrers but we only seed isolated dead + * candidates). */ function makeFakeStore(nodes: readonly FakeNode[]): DuckDbStore { const nodeAsGraphNode = (n: FakeNode): GraphNode => n as unknown as GraphNode; diff --git a/packages/mcp/src/tools/run-smoke.test.ts b/packages/mcp/src/tools/run-smoke.test.ts index c9a715d2..4174f891 100644 --- a/packages/mcp/src/tools/run-smoke.test.ts +++ b/packages/mcp/src/tools/run-smoke.test.ts @@ -63,8 +63,8 @@ import { runVerdict } from "./verdict.js"; /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` - * (`OpenStoreResult`) that the connection pool returns post AC-A-6c. - * The same instance backs both `graph` and `temporal` because DuckDbStore + * (`OpenStoreResult`) that the connection pool returns. The same + * instance backs both `graph` and `temporal` because DuckDbStore * implements both interfaces over a single connection in production. */ function wrapAsStore(fake: unknown): import("@opencodehub/storage").Store { diff --git a/packages/mcp/src/tools/shared.ts b/packages/mcp/src/tools/shared.ts index 7e78b85f..91846a7d 100644 --- a/packages/mcp/src/tools/shared.ts +++ b/packages/mcp/src/tools/shared.ts @@ -95,7 +95,7 @@ export function fromToolResult(r: ToolResult): CallToolResult { * Shared zod shape for `{ repo, repo_uri }` — every per-repo MCP tool * spreads this into its `inputSchema` so callers can pass either the * registry name (`repo`) or a Sourcegraph-style URI (`repo_uri`). When - * both are provided, `repo_uri` wins at the resolver. See AC-M6-2 §5. + * both are provided, `repo_uri` wins at the resolver. */ export const repoArgShape = { repo: z @@ -167,7 +167,7 @@ export async function withStore( // Enumerate every in-tree backend's artifact filename so the hint is // useful regardless of which backend produced the index. Pulling the // filenames from `describeArtifacts` keeps two-store deployments in - // sync with a single source of truth (AC-A-8). + // sync with a single source of truth. const candidates = (["duck", "lbug"] as const) .map((b) => `.codehub/${describeArtifacts(b).graphFile}`) .join(" or "); diff --git a/packages/mcp/src/tools/sql.test.ts b/packages/mcp/src/tools/sql.test.ts index a402b0a2..157b5e88 100644 --- a/packages/mcp/src/tools/sql.test.ts +++ b/packages/mcp/src/tools/sql.test.ts @@ -30,9 +30,8 @@ import { registerSqlTool } from "./sql.js"; /** * Captured call to `temporal.exec()` (SQL path) or `graph.execCypher()` - * (Cypher path). The original test recorded "store.query" — post AC-A-6c - * the SQL path routes through `temporal.exec()` and the Cypher path - * routes through `graph.execCypher()`. + * (Cypher path). The SQL path routes through `temporal.exec()` and the + * Cypher path routes through `graph.execCypher()`. */ interface ExecCall { readonly statement: string; @@ -405,12 +404,11 @@ test("sql: cypher read path tolerates an unknown keyword that is NOT a write ver }); test("sql: cypher timeout_ms is forwarded to store.query opts", async () => { - // The original test asserted the SQL `timeout_ms` was forwarded to a - // `query()` call's third arg. Post AC-A-6c the SQL path routes through - // `temporal.exec(sql, params, { timeoutMs })`. The tool currently does - // NOT forward `timeout_ms` to the cypher path — `execCypher` only - // accepts (statement, params). To preserve test intent we exercise the - // SQL path here and assert the `opts.timeoutMs` plumbing. + // The SQL path routes through `temporal.exec(sql, params, + // { timeoutMs })`. The tool currently does NOT forward `timeout_ms` + // to the cypher path — `execCypher` only accepts (statement, params). + // To preserve test intent we exercise the SQL path here and assert + // the `opts.timeoutMs` plumbing. await withHarness( { rows: [{ x: 1 }], diff --git a/packages/mcp/src/tools/sql.ts b/packages/mcp/src/tools/sql.ts index 5eddbd28..6c245e28 100644 --- a/packages/mcp/src/tools/sql.ts +++ b/packages/mcp/src/tools/sql.ts @@ -133,9 +133,9 @@ export async function runSql(ctx: ToolContext, args: SqlArgs): Promise[]; if (isCypher) { diff --git a/packages/pack/README.md b/packages/pack/README.md index 236c4c86..3e1f896b 100644 --- a/packages/pack/README.md +++ b/packages/pack/README.md @@ -1,3 +1,3 @@ # @opencodehub/pack -Deterministic code-pack generator producing the M5 9-item BOM (manifest, skeleton, file-tree, deps, ast-chunks, xrefs, embeddings-sidecar, findings, licenses). Scaffolded in AC-M5-1; BOM body implementations land in AC-M5-3..9. See `.erpaval/specs/005-m5-m6/spec.md` for the contract. +Deterministic code-pack generator producing the 9-item BOM (manifest, skeleton, file-tree, deps, ast-chunks, xrefs, embeddings-sidecar, findings, licenses). See `src/types.ts` for the contract types (`PackManifest`, `BomItem`, `PackPins`, `DeterminismClass`, `PackOpts`). diff --git a/packages/pack/src/ast-chunker.test.ts b/packages/pack/src/ast-chunker.test.ts index df06baf6..eba1359a 100644 --- a/packages/pack/src/ast-chunker.test.ts +++ b/packages/pack/src/ast-chunker.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the AST-chunker BOM body (AC-M5-5 — item 5/9). + * Tests for the AST-chunker BOM body (item 5/9). * * Covers: * - A. Determinism on the strict path (mock chonkie that returns fixed chunks). @@ -108,7 +108,7 @@ test("C. CRLF input yields offsets against the LF-normalized text", async () => const fromCrlf = await buildAstChunks(crlf, { _loadChonkie: makeFakeLoader() }); const fromLf = await buildAstChunks(lf, { _loadChonkie: makeFakeLoader() }); // After CRLF→LF the texts are byte-identical, so the chunks must match - // byte-for-byte regardless of input line-ending style (W-M5-4). + // byte-for-byte regardless of input line-ending style. assert.equal(canonicalJson(fromCrlf.chunks), canonicalJson(fromLf.chunks)); assert.equal(fromCrlf.chunks[0]?.startByte, 0); assert.equal(fromCrlf.chunks[0]?.endByte, 4); diff --git a/packages/pack/src/ast-chunker.ts b/packages/pack/src/ast-chunker.ts index 1d01c3ea..60aa37f1 100644 --- a/packages/pack/src/ast-chunker.ts +++ b/packages/pack/src/ast-chunker.ts @@ -1,10 +1,10 @@ /** - * BOM body item: AST-aware code chunks (AC-M5-5 — item 5/9). + * BOM body item: AST-aware code chunks (item 5/9). * * Wraps `@chonkiejs/core`'s `CodeChunker`, which builds chunks from a * tree-sitter AST (children grouped by token budget). Each input file is - * CRLF→LF normalized BEFORE chunking — W-M5-4 requires that two repos - * differing only by line-ending style produce the same `pack_hash`. + * CRLF→LF normalized BEFORE chunking — two repos differing only by + * line-ending style must produce the same `pack_hash`. * * Determinism: * - Strict path: `CodeChunker.create({language})` succeeds for every @@ -282,7 +282,7 @@ function pushLineSplitChunks( } } -/** Decode raw bytes as UTF-8 and CRLF→LF normalize for W-M5-4. */ +/** Decode raw bytes as UTF-8 and CRLF→LF normalize for line-ending byte-identity. */ function decodeAndNormalize(bytes: Uint8Array): string { // `fatal: false` so malformed sequences become U+FFFD instead of throwing — // the BOM is best-effort over arbitrary repo bytes; it does not validate diff --git a/packages/pack/src/deps.test.ts b/packages/pack/src/deps.test.ts index 180bb7a1..3a43b4f9 100644 --- a/packages/pack/src/deps.test.ts +++ b/packages/pack/src/deps.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the dependency BOM body (AC-M5-4 — item 4/9). + * Tests for the dependency BOM body (item 4/9). * * Covers: * - A. Determinism: two consecutive calls return deep-equal output. diff --git a/packages/pack/src/deps.ts b/packages/pack/src/deps.ts index bdddab95..fc86691d 100644 --- a/packages/pack/src/deps.ts +++ b/packages/pack/src/deps.ts @@ -1,5 +1,5 @@ /** - * BOM body item: dependency graph / lockfile slice (AC-M5-4 — item 4/9). + * BOM body item: dependency graph / lockfile slice (item 4/9). * * Reads `Dependency` nodes via `IGraphStore.listNodes()` and projects * each onto a flat `DepRow`. Mirrors the shape of the MCP `dependencies` @@ -32,7 +32,7 @@ export interface DepRow { * Resolved package version. The `DependencyNode` schema defines * `version: string` (non-optional), but we keep the row shape lenient * so future graphs that allow optional version (e.g. workspace `*` - * pins) round-trip without coercion. See AC-M5-4 anti-goals. + * pins) round-trip without coercion. */ readonly version: string; /** Ecosystem — `npm` / `pypi` / `go` / `cargo` / `maven` / `nuget`. */ diff --git a/packages/pack/src/embeddings-sidecar.test.ts b/packages/pack/src/embeddings-sidecar.test.ts index 88dc92c5..52b29c2b 100644 --- a/packages/pack/src/embeddings-sidecar.test.ts +++ b/packages/pack/src/embeddings-sidecar.test.ts @@ -1,8 +1,8 @@ /** - * Tests for the Parquet embeddings sidecar (AC-M5-6 + AC-A-4 relocation). + * Tests for the Parquet embeddings sidecar. * - * AC-A-4 moved sidecar emission OUT of `@opencodehub/storage` and INTO - * pack/. The sidecar now consumes embeddings via the portable + * Sidecar emission lives in pack/, not in `@opencodehub/storage`. The + * sidecar consumes embeddings via the portable * {@link IGraphStore.listEmbeddings} stream and writes Parquet via * DuckDB COPY. Tests cover three tiers: * @@ -10,14 +10,14 @@ * - Duck-path fake exposing the @internal `exportEmbeddingsParquet` * helper → `written: true`, `writerBackend: "duck-copy"`. * - Duck-path fake reporting `rowCount: 0` → `written: false`, - * `writerBackend: "absent"`, `determinismClass: "strict"` (S-M5-3). + * `writerBackend: "absent"`, `determinismClass: "strict"`. * - lbug-path fake → `written: false`, `writerBackend: "absent"`, - * `determinismClass: "degraded"` when embeddings exist (v1 - * deferred — AC-A-4 anti-goal §10). + * `determinismClass: "degraded"` when embeddings exist (v1 defers + * Parquet emission on lbug-only deployments). * * 2. Real-DuckDB byte-identity test (skipped when `@duckdb/node-api` - * native binding fails to load — the worktree native-binding - * lesson from `T-W3-1.md §11`). When it runs: + * native binding fails to load — worktree native bindings may not + * always rebuild cleanly). When it runs: * - 100 row × 384-dim Float32Array fixture. * - Two consecutive `writeEmbeddingsSidecar` runs against the same * store produce byte-identical Parquet files. @@ -51,7 +51,7 @@ function makeMockGraph(rows: readonly EmbeddingRow[] = []): IGraphStore { /** * Wrap a graph store + optional COPY helper into the {@link Store} shape - * the AC-A-4 sidecar consumes. `backend` is the dispatch axis the sidecar + * the sidecar consumes. `backend` is the dispatch axis the sidecar * narrows on; `temporal` is unused on the duck path so we cast the graph * stand-in into temporal-shape when the caller wants the duck-typed COPY * helper attached to the graph view. @@ -92,7 +92,7 @@ async function tempDir(): Promise { // --------------------------------------------------------------------------- describe("writeEmbeddingsSidecar — duck-path dispatch (mock)", () => { - it("returns written=false, writerBackend=absent when COPY reports rowCount=0 (S-M5-3)", async () => { + it("returns written=false, writerBackend=absent when COPY reports rowCount=0", async () => { const dir = await tempDir(); try { let calls = 0; @@ -177,7 +177,7 @@ describe("writeEmbeddingsSidecar — lbug-path degraded stamp (mock)", () => { assert.equal( result.determinismClass, "degraded", - "lbug + non-empty embeddings must stamp degraded (AC-A-4 §10 v1)", + "lbug + non-empty embeddings must stamp degraded for v1", ); assert.equal(result.rowCount, 2); assert.equal(result.bytesWritten, 0); @@ -206,9 +206,9 @@ describe("writeEmbeddingsSidecar — lbug-path degraded stamp (mock)", () => { // --------------------------------------------------------------------------- // Byte-identity test against a real DuckDbStore. The native binding may // fail to rebuild in worktrees — wrap the entire test in a try/catch and -// skip with a logged note when DuckDB cannot be loaded. This follows the -// worktree native-binding lesson in T-W3-1.md §11; the orchestrator's -// main checkout re-validates with bindings present. +// skip with a logged note when DuckDB cannot be loaded. The main +// checkout re-validates with bindings present so any divergence still +// gets caught upstream. // --------------------------------------------------------------------------- test("writeEmbeddingsSidecar — populated duck path is byte-identical across two runs", async () => { @@ -234,7 +234,8 @@ test("writeEmbeddingsSidecar — populated duck path is byte-identical across tw store = new DuckDbStore(dbPath, { embeddingDim: 384 }); await store.open(); } catch (err) { - // Native binding load failure — log and skip per worktree lesson. + // Native binding load failure — log and skip; worktree bindings + // may not always rebuild cleanly. await rm(dir, { recursive: true, force: true }); assert.ok( true, @@ -322,7 +323,7 @@ test("writeEmbeddingsSidecar — populated duck path is byte-identical across tw /** * Generate a deterministic Float32 vector. Uses a simple LCG seeded by * `(rowIndex, dimIndex)` so the same call returns the same vector across - * runs — matches the AC-M5-6 byte-identity contract without dragging in a + * runs — matches the byte-identity contract without dragging in a * crypto-grade RNG. */ function deterministicVector(rowIndex: number, dim: number): Float32Array { diff --git a/packages/pack/src/embeddings-sidecar.ts b/packages/pack/src/embeddings-sidecar.ts index 08f1a908..18bc173b 100644 --- a/packages/pack/src/embeddings-sidecar.ts +++ b/packages/pack/src/embeddings-sidecar.ts @@ -1,16 +1,16 @@ /** - * BOM body item #7: Parquet embeddings sidecar (AC-M5-6, AC-A-4 relocation). + * BOM body item #7: Parquet embeddings sidecar. * - * AC-A-4 moved sidecar emission OUT of `@opencodehub/storage` and into the - * pack layer. The sidecar is now a packaging concern: it consumes - * embeddings via {@link IGraphStore.listEmbeddings} (a portable graph-side - * method shipped by both adapters in AC-A-6a) and writes Parquet via the - * temporal store's DuckDB `COPY ... TO ... (FORMAT PARQUET, COMPRESSION - * ZSTD)`. Third-party graph adapters (AGE, Memgraph, Neo4j, Neptune) - * therefore do NOT implement Parquet emission themselves — pack handles - * it from the deterministic row stream. + * Sidecar emission lives in the pack layer, not in `@opencodehub/storage`. + * The sidecar is a packaging concern: it consumes embeddings via the + * portable {@link IGraphStore.listEmbeddings} method shipped by every + * adapter and writes Parquet via the temporal store's DuckDB + * `COPY ... TO ... (FORMAT PARQUET, COMPRESSION ZSTD)`. Third-party + * graph adapters (AGE, Memgraph, Neo4j, Neptune) therefore do NOT + * implement Parquet emission themselves — pack handles it from the + * deterministic row stream. * - * Backend dispatch (per architecture-revised.md §AC-A-4): + * Backend dispatch: * * - `backend === "duck"`: temporal IS the same DuckDB connection that * owns the `embeddings` table. We call the @internal helper @@ -21,11 +21,10 @@ * - `backend === "lbug"`: graph rows live in `@ladybugdb/core`; the paired * temporal DuckDB has no embeddings table. v1 stamps * `determinismClass: "degraded"`, `writerBackend: "absent"` and emits - * no file. AC-A-4 anti-goal §10 explicitly permits this: - * "accept `determinism_class: degraded` on lbug-only deployments for - * v1". A future iteration can stage rows into the temporal store - * before COPY (or fall back to `@dsnp/parquetjs`) once the dep - * footprint is acceptable. + * no file — lbug-only deployments accept `determinism_class: + * degraded` for v1. A future iteration can stage rows into the + * temporal store before COPY (or fall back to `@dsnp/parquetjs`) + * once the dep footprint is acceptable. * * Determinism contract — non-negotiable, mirrored by the byte-identity * test in `embeddings-sidecar.test.ts` for the duck path: @@ -48,7 +47,7 @@ import { readFile } from "node:fs/promises"; import { DuckDbStore, type IGraphStore, type Store } from "@opencodehub/storage"; /** - * Inputs to {@link writeEmbeddingsSidecar}. AC-A-4 takes a composed + * Inputs to {@link writeEmbeddingsSidecar}. Takes a composed * {@link Store} (= `OpenStoreResult`) so the sidecar can dispatch on * backend and route through whichever adapter owns the embeddings. */ @@ -123,7 +122,7 @@ interface ParquetCopyCapableStore { * * Returns `{ written: false, rowCount: 0, writerBackend: "absent", ... }` * when: - * - the `embeddings` table is empty (S-M5-3 — pack omits the BomItem); + * - the `embeddings` table is empty (pack omits the BomItem); * - the backend is `lbug` (v1 degraded path — no temporal embeddings * table to COPY from). * @@ -165,8 +164,8 @@ export async function writeEmbeddingsSidecar(opts: SidecarOptions): Promise { +describe("@opencodehub/pack public entry", () => { it("exports generatePack as a function", () => { assert.equal(typeof generatePack, "function"); }); @@ -154,8 +155,8 @@ const COMMON_INTERNAL = { duckdbVersion: "1.1.3", grammarCommits: { typescript: "b".repeat(40) }, // Provide a deterministic chonkie loader for the strict path so tests - // never depend on the real `@chonkiejs/core` install (the worktree - // native-binding lesson — onnxruntime-node may not rebuild cleanly). + // never depend on the real `@chonkiejs/core` install (worktree native + // bindings such as onnxruntime-node may not rebuild cleanly). chonkieLoader: async () => ({ version: "0.0.9", CodeChunker: { @@ -182,11 +183,11 @@ async function runFixture( }, { ...COMMON_INTERNAL, - // AC-A-4 widened the seam to `Store`, but tests that don't exercise - // the sidecar can still pass a graph-only store via `graphOnly`. - // generatePack auto-wraps it into a Store with backend: "duck" and - // a no-op temporal — the sidecar's COPY-helper probe finds nothing - // and resolves to absent (S-M5-3). + // The seam accepts a composed `Store`, but tests that don't + // exercise the sidecar can still pass a graph-only store via + // `graphOnly`. generatePack auto-wraps it into a Store with + // backend: "duck" and a no-op temporal — the sidecar's COPY-helper + // probe finds nothing and resolves to absent. graphOnly: makeFixtureStore(), chunkerFiles: FIXTURE_FILES, ...internalOverrides, @@ -283,7 +284,7 @@ test("E2E-D. expected 9 files appear on disk after a run; no Parquet sidecar", a ]) { assert.ok(names.has(n), `missing BOM file: ${n}`); } - // No Parquet sidecar — T-W3-1 owns it. + // No Parquet sidecar in this variant — covered by a dedicated test. for (const n of names) { assert.ok(!n.endsWith(".parquet"), `unexpected Parquet file: ${n}`); } @@ -330,7 +331,7 @@ test("E2E-F. production store path throws cleanly when no internal store provide budgetTokens: 64, tokenizerId: "openai:o200k_base@0.8.0", }), - /AC-M5-7/, + /production store lookup is wired by the CLI/, ); } finally { await rm(dir, { recursive: true, force: true }); @@ -338,7 +339,7 @@ test("E2E-F. production store path throws cleanly when no internal store provide }); // --------------------------------------------------------------------------- -// AC-M5-6 — sidecar wiring. The fixture store does not implement +// Sidecar wiring. The fixture store does not implement // `exportEmbeddingsParquet`, so the sidecar resolves to `absent: true`; the // manifest must therefore NOT list `embeddings.parquet` and the file must // NOT exist on disk. When the store DOES implement the export hook, the @@ -357,7 +358,7 @@ test("E2E-G. sidecar absent — manifest.files[] does not list embeddings.parque const entries = await readdir(dir); assert.ok( !entries.includes("embeddings.parquet"), - "absent sidecar must not produce a file on disk (S-M5-3)", + "absent sidecar must not produce a file on disk", ); } finally { await rm(dir, { recursive: true, force: true }); @@ -368,10 +369,10 @@ test("E2E-H. sidecar present — manifest lists it; pins.duckdbVersion overrides const dir = await tempDir(); try { // Inject a Store whose graph view duck-types the @internal COPY - // helper. AC-A-4's `writeEmbeddingsSidecar` narrows on - // `backend === "duck"` and finds the helper attached to the graph - // view. The fake writes 4 magic bytes ("PAR1") to the path so we - // can verify the hash round-trips into manifest.files[]. + // helper. `writeEmbeddingsSidecar` narrows on `backend === "duck"` + // and finds the helper attached to the graph view. The fake writes + // 4 magic bytes ("PAR1") to the path so we can verify the hash + // round-trips into manifest.files[]. const baseStore = makeFixtureStore() as unknown as Record; baseStore["exportEmbeddingsParquet"] = async (absPath: string) => { await (await import("node:fs/promises")).writeFile( diff --git a/packages/pack/src/index.ts b/packages/pack/src/index.ts index 2edf8cbc..733155db 100644 --- a/packages/pack/src/index.ts +++ b/packages/pack/src/index.ts @@ -1,12 +1,12 @@ /** - * @opencodehub/pack — deterministic M5 code-pack BOM. + * @opencodehub/pack — deterministic code-pack BOM. * * Public surface: * - generatePack(opts): assembles the 9-item BOM (skeleton, file-tree, * deps, ast-chunks, xrefs, findings, licenses.md, readme.md, optional * Parquet embeddings sidecar) plus the manifest. The Parquet sidecar - * (AC-M5-6) is absent when no embeddings exist (S-M5-3). - * - buildManifest / serializeManifest: BOM manifest + pack_hash (AC-M5-3). + * is absent when no embeddings exist. + * - buildManifest / serializeManifest: BOM manifest + pack_hash. * - Per-BOM-item builders re-exported for direct use (skeleton, file-tree, * deps, ast-chunker, xrefs, findings, licenses, readme, * embeddings-sidecar). @@ -68,12 +68,11 @@ export { buildXrefs } from "./xrefs.js"; * loader). Callers in production never set this; the public `PackOpts` * surface is unchanged. * - * `store` is the composed {@link Store} (= `OpenStoreResult`) — AC-A-4 - * widened the seam from `IGraphStore` so the embeddings sidecar can - * dispatch on `store.backend` and reach the temporal-tier DuckDB COPY - * helper. Tests that only need graph-side reads can pass an - * {@link IGraphStore} via the `graphOnly` field; the sidecar then takes - * the absent path automatically. + * `store` is the composed {@link Store} (= `OpenStoreResult`) — the + * embeddings sidecar dispatches on `store.backend` and reaches the + * temporal-tier DuckDB COPY helper through this seam. Tests that only + * need graph-side reads can pass an {@link IGraphStore} via the + * `graphOnly` field; the sidecar then takes the absent path automatically. */ export interface GeneratePackInternalOpts { readonly store?: Store; @@ -101,7 +100,7 @@ export interface GeneratePackInternalOpts { * * Writes the 8 always-present BOM files plus the manifest into * `opts.outDir`, plus an optional Parquet sidecar when the underlying - * embeddings table has rows (AC-M5-6): + * embeddings table has rows: * - skeleton.jsonl * - file-tree.jsonl * - deps.jsonl @@ -110,13 +109,14 @@ export interface GeneratePackInternalOpts { * - findings.jsonl * - licenses.md * - readme.md - * - embeddings.parquet (optional — absent when no embeddings, S-M5-3) + * - embeddings.parquet (optional — absent when no embeddings) * - manifest.json * * Determinism class: * - `"strict"` by default. - * - `"best_effort"` when `tokenizerId` starts with `"anthropic:"` (S-M5-2). - * - `"degraded"` when the AST chunker fell back to line-split (S-M5-1). + * - `"best_effort"` when `tokenizerId` starts with `"anthropic:"` (Claude + * tokenizers are not guaranteed stable across versions). + * - `"degraded"` when the AST chunker fell back to line-split. * * The function always writes the manifest LAST so a partial run never * leaves a manifest pointing at hashes that don't match the on-disk @@ -173,14 +173,13 @@ export async function generatePack( bomItem("licenses", "licenses.md", licensesBytes), ]; - // --- Optional Parquet embeddings sidecar (BOM item #7, AC-M5-6 + - // AC-A-4 relocation). The sidecar dispatches on `store.backend`: - // `duck` runs DuckDB COPY directly, `lbug` stamps a degraded - // determinism class for v1 (no temporal embeddings table to COPY - // from). When written, the sidecar's runtime `SELECT version()` - // overrides `pins.duckdbVersion` so the manifest binds determinism - // to the engine version that produced the file — the parquet - // `created_by` metadata embeds it. --- + // --- Optional Parquet embeddings sidecar (BOM item #7). The sidecar + // dispatches on `store.backend`: `duck` runs DuckDB COPY directly, + // `lbug` stamps a degraded determinism class for v1 (no temporal + // embeddings table to COPY from). When written, the sidecar's + // runtime `SELECT version()` overrides `pins.duckdbVersion` so the + // manifest binds determinism to the engine version that produced + // the file — the parquet `created_by` metadata embeds it. --- await mkdir(opts.outDir, { recursive: true }); const sidecarPath = path.join(opts.outDir, "embeddings.parquet"); const sidecar = await writeEmbeddingsSidecar({ store, outPath: sidecarPath }); @@ -292,7 +291,7 @@ async function writeBytes(p: string, bytes: Uint8Array): Promise { /** * Resolve the determinism class. `degraded` (from either the chunker - * fallback or the AC-A-4 sidecar lbug-path stamp) dominates everything; + * fallback or the sidecar's lbug-path stamp) dominates everything; * Anthropic tokenizers downgrade to `best_effort`; otherwise `strict`. */ function resolveDeterminism( @@ -306,11 +305,11 @@ function resolveDeterminism( } /** - * Resolve the composed store. AC-A-4 widened the seam from `IGraphStore` - * to `Store`; tests that don't exercise the sidecar can still pass an - * `IGraphStore` via `internal.graphOnly` and we wrap it into a minimal - * `Store` shape that funnels the sidecar to its absent path automatically - * (no `temporal` DuckDB → no COPY helper → `writerBackend: "absent"`). + * Resolve the composed store. The seam accepts a composed `Store`; tests + * that don't exercise the sidecar can still pass an `IGraphStore` via + * `internal.graphOnly` and we wrap it into a minimal `Store` shape that + * funnels the sidecar to its absent path automatically (no `temporal` + * DuckDB → no COPY helper → `writerBackend: "absent"`). */ async function resolveStore(internal: GeneratePackInternalOpts, repoPath: string): Promise { if (internal.store !== undefined) return internal.store; @@ -348,10 +347,11 @@ function wrapGraphOnly(graph: IGraphStore): Store { * `internal.graphOnly`) instead. */ async function openStoreFromRepoPath(_repoPath: string): Promise { - // M5 leaves the production lookup wiring to AC-M5-7 (CLI integration). - // Keep a clear failure mode here so the wiring AC catches it loudly. + // Production store lookup is wired by the CLI integration layer. + // Keep a clear failure mode here so callers that forget to inject a + // store in tests (or skip the CLI in production) fail loudly. throw new Error( - "generatePack: production store lookup is owned by AC-M5-7; pass internal.store in tests.", + "generatePack: production store lookup is wired by the CLI; pass internal.store in tests.", ); } diff --git a/packages/pack/src/licenses.test.ts b/packages/pack/src/licenses.test.ts index 2f5ec0cb..d2558697 100644 --- a/packages/pack/src/licenses.test.ts +++ b/packages/pack/src/licenses.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the licenses BOM body (AC-M5-5 — item 9 partial). + * Tests for the licenses BOM body (item 9 partial). * * Covers: * - A. Determinism across two consecutive calls. diff --git a/packages/pack/src/licenses.ts b/packages/pack/src/licenses.ts index 53242dd4..ae256c7d 100644 --- a/packages/pack/src/licenses.ts +++ b/packages/pack/src/licenses.ts @@ -1,9 +1,9 @@ /** - * BOM body item: aggregated LICENSES + NOTICES (AC-M5-5 — item 9 partial). + * BOM body item: aggregated LICENSES + NOTICES (item 9 partial). * * Reads `Dependency` nodes via `IGraphStore.listNodes()`, classifies them - * via `classifyDependencies` from `@opencodehub/analysis` (lifted in - * AC-M5-3), and renders both: + * via `classifyDependencies` from `@opencodehub/analysis`, and renders + * both: * * - `licensesMd` — Markdown body listing every dependency by tier * (BLOCK / WARN / OK) and a per-package section in @@ -16,7 +16,7 @@ * before rendering — same key as `deps.ts` so the two BOM items agree * on order. * - The markdown body is reconstructed from the sorted rows; LF-only - * line endings (W-M5-4). + * line endings. * - NOTICE file lookup probes a fixed list in lex order; the first * match wins, but the function still concatenates every match found * so two repos with the same NOTICES content produce byte-identical @@ -154,7 +154,7 @@ async function readNotices(opts: LicensesOpts): Promise { if (content === undefined || content.length === 0) continue; chunks.push(`# ${filename}`); chunks.push(""); - // CRLF→LF normalize for byte-identity (W-M5-4). + // CRLF→LF normalize for byte-identity. chunks.push(content.replace(/\r\n/g, "\n").trimEnd()); chunks.push(""); } diff --git a/packages/pack/src/manifest.test.ts b/packages/pack/src/manifest.test.ts index be68d91a..9d362da1 100644 --- a/packages/pack/src/manifest.test.ts +++ b/packages/pack/src/manifest.test.ts @@ -1,7 +1,7 @@ /** - * Tests for the BOM manifest builder (AC-M5-3). + * Tests for the BOM manifest builder. * - * Covers the four success criteria from the packet: + * Covers four core invariants: * A. Byte-identity: two runs on the same opts produce === manifest JSON. * B. Hash sensitivity: each input field propagates to packHash. * C. packHash is not part of its own preimage. diff --git a/packages/pack/src/pack-determinism.test.ts b/packages/pack/src/pack-determinism.test.ts index f9d3a0d9..30e189ef 100644 --- a/packages/pack/src/pack-determinism.test.ts +++ b/packages/pack/src/pack-determinism.test.ts @@ -1,8 +1,8 @@ /** - * End-to-end byte-identity determinism suite (AC-M5-8 / U2 / E-M5-3). + * End-to-end byte-identity determinism suite. * - * The per-module tests in this package each pin one slice of the U2 - * invariant ("same inputs → same bytes"). This suite exercises the + * The per-module tests in this package each pin one slice of the + * "same inputs → same bytes" invariant. This suite exercises the * composition: it runs `generatePack` twice over a richer fixture and * asserts every file under `outDir` is byte-identical across runs. * @@ -13,12 +13,12 @@ * 3. For every file `f` in the directory: * `Buffer.compare(readFile(outA/f), readFile(outB/f)) === 0` * - * Variant matrix (≥ 4 per the AC-M5-8 packet): + * Variant matrix: * V1. Empty embeddings — store has no `exportEmbeddingsParquet` hook; * sidecar is absent; manifest.files[] lists 7 BOM bodies (excluding * manifest+readme). 9 files on disk: 7 bodies + readme.md + manifest.json. * V2. Populated embeddings — fake @internal `exportEmbeddingsParquet` - * (duck-typed onto the graph view, AC-A-4) writes a deterministic + * (duck-typed onto the graph view) writes a deterministic * parquet body; sidecar is present; embeddings.parquet bytes are * identical across runs. * V3. Mixed framework labels — ProjectProfile.frameworks is a duplicated, @@ -28,7 +28,8 @@ * must group stably; findings.jsonl bytes match across runs. * * The chonkie loader is a deterministic stub so the test never depends on - * the real `@chonkiejs/core` install (worktree native-binding lesson). + * the real `@chonkiejs/core` install (worktree native bindings may not + * always rebuild cleanly). */ import { strict as assert } from "node:assert"; @@ -47,9 +48,9 @@ import { type GeneratePackInternalOpts, generatePack } from "./index.js"; interface FixtureKnobs { /** * Attach a duck-typed @internal `exportEmbeddingsParquet` helper to the - * graph fake so AC-A-4's sidecar emits 4 deterministic bytes. The - * helper lives on the graph view because `runVariant` wraps the fake - * with `backend: "duck"`, where the sidecar narrows on `store.graph`. + * graph fake so the sidecar emits 4 deterministic bytes. The helper + * lives on the graph view because `runVariant` wraps the fake with + * `backend: "duck"`, where the sidecar narrows on `store.graph`. */ readonly withEmbeddings: boolean; /** Use a duplicated, reverse-sorted ProjectProfile.frameworks list. */ @@ -270,8 +271,8 @@ function makeRichFixtureStore(knobs: FixtureKnobs): IGraphStore { if (knobs.withEmbeddings) { // Deterministic 4-byte parquet stand-in. Real DuckDB Parquet output is - // also byte-stable for the same input set on the same engine version - // (S-M5-3 / AC-M5-6); the test exercises the wiring path only. + // also byte-stable for the same input set on the same engine version; + // the test exercises the wiring path only. store["exportEmbeddingsParquet"] = async (absPath: string): Promise => { const fs = await import("node:fs/promises"); await fs.writeFile(absPath, new Uint8Array([0x50, 0x41, 0x52, 0x31])); @@ -314,8 +315,7 @@ const COMMON_INTERNAL: GeneratePackInternalOpts = { duckdbVersion: "1.1.3", grammarCommits: { typescript: "b".repeat(40) }, // Deterministic chonkie stub — emits one chunk per file. Avoids the real - // import path so the test runs even when native bindings are unavailable - // (worktree lesson). + // import path so the test runs even when native bindings are unavailable. chonkieLoader: async () => ({ version: "0.0.9", CodeChunker: { @@ -335,7 +335,7 @@ async function tempDir(prefix: string): Promise { async function runVariant(outDir: string, knobs: FixtureKnobs): Promise<{ packHash: string }> { const fakeGraph = makeRichFixtureStore(knobs); // V2 attaches a duck-typed COPY helper to the graph — wrap into a - // backend:"duck" Store so the AC-A-4 sidecar narrows correctly. V1/V3/V4 + // backend:"duck" Store so the sidecar narrows correctly. V1/V3/V4 // never invoke the helper; the wrapper just exposes the graph view. const composedStore: Store = { backend: "duck", @@ -398,7 +398,7 @@ async function assertByteIdentical(label: string, knobs: FixtureKnobs): Promise< } // --------------------------------------------------------------------------- -// Variant tests — 4 distinct shapes per the AC-M5-8 matrix. +// Variant tests — 4 distinct shapes covering the determinism matrix. // --------------------------------------------------------------------------- test("V1. empty embeddings — sidecar absent, 9 files on disk, byte-identical", async () => { diff --git a/packages/pack/src/readme.test.ts b/packages/pack/src/readme.test.ts index f45deefd..40f3ea51 100644 --- a/packages/pack/src/readme.test.ts +++ b/packages/pack/src/readme.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the BOM README renderer (AC-M5-5 — item 9 partial). + * Tests for the BOM README renderer (item 9 partial). * * Covers: * - A. Pure-function determinism: same inputs → same bytes. diff --git a/packages/pack/src/readme.ts b/packages/pack/src/readme.ts index 10996c8e..621a0613 100644 --- a/packages/pack/src/readme.ts +++ b/packages/pack/src/readme.ts @@ -1,15 +1,15 @@ /** - * BOM body item: README.md with the determinism contract (AC-M5-5 — item 9 partial). + * BOM body item: README.md with the determinism contract (item 9 partial). * * Pure-string renderer; deterministic by construction. The README pastes - * the M5 determinism contract verbatim and interpolates the manifest's + * the determinism contract verbatim and interpolates the manifest's * commit / tokenizer / class / pack hash so consumers can verify byte * identity without parsing `manifest.json`. * * Determinism contract: * - Pure function of `manifest` + `bomItemPaths`. No clocks, no random * ids, no environment lookups. - * - LF-only line endings (W-M5-4). + * - LF-only line endings. * - `bomItemPaths` is rendered alpha-sorted; the function does NOT * mutate the caller's array. */ diff --git a/packages/pack/src/skeleton.test.ts b/packages/pack/src/skeleton.test.ts index 7ffffa02..db823931 100644 --- a/packages/pack/src/skeleton.test.ts +++ b/packages/pack/src/skeleton.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the PageRank-ranked symbol skeleton (AC-M5-4 — item 2/9). + * Tests for the PageRank-ranked symbol skeleton (item 2/9). * * Covers: * - A. Determinism: two consecutive calls return deep-equal output. diff --git a/packages/pack/src/skeleton.ts b/packages/pack/src/skeleton.ts index c9cc4461..e5173c26 100644 --- a/packages/pack/src/skeleton.ts +++ b/packages/pack/src/skeleton.ts @@ -1,11 +1,11 @@ /** - * BOM body item: PageRank-ranked symbol skeleton (AC-M5-4 — item 2/9). + * BOM body item: PageRank-ranked symbol skeleton (item 2/9). * * The skeleton is the deterministic "what matters here?" view of a repo, * built from `Function`/`Class`/`Method` nodes ranked by call-graph * PageRank. The output is a flat row stream that downstream tooling - * (the pack writer in T-W2-5; the future `code_skeleton` MCP surface) - * consumes as a strictly-ordered table. + * (the pack writer; the future `code_skeleton` MCP surface) consumes as + * a strictly-ordered table. * * Algorithm: * 1. `store.listNodes({ kinds: ["Function","Class","Method"] })` @@ -13,18 +13,16 @@ * 2. Pull every `CALLS` edge via `IGraphStore.listEdgesByType('CALLS')` * (typed `CodeRelation`) and feed `EdgeLike[]` into * `buildAdjacency` from `@opencodehub/analysis`. - * 3. Run `pageRank(adj, 0.85, 50)` — fixed iterations + damping per - * W-M5-3 (no tolerance-based convergence; numerical drift would - * break the byte-identity guarantee that `pack_hash` and the - * future `graphHash` both depend on). + * 3. Run `pageRank(adj, 0.85, 50)` — fixed iterations + damping (no + * tolerance-based convergence; numerical drift would break the + * byte-identity guarantee that `pack_hash` and the future + * `graphHash` both depend on). * 4. Sort rows by `score DESC` with `id ASC` as the lex-stable - * tiebreak. Per the BM25-over-node-id stub-pollution lesson - * (`.erpaval/solutions/conventions/bm25-over-node-id-favors-stubs.md`) - * the packet flags this as a known consideration: stub - * re-export nodes can outrank real call-targets when the call - * graph is sparse. For now we surface every callable kind and - * let downstream consumers filter; refining the kind set is a - * future-work item, not an AC-M5-4 deliverable. + * tiebreak. Stub re-export nodes can outrank real call-targets + * when the call graph is sparse (a known BM25-over-node-id + * stub-pollution caveat); for now we surface every callable kind + * and let downstream consumers filter — refining the kind set is + * future work. * * Determinism contract — non-negotiable: * - Output ordering is the result of `Array.prototype.sort` over a diff --git a/packages/pack/src/types.ts b/packages/pack/src/types.ts index 4818e086..6c38bd4e 100644 --- a/packages/pack/src/types.ts +++ b/packages/pack/src/types.ts @@ -1,9 +1,10 @@ /** - * @opencodehub/pack — public type surface for the M5 9-item BOM. + * @opencodehub/pack — public type surface for the 9-item BOM. * - * These interfaces are the contract consumed by AC-M5-3..9. Fields are - * `readonly` by convention (see sibling packages in this workspace for - * precedent) so downstream code cannot mutate a manifest in-place. + * These interfaces are the contract every BOM body builder consumes. + * Fields are `readonly` by convention (see sibling packages in this + * workspace for precedent) so downstream code cannot mutate a manifest + * in-place. */ /** A single item in the 9-item BOM. */ diff --git a/packages/pack/src/xrefs.test.ts b/packages/pack/src/xrefs.test.ts index 4e51a0d7..e42df53f 100644 --- a/packages/pack/src/xrefs.test.ts +++ b/packages/pack/src/xrefs.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the xrefs BOM body (AC-M5-5 — item 6/9). + * Tests for the xrefs BOM body (item 6/9). * * Covers: * - A. Determinism across two consecutive calls. diff --git a/packages/pack/src/xrefs.ts b/packages/pack/src/xrefs.ts index c63c12ef..99ef4ec9 100644 --- a/packages/pack/src/xrefs.ts +++ b/packages/pack/src/xrefs.ts @@ -1,5 +1,5 @@ /** - * BOM body item: SCIP-grounded cross-references (AC-M5-5 — item 6/9). + * BOM body item: SCIP-grounded cross-references (item 6/9). * * Two-shape union row stream: * - `community` rows expose architectural clusters (`Community` nodes). @@ -10,15 +10,14 @@ * - Call rows follow, sorted `(from ASC, to ASC, id ASC)` — the id is * the deterministic last-resort tiebreak when the same callsite has * two relation rows (e.g. duplicate CALLS edges across SCIP indexes). - * - The CALLS edge stream comes from `IGraphStore.listEdgesByType('CALLS')` - * (AC-A-6a). Result rows are typed `CodeRelation` and ordered - * `(from_id, to_id, type)` by the storage layer; this module re-sorts to - * the BOM contract `(from, to, id)` so the wire form stays byte-stable + * - The CALLS edge stream comes from `IGraphStore.listEdgesByType('CALLS')`. + * Result rows are typed `CodeRelation` and ordered `(from_id, to_id, + * type)` by the storage layer; this module re-sorts to the BOM + * contract `(from, to, id)` so the wire form stays byte-stable * regardless of which finder ordering the adapter chose. * - PageRank is NOT used here; this is a pure relations-table slice - * plus a Community-node enumeration. W-M5-3 (no tolerance-based - * convergence) is therefore not in scope but worth flagging for the - * reader. + * plus a Community-node enumeration (so the no-tolerance-based- + * convergence rule that governs the skeleton is not in scope). * * Confidence column: chonkie / SCIP indexes typically emit `1.0` for * resolved CALLS edges. We surface it raw so downstream tools can filter diff --git a/packages/policy/src/load.ts b/packages/policy/src/load.ts index a1b9405b..56a82014 100644 --- a/packages/policy/src/load.ts +++ b/packages/policy/src/load.ts @@ -1,7 +1,7 @@ /** * loadPolicy — read opencodehub.policy.yaml, parse, Zod-validate. * - * Behavior (hard-pinned by T-M2-4's EARS requirements): + * Behavior: * * - File missing on disk → resolve to `undefined`. `codehub verdict` must * skip the policy step entirely in this state. diff --git a/packages/scanners/src/converters/detect-secrets-to-sarif.test.ts b/packages/scanners/src/converters/detect-secrets-to-sarif.test.ts index 4ab07295..129ae873 100644 --- a/packages/scanners/src/converters/detect-secrets-to-sarif.test.ts +++ b/packages/scanners/src/converters/detect-secrets-to-sarif.test.ts @@ -109,7 +109,7 @@ test("detectSecretsJsonToSarif stamps hashed_secret on partialFingerprints (not const r = log.runs[0]?.results?.[0]; // SARIF §3.27.18: partialFingerprints are plugin-defined identifiers, // NOT a security claim. The slot is named `detect_secrets_sha1` to - // make the (non-cryptographic) algorithm explicit (W-B-1). + // make the (non-cryptographic) algorithm explicit. assert.equal(r?.partialFingerprints?.["detect_secrets_sha1"], "deadbeef"); }); @@ -124,7 +124,7 @@ test("detectSecretsJsonToSarif uses 1-indexed startLine matching SARIF", () => { assert.equal(region?.startLine, 42); }); -test("detectSecretsJsonToSarif passes overlapping findings through (W-B-2)", () => { +test("detectSecretsJsonToSarif passes overlapping findings through", () => { // Two detectors fire on the same line — both must pass through and let // OCH's downstream SARIF dedupe handle merging. const json = { @@ -183,7 +183,7 @@ test("detectSecretsJsonToSarif emits empty (but valid) SARIF for garbage input", assert.equal( detectSecretsJsonToSarif(null).runs[0]?.tool.driver.name, "detect-secrets", - "tool.driver.name must be preserved on empty SARIF (E-B-2)", + "tool.driver.name must be preserved on empty SARIF", ); }); diff --git a/packages/scanners/src/converters/detect-secrets-to-sarif.ts b/packages/scanners/src/converters/detect-secrets-to-sarif.ts index 432d367a..c9208c03 100644 --- a/packages/scanners/src/converters/detect-secrets-to-sarif.ts +++ b/packages/scanners/src/converters/detect-secrets-to-sarif.ts @@ -31,14 +31,14 @@ * - properties.opencodehub.is_verified = boolean * - partialFingerprints.detect_secrets_sha1 = hashed_secret * - * We do NOT advertise hashed_secret as a cryptographic fingerprint - * (W-B-1) — SHA-1 is not collision-resistant. The + * We do NOT advertise hashed_secret as a cryptographic fingerprint — + * SHA-1 is not collision-resistant. The * `partialFingerprints.detect_secrets_sha1` slot is documented as a * plugin-defined identifier per SARIF §3.27.18, not a security claim. * * Overlapping findings (KeywordDetector + AWSKeyDetector on the same - * line) are NOT deduplicated here (W-B-2) — both pass through and rely - * on OCH's downstream SARIF dedupe at merge time. + * line) are NOT deduplicated here — both pass through and rely on + * OCH's downstream SARIF dedupe at merge time. * * The output is validated against `SarifLogSchema` from @opencodehub/sarif * before being returned, so malformed emissions never leak downstream. diff --git a/packages/scanners/src/wrappers/wrappers.test.ts b/packages/scanners/src/wrappers/wrappers.test.ts index 468d4ea7..1335311f 100644 --- a/packages/scanners/src/wrappers/wrappers.test.ts +++ b/packages/scanners/src/wrappers/wrappers.test.ts @@ -199,7 +199,7 @@ test("detect-secrets wrapper returns empty SARIF + skipped when binary missing", const { deps } = makeFakeDeps(() => ({ stdout: "" }), { missing: ["detect-secrets"] }); const wrapper = createDetectSecretsWrapper(deps); const out = await wrapper.run(ctx); - // E-B-2: tool.driver.name must be preserved even when skipped. + // tool.driver.name must be preserved even when skipped. assert.equal(out.sarif.runs[0]?.tool.driver.name, "detect-secrets"); assert.equal(out.sarif.runs[0]?.results?.length, 0); assert.ok(out.skipped?.includes("not found on PATH")); @@ -213,7 +213,7 @@ test("detect-secrets wrapper emits empty SARIF when stdout is malformed", async assert.equal(out.sarif.runs[0]?.results?.length, 0); }); -test("detect-secrets wrapper passes overlapping findings through (W-B-2)", async () => { +test("detect-secrets wrapper passes overlapping findings through", async () => { // KeywordDetector + AWSKeyDetector firing on the same line: both must // appear in the SARIF output; OCH's downstream merge handles dedupe. const json = { diff --git a/packages/scip-ingest/src/materialize.test.ts b/packages/scip-ingest/src/materialize.test.ts index a95272ed..31f63e85 100644 --- a/packages/scip-ingest/src/materialize.test.ts +++ b/packages/scip-ingest/src/materialize.test.ts @@ -15,9 +15,9 @@ function loadFixture(): Uint8Array { } test("materialize: blast ranking surfaces a connected leader with backward reach", () => { - // Before AC-M5-2 this test asserted `add()` as the POC leader when - // the blast formula included a `gamma * pagerank * n` term. - // PageRank was lifted to @opencodehub/analysis and is now a + // The previous version of this test asserted `add()` as the POC + // leader when the blast formula included a `gamma * pagerank * n` + // term. PageRank was lifted to @opencodehub/analysis and is now a // request-time kernel; the ingest-time blast formula leans on // reach + SCC only, which shifts the top-ranked symbol on this // fixture. The invariant we still care about at this layer is diff --git a/packages/scip-ingest/src/materialize.ts b/packages/scip-ingest/src/materialize.ts index d3691c14..0565a63f 100644 --- a/packages/scip-ingest/src/materialize.ts +++ b/packages/scip-ingest/src/materialize.ts @@ -7,9 +7,9 @@ * the BFS closures run on the same scale as the Python+NetworkX * implementation for ~10k-node repos (OCH's analyze target). * - * PageRank was lifted to `@opencodehub/analysis/page-rank.ts` - * (AC-M5-2). It's now a request-time kernel; this file no longer - * computes per-symbol PageRank during ingest. + * PageRank was lifted to `@opencodehub/analysis/page-rank.ts`. It's now + * a request-time kernel; this file no longer computes per-symbol + * PageRank during ingest. */ import { type Adjacency, buildAdjacency } from "@opencodehub/analysis"; @@ -198,9 +198,9 @@ export function materialize( const sccEntry = scc[u] ?? { sccId: -1, size: 0 }; const sccContribution = sccEntry.size > 1 ? sccEntry.size : 0; // PageRank term (`gamma * pr * n`) was removed with the lift to - // @opencodehub/analysis (AC-M5-2). The field was never consumed - // outside this file; ranking now leans on reach closures + SCC - // membership until AC-M5-4 reintroduces PageRank at request time. + // @opencodehub/analysis. The field was never consumed outside this + // file; ranking now leans on reach closures + SCC membership until + // PageRank is reintroduced at request time. const raw = alpha * (fwdReach[u] ?? 0) + beta * (bwdReach[u] ?? 0) + delta * sccContribution; const blast = Math.log1p(raw); metrics.set(sym, { diff --git a/packages/scip-ingest/src/runners/clang.test.ts b/packages/scip-ingest/src/runners/clang.test.ts index 0e582176..df3e8fee 100644 --- a/packages/scip-ingest/src/runners/clang.test.ts +++ b/packages/scip-ingest/src/runners/clang.test.ts @@ -1,5 +1,5 @@ /** - * Tests for the scip-clang adapter (AC-M4-1). + * Tests for the scip-clang adapter. * * Coverage mirrors the other adapter contracts: * 1. `buildCommand("clang", ...)` shell shape matches scip-clang v0.4.0: diff --git a/packages/scip-ingest/src/runners/index.test.ts b/packages/scip-ingest/src/runners/index.test.ts index 0ab6e66d..db4ccbfe 100644 --- a/packages/scip-ingest/src/runners/index.test.ts +++ b/packages/scip-ingest/src/runners/index.test.ts @@ -1,11 +1,11 @@ /** - * Tests for the cobol-proleap gating logic added in T-M4-6. + * Tests for the cobol-proleap gating logic. * * We cannot spawn a JVM in CI, so these tests exercise the gating surface: * - Without `--allow-build-scripts=proleap` the runner skips with a - * clear "falling back to regex" reason (spec W-M4-1). + * clear "falling back to regex" reason. * - With the flag but no JAR installed, the runner skips with the - * missing-jar hint (spec S-M4-3). + * missing-jar hint pointing at `codehub setup --cobol-proleap`. * - With flag + JAR present, the runner activates (skipped=false). * * The scip-java / rust / python / go branches are already covered by the diff --git a/packages/scip-ingest/src/runners/index.ts b/packages/scip-ingest/src/runners/index.ts index c0f63eba..f718d50c 100644 --- a/packages/scip-ingest/src/runners/index.ts +++ b/packages/scip-ingest/src/runners/index.ts @@ -5,9 +5,6 @@ * writes `.codehub/scip/.scip`. The factory `runIndexer` is * fan-out friendly — callers invoke it once per detected language in * parallel via `Promise.all`. - * - * See `.erpaval/sessions/session-f8a300bc/research-scip-indexers.yaml` - * for indexer versions + known issues as of 2026-04-26. */ import { spawn } from "node:child_process"; @@ -100,11 +97,11 @@ export interface IndexerResult { * Note on `cobol-proleap`: the detector never infers the proleap kind * from disk alone — it is strictly gated behind * `allowedBuildScripts.includes("proleap")`, which the CLI surface only - * sets in response to an explicit user opt-in (spec W-M4-1). Callers - * that opted in append `"cobol-proleap"` to the detected set themselves. + * sets in response to an explicit user opt-in. Callers that opted in + * append `"cobol-proleap"` to the detected set themselves. * - * Kotlin note (AC-M4-4): before scip-kotlin existed as a standalone SCIP - * adapter, Kotlin projects rode on the `java` adapter + the tree-sitter-kotlin + * Kotlin note: before scip-kotlin existed as a standalone SCIP adapter, + * Kotlin projects rode on the `java` adapter + the tree-sitter-kotlin * grammar. With scip-kotlin v0.6.0 promoted in, we detect `.kt`/`.kts` source * files directly and emit `"kotlin"` as its own candidate. Pure-Kotlin * projects (Kotlin sources, no Java sources, no `pom.xml` / `build.sbt` / diff --git a/packages/scip-ingest/src/runners/kotlin.test.ts b/packages/scip-ingest/src/runners/kotlin.test.ts index c4732091..5c5d1b51 100644 --- a/packages/scip-ingest/src/runners/kotlin.test.ts +++ b/packages/scip-ingest/src/runners/kotlin.test.ts @@ -1,5 +1,5 @@ /** - * Unit tests for the scip-kotlin v0.6.0 adapter (AC-M4-4). + * Unit tests for the scip-kotlin v0.6.0 adapter. * * Covered paths: * - `detectLanguages`: pure-Kotlin projects drop the legacy `"java"` candidate; diff --git a/packages/scip-ingest/src/runners/ruby.test.ts b/packages/scip-ingest/src/runners/ruby.test.ts index fbd1fcf6..4390d2cb 100644 --- a/packages/scip-ingest/src/runners/ruby.test.ts +++ b/packages/scip-ingest/src/runners/ruby.test.ts @@ -3,9 +3,8 @@ * * These tests assert on the shell plan + skip semantics without spawning the * real `scip-ruby` binary. A missing-binary skip test exercises `runIndexer` - * with a bogus `$PATH` so `spawn` returns ENOENT, validating the S-M4-1 - * state requirement: when the indexer binary is absent, analyze must skip - * cleanly with a setup hint. + * with a bogus `$PATH` so `spawn` returns ENOENT, validating that when the + * indexer binary is absent, analyze skips cleanly with a setup hint. */ import { strict as assert } from "node:assert"; diff --git a/packages/search/src/hybrid.test.ts b/packages/search/src/hybrid.test.ts index 63ec4ab6..b2818cd2 100644 --- a/packages/search/src/hybrid.test.ts +++ b/packages/search/src/hybrid.test.ts @@ -45,8 +45,8 @@ class StubStore implements IGraphStore { vectorQueries: VectorQuery[] = []; /** * Fixture File-node rows the zoom path resolves through `listNodesByKind('File')`. - * The pre-AC-A-6d shape captured raw `{id, file_path}` query rows; the - * post-migration shape is the typed FileNode contract — `id` + `filePath`. + * The pre-typed-finder shape captured raw `{id, file_path}` query + * rows; the typed shape is the FileNode contract — `id` + `filePath`. */ fileNodes: FileNode[] = []; /** Captured `listNodesByKind` calls so tests can assert tier + filter shape. */ diff --git a/packages/storage/src/column-encode.test.ts b/packages/storage/src/column-encode.test.ts index 46a8c231..3f8027d6 100644 --- a/packages/storage/src/column-encode.test.ts +++ b/packages/storage/src/column-encode.test.ts @@ -1,11 +1,10 @@ /** * Unit tests for `./column-encode.ts` — every encoder and every sentinel. * - * The hoist is a pure refactor (AC-A-2); these tests pin the helper-level - * contracts so a future edit to `column-encode.ts` cannot silently change - * behaviour without tripping a focused failure here. The cross-adapter - * round-trip is covered by `graph-hash-parity.test.ts`; this file owns the - * unit-level shape. + * These tests pin the helper-level contracts so a future edit to + * `column-encode.ts` cannot silently change behaviour without tripping + * a focused failure here. The cross-adapter round-trip is covered by + * `graph-hash-parity.test.ts`; this file owns the unit-level shape. */ import assert from "node:assert/strict"; @@ -187,8 +186,8 @@ test("frameworksJsonOrNull: legacy flat shape when frameworksDetected is absent/ assert.equal(frameworksJsonOrNull([], undefined), "[]"); }); -test("frameworksJsonOrNull: returns null when both flat and detected are absent (AC-A-7)", () => { - // AC-A-7 fix: nodes that never declared `frameworks` (every kind except +test("frameworksJsonOrNull: returns null when both flat and detected are absent", () => { + // Nodes that never declared `frameworks` (every kind except // ProjectProfile in practice) must store SQL NULL — otherwise the // public-interface parity rebuilder re-attaches a spurious // `frameworks: []` field and graphHash byte-identity breaks across the diff --git a/packages/storage/src/column-encode.ts b/packages/storage/src/column-encode.ts index fd0c4f9c..52109738 100644 --- a/packages/storage/src/column-encode.ts +++ b/packages/storage/src/column-encode.ts @@ -5,7 +5,7 @@ * (`./graphdb-adapter.ts`) write a 73-column row per node where every column * matches the canonical {@link NODE_COLUMNS} order. The two adapters used to * carry duplicate `nodeToRow` / `nodeToParams` / `*OrNull` / `dedupeLastById` - * helpers; per AC-A-2 they now consume one canonical implementation here. + * helpers; both now consume one canonical implementation here. * * The module is `internal-only` — it is NOT re-exported from * `packages/storage/src/index.ts`. Adapters import directly from @@ -41,8 +41,8 @@ * `{keywords: []}` round-trips byte-identically to itself instead of * collapsing to `{}` (canonical-JSON / graphHash distinction preserved). * - * **`frameworks_json` unification note (AC-A-2)** — before the hoist, the - * DuckDB adapter wrote the v2.0 polymorphic shape via `frameworksJsonOrNull` + * **`frameworks_json` unification** — before the hoist, the DuckDB + * adapter wrote the v2.0 polymorphic shape via `frameworksJsonOrNull` * while the graph-db adapter wrote the legacy flat shape via * `jsonArrayOrNull`. Both adapters' readers already support both shapes * (`applyFrameworksJsonReadback`, `applyFrameworksJsonReadbackGd`). The @@ -145,7 +145,7 @@ export const NODE_COLUMNS: readonly string[] = [ "partial_fingerprint", "baseline_state", "suppressed_json", - // Repo (AC-M6-1). + // Repo. "origin_url", "repo_uri", "default_branch", @@ -251,8 +251,8 @@ export function nodeToColumns(node: GraphNode): Record { partial_fingerprint: stringOrNull(n["partialFingerprint"]), baseline_state: stringOrNull(n["baselineState"]), suppressed_json: stringOrNull(n["suppressedJson"]), - // Repo (AC-M6-1). Each column is populated only when - // `node.kind === "Repo"` and stays NULL for every other kind. + // Repo. Each column is populated only when `node.kind === "Repo"` + // and stays NULL for every other kind. // `originUrl` / `defaultBranch` / `group` are nullable on the interface // — `repoStringOrNull` collapses null and missing alike to SQL NULL. origin_url: repoStringOrNull(n, "originUrl"), @@ -435,16 +435,16 @@ export function normalizeDeadness(v: unknown): unknown { * version bump. The read side in `packages/mcp/src/tools/project-profile.ts` * sniffs the shape. * - * Both adapters now call this function (AC-A-2). The graph-db writer - * previously emitted only the legacy flat shape; with the unification it - * gains the v2.0 envelope when callers populate `frameworksDetected`. The - * legacy path is byte-identical to the old graph-db output, so existing - * graphs keep round-tripping unchanged. + * Both adapters call this function. The graph-db writer previously + * emitted only the legacy flat shape; with the unification it gains the + * v2.0 envelope when callers populate `frameworksDetected`. The legacy + * path is byte-identical to the old graph-db output, so existing graphs + * keep round-tripping unchanged. * - * **AC-A-7 fix:** when both `flat` is absent / non-array AND `detected` is - * empty, return `null` so the column stays NULL for nodes that never - * declared a `frameworks` field (every node kind except ProjectProfile, - * in practice). Previously this branch returned `"[]"` for every node, + * When both `flat` is absent / non-array AND `detected` is empty, + * return `null` so the column stays NULL for nodes that never declared + * a `frameworks` field (every node kind except ProjectProfile, in + * practice). Previously this branch returned `"[]"` for every node, * which polluted the polymorphic column and — once the public-interface * parity harness landed — broke graphHash byte-identity (the rebuilder * would re-attach `frameworks: []` on every rebuilt node). Callers that diff --git a/packages/storage/src/cypher-guard.ts b/packages/storage/src/cypher-guard.ts index 1dbef5a6..56e3e911 100644 --- a/packages/storage/src/cypher-guard.ts +++ b/packages/storage/src/cypher-guard.ts @@ -8,7 +8,7 @@ * typed rejection earlier in the stack plus a consistent user-facing * message regardless of backend. * - * Scope (AC-M3-5): + * Scope: * - Allowlist of reader clauses: MATCH, RETURN, WITH, WHERE, ORDER BY, * LIMIT, SKIP, UNWIND. * - `CALL` is rejected unless the invocation is exactly one of the two diff --git a/packages/storage/src/duckdb-adapter.test.ts b/packages/storage/src/duckdb-adapter.test.ts index b8680937..c11fd3f5 100644 --- a/packages/storage/src/duckdb-adapter.test.ts +++ b/packages/storage/src/duckdb-adapter.test.ts @@ -461,7 +461,7 @@ test("vectorSearch with granularity filter restricts to that tier", async () => }); // --------------------------------------------------------------------------- -// listEmbeddingHashes (T-M1-3 content-hash skip helper) +// listEmbeddingHashes — content-hash skip helper // --------------------------------------------------------------------------- test("listEmbeddingHashes returns an empty Map on a fresh database", async () => { @@ -934,7 +934,7 @@ test("bulkLoad stores Finding / Dependency / Operation / Contributor / ProjectPr } }); -test("bulkLoad stores Repo columns (AC-M6-1 first-class repo node)", async () => { +test("bulkLoad stores Repo columns (first-class repo node)", async () => { const dbPath = await scratchDbPath(); const store = new DuckDbStore(dbPath); await store.open(); @@ -982,7 +982,7 @@ test("bulkLoad stores Repo columns (AC-M6-1 first-class repo node)", async () => } }); -test("bulkLoad stores Repo columns with explicit-null nullable fields (S-M6-1)", async () => { +test("bulkLoad stores Repo columns with explicit-null nullable fields", async () => { const dbPath = await scratchDbPath(); const store = new DuckDbStore(dbPath); await store.open(); @@ -2145,7 +2145,7 @@ test("listNodes() returns [] from an unknown kind", async () => { }); // --------------------------------------------------------------------------- -// v1.0 community-adapter conformance suite (AC-A-11) +// v1.0 community-adapter conformance suite // // DuckDb is the flagship reference implementation, so it MUST pass every // block of the shared conformance contract. A regression here would mean diff --git a/packages/storage/src/duckdb-adapter.ts b/packages/storage/src/duckdb-adapter.ts index 4c78e0bf..8378d2c8 100644 --- a/packages/storage/src/duckdb-adapter.ts +++ b/packages/storage/src/duckdb-adapter.ts @@ -1,11 +1,11 @@ /** * DuckDB-backed adapter for the storage interfaces. * - * Per AC-A-1, this class implements BOTH {@link IGraphStore} and - * {@link ITemporalStore} over a single `DuckDBConnection`. The legacy - * `DuckDbStore` class export is retained as the bridge type for the - * 41 type-pin call sites that AC-A-5 will migrate gradually — its - * instances satisfy the union of both surfaces. + * This class implements BOTH {@link IGraphStore} and {@link ITemporalStore} + * over a single `DuckDBConnection`. The legacy `DuckDbStore` class export + * is retained as the bridge type for the type-pin call sites that still + * consume the merged surface — its instances satisfy the union of both + * surfaces. * * When a caller composes a {@link OpenStoreResult} with `backend: "duck"`, * the same `DuckDbStore` instance is returned as both the `graph` view @@ -138,16 +138,17 @@ const DEFAULT_COCHANGE_MIN_LIFT = 1.0; /** * Concrete adapter that satisfies both {@link IGraphStore} (graph-tier) * and {@link ITemporalStore} (tabular-tier) over a single DuckDB - * connection. The class export remains the legacy bridge type that the - * 41 AC-A-5 type-pin sites continue to consume; new code should call - * `openStore(...)` and route through `OpenStoreResult.graph` / - * `OpenStoreResult.temporal` rather than reaching for the concrete class. + * connection. The class export remains the legacy bridge type that + * existing type-pin sites consume; new code should call `openStore(...)` + * and route through `OpenStoreResult.graph` / `OpenStoreResult.temporal` + * rather than reaching for the concrete class. */ export class DuckDbStore implements IGraphStore, ITemporalStore { /** * DuckDB exposes no public Cypher entry point — typed finders cover the - * graph reads. Stamped as `"none"` for the {@link IGraphStore.dialect} - * marker introduced in AC-A-1. + * graph reads. Stamped as `"none"` on the {@link IGraphStore.dialect} + * marker so callers can branch between Cypher-aware and Cypher-free + * adapters. */ readonly dialect: GraphDialect = "none"; private readonly path: string; @@ -487,13 +488,13 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { /** * @internal * Stream the `embeddings` table to a Parquet file via DuckDB's built-in - * `COPY ... TO ... (FORMAT PARQUET, COMPRESSION ZSTD)`. Backs the M5 BOM - * item #7 (Parquet sidecar) for `@opencodehub/pack`. + * `COPY ... TO ... (FORMAT PARQUET, COMPRESSION ZSTD)`. Backs the + * Parquet sidecar BOM item for `@opencodehub/pack`. * - * **NOT part of the public storage surface.** AC-A-4 reframed the - * embeddings sidecar as a packaging concern, owned by `@opencodehub/pack`. - * This method survives as a DuckDB-only helper that pack's - * `writeEmbeddingsSidecar` invokes after narrowing `store.temporal` (or + * **NOT part of the public storage surface.** The embeddings sidecar is + * a packaging concern owned by `@opencodehub/pack`. This method survives + * as a DuckDB-only helper that pack's `writeEmbeddingsSidecar` invokes + * after narrowing `store.temporal` (or * `store.graph` when `backend === "duck"`) to a {@link DuckDbStore}. * Third-party {@link IGraphStore} / {@link ITemporalStore} implementations * MUST NOT implement it — pack stamps `determinismClass: "degraded"` @@ -513,8 +514,8 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { * surface that string to the caller via `duckdbVersion` and the pack * manifest pins it (`PackPins.duckdbVersion`). * - * When the embeddings table is empty, NO file is written (S-M5-3 contract - * for the pack BOM); the caller is expected to skip the BomItem entirely. + * When the embeddings table is empty, NO file is written; the caller + * is expected to skip the BomItem entirely. * * Caller MUST pass an absolute path. Path is interpolated into the SQL * statement after a strict format check (alphanumerics + `/_-.` only and @@ -580,7 +581,7 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { * Load every prior `content_hash` from the `embeddings` table keyed by the * composite `(granularity, node_id, chunk_index)` tuple. Used by the * ingestion embeddings phase to skip re-embedding chunks whose source - * text is unchanged across runs (T-M1-3). + * text is unchanged across runs. * * A single `SELECT` round-trip is cheaper than per-chunk lookups and * keeps the API surface narrow: the caller gets a `Map` it owns. @@ -905,10 +906,9 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { /** * {@link ITemporalStore.exec} implementation — delegates to {@link query}. - * AC-A-1 introduced this name on the temporal interface so callers that - * route through `OpenStoreResult.temporal` use the new vocabulary; the - * original `query()` method stays for the 41 type-pin sites AC-A-5 will - * migrate. + * Callers that route through `OpenStoreResult.temporal` use this name; + * the original `query()` method stays for legacy type-pin sites that + * still consume the merged surface. */ async exec( sql: string, @@ -1007,12 +1007,12 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { } // -------------------------------------------------------------------------- - // Typed finders — AC-A-6 service-layer foundation + // Typed finders — service-layer foundation // -------------------------------------------------------------------------- // - // Every method below replaces a pattern-matched raw-SQL site identified in - // architecture-revised.md §5. SQL strings stay LOCAL to this file — they are - // never exported from the package surface so consumers cannot reach for the + // Every method below replaces a raw-SQL pattern that consumers used to + // reach for. SQL strings stay LOCAL to this file — they are never + // exported from the package surface so consumers cannot reach for the // dialect directly. // // Determinism contract: every finder returns rows in deterministic order so @@ -1530,8 +1530,8 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { * * Repo membership is resolved by walking the `Repo` row whose `id` is * the prefix of the consumer/producer node ids. The current ingestion - * stamps `repo_uri` directly on every node via the AC-M6-1 column — - * we read it inline rather than re-traversing the graph. + * stamps `repo_uri` directly on every node via the persisted Repo + * column — we read it inline rather than re-traversing the graph. */ async listConsumerProducerEdges( opts: { readonly repoUris?: readonly string[] } = {}, @@ -1640,9 +1640,9 @@ export class DuckDbStore implements IGraphStore, ITemporalStore { for (const r of rows) { const row = r as Record; const stepVal = row["step"]; - // Match the AC-A-2 step-zero sentinel: DuckDB stores `INT NOT NULL - // DEFAULT 0` for absent step values; collapse 0 to "field absent" - // so the wire shape matches the source `CodeRelation`. + // Step-zero sentinel: DuckDB stores `INT NOT NULL DEFAULT 0` + // for absent step values; collapse 0 to "field absent" so the + // wire shape matches the source `CodeRelation`. const step = stepVal === null || stepVal === undefined || Number(stepVal) === 0 ? undefined @@ -2264,7 +2264,7 @@ function rowToGraphNode(row: Record): GraphNode | undefined { setStringField(out, "partialFingerprint", row["partial_fingerprint"]); setStringField(out, "baselineState", row["baseline_state"]); setStringField(out, "suppressedJson", row["suppressed_json"]); - // Repo (AC-M6-1). The interface marks `originUrl` / `defaultBranch` / + // Repo. The interface marks `originUrl` / `defaultBranch` / // `group` as `string | null` so the round-trip preserves an explicit // null when the column is NULL. Other Repo fields are populated only // when `kind === "Repo"`; for non-Repo rows the columns stay NULL and diff --git a/packages/storage/src/finders.test.ts b/packages/storage/src/finders.test.ts index 0b213a30..eb3b35c8 100644 --- a/packages/storage/src/finders.test.ts +++ b/packages/storage/src/finders.test.ts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // -// AC-A-6a — typed-finder tests for both adapters. +// Typed-finder tests for both adapters. // // Each finder is exercised against a small fixture loaded into a DuckDbStore. // Where the native graph-db binding is available, the same fixture is loaded @@ -8,8 +8,8 @@ // results (so the cross-adapter Liskov contract holds for the finder family // the same way it does for `listNodes` / `bulkLoad`). // -// Per the AC-A-6a packet anti-goal #1, NO consumer is touched here — the -// fixtures and assertions live entirely inside `packages/storage`. +// Fixtures and assertions live entirely inside `packages/storage`; no +// consumer package is touched here. import assert from "node:assert/strict"; import { mkdtemp } from "node:fs/promises"; @@ -267,10 +267,10 @@ function buildFinderFixture(): { graph: KnowledgeGraph; ids: FixtureIds } { // FETCHES edge from a consumer Function on the consumer side to the // Operation on the producer side. The producer carries a `repo_uri` - // matching `repoProducer.repoUri` via the AC-M6-1 column. We synthesize - // the cross-repo wiring by adding an Operation node whose `repo_uri` - // column will be set after node insertion through the bulkLoad column - // encoder. + // matching `repoProducer.repoUri` via the persisted Repo column. We + // synthesize the cross-repo wiring by adding an Operation node whose + // `repo_uri` column will be set after node insertion through the + // bulkLoad column encoder. g.addEdge({ from: fnFoo, to: op1, type: "FETCHES", confidence: 0.95 }); return { @@ -647,7 +647,7 @@ test("DuckDb listConsumerProducerEdges returns the FETCHES + Operation join", as // those columns NULL on Function/Operation nodes (only Repo nodes carry // repo_uri today), so the cross-repo predicate resolves to the empty // string for both endpoints. This test confirms the SHAPE of the result - // — the full cross-repo join is exercised by the AC-M6-1 / AC-M6-3 + // — the full cross-repo join is exercised by the cross-repo contract // integration suites, which run against repos whose ingestion has // populated repo_uri on every node. await withDuckStore(async (store) => { diff --git a/packages/storage/src/graph-hash-parity.test.ts b/packages/storage/src/graph-hash-parity.test.ts index b06a73b4..f9d978af 100644 --- a/packages/storage/src/graph-hash-parity.test.ts +++ b/packages/storage/src/graph-hash-parity.test.ts @@ -1,8 +1,8 @@ /** - * graphHash parity gate (architecture-revised.md §AC-A-7). + * graphHash parity gate. * - * Enforces the v1.0 byte-identity invariant (validation constraint #6) - * across every IGraphStore backend: for every fixture graph, + * Enforces the v1.0 byte-identity invariant across every IGraphStore + * backend: for every fixture graph, * * graphHash(graph) * === graphHash(rebuildFromStore(duckGraph)) @@ -10,16 +10,16 @@ * * If these hashes diverge, one of the adapters dropped, reordered, or * coerced a field on the round-trip — which would silently break the - * incremental re-index contract (T-M7-4) and the Reindex parity gate. - * This file is the CI tripwire. + * incremental re-index contract and the Reindex parity gate. This file + * is the CI tripwire. * - * AC-A-7 hoisted the per-backend rebuilders into - * `./test-utils/parity-harness.ts`. The parity harness now uses ONLY - * `IGraphStore.listNodes({})` + `IGraphStore.listEdges({})` — a third- - * party AGE / Memgraph / Neo4j / Neptune adapter can prove conformance - * by importing `assertGraphParity` from `@opencodehub/storage/test-utils` - * and running it against its own adapter. This test reduces to fixture - * builders + a single `assertGraphParity` call per fixture. + * The per-backend rebuilders live in `./test-utils/parity-harness.ts`. + * The parity harness uses ONLY `IGraphStore.listNodes({})` + + * `IGraphStore.listEdges({})` — a third-party AGE / Memgraph / Neo4j / + * Neptune adapter can prove conformance by importing `assertGraphParity` + * from `@opencodehub/storage/test-utils` and running it against its own + * adapter. This test reduces to fixture builders + a single + * `assertGraphParity` call per fixture. * * Three fixtures exercise progressively larger shapes: * - small: ≤10 nodes, DEFINES + CALLS only (sanity shape). @@ -28,15 +28,14 @@ * CALLS / OWNED_BY so the v1.1 node + edge surface is visible. * - large: ≥500 nodes built as a long CALLS chain with shortcuts, plus * a companion sweep that emits at least one edge for every - * entry in `getAllRelationTypes()` (24 kinds as of AC-M3-3). - * - repo / repo-null: AC-M6-1 RepoNode round-trip — populated AND - * explicit-null variants of `originUrl` / `defaultBranch` / - * `group`. + * entry in `getAllRelationTypes()` (24 kinds today). + * - repo / repo-null: RepoNode round-trip — populated AND explicit-null + * variants of `originUrl` / `defaultBranch` / `group`. * - * Step-zero contract (AC-M3-3 + AC-A-2): both adapters' read paths drop - * `step` when the stored value reads back as 0/null so the rebuilt graph - * is byte-identical across backends. Fixtures avoid `step: 0` anyway to - * keep the original-graph comparison clean. + * Step-zero contract: both adapters' read paths drop `step` when the + * stored value reads back as 0/null so the rebuilt graph is byte- + * identical across backends. Fixtures avoid `step: 0` anyway to keep + * the original-graph comparison clean. */ import { mkdtemp } from "node:fs/promises"; @@ -404,7 +403,7 @@ function buildMediumWithoutKeywordsFixture(): KnowledgeGraph { } /** - * AC-M6-1 fixture: a RepoNode exercising every field — populated + + * Repo fixture: a RepoNode exercising every field — populated + * explicit-null variants of `originUrl` / `defaultBranch` / `group`, and * a non-empty `languageStats` record. The fixture must round-trip * through both stores with matching graphHash, proving the new Repo @@ -438,7 +437,7 @@ function buildRepoFixture(): KnowledgeGraph { /** * Parallel RepoNode fixture with the nullable string fields explicitly set - * to `null` — covers the S-M6-1 "no remote" branch where originUrl is + * to `null` — covers the "no remote" branch where originUrl is * absent, defaultBranch is unknown, and the repo is group-less. Empty * languageStats ({}) is normalised to NULL on the wire; the reader * reconstructs it as `{}` so canonical-JSON parity holds. diff --git a/packages/storage/src/graphdb-adapter.test.ts b/packages/storage/src/graphdb-adapter.test.ts index 295574f3..27fa7d9e 100644 --- a/packages/storage/src/graphdb-adapter.test.ts +++ b/packages/storage/src/graphdb-adapter.test.ts @@ -55,14 +55,14 @@ test("GraphDbStore honours option overrides", () => { }); // --------------------------------------------------------------------------- -// Surface separation (AC-A-1): cochange + symbol-summary methods removed +// Surface separation: cochange + symbol-summary methods live on ITemporalStore // --------------------------------------------------------------------------- test("GraphDbStore no longer exposes cochange or symbol-summary methods", () => { - // Per AC-A-1 the temporal surface (cochanges + symbol summaries) lives - // exclusively on `ITemporalStore`; `GraphDbStore` is graph-only and - // does not even declare these names. The runtime check guards against - // accidental re-introduction of the merged shape. + // The temporal surface (cochanges + symbol summaries) lives exclusively + // on `ITemporalStore`; `GraphDbStore` is graph-only and does not even + // declare these names. The runtime check guards against accidental + // re-introduction of the merged shape. const s = new GraphDbStore("/tmp/graph.db"); const removed: readonly string[] = [ "bulkLoadCochanges", @@ -76,7 +76,7 @@ test("GraphDbStore no longer exposes cochange or symbol-summary methods", () => assert.equal( typeof (s as unknown as Record)[name], "undefined", - `GraphDbStore must not expose ${name} after AC-A-1`, + `GraphDbStore must not expose ${name}`, ); } // NotImplementedError is still exported for adapter-internal use even @@ -84,7 +84,7 @@ test("GraphDbStore no longer exposes cochange or symbol-summary methods", () => assert.equal(typeof NotImplementedError, "function"); }); -test("query before open rejects with a clear error (pool-wired in AC-M3-2)", async () => { +test("query before open rejects with a clear error", async () => { const s = new GraphDbStore("/tmp/graph.db"); await assert.rejects(() => s.query("RETURN 1"), /before open/); }); @@ -162,9 +162,9 @@ test("resolveStoreBackend rejects unknown CODEHUB_STORE values", () => { test("openStore composes a DuckDbStore graph + temporal pair when backend=duck", async () => { const store = await openStore({ path: ":memory:", backend: "duck" }); - // AC-A-1: the duck backend wires BOTH views to the same DuckDbStore - // instance. Identity check — not just constructor-name — pins the - // single-connection invariant. + // The duck backend wires BOTH views to the same DuckDbStore instance. + // Identity check — not just constructor-name — pins the single- + // connection invariant. assert.equal(store.backend, "duck"); assert.equal(store.graph.constructor.name, "DuckDbStore"); assert.equal(store.temporal.constructor.name, "DuckDbStore"); @@ -175,10 +175,10 @@ test("openStore composes a DuckDbStore graph + temporal pair when backend=duck", }); test("openStore composes GraphDbStore + DuckDbStore pair when backend=lbug", async () => { - // AC-A-3 tightens the artifact split: the graph file is renamed to - // `graph.lbug` and the temporal file is its sibling `temporal.duckdb` - // inside the same directory, regardless of the legacy filename the - // caller supplies (typically `/.codehub/graph.duckdb`). + // The graph file is renamed to `graph.lbug` and the temporal file is + // its sibling `temporal.duckdb` inside the same directory, regardless + // of the legacy filename the caller supplies (typically + // `/.codehub/graph.duckdb`). const store = await openStore({ path: "/tmp/och-test/graph.duckdb", backend: "lbug" }); assert.equal(store.backend, "lbug"); assert.equal(store.graph.constructor.name, "GraphDbStore"); @@ -188,12 +188,12 @@ test("openStore composes GraphDbStore + DuckDbStore pair when backend=lbug", asy }); // --------------------------------------------------------------------------- -// Integration: createSchema + bulkLoad (AC-M3-3 Commit 1) +// Integration: createSchema + bulkLoad // --------------------------------------------------------------------------- // // These tests require the native binding. On platforms without the prebuilt // `.node` the suite gracefully skips; every one of the code paths still gets -// exercised by the unit tests above plus the AC-M3-4 round-trip suite. +// exercised by the unit tests above plus the round-trip suite. test("createSchema runs the full DDL against a fresh store", async () => { if (!(await hasNativeBinding())) { @@ -377,7 +377,7 @@ test("bulkLoad cycles through every declared edge kind without fault", async () }); // --------------------------------------------------------------------------- -// Cypher write-guard (AC-M3-3 Commit 2) +// Cypher write-guard // --------------------------------------------------------------------------- test("assertReadOnlyCypher accepts plain MATCH ... RETURN", () => { @@ -571,10 +571,9 @@ test("search: BM25 index finds a distinct symbol name", async () => { } }); -// NOTE: a real vectorSearch integration test lands in AC-M3-3 Commit 3 -// alongside upsertEmbeddings — the vector query path is already wired here -// but it needs at least one embedding row to return non-empty results, and -// upsertEmbeddings is still a stub at this commit. +// A real vectorSearch integration test lives below alongside +// upsertEmbeddings — the vector query path needs at least one embedding +// row to return non-empty results. test("vectorSearch rejects vectors with the wrong dimension", async () => { const store = new GraphDbStore("/tmp/graph-vec-dim.db", { embeddingDim: 4 }); @@ -646,7 +645,7 @@ test("healthCheck returns ok once the pool is open", async () => { }); // --------------------------------------------------------------------------- -// Integration: upsertEmbeddings + listEmbeddingHashes (AC-M3-3 Commit 3) +// Integration: upsertEmbeddings + listEmbeddingHashes // --------------------------------------------------------------------------- test("upsertEmbeddings dimension mismatch throws without touching the store", async () => { @@ -1132,7 +1131,7 @@ test("listNodes() cross-adapter parity: DuckStore ≡ GraphDbStore on the shared }); // --------------------------------------------------------------------------- -// v1.0 community-adapter conformance suite (AC-A-11) +// v1.0 community-adapter conformance suite // // GraphDb is graph-only; it MUST satisfy every block of the shared v1.0 // conformance contract. Binding probe is performed once at module load diff --git a/packages/storage/src/graphdb-adapter.ts b/packages/storage/src/graphdb-adapter.ts index 50d7537e..329b5b8a 100644 --- a/packages/storage/src/graphdb-adapter.ts +++ b/packages/storage/src/graphdb-adapter.ts @@ -2,12 +2,11 @@ * Graph-database backend for {@link IGraphStore} (phase-2 implementation). * * This adapter is the second implementation behind the `IGraphStore` seam. - * DuckDbStore remains the default through M7; this file ships the full - * lifecycle + bulk-load surface so `CODEHUB_STORE=lbug` can already drive a - * round-trip-clean graph write. Query, search, vector, and embedding - * surfaces follow in AC-M3-3 sibling commits. + * DuckDbStore remains the default; this file ships the full lifecycle + + * bulk-load surface so `CODEHUB_STORE=lbug` can already drive a + * round-trip-clean graph write. * - * Design notes (spec 004 §Architectural decisions): + * Design notes: * 1. Rel tables are polymorphic per edge kind — one named rel table per * relation type, each with multiple `FROM/TO` pairs. The DDL lives in * {@link graphdb-schema.ts}; this file never emits DDL inline. @@ -85,11 +84,10 @@ const DEFAULT_EMBEDDING_DIM = 768; const DEFAULT_TIMEOUT_MS = 5_000; /** - * Thrown by adapter surfaces that are not yet wired. AC-A-1 deleted the - * cochange + summary stubs from this adapter (those methods now live on - * {@link ITemporalStore}, never on the graph adapter). The class export - * is retained because downstream packages still import it for typed - * fallback handling on graph-only failure modes. + * Thrown by adapter surfaces that are not yet wired. The cochange + symbol + * summary surfaces live on {@link ITemporalStore}, never on the graph + * adapter. The class export is retained because downstream packages still + * import it for typed fallback handling on graph-only failure modes. */ export class NotImplementedError extends Error { constructor(method: string) { @@ -101,8 +99,7 @@ export class NotImplementedError extends Error { /** * Missing peer-binding error. Surfaced when the native `@ladybugdb/core` * module is not available on the current platform (no prebuilt binary, or - * the package was pruned by a `--production` install). The message - * satisfies spec 004 §S-M3-2. + * the package was pruned by a `--production` install). */ export class GraphDbBindingError extends Error { constructor(cause: unknown) { @@ -147,8 +144,8 @@ const EMBEDDING_COLUMNS: readonly string[] = [ /** * Column → node-field descriptors used by the round-trip readback path. - * AC-M3-3 Commit 4's `rebuildGraphFromStore` walks this list so the - * returned graph carries the same field set the bulk writer ingested. + * `rebuildGraphFromStore` walks this list so the returned graph carries + * the same field set the bulk writer ingested. */ export const ROUND_TRIP_COLUMN_MAP: readonly (readonly [ string, @@ -214,10 +211,10 @@ function buildEmbeddingCreateCypher(): string { export class GraphDbStore implements IGraphStore { /** - * Cypher dialect marker introduced by AC-A-1. The graph-db backend - * speaks Cypher natively; the optional {@link IGraphStore.execCypher} - * escape hatch is wired below so community tooling that needs raw - * Cypher (APOC analogues, etc.) can call through. + * Cypher dialect marker. The graph-db backend speaks Cypher natively; + * the optional {@link IGraphStore.execCypher} escape hatch is wired + * below so community tooling that needs raw Cypher (APOC analogues, + * etc.) can call through. */ readonly dialect: GraphDialect = "cypher"; private readonly path: string; @@ -245,10 +242,10 @@ export class GraphDbStore implements IGraphStore { async open(): Promise { if (this.pool?.isOpen()) return; - // Surface missing-binding failures as a typed error per spec 004 §S-M3-2. - // The pool's own lazy import would produce a raw module-not-found error - // otherwise. When the caller injected a `binding` in `poolConfig` (tests) - // we skip the probe — the fake already provides the types. + // Surface missing-binding failures as a typed error so the pool's own + // lazy import doesn't produce a raw module-not-found error. When the + // caller injected a `binding` in `poolConfig` (tests) we skip the + // probe — the fake already provides the types. if (!this.poolConfig.binding) { try { await import("@ladybugdb/core"); @@ -381,7 +378,7 @@ export class GraphDbStore implements IGraphStore { // "explicit zero". DuckDbStore stores 0 in both cases because the // column is NOT NULL; the graph-db schema declares it as nullable // INT32 and the canonical-JSON hash stays stable across backends as - // long as both adapters agree on the sentinel (AC-M3-4 gate). + // long as both adapters agree on the sentinel. const params: SqlParam[] = [ e.from, e.to, @@ -498,10 +495,10 @@ export class GraphDbStore implements IGraphStore { if (!this.pool) { throw new Error("graph-db: query called before open()"); } - // Refuse write keywords so the user surface stays read-only. A full - // Cypher-guard lands in AC-M3-5; this minimal deny-list matches the - // DuckDB backend's assertReadOnlySql approach and trips every write - // verb the native binding accepts. + // Refuse write keywords so the user surface stays read-only. The + // full Cypher-guard lives in `cypher-guard.ts`; this call mirrors + // the DuckDB backend's `assertReadOnlySql` approach and trips every + // write verb the native binding accepts. assertReadOnlyCypher(sql); const timeoutMs = opts?.timeoutMs ?? this.defaultTimeoutMs; return this.pool.query(sql, params, { timeoutMs }); @@ -593,7 +590,7 @@ export class GraphDbStore implements IGraphStore { } // -------------------------------------------------------------------------- - // Typed finders — AC-A-6 service-layer foundation + // Typed finders — service-layer foundation // -------------------------------------------------------------------------- // // Cypher stays LOCAL to this file — never exported. Determinism: node @@ -1410,7 +1407,7 @@ export class GraphDbStore implements IGraphStore { } // -------------------------------------------------------------------------- - // execCypher — IGraphStore optional escape hatch (AC-A-1) + // execCypher — IGraphStore optional escape hatch // -------------------------------------------------------------------------- /** diff --git a/packages/storage/src/graphdb-pool.test.ts b/packages/storage/src/graphdb-pool.test.ts index 4443e570..1a632728 100644 --- a/packages/storage/src/graphdb-pool.test.ts +++ b/packages/storage/src/graphdb-pool.test.ts @@ -1,5 +1,5 @@ /** - * Concurrency regression suite for {@link GraphDbPool} (spec 004 §AC-M3-2). + * Concurrency regression suite for {@link GraphDbPool}. * * Every test injects a fake `NativeBinding` into the pool so the suite * runs without touching the native binding. That lets us drive exact diff --git a/packages/storage/src/graphdb-pool.ts b/packages/storage/src/graphdb-pool.ts index 553c68aa..bd0e6803 100644 --- a/packages/storage/src/graphdb-pool.ts +++ b/packages/storage/src/graphdb-pool.ts @@ -1,7 +1,7 @@ /** * Connection pool for the graph-database backend. * - * Design goals (spec 004 §AC-M3-2 / §W-M3-1): + * Design goals: * * 1. **Single-writer-multi-reader model.** One native `Database` per store * path, with a bounded fan-out of `Connection` objects on top of it. @@ -241,10 +241,10 @@ function closeEntry(path: string): void { async function loadDefaultBinding(): Promise { // Dynamic import keeps the native dep off the startup path when the - // default DuckDB backend is in use (spec 004 §S-M3-1). The cast - // passes through `unknown` because the native binding's typed surface - // is richer than the structural shape this module uses — we only - // require `{ Database, Connection }` constructors, nothing more. + // DuckDB backend is in use. The cast passes through `unknown` because + // the native binding's typed surface is richer than the structural + // shape this module uses — we only require `{ Database, Connection }` + // constructors, nothing more. const mod = (await import("@ladybugdb/core")) as unknown as { default?: NativeBinding; } & NativeBinding; diff --git a/packages/storage/src/graphdb-roundtrip.test.ts b/packages/storage/src/graphdb-roundtrip.test.ts index b5c69d61..f389d134 100644 --- a/packages/storage/src/graphdb-roundtrip.test.ts +++ b/packages/storage/src/graphdb-roundtrip.test.ts @@ -1,10 +1,10 @@ /** - * Round-trip parity tests for {@link GraphDbStore} (spec 004 §AC-M3-3). + * Round-trip parity tests for {@link GraphDbStore}. * * These tests verify that a knowledge graph survives a bulk-load + rebuild - * cycle byte-identical under `graphHash`. The AC-M3-4 CI gate pairs this - * with the DuckDbStore round-trip to guarantee cross-backend parity; this - * file establishes the correctness half. + * cycle byte-identical under `graphHash`. A CI gate pairs this with the + * DuckDbStore round-trip to guarantee cross-backend parity; this file + * establishes the correctness half. * * Three fixture sizes: * - small: 2 files + 8 functions + 15 edges (mixed DEFINES / CALLS). @@ -268,7 +268,7 @@ const NODE_COLUMN_MAP: readonly (readonly [string, string, "number" | "string" | ["content_hash", "contentHash", "string"], ["email_hash", "emailHash", "string"], ["email_plain", "emailPlain", "string"], - // Repo (AC-M6-1). See graph-hash-parity.test.ts for the parallel mapping. + // Repo. See graph-hash-parity.test.ts for the parallel mapping. ["origin_url", "originUrl", "string"], ["repo_uri", "repoUri", "string"], ["default_branch", "defaultBranch", "string"], @@ -463,7 +463,7 @@ test("every declared edge kind round-trips at least one row", async () => { assert.equal(rebuilt, original, "graphHash parity broken for all-kinds fixture"); }); -test("round-trip parity: RepoNode fixture (AC-M6-1 first-class repo entity)", async () => { +test("round-trip parity: RepoNode fixture (first-class repo entity)", async () => { if (!(await hasNativeBinding())) { assert.ok(true, "native binding unavailable — skipping round-trip"); return; @@ -492,7 +492,7 @@ test("round-trip parity: RepoNode fixture (AC-M6-1 first-class repo entity)", as assert.equal(rebuilt, original, "graphHash parity broken for RepoNode fixture"); }); -test("round-trip parity: RepoNode with explicit-null origin / branch / group (S-M6-1)", async () => { +test("round-trip parity: RepoNode with explicit-null origin / branch / group", async () => { if (!(await hasNativeBinding())) { assert.ok(true, "native binding unavailable — skipping round-trip"); return; @@ -515,11 +515,7 @@ test("round-trip parity: RepoNode with explicit-null origin / branch / group (S- languageStats: {}, } as unknown as GraphNode); const { original, rebuilt } = await runRoundTrip(g); - assert.equal( - rebuilt, - original, - "graphHash parity broken for RepoNode no-remote fixture (S-M6-1)", - ); + assert.equal(rebuilt, original, "graphHash parity broken for RepoNode no-remote fixture"); }); test("round-trip is deterministic across independent writes of the same graph", async () => { diff --git a/packages/storage/src/graphdb-schema.test.ts b/packages/storage/src/graphdb-schema.test.ts index 3455b9bd..43192e1b 100644 --- a/packages/storage/src/graphdb-schema.test.ts +++ b/packages/storage/src/graphdb-schema.test.ts @@ -30,9 +30,8 @@ function decode(codes: readonly number[]): string { test("generateSchemaDdl emits the expected number of node tables", () => { const ddl = generateSchemaDdl(); const nodeMatches = ddl.match(/CREATE NODE TABLE IF NOT EXISTS \w+/g) ?? []; - // AC-A-1 deleted Cochange + SymbolSummary NODE TABLEs (those rows now - // live exclusively on a paired ITemporalStore). The graph-side schema - // is therefore CodeNode + Embedding + StoreMeta = 3. + // Cochange + SymbolSummary live exclusively on a paired ITemporalStore; + // the graph-side schema is CodeNode + Embedding + StoreMeta = 3. assert.equal(nodeMatches.length, 3, nodeMatches.join("\n")); }); @@ -118,8 +117,8 @@ test("getAllRelationTypes returns every OCH edge kind in canonical order", () => test("statements are semicolon-terminated", () => { const ddl = generateSchemaDdl(); - // 3 node tables (post AC-A-1: CodeNode + Embedding + StoreMeta) + - // 24 rel tables + 1 EMBEDS rel = 28 statements → 28 semicolons. + // 3 node tables (CodeNode + Embedding + StoreMeta) + 24 rel tables + + // 1 EMBEDS rel = 28 statements → 28 semicolons. const count = (ddl.match(/;\n/g) ?? []).length; assert.equal(count, 3 + EXPECTED_RELATION_COUNT + 1); }); diff --git a/packages/storage/src/graphdb-schema.ts b/packages/storage/src/graphdb-schema.ts index b07d87d8..2b3d3fa0 100644 --- a/packages/storage/src/graphdb-schema.ts +++ b/packages/storage/src/graphdb-schema.ts @@ -71,8 +71,8 @@ const RELATION_KINDS: readonly string[] = [ ]; /** - * Exported for AC-M3-3/4 round-trip tests so they can compare against the - * same source of truth as the DDL emitter. + * Exported for the round-trip parity tests so they can compare against + * the same source of truth as the DDL emitter. */ export function getAllRelationTypes(): readonly string[] { return RELATION_KINDS; @@ -203,17 +203,15 @@ export function generateSchemaDdl(opts: GraphDbSchemaOptions = {}): string { PRIMARY KEY (id) )`); - // AC-A-1 — Cochange + SymbolSummary NODE TABLEs deleted. The graph - // adapter never stored cochange / symbol-summary data; the M3+M6 - // reframe (AC-A-3) routes those rows to a paired DuckDB-backed - // ITemporalStore on every deployment, so the Cypher schema no longer - // needs to declare them. + // Cochange + SymbolSummary live exclusively on the paired DuckDB-backed + // ITemporalStore — the graph adapter never stores those rows, so the + // Cypher schema does not declare them. // ------------------------------------------------------------------------- - // Rel tables — one per edge kind. FROM/TO is CodeNode on both sides; an - // AC-M3-3 follow-up may narrow the endpoints per kind once the node-kind - // split lands. We DO NOT emit a single CodeRelation rel table with a type - // column — that defeats the predicate push-down the graph-db gives us (spec - // 004 §Architectural decisions #1). + // Rel tables — one per edge kind. FROM/TO is CodeNode on both sides; + // a future schema revision may narrow the endpoints per kind once the + // node-kind split lands. We DO NOT emit a single CodeRelation rel + // table with a type column — that defeats the predicate push-down the + // graph-db gives us. // ------------------------------------------------------------------------- for (const kind of RELATION_KINDS) { statements.push(`CREATE REL TABLE IF NOT EXISTS ${kind} ( diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index 70de164c..9c2ec0f7 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -79,8 +79,7 @@ import { describeArtifacts } from "./paths.js"; * superset of the spec-level {@link ApiOpenStoreOptions}: keeps the * `duckOptions` / `graphDbOptions` adapter-specific bag so existing * callers (analyze CLI, ingestion harness) can continue passing through - * the precise per-backend tuning while AC-A-9 finishes the auto-detect - * resolver. + * the precise per-backend tuning alongside the auto-detect resolver. */ export interface OpenStoreOptions extends ApiOpenStoreOptions { readonly duckOptions?: DuckDbStoreOptions; @@ -104,7 +103,7 @@ type ResolvedBackend = "duck" | "lbug"; * - `CODEHUB_STORE=lbug` → `"lbug"`. * - any other value → throw. * - * The async sibling {@link resolveStoreBackendAsync} adds the AC-A-9 + * The async sibling {@link resolveStoreBackendAsync} adds the * binding-availability probe: when env is unset, it calls * `import("@ladybugdb/core")` and prefers `"lbug"` on success. The sync * resolver here intentionally returns `"duck"` for `auto+unset` because @@ -185,9 +184,9 @@ function shouldEmitAdvisory(env: NodeJS.ProcessEnv = process.env): boolean { } /** - * Async backend resolver — the AC-A-9 default-flip entry point. Honors - * the explicit env var first, then probes `@ladybugdb/core` when the - * caller asked for `"auto"` and `CODEHUB_STORE` is unset. + * Async backend resolver — the graph-default entry point. Honors the + * explicit env var first, then probes `@ladybugdb/core` when the caller + * asked for `"auto"` and `CODEHUB_STORE` is unset. * * The probe runs at most once per process via {@link probeLbugBinding}; * subsequent calls hit the cached result. On binding failure the resolver @@ -312,7 +311,6 @@ function composeArtifactPaths( /** * Factory that returns a composed graph + temporal {@link OpenStoreResult}. - * Per AC-A-3 (architecture-revised.md §AC-A-3): * * - `backend: "duck"` → a single `DuckDbStore` instance is returned as * BOTH the `graph` and `temporal` views over the same connection. @@ -331,9 +329,9 @@ function composeArtifactPaths( * lifecycle cleanup symmetry. */ export async function openStore(opts: OpenStoreOptions): Promise { - // AC-A-9: async resolver — runs the cached `@ladybugdb/core` probe - // when the caller asked for `"auto"` and `CODEHUB_STORE` is unset. - // Explicit backend / env var paths skip the probe. + // Async resolver — runs the cached `@ladybugdb/core` probe when the + // caller asked for `"auto"` and `CODEHUB_STORE` is unset. Explicit + // backend / env var paths skip the probe. const initialBackend: ResolvedBackend = await resolveStoreBackendAsync(opts.backend); // Compose the canonical artifact paths for the initial backend, then // run dual-artifact detection. When both `graph.duckdb` and diff --git a/packages/storage/src/interface.test.ts b/packages/storage/src/interface.test.ts index 34c97244..2ee7db90 100644 --- a/packages/storage/src/interface.test.ts +++ b/packages/storage/src/interface.test.ts @@ -3,7 +3,7 @@ import { test } from "node:test"; import type { CochangeRow, IGraphStore, ITemporalStore, Store } from "./interface.js"; // --------------------------------------------------------------------------- -// AC-A-1 — structural separation between IGraphStore and ITemporalStore +// Structural separation between IGraphStore and ITemporalStore // --------------------------------------------------------------------------- /** @@ -62,9 +62,9 @@ void _graphLeakWedge; test("IGraphStore-shaped value lacks temporal methods at runtime", () => { // Minimal IGraphStore stub. Intentionally typed precisely as IGraphStore // so the structural shape is enforced by the checker. - // AC-A-6 widened the IGraphStore surface with the typed-finder family; - // the minimal stub gains thin no-op implementations for each new finder - // so the structural shape continues to be enforced by the checker. + // The minimal stub carries thin no-op implementations for each typed + // finder so the structural shape continues to be enforced by the + // checker. // eslint-disable-next-line require-yield async function* emptyEmbeddings() { // intentionally empty diff --git a/packages/storage/src/interface.ts b/packages/storage/src/interface.ts index abe4b624..960517bc 100644 --- a/packages/storage/src/interface.ts +++ b/packages/storage/src/interface.ts @@ -1,7 +1,7 @@ /** * Storage abstractions for OpenCodeHub knowledge graphs. * - * AC-A-1 split this surface into two cohesive interfaces: + * The surface is split into two cohesive interfaces: * * 1. {@link IGraphStore} — graph-tier, pure graph operations only: * nodes, edges, traversals, BM25 search, vector search, embeddings. @@ -21,7 +21,7 @@ * file when DuckDB is the only backend). The graph-db adapter (via * `@ladybugdb/core`) is graph-only and pairs with a DuckDB temporal store. * - * ## Sentinel rules (AC-A-2) + * ## Sentinel rules * * Every adapter that implements {@link IGraphStore} MUST honour four * sentinel coercions so the cross-adapter `graphHash` parity invariant @@ -112,8 +112,8 @@ export type GraphDialect = "cypher" | "none"; * * `assertIGraphStoreConformance(name, factory)` from * `@opencodehub/storage/test-utils` is the formal v1.0 conformance test - * suite for community adapters (architecture-revised.md §AC-A-11). A - * third-party adapter author imports it from their own test file: + * suite for community adapters. A third-party adapter author imports it + * from their own test file: * * ```ts * import { test } from "node:test"; @@ -497,9 +497,9 @@ export interface OpenStoreOptions { * - `"duck"` — single DuckDB file backs BOTH graph and temporal views. * - `"lbug"` — graph-db backend (`@ladybugdb/core`) for graph; a paired * DuckDB file at `.temporal.duckdb` for temporal. - * - `"auto"` — read the `CODEHUB_STORE` env var (AC-A-9 will flip the - * default once binding-availability detection lands). For now - * `"auto"` resolves to the legacy default. + * - `"auto"` — read the `CODEHUB_STORE` env var; when unset, probe + * `@ladybugdb/core` and prefer the graph backend on success, else + * fall back to DuckDB. */ readonly backend?: BackendKind | "auto"; readonly readOnly?: boolean; @@ -552,8 +552,8 @@ export interface CochangeLookupOptions { } /** - * @deprecated AC-A-1 folded the cochange surface into {@link ITemporalStore}. - * The named alias is retained for one AC cycle so test fakes that satisfy + * @deprecated The cochange surface is folded into {@link ITemporalStore}. + * The named alias is retained transiently so test fakes that satisfy * the older shape keep compiling. New code consumes `ITemporalStore` * directly via {@link OpenStoreResult.temporal}. */ @@ -605,10 +605,10 @@ export interface SymbolSummaryRow { } /** - * @deprecated AC-A-1 folded the symbol-summary surface into - * {@link ITemporalStore}. The named alias is retained for one AC cycle so - * test fakes that satisfy the older shape keep compiling. New code consumes - * `ITemporalStore` directly via {@link OpenStoreResult.temporal}. + * @deprecated The symbol-summary surface is folded into {@link ITemporalStore}. + * The named alias is retained transiently so test fakes that satisfy + * the older shape keep compiling. New code consumes `ITemporalStore` + * directly via {@link OpenStoreResult.temporal}. */ export interface SymbolSummaryStore { bulkLoadSymbolSummaries(rows: readonly SymbolSummaryRow[]): Promise; @@ -867,10 +867,10 @@ export interface VectorQuery { * to `nodes` (aliased `n`). Example: `n.kind = ?`. Use `?` placeholders and * supply values via `params`. * - * NOTE — Layer-2 leak (architecture-revised §AC-A-6). This raw SQL - * predicate is a temporary surface; AC-A-6 replaces it with typed - * finder shapes (`kindFilter`, `confidenceFloor`, etc.). Do not add - * new callers that depend on raw SQL here. + * NOTE — Layer-2 leak. This raw SQL predicate is a temporary surface + * to be replaced with typed finder shapes (`kindFilter`, + * `confidenceFloor`, etc.). Do not add new callers that depend on raw + * SQL here. */ readonly whereClause?: string; readonly params?: readonly SqlParam[]; diff --git a/packages/storage/src/resolver.test.ts b/packages/storage/src/resolver.test.ts index 6f3680cc..464e400c 100644 --- a/packages/storage/src/resolver.test.ts +++ b/packages/storage/src/resolver.test.ts @@ -1,10 +1,11 @@ /** - * AC-A-9: tests for the async backend resolver + dual-artifact detection. +/** + * Tests for the async backend resolver + dual-artifact detection. * * The sync `resolveStoreBackend` env-var resolution lives next door in - * `graphdb-adapter.test.ts:141-161`. This file covers the new surface: + * `graphdb-adapter.test.ts:141-161`. This file covers: * - * - `resolveStoreBackendAsync` — the AC-A-9 default-flip resolver. + * - `resolveStoreBackendAsync` — graph-default async resolver. * - `detectDualArtifacts` — the newer-mtime-wins helper. */ diff --git a/packages/storage/src/schema-ddl.ts b/packages/storage/src/schema-ddl.ts index b7212cff..32869ddd 100644 --- a/packages/storage/src/schema-ddl.ts +++ b/packages/storage/src/schema-ddl.ts @@ -104,10 +104,10 @@ export function generateSchemaDDL(opts: SchemaOptions): readonly string[] { partial_fingerprint TEXT, baseline_state TEXT, suppressed_json TEXT, - -- Repo (AC-M6-1). One row per indexed repository. The "group" field - -- is a reserved SQL keyword, so the column is named repo_group. The - -- index_time field is node-level metadata that is kept out of - -- graphHash determinism inputs per E-M6-1 / W-M6-1. + -- Repo. One row per indexed repository. The "group" field is a + -- reserved SQL keyword, so the column is named repo_group. The + -- index_time field is node-level metadata that is deliberately + -- excluded from graphHash determinism inputs. origin_url TEXT, repo_uri TEXT, default_branch TEXT, diff --git a/packages/storage/src/temporal-parity.test.ts b/packages/storage/src/temporal-parity.test.ts index 851fecea..6d608c95 100644 --- a/packages/storage/src/temporal-parity.test.ts +++ b/packages/storage/src/temporal-parity.test.ts @@ -1,11 +1,10 @@ /** - * ITemporalStore parity gate (architecture-revised.md §AC-A-3). + * ITemporalStore parity gate. * - * After AC-A-1 split the storage interface into {@link IGraphStore} - * (graph-only) and {@link ITemporalStore} (tabular-only), AC-A-3 deleted - * the residual cochange + symbol-summary methods from {@link GraphDbStore} - * — those rows now live exclusively on the DuckDB-backed temporal view - * regardless of which graph backend the caller picked. + * The storage interface is split into {@link IGraphStore} (graph-only) + * and {@link ITemporalStore} (tabular-only). Cochange + symbol-summary + * rows live exclusively on the DuckDB-backed temporal view regardless + * of which graph backend the caller picked. * * This file is the parity tripwire for that contract: * @@ -237,9 +236,9 @@ test("temporal-parity: openStore composes identical temporal snapshots across ba }); test("openStore({backend:'lbug'}) splits artifacts into graph.lbug + temporal.duckdb siblings", async () => { - // AC-A-3 §4 — the temporal store lives at /temporal.duckdb, the - // graph store at /graph.lbug, regardless of the legacy filename - // the caller passes through. + // The temporal store lives at /temporal.duckdb, the graph store + // at /graph.lbug, regardless of the legacy filename the caller + // passes through. const dbPath = await scratchDbPath("och-temporal-parity-paths-"); const store = await openStore({ path: dbPath, backend: "lbug" }); try { diff --git a/packages/storage/src/test-utils/index.ts b/packages/storage/src/test-utils/index.ts index ffefe6d3..2c2db3ba 100644 --- a/packages/storage/src/test-utils/index.ts +++ b/packages/storage/src/test-utils/index.ts @@ -5,8 +5,7 @@ * `IGraphStore` adapter authors (community AGE / Memgraph / Neo4j / * Neptune forks) import {@link assertIGraphStoreConformance} from here and * run it against their own implementation to prove they satisfy the v1.0 - * graphHash byte-identity + typed-finder contract (architecture-revised.md - * §AC-A-11). + * graphHash byte-identity + typed-finder contract. * * {@link assertGraphParity} + {@link rebuildFromStore} are the lower-level * primitives that the conformance suite is built on; they are re-exported diff --git a/packages/storage/src/test-utils/parity-harness.ts b/packages/storage/src/test-utils/parity-harness.ts index af28d13e..7586551c 100644 --- a/packages/storage/src/test-utils/parity-harness.ts +++ b/packages/storage/src/test-utils/parity-harness.ts @@ -1,14 +1,13 @@ /** - * Public-interface parity harness (architecture-revised.md §AC-A-7). + * Public-interface parity harness. * - * Hoists what used to live in `graph-hash-parity.test.ts` as a pair of - * hand-written per-backend rebuild helpers — each issuing raw SQL or - * Cypher — into one backend-agnostic rebuilder that uses ONLY public - * {@link IGraphStore} methods: {@link IGraphStore.listNodes} and - * {@link IGraphStore.listEdges}. + * One backend-agnostic rebuilder that uses ONLY public {@link IGraphStore} + * methods: {@link IGraphStore.listNodes} and {@link IGraphStore.listEdges}. + * Replaces a pair of hand-written per-backend rebuild helpers — each + * issuing raw SQL or Cypher — with a single dialect-free path. * - * After this AC, a community AGE / Memgraph / Neo4j / Neptune adapter can - * prove conformance by importing {@link assertGraphParity} and running it + * A community AGE / Memgraph / Neo4j / Neptune adapter can prove + * conformance by importing {@link assertGraphParity} and running it * against its own `IGraphStore` implementation — no per-backend SQL * dialect required, no escape hatch into `query()` or `execCypher()`. * diff --git a/packages/wiki/src/index.test.ts b/packages/wiki/src/index.test.ts index 14b3e70c..687b8754 100644 --- a/packages/wiki/src/index.test.ts +++ b/packages/wiki/src/index.test.ts @@ -2,11 +2,10 @@ * Wiki generation tests — confirm the deterministic-output + success-criteria * contract without spinning up DuckDB. * - * The post-AC-A-6d `WikiFakeStore` implements `IGraphStore` finder methods - * directly over in-memory `nodes` + `edges` arrays. The earlier - * SQL-regex `dispatch()` (~400 LOC of pattern-matching) is gone — every - * helper in `wiki/wiki-render/shared.ts` now reaches the same fixture - * data via typed finders. + * `WikiFakeStore` implements `IGraphStore` finder methods directly + * over in-memory `nodes` + `edges` arrays. Every helper in + * `wiki/wiki-render/shared.ts` reaches the same fixture data via + * typed finders. */ import assert from "node:assert/strict"; diff --git a/packages/wiki/src/index.ts b/packages/wiki/src/index.ts index 5286f81d..36d0e0ed 100644 --- a/packages/wiki/src/index.ts +++ b/packages/wiki/src/index.ts @@ -241,10 +241,11 @@ async function renderLlmOverviewPage( * by kind priority then name. Used by the LLM overview page to feed key * symbols into each summarizer prompt. * - * Implementation: walk MEMBER_OF edges via `listEdgesByType` (post-AC-A-6a), - * lift the typed Class/Function/Method node lists via `listNodesByKind`, - * then JS-side join the edge endpoints to the symbol nodes. Sort by the - * (kind-priority, name ASC) key the SQL formerly applied via `CASE n.kind`. + * Implementation: walk MEMBER_OF edges via `listEdgesByType`, lift the + * typed Class/Function/Method node lists via `listNodesByKind`, then + * JS-side join the edge endpoints to the symbol nodes. Sort by the + * (kind-priority, name ASC) key the SQL formerly applied via + * `CASE n.kind`. */ async function loadCommunityTopSymbols( store: IGraphStore, diff --git a/packages/wiki/src/wiki-render/shared.ts b/packages/wiki/src/wiki-render/shared.ts index 6ed8ec4a..4fd3dc9f 100644 --- a/packages/wiki/src/wiki-render/shared.ts +++ b/packages/wiki/src/wiki-render/shared.ts @@ -1,10 +1,10 @@ /** * Shared helpers for wiki renderers. * - * Everything here is pure: no LLM calls, no network, no clock. The only side - * effect is reading from the graph store via typed `IGraphStore` finders - * (post-AC-A-6). Each helper returns structured data the render modules - * turn into Markdown. + * Everything here is pure: no LLM calls, no network, no clock. The only + * side effect is reading from the graph store via typed `IGraphStore` + * finders. Each helper returns structured data the render modules turn + * into Markdown. */ import type { IGraphStore } from "@opencodehub/storage"; diff --git a/plugins/opencodehub/hooks/docs-staleness.sh b/plugins/opencodehub/hooks/docs-staleness.sh index 728c815b..9134612b 100755 --- a/plugins/opencodehub/hooks/docs-staleness.sh +++ b/plugins/opencodehub/hooks/docs-staleness.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash # Non-blocking docs-staleness hook — fires after codehub auto-reindex. -# Per spec 001 AC-2-8: when .codehub/docs/.docmeta.json exists and the -# graph_hash in the manifest disagrees with the live hash, emit a -# systemMessage suggesting /codehub-document --refresh. Never regenerates -# automatically — regeneration spends LLM credits and requires consent. +# When .codehub/docs/.docmeta.json exists and the graph_hash in the +# manifest disagrees with the live hash, emit a systemMessage suggesting +# /codehub-document --refresh. Never regenerates automatically — +# regeneration spends LLM credits and requires consent. set -uo pipefail diff --git a/plugins/opencodehub/skills/codehub-code-pack/references/determinism-contract.md b/plugins/opencodehub/skills/codehub-code-pack/references/determinism-contract.md index 95d51f07..3da92089 100644 --- a/plugins/opencodehub/skills/codehub-code-pack/references/determinism-contract.md +++ b/plugins/opencodehub/skills/codehub-code-pack/references/determinism-contract.md @@ -1,100 +1,94 @@ # Determinism contract — auditor reference -Ground truth for the `codehub-code-pack` skill. Cite this file when -the user disputes a `packHash` mismatch, when a CI determinism gate -fails, or when a future contributor proposes adding a non-deterministic -emitter to `@opencodehub/pack`. All requirements below are excerpted -verbatim from `.erpaval/specs/005-m5-m6/spec.md` and -`.erpaval/ROADMAP.md` — do not paraphrase. - -## Source — ROADMAP §M5: 9-item code-pack BOM (verbatim) - -> **9-item code-pack BOM** (byte-identical given same commit, -> tokenizer, budget): -> -> 1. `manifest.json` — pack_hash, commit SHA, tokenizer ID, schema version, counts -> 2. PageRank-ranked symbol skeleton -> 3. File tree with framework labels -> 4. Dependency graph / lockfile slice (exact versions) -> 5. Top-N AST-chunked files with byte offsets -> 6. SCIP-grounded cross-refs (community clusters + call graph) -> 7. Optional embeddings sidecar (`.parquet`) -> 8. Salient docstrings / SARIF findings by severity + rule -> 9. LICENSES / NOTICES + README.md + full determinism contract - -## Source — Spec 005 §M5 ubiquitous requirements (verbatim) - -> - **U1**: `graphHash` byte-identity invariant MUST hold before and -> after every M5+M6 commit — existing `DuckDbStore` / `GraphDbStore` -> parity suite stays green. -> - **U2**: `pack_hash` byte-identity invariant — same -> `(commit, tokenizer, budget, chonkie_version, duckdb_version, -> grammar_commits)` → same `pack_hash`. Verified by a determinism -> suite. -> - **U3**: No tracked source file MUST introduce banned literals. -> `bash scripts/check-banned-strings.sh` MUST exit 0 post-commit. -> - **U4**: `mise run check` MUST exit 0 after every commit. -> - **U5**: Every new package MUST carry `@opencodehub/` naming, -> Apache-2.0 license, `type: module`, `tsc --noEmit` clean. -> - **U6**: No LLM calls outside `@opencodehub/summarizer`. -> - **U7**: Every MCP tool and CLI output MUST remain deterministic -> (alpha-sort, lex-stable tiebreak) — preserves the existing -> group-query convention at `group-query.ts`. - -## Source — Spec 005 §M5 event-driven requirements (verbatim) - -> - **E-M5-1**: When a user runs `codehub code-pack --budget `, -> the CLI MUST produce a directory containing all 9 BOM items plus -> `manifest.json` at `/.codehub/packs//`. -> - **E-M5-2**: When `pack_codebase` MCP tool is called with a pack-id -> arg, it MUST route through `@opencodehub/pack`, not `repomix`. The -> legacy repomix path stays available under an `--engine repomix` -> opt-in flag for one milestone, then removes in M7. -> - **E-M5-3**: When `codehub code-pack` is called twice on the same -> `(commit, tokenizer, budget)`, every file under the output -> directory MUST be byte-identical on second run (cmp -s). -> - **E-M5-4**: When the BOM is written, `manifest.json` MUST include -> `{commit, repo_origin_url, tokenizer_id, determinism_class, -> budget_tokens, grammar_commits, chonkie_version, duckdb_version, -> files[], pack_hash}` with -> `pack_hash = sha256(canonicalJson(all-other-fields))`. -> - **E-M5-5**: When PageRank is computed, it MUST be at request time -> from the loaded `KnowledgeGraph` (per ROADMAP §Target package -> layout — "`@opencodehub/analysis` — request-time queries (PageRank, -> blast, impact)"), NOT at index time in `materialize.ts`. The -> dead-code `pagerank()` call at `materialize.ts:231` MUST be -> removed in the same commit that lifts the function. - -## Source — Spec 005 §M5 state-driven requirements (verbatim) - -> - **S-M5-1**: While `@chonkiejs/core` fails to install or load -> (native-binding unavailable on CI platform), `@opencodehub/pack` -> MUST degrade to a line-split fallback and stamp -> `determinism_class: degraded` in the manifest — NOT silently emit -> byte-different output claiming strict determinism. -> - **S-M5-2**: While `tokenizer_id` names a Claude model, the -> manifest MUST set `determinism_class: best_effort` and the BOM -> verifier MUST warn when asked to check byte-identity against such -> a pack. -> - **S-M5-3**: While the target repo has no embeddings computed, BOM -> item #7 (Parquet sidecar) MUST be absent entirely (not an empty -> file) and `manifest.files[]` MUST NOT list a path to it. - -## Source — Spec 005 §M5 unwanted-behavior requirements (verbatim) - -> - **W-M5-1**: `@opencodehub/pack` MUST NOT call any LLM (enforced -> by the existing `scripts/check-banned-strings.sh`-style audit + -> a new `no-bedrock-outside-summarizer` test). -> - **W-M5-2**: `codehub code-pack` MUST NOT emit writer metadata -> (DuckDB `created_by`, chonkie writer tags) as top-level fields in -> `manifest.json` — all tool-version pins live in a single -> `pins: {}` nested object so the BOM schema is stable across tool -> upgrades. -> - **W-M5-3**: `codehub code-pack` MUST NOT use tolerance-based -> PageRank convergence — fixed iterations only. -> - **W-M5-4**: CRLF files on Windows checkouts MUST NOT produce a -> different `pack_hash` than LF on Linux — ingest normalizes to LF -> before hashing content. +Ground truth for the `codehub-code-pack` skill. Cite this file when the +user disputes a `packHash` mismatch, when a CI determinism gate fails, +or when a future contributor proposes adding a non-deterministic emitter +to `@opencodehub/pack`. The reference implementation in +`packages/pack/src/` is authoritative; this document describes the +contract that the implementation enforces. + +## 9-item code-pack BOM + +Every `codehub code-pack` invocation produces a directory of nine BOM +items plus a manifest. Same `(commit, tokenizer, budget)` → byte- +identical output: + +1. `manifest.json` — pack_hash, commit SHA, tokenizer ID, schema version, counts +2. PageRank-ranked symbol skeleton +3. File tree with framework labels +4. Dependency graph / lockfile slice (exact versions) +5. Top-N AST-chunked files with byte offsets +6. SCIP-grounded cross-refs (community clusters + call graph) +7. Optional embeddings sidecar (`.parquet`) +8. Salient docstrings / SARIF findings by severity + rule +9. LICENSES / NOTICES + README.md + full determinism contract + +## Invariants + +- **graphHash byte-identity** holds before and after every pack- + affecting commit — the `DuckDbStore` / `GraphDbStore` parity suite + stays green. +- **packHash byte-identity** — same + `(commit, tokenizer, budget, chonkie_version, duckdb_version, + grammar_commits)` → same `packHash`. Verified by the determinism + suite at `packages/pack/src/pack-determinism.test.ts`. +- **No banned literals** in tracked source — + `bash scripts/check-banned-strings.sh` exits 0 post-commit. +- **`mise run check`** exits 0 after every commit. +- **Naming + license** — every new package carries `@opencodehub/` + naming, Apache-2.0 license, `type: module`, `tsc --noEmit` clean. +- **No LLM calls** outside `@opencodehub/summarizer`. +- **Deterministic output** — every MCP tool and CLI output is + alpha-sorted with a lex-stable tiebreak. + +## Behavior + +### Pack invocation + +- `codehub code-pack --budget ` produces a directory + containing all 9 BOM items plus `manifest.json` at + `/.codehub/packs//`. +- The `pack_codebase` MCP tool routes through `@opencodehub/pack`. The + legacy `repomix` path remains available under an `--engine repomix` + opt-in flag for one milestone before removal. +- Two invocations of `codehub code-pack` with the same + `(commit, tokenizer, budget)` produce byte-identical output (`cmp -s` + on every file under the output directory). +- `manifest.json` carries + `{commit, repo_origin_url, tokenizer_id, determinism_class, + budget_tokens, grammar_commits, chonkie_version, duckdb_version, + files[], pack_hash}` with + `pack_hash = sha256(canonicalJson(all-other-fields))`. +- PageRank is computed at request time from the loaded + `KnowledgeGraph` via `@opencodehub/analysis` — never at index time. + +### Degraded modes + +- When `@chonkiejs/core` fails to install or load (native binding + unavailable on a CI platform), pack degrades to a line-split + fallback and stamps `determinism_class: degraded` in the manifest — + it does NOT silently emit byte-different output claiming strict + determinism. +- When `tokenizer_id` names a Claude model, the manifest sets + `determinism_class: best_effort`. The BOM verifier warns when asked + to check byte-identity against such a pack. +- When the target repo has no embeddings computed, BOM item #7 (the + Parquet sidecar) is absent entirely (not an empty file) and + `manifest.files[]` does NOT list a path to it. + +### Forbidden + +- No LLM calls in `@opencodehub/pack` (enforced by + `scripts/check-banned-strings.sh`-style audit + a + `no-bedrock-outside-summarizer` test). +- No writer metadata (DuckDB `created_by`, chonkie writer tags) as + top-level fields in `manifest.json` — all tool-version pins live in + a single nested `pins: {}` object so the BOM schema is stable across + tool upgrades. +- No tolerance-based PageRank convergence — fixed iterations only. +- CRLF files on Windows checkouts MUST NOT produce a different + `pack_hash` than LF on Linux — ingest normalizes to LF before + hashing content. ## packHash construction algorithm @@ -102,17 +96,16 @@ The exact preimage shape that produces `packHash`: 1. Compute `fileHash = sha256_hex(raw_bytes)` for every emitted BOM file (items 2-9 from the contract above). CRLF files are - normalized to LF **at ingest** before hashing content (per W-M5-4) - — the on-disk bytes after normalization are the bytes that get - hashed. + normalized to LF **at ingest** before hashing content — the + on-disk bytes after normalization are the bytes that get hashed. 2. Construct the manifest object with `packHash: ""` as a placeholder and `files[]` populated with `{kind, path, fileHash}` rows in the order they appear in `BomItem.kind` (the type union enumerates a stable order). 3. Serialize the manifest to RFC 8785-shaped canonical JSON (sorted keys, no whitespace, no trailing newline). All tool-version pins - live in a single nested `pins: {}` object (per W-M5-2) — the - top-level `manifest.json` schema does not carry writer metadata. + live in a single nested `pins: {}` object — the top-level + `manifest.json` schema does not carry writer metadata. 4. `packHash = sha256_hex(canonicalJson(manifest_with_packHash_omitted))`. 5. Replace the placeholder. Write `manifest.json` with `packHash` set and `files[]` unchanged. The wire form serializes camelCase TS @@ -122,29 +115,27 @@ The exact preimage shape that produces `packHash`: The reference implementation is `packages/pack/src/manifest.ts` (the `buildManifest()` helper). The serializer reuses -`packages/core-types/src/graph-hash.ts` `writeCanonicalJson` per the -spec context note ("OCH's existing `graphHash` helper is already the -right pattern"). +`packages/core-types/src/graph-hash.ts` `writeCanonicalJson` — the +same canonical-JSON pattern that `graphHash` uses. ## Determinism class triage The manifest's `determinism_class` (snake_case on disk, `determinismClass` -in TS) takes one of three values. Each maps to a state-driven -requirement above. +in TS) takes one of three values: -| Class | Trigger | Requirement | +| Class | Trigger | Implication | |-------|---------|-------------| -| `strict` | None of the degraded triggers fire | U2 holds in full: same `(commit, tokenizer, budget, chonkie_version, duckdb_version, grammar_commits)` → same `pack_hash`. | -| `best_effort` | `tokenizer_id` resolves to a Claude model | S-M5-2 — verifier MUST warn callers checking byte-identity. | -| `degraded` | `@chonkiejs/core` native binding fails to load | S-M5-1 — line-split fallback used; pack still self-consistent locally but not portable. | +| `strict` | None of the degraded triggers fire | The byte-identity invariant holds in full: same `(commit, tokenizer, budget, chonkie_version, duckdb_version, grammar_commits)` → same `pack_hash`. | +| `best_effort` | `tokenizer_id` resolves to a Claude model | The verifier MUST warn callers checking byte-identity. | +| `degraded` | `@chonkiejs/core` native binding fails to load | Line-split fallback used; pack still self-consistent locally but not portable. | ## Determinism suite location The byte-identity test suite lives at -`packages/pack/src/pack-determinism.test.ts` (delivered by T-W3-3 in -this same M5 wave). It runs `generatePack` twice against a fixture -repo, computes `cmp -s` over every output file, and asserts manifest -`pack_hash` equality. CI gates on this suite. +`packages/pack/src/pack-determinism.test.ts`. It runs `generatePack` +twice against a fixture repo, computes `cmp -s` over every output +file, and asserts manifest `pack_hash` equality. CI gates on this +suite. When debugging a `pack_hash` drift: diff --git a/plugins/opencodehub/skills/codehub-contract-map/SKILL.md b/plugins/opencodehub/skills/codehub-contract-map/SKILL.md index 3359c65b..6d399e72 100644 --- a/plugins/opencodehub/skills/codehub-contract-map/SKILL.md +++ b/plugins/opencodehub/skills/codehub-contract-map/SKILL.md @@ -14,7 +14,7 @@ Standalone group-only skill. Renders `group_contracts` into a Markdown + Mermaid ## Preconditions 1. A `` positional argument is required. If missing or if `mcp__opencodehub__group_list` does not return the name, refuse with: - `Contract map requires a named group — run 'codehub group list' to see registered groups.` (Spec 001 AC-3-4.) + `Contract map requires a named group — run 'codehub group list' to see registered groups.` 2. `mcp__opencodehub__group_status({group})` must return `fresh: true` for every member. If any member is stale, abort and name each stale repo. ## Arguments @@ -32,8 +32,8 @@ Default output path: 1. Run the preconditions. Refuse on missing/unknown group. 2. `mcp__opencodehub__group_list` — confirm `` exists; read member list. 3. `mcp__opencodehub__group_status({group})` — confirm freshness per member. Abort with named stale repos otherwise. -4. `mcp__opencodehub__group_contracts({group})` — the spine. Returns `{consumerRepo, consumerRepoUri, consumerSymbol, producerRepo, producerRepoUri, producerRoute, method, path}` per row (legacy `consumerRepo`/`producerRepo` are the registry names; the `*RepoUri` siblings are the Sourcegraph-style cross-repo handle added in AC-M6-4 and are the preferred handle going forward). -5. If `group_contracts` returns `[]` (zero inter-repo contracts): still write the artifact with a `No inter-repo contracts detected` banner and an empty matrix. Do not error. (Spec 001 AC-5-5.) +4. `mcp__opencodehub__group_contracts({group})` — the spine. Returns `{consumerRepo, consumerRepoUri, consumerSymbol, producerRepo, producerRepoUri, producerRoute, method, path}` per row (legacy `consumerRepo`/`producerRepo` are the registry names; the `*RepoUri` siblings are the Sourcegraph-style cross-repo handle and are the preferred handle going forward). +5. If `group_contracts` returns `[]` (zero inter-repo contracts): still write the artifact with a `No inter-repo contracts detected` banner and an empty matrix. Do not error. 6. `mcp__opencodehub__group_query({group, text: "api handlers"})` — disambiguate producer-side locations. 7. For each member repo: `mcp__opencodehub__route_map({repo})` for handler-path citations. 8. Build the consumer/producer matrix: rows = producers, columns = consumers, cell = contract count. diff --git a/plugins/opencodehub/skills/codehub-document/references/cross-reference-spec.md b/plugins/opencodehub/skills/codehub-document/references/cross-reference-spec.md index d776f5e2..94d2c2a4 100644 --- a/plugins/opencodehub/skills/codehub-document/references/cross-reference-spec.md +++ b/plugins/opencodehub/skills/codehub-document/references/cross-reference-spec.md @@ -27,12 +27,12 @@ The assembler scans only between backtick pairs — never raw prose. 6. **Append** a `## See also` footer to every doc with ≥ 1 sibling. Use Markdown reference-style links, not inline URLs. 7. **Group mode**: for every `cross-repo/*.md` file, additionally append `## See also (other repos in group)` listing relative paths into sibling repos' generated docs (e.g., `../../billing/.codehub/docs/reference/public-api.md`). 8. **Dedup** sibling paths across both footer sections. -9. **Strip** any YAML frontmatter blocks on generated docs and record a `frontmatter_removed: []` entry in `.docmeta.json` (per spec AC-5-3). +9. **Strip** any YAML frontmatter blocks on generated docs and record a `frontmatter_removed: []` entry in `.docmeta.json`. 10. **Write** `README.md` (landing page with the "Prose is LLM-generated; structure is graph-derived" disclaimer) and `.docmeta.json` (schema below). ## `.docmeta.json` schema -The file carries a `schema_version` integer. **v2 is the current schema** (ships with AC-M6-3); v1 files on disk remain readable — the orchestrator lazily upgrades them on the next regeneration by re-running Phase E and writing v2. v2 adds one new field — `cross_repo_links[]` — populated in group mode from the `group_cross_repo_links` MCP tool. All v1 fields carry through unchanged. +The file carries a `schema_version` integer. **v2 is the current schema**; v1 files on disk remain readable — the orchestrator lazily upgrades them on the next regeneration by re-running Phase E and writing v2. v2 adds one new field — `cross_repo_links[]` — populated in group mode from the `group_cross_repo_links` MCP tool. All v1 fields carry through unchanged. ```json { diff --git a/plugins/opencodehub/skills/codehub-onboarding/SKILL.md b/plugins/opencodehub/skills/codehub-onboarding/SKILL.md index b355149f..274ac60a 100644 --- a/plugins/opencodehub/skills/codehub-onboarding/SKILL.md +++ b/plugins/opencodehub/skills/codehub-onboarding/SKILL.md @@ -14,7 +14,7 @@ Produces a single ONBOARDING.md with a ranked reading order drawn from graph cen ## Preconditions 1. `mcp__opencodehub__list_repos` returns the target. If not, emit `Run codehub analyze first — repo is not indexed.` and stop. -2. `codehub status` is fresh. If stale, emit `Run 'codehub analyze' first — index is stale` and stop. (Spec 001 AC-3-1.) +2. `codehub status` is fresh. If stale, emit `Run 'codehub analyze' first — index is stale` and stop. ## Arguments diff --git a/plugins/opencodehub/skills/codehub-pr-description/SKILL.md b/plugins/opencodehub/skills/codehub-pr-description/SKILL.md index 118dc930..ed975d64 100644 --- a/plugins/opencodehub/skills/codehub-pr-description/SKILL.md +++ b/plugins/opencodehub/skills/codehub-pr-description/SKILL.md @@ -14,7 +14,7 @@ Generates a Markdown PR body from graph primitives. Linear (no subagents). Sonne ## Preconditions 1. Resolve `--base` (default `main`) and `--head` (default `HEAD`) via `git rev-parse`. -2. `git diff --name-only ..` must return ≥ 1 path. If empty, emit `No diff detected — resolve base/head or stage changes.` and stop. (Spec 001 AC-5-4.) +2. `git diff --name-only ..` must return ≥ 1 path. If empty, emit `No diff detected — resolve base/head or stage changes.` and stop. ## Arguments diff --git a/scripts/acceptance.sh b/scripts/acceptance.sh index 37472312..b1802fbf 100755 --- a/scripts/acceptance.sh +++ b/scripts/acceptance.sh @@ -24,8 +24,8 @@ # 13. sarif-validation (zod schema vs emitted SARIF) [NEW v1.0] # 14. license-audit-smoke (analyze + license_audit tool) [NEW v1.0] # 15. verdict-smoke (2-commit fixture → tier) [NEW v1.0] -# 16. pack-determinism (code-pack ×2 → diff -r, U2) [NEW v1.0] -# 17. m7-parity-audit (analyze ×2 backends → graphHash, U1) [NEW v1.0] +# 16. pack-determinism (code-pack ×2 → diff -r) [NEW v1.0] +# 17. m7-parity-audit (analyze ×2 backends → graphHash) [NEW v1.0] # # Gates 10-17 MUST degrade gracefully: when their dependency binary is not # available (semgrep, embedder weights, codehub verdict command, populated @@ -551,7 +551,7 @@ fi echo # --------------------------------------------------------------------------- -# 16. Pack determinism: `codehub code-pack` ×2 → `diff -r` (U2 / E-M5-3) +# 16. Pack determinism: `codehub code-pack` ×2 → `diff -r` # --------------------------------------------------------------------------- echo "16/${TOTAL_GATES}: pack-determinism (code-pack ×2 → diff -r)" # The audit script SKIPs cleanly when the CLI isn't built or the repo lacks @@ -572,14 +572,15 @@ fi echo # --------------------------------------------------------------------------- -# 17. M7 parity audit: analyze ×2 backends → graphHash byte-identity (U1) +# 17. M7 parity audit: analyze ×2 backends → graphHash byte-identity # --------------------------------------------------------------------------- echo "17/${TOTAL_GATES}: m7-parity-audit (analyze ×2 backends → graphHash)" # The audit script runs `codehub analyze --force` under both `CODEHUB_STORE=duck` # and `CODEHUB_STORE=lbug`, then compares the `graph ` summary line. It # SKIPs cleanly when the CLI isn't built or the `@ladybugdb/core` binding is # not importable on this host. Companion to the in-memory parity harness -# (AC-A-7); together they pin U1 from both layers. +# (`packages/storage/src/test-utils/parity-harness.ts`); together they +# pin graphHash byte-identity from both layers. PARITY_LOG="$tmpdir/m7-parity-audit.log" if bash "$ROOT/scripts/m7-parity-audit.sh" > "$PARITY_LOG" 2>&1; then PARITY_LINE=$(head -1 "$PARITY_LOG" || true) @@ -589,7 +590,7 @@ if bash "$ROOT/scripts/m7-parity-audit.sh" > "$PARITY_LOG" 2>&1; then *) pass "m7-parity-audit: ${PARITY_LINE:-byte-identical}" ;; esac else - fail "m7-parity-audit: graphHash divergence across backends (U1 breach)" + fail "m7-parity-audit: graphHash divergence across backends" tail -20 "$PARITY_LOG" fi echo diff --git a/scripts/pack-determinism-audit.sh b/scripts/pack-determinism-audit.sh index 9523bfe9..fa61c931 100755 --- a/scripts/pack-determinism-audit.sh +++ b/scripts/pack-determinism-audit.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# scripts/pack-determinism-audit.sh — shell-level pack determinism gate (AC-M5-8). +# scripts/pack-determinism-audit.sh — shell-level pack determinism gate. # # Runs `codehub code-pack` twice against the same repo with identical args, # then `diff -r`'s the two output directories. PASS = byte-identical; @@ -8,7 +8,7 @@ # This is the shell-level companion to `packages/pack/src/pack-determinism.test.ts`. # The TS test pins the in-memory generatePack contract; this script pins the # real CLI binary against a real DuckStore — together they cover both layers -# of the U2 invariant. +# of the byte-identity invariant. # # Usage: # bash scripts/pack-determinism-audit.sh # uses repo root @@ -46,7 +46,7 @@ OUT_A="$TMP/pack-a" OUT_B="$TMP/pack-b" # Run the CLI twice with identical args. The two output dirs MUST match -# byte-for-byte (U2 / E-M5-3). +# byte-for-byte. node "$CLI" code-pack "$REPO" \ --budget 50000 \ --tokenizer "openai:o200k_base@tiktoken-0.8.0" \