From ad3b9ad58c393ec6451d563baaffaf4aaec74003 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sat, 11 Apr 2026 20:19:37 +0200 Subject: [PATCH 1/4] feat(v2.1): upgrade map to structural skeleton MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Key Interfaces section: top types/interfaces/classes by import centrality with signature hints - Add API Surface section: top 5 named exports per entrypoint file - Add Dependency Hotspots section: top 5 files by combined import+importedBy count - Enrich Architecture Layers with hub file + top exports per layer - Add --export flag: writes CODEBASE_MAP.md to project root - Update fixture relationships.json with exports field - Full test coverage for new sections + graceful degradation + --export flag - Gitignore CODEBASE_MAP.md (generated) and repos/ (benchmark fixtures) All data derived from existing index.json + relationships.json — no new I/O. --- .gitignore | 2 + src/cli-map.ts | 9 + src/core/codebase-map.ts | 230 +++++++++++++++++- src/types/index.ts | 35 +++ tests/__snapshots__/codebase-map.test.ts.snap | 36 ++- tests/cli-map-export.test.ts | 63 +++++ tests/codebase-map.test.ts | 107 +++++++- .../.codebase-context/relationships.json | 11 +- 8 files changed, 474 insertions(+), 19 deletions(-) create mode 100644 tests/cli-map-export.test.ts diff --git a/.gitignore b/.gitignore index 3c945b4..0b214a1 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ docs/visuals.md .repolore/ .opencode/ .agents/ +CODEBASE_MAP.md +repos/ diff --git a/src/cli-map.ts b/src/cli-map.ts index 070ef1c..0f13606 100644 --- a/src/cli-map.ts +++ b/src/cli-map.ts @@ -32,6 +32,7 @@ function printMapUsage(): void { console.log('Output the conventions map for the current codebase.'); console.log(''); console.log('Options:'); + console.log(' --export Write CODEBASE_MAP.md to project root (overrides other flags)'); console.log(' --json Output raw JSON (CodebaseMapSummary)'); console.log(' --pretty Terminal-friendly box layout'); console.log(' --help Show this help'); @@ -42,6 +43,7 @@ function printMapUsage(): void { export async function handleMapCli(args: string[]): Promise { const useJson = args.includes('--json'); const usePretty = args.includes('--pretty'); + const useExport = args.includes('--export'); const showHelp = args.includes('--help') || args.includes('-h'); if (showHelp) { @@ -77,6 +79,13 @@ export async function handleMapCli(args: string[]): Promise { try { const map = await buildCodebaseMap(project); + if (useExport) { + const outPath = path.join(rootPath, 'CODEBASE_MAP.md'); + await fs.writeFile(outPath, renderMapMarkdown(map), 'utf-8'); + console.log(`Wrote ${outPath}`); + return; + } + if (useJson) { console.log(JSON.stringify(map, null, 2)); } else if (usePretty) { diff --git a/src/core/codebase-map.ts b/src/core/codebase-map.ts index a9bdb30..e12b803 100644 --- a/src/core/codebase-map.ts +++ b/src/core/codebase-map.ts @@ -17,10 +17,14 @@ import type { CodebaseMapPattern, CodebaseMapExample, CodebaseMapNextCall, + CodebaseMapKeyInterface, + CodebaseMapApiSurface, + CodebaseMapHotspot, IntelligenceData, - PatternsData + PatternsData, + CodeChunk } from '../types/index.js'; -import { RELATIONSHIPS_FILENAME } from '../constants/codebase-context.js'; +import { RELATIONSHIPS_FILENAME, KEYWORD_INDEX_FILENAME } from '../constants/codebase-context.js'; // --------------------------------------------------------------------------- // Internal types for relationships.json @@ -29,6 +33,7 @@ import { RELATIONSHIPS_FILENAME } from '../constants/codebase-context.js'; interface RelationshipsGraph { imports?: Record; importedBy?: Record; + exports?: Record>; stats?: { files?: number; edges?: number; @@ -58,7 +63,7 @@ const ENTRYPOINT_EXCLUSION_RE = /** * Build a `CodebaseMapSummary` from the project's index artifacts. - * Reads `intelligence.json` and `relationships.json` from project paths. + * Reads `intelligence.json`, `relationships.json`, and `index.json` from project paths. * Degrades gracefully when artifacts are missing. */ export async function buildCodebaseMap(project: ProjectState): Promise { @@ -83,9 +88,21 @@ export async function buildCodebaseMap(project: ProjectState): Promise ({ name, fileCount })), (l) => l.fileCount, (l) => l.name ); + const layers = enrichLayers(rawLayers, graphImportedBy, graphExports); // --- Entrypoints --- const entrypoints: string[] = []; @@ -135,6 +153,15 @@ export async function buildCodebaseMap(project: ProjectState): Promise x.file); + // --- Key interfaces --- + const keyInterfaces = deriveKeyInterfaces(chunks, graphImportedBy); + + // --- API surface --- + const apiSurface = deriveApiSurface(entrypoints, graphExports); + + // --- Dependency hotspots --- + const hotspots = deriveHotspots(graphImports, graphImportedBy); + // --- Active patterns --- const patterns: PatternsData = intelligence.patterns ?? {}; const activePatterns: CodebaseMapPattern[] = []; @@ -183,7 +210,7 @@ export async function buildCodebaseMap(project: ProjectState): Promise l.trim()).filter(Boolean); + const hint = lines.slice(0, 3).join('\n'); + const truncated = hint.length > 200 ? hint.slice(0, 197) + '...' : hint; + return truncated.replace(/\s*\{$/, '').trim(); +} + +function deriveKeyInterfaces( + chunks: CodeChunk[], + graphImportedBy: Record +): CodebaseMapKeyInterface[] { + const symbolChunks = chunks.filter( + (c) => c.metadata?.symbolAware === true && SYMBOL_KINDS.has(c.metadata.symbolKind ?? '') + ); + const scored = symbolChunks.map((c) => ({ + chunk: c, + importerCount: graphImportedBy[c.relativePath]?.length ?? 0 + })); + scored.sort((a, b) => { + if (b.importerCount !== a.importerCount) return b.importerCount - a.importerCount; + const lenDiff = a.chunk.content.length - b.chunk.content.length; + if (lenDiff !== 0) return lenDiff; + return a.chunk.relativePath.localeCompare(b.chunk.relativePath); + }); + return scored.slice(0, 10).map(({ chunk, importerCount }) => ({ + name: chunk.metadata.symbolName ?? path.basename(chunk.relativePath), + kind: chunk.metadata.symbolKind ?? 'unknown', + file: chunk.relativePath, + importerCount, + signatureHint: buildSignatureHint(chunk.content) + })); +} + +function deriveApiSurface( + entrypoints: string[], + graphExports: Record> +): CodebaseMapApiSurface[] { + const results: CodebaseMapApiSurface[] = []; + for (const ep of entrypoints) { + const exps = graphExports[ep]; + if (!exps || exps.length === 0) continue; + const names = exps + .map((e) => e.name) + .filter((n) => n && n !== 'default') + .slice(0, 5); + if (names.length === 0) continue; + results.push({ file: ep, exports: names }); + } + return results; +} + +function deriveHotspots( + graphImports: Record, + graphImportedBy: Record +): CodebaseMapHotspot[] { + const allFiles = new Set([...Object.keys(graphImports), ...Object.keys(graphImportedBy)]); + const hotspots: CodebaseMapHotspot[] = []; + for (const file of allFiles) { + const importerCount = graphImportedBy[file]?.length ?? 0; + const importCount = graphImports[file]?.length ?? 0; + const combined = importerCount + importCount; + if (combined === 0) continue; + hotspots.push({ file, importerCount, importCount, combined }); + } + hotspots.sort((a, b) => { + if (b.combined !== a.combined) return b.combined - a.combined; + return a.file.localeCompare(b.file); + }); + return hotspots.slice(0, 5); +} + +function enrichLayers( + layers: CodebaseMapLayer[], + graphImportedBy: Record, + graphExports: Record> +): CodebaseMapLayer[] { + return layers.map((layer) => { + let bestFile: string | undefined; + let bestCount = 0; + for (const [file, importers] of Object.entries(graphImportedBy)) { + if (file.split('/')[0] !== layer.name) continue; + if (importers.length > bestCount) { + bestCount = importers.length; + bestFile = file; + } + } + if (!bestFile) return layer; + const exps = graphExports[bestFile]; + const hubExports = exps + ? exps + .map((e) => e.name) + .filter((n) => n && n !== 'default') + .slice(0, 3) + : []; + return { + ...layer, + hubFile: bestFile, + ...(hubExports.length > 0 ? { hubExports } : {}) + }; + }); +} + // --------------------------------------------------------------------------- // Suggested next calls // --------------------------------------------------------------------------- @@ -253,16 +388,22 @@ export function renderMapMarkdown(map: CodebaseMapSummary): string { lines.push(`# Codebase Map — ${map.project}`); lines.push(''); - // Architecture + // Architecture layers lines.push('## Architecture Layers'); lines.push(''); if (map.architecture.layers.length === 0) { lines.push('_No index data available._'); } else { for (const layer of map.architecture.layers) { - lines.push( - `- **${layer.name}** (${layer.fileCount} file${layer.fileCount === 1 ? '' : 's'})` - ); + let line = `- **${layer.name}** (${layer.fileCount} file${layer.fileCount === 1 ? '' : 's'})`; + if (layer.hubFile) { + const exStr = + layer.hubExports && layer.hubExports.length > 0 + ? ` → ${layer.hubExports.join(', ')}` + : ''; + line += ` — hub: \`${layer.hubFile}\`${exStr}`; + } + lines.push(line); } } lines.push(''); @@ -291,6 +432,51 @@ export function renderMapMarkdown(map: CodebaseMapSummary): string { } lines.push(''); + // Key Interfaces + lines.push('## Key Interfaces'); + lines.push(''); + if (map.architecture.keyInterfaces.length === 0) { + lines.push('_None detected._'); + } else { + for (const ki of map.architecture.keyInterfaces) { + lines.push( + `- **${ki.name}** \`${ki.kind}\` — \`${ki.file}\` (imported by ${ki.importerCount})` + ); + if (ki.signatureHint) { + lines.push(' ```'); + lines.push(` ${ki.signatureHint.split('\n').join('\n ')}`); + lines.push(' ```'); + } + } + } + lines.push(''); + + // API Surface + lines.push('## API Surface'); + lines.push(''); + if (map.architecture.apiSurface.length === 0) { + lines.push('_None detected._'); + } else { + for (const s of map.architecture.apiSurface) { + lines.push(`- \`${s.file}\` — exports: ${s.exports.join(', ')}`); + } + } + lines.push(''); + + // Dependency Hotspots + lines.push('## Dependency Hotspots'); + lines.push(''); + if (map.architecture.hotspots.length === 0) { + lines.push('_None detected._'); + } else { + for (const h of map.architecture.hotspots) { + lines.push( + `- \`${h.file}\` — imported by ${h.importerCount}, imports ${h.importCount} (combined: ${h.combined})` + ); + } + } + lines.push(''); + // Patterns lines.push('## Active Patterns'); lines.push(''); @@ -376,7 +562,9 @@ export function renderMapPretty(map: CodebaseMapSummary): string { const layerLines = map.architecture.layers.length === 0 ? ['(none)'] - : map.architecture.layers.map((l) => `${l.name} ${l.fileCount} files`); + : map.architecture.layers.map((l) => + l.hubFile ? `${l.name} ${l.fileCount} files [${l.hubFile}]` : `${l.name} ${l.fileCount} files` + ); sections.push(box('Architecture Layers', layerLines)); const epLines = @@ -387,6 +575,28 @@ export function renderMapPretty(map: CodebaseMapSummary): string { map.architecture.hubFiles.length === 0 ? ['(none detected)'] : map.architecture.hubFiles; sections.push(box('Hub Files', hubLines)); + const kiLines = + map.architecture.keyInterfaces.length === 0 + ? ['(none detected)'] + : map.architecture.keyInterfaces.map( + (ki) => `${ki.name} ${ki.kind} ${ki.file} (×${ki.importerCount})` + ); + sections.push(box('Key Interfaces', kiLines)); + + const apiLines = + map.architecture.apiSurface.length === 0 + ? ['(none detected)'] + : map.architecture.apiSurface.map((s) => `${s.file}: ${s.exports.join(', ')}`); + sections.push(box('API Surface', apiLines)); + + const hotspotLines = + map.architecture.hotspots.length === 0 + ? ['(none detected)'] + : map.architecture.hotspots.map( + (h) => `${h.file} +${h.importerCount}/-${h.importCount}` + ); + sections.push(box('Dependency Hotspots', hotspotLines)); + const patternLines = map.activePatterns.length === 0 ? ['(no patterns)'] diff --git a/src/types/index.ts b/src/types/index.ts index 7661123..2ae6ca2 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -651,6 +651,35 @@ export interface IntelligenceGoldenFile { export interface CodebaseMapLayer { name: string; fileCount: number; + /** Most-imported file whose first path segment matches this layer */ + hubFile?: string; + /** Top 3 named exports from hubFile */ + hubExports?: string[]; +} + +/** Key interface/class/type from the index, ranked by import centrality */ +export interface CodebaseMapKeyInterface { + name: string; + kind: string; + file: string; + importerCount: number; + /** First 1–3 non-empty content lines, trimmed, max 200 chars, trailing { stripped */ + signatureHint: string; +} + +/** Exported symbols per entrypoint file */ +export interface CodebaseMapApiSurface { + file: string; + /** Up to 5 named exports (no 'default') */ + exports: string[]; +} + +/** File ranked by combined import + importer count */ +export interface CodebaseMapHotspot { + file: string; + importerCount: number; + importCount: number; + combined: number; } /** Active pattern from intelligence.json with adoption and trend */ @@ -693,6 +722,12 @@ export interface CodebaseMapSummary { entrypoints: string[]; /** Top most-imported internal files by importedBy count */ hubFiles: string[]; + /** Top 10 interfaces/classes/types by import centrality */ + keyInterfaces: CodebaseMapKeyInterface[]; + /** Exported API surface per entrypoint */ + apiSurface: CodebaseMapApiSurface[]; + /** Top 5 files by combined import + importer count */ + hotspots: CodebaseMapHotspot[]; }; activePatterns: CodebaseMapPattern[]; bestExamples: CodebaseMapExample[]; diff --git a/tests/__snapshots__/codebase-map.test.ts.snap b/tests/__snapshots__/codebase-map.test.ts.snap index f8c05dc..be94e64 100644 --- a/tests/__snapshots__/codebase-map.test.ts.snap +++ b/tests/__snapshots__/codebase-map.test.ts.snap @@ -5,9 +5,9 @@ exports[`renderMapMarkdown > renders deterministic markdown from fixture — sna ## Architecture Layers -- **src** (5 files) +- **src** (5 files) — hub: \`src/core/search.ts\` - **tests** (2 files) -- **lib** (1 file) +- **lib** (1 file) — hub: \`lib/utils.ts\` ## Entrypoints @@ -20,6 +20,38 @@ exports[`renderMapMarkdown > renders deterministic markdown from fixture — sna - \`src/utils/helpers.ts\` - \`lib/utils.ts\` +## Key Interfaces + +- **SearchOptions** \`interface\` — \`src/core/search.ts\` (imported by 3) + \`\`\` + export interface SearchOptions { + query: string; + limit?: number; + \`\`\` +- **CodebaseSearcher** \`class\` — \`src/core/search.ts\` (imported by 3) + \`\`\` + export class CodebaseSearcher { + private rootPath: string; + constructor(rootPath: string) + \`\`\` +- **SearchResult** \`type\` — \`src/types.ts\` (imported by 0) + \`\`\` + export type SearchResult = { chunk: CodeChunk; score: number; }; + \`\`\` + +## API Surface + +- \`src/cli.ts\` — exports: runCli, parseArgs +- \`src/index.ts\` — exports: main, createServer + +## Dependency Hotspots + +- \`src/core/search.ts\` — imported by 3, imports 2 (combined: 5) +- \`src/utils/helpers.ts\` — imported by 3, imports 0 (combined: 3) +- \`lib/utils.ts\` — imported by 2, imports 0 (combined: 2) +- \`src/cli.ts\` — imported by 0, imports 2 (combined: 2) +- \`src/index.ts\` — imported by 0, imports 2 (combined: 2) + ## Active Patterns - **Injectable**: 100% (Stable) diff --git a/tests/cli-map-export.test.ts b/tests/cli-map-export.test.ts new file mode 100644 index 0000000..78dd9cc --- /dev/null +++ b/tests/cli-map-export.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { mkdtemp, rm, access, readFile } from 'fs/promises'; +import os from 'os'; +import path from 'path'; +import { handleMapCli } from '../src/cli-map.js'; + +describe('handleMapCli --export', () => { + let tmpDir: string; + + afterEach(async () => { + if (tmpDir) await rm(tmpDir, { recursive: true, force: true }); + }); + + it('writes CODEBASE_MAP.md to rootPath', async () => { + tmpDir = await mkdtemp(path.join(os.tmpdir(), 'cli-map-export-')); + const original = process.env.CODEBASE_ROOT; + process.env.CODEBASE_ROOT = tmpDir; + try { + await handleMapCli(['--export']); + // File must exist + await access(path.join(tmpDir, 'CODEBASE_MAP.md')); + // Content must be markdown (starts with # Codebase Map) + const content = await readFile(path.join(tmpDir, 'CODEBASE_MAP.md'), 'utf-8'); + expect(content).toContain('# Codebase Map'); + } finally { + if (original === undefined) delete process.env.CODEBASE_ROOT; + else process.env.CODEBASE_ROOT = original; + } + }); + + it('--export takes precedence over --json', async () => { + tmpDir = await mkdtemp(path.join(os.tmpdir(), 'cli-map-export-')); + const original = process.env.CODEBASE_ROOT; + process.env.CODEBASE_ROOT = tmpDir; + try { + await handleMapCli(['--export', '--json']); + // Must write the file (not print JSON and skip) + await access(path.join(tmpDir, 'CODEBASE_MAP.md')); + // File content must be markdown, not JSON + const content = await readFile(path.join(tmpDir, 'CODEBASE_MAP.md'), 'utf-8'); + expect(content).toContain('# Codebase Map'); + expect(content).not.toMatch(/^\{/); + } finally { + if (original === undefined) delete process.env.CODEBASE_ROOT; + else process.env.CODEBASE_ROOT = original; + } + }); + + it('--export takes precedence over --pretty', async () => { + tmpDir = await mkdtemp(path.join(os.tmpdir(), 'cli-map-export-')); + const original = process.env.CODEBASE_ROOT; + process.env.CODEBASE_ROOT = tmpDir; + try { + await handleMapCli(['--export', '--pretty']); + await access(path.join(tmpDir, 'CODEBASE_MAP.md')); + const content = await readFile(path.join(tmpDir, 'CODEBASE_MAP.md'), 'utf-8'); + expect(content).toContain('# Codebase Map'); + } finally { + if (original === undefined) delete process.env.CODEBASE_ROOT; + else process.env.CODEBASE_ROOT = original; + } + }); +}); diff --git a/tests/codebase-map.test.ts b/tests/codebase-map.test.ts index 718aaee..debc20d 100644 --- a/tests/codebase-map.test.ts +++ b/tests/codebase-map.test.ts @@ -24,11 +24,11 @@ describe('buildCodebaseMap', () => { it('derives architecture layers from graph keys, sorted by count desc then alpha', async () => { const project = createProjectState(FIXTURE_ROOT); const map = await buildCodebaseMap(project); - expect(map.architecture.layers).toEqual([ - { name: 'src', fileCount: 5 }, - { name: 'tests', fileCount: 2 }, - { name: 'lib', fileCount: 1 } - ]); + // Use objectContaining — layers may now have hubFile/hubExports from enrichLayers + expect(map.architecture.layers).toHaveLength(3); + expect(map.architecture.layers[0]).toMatchObject({ name: 'src', fileCount: 5 }); + expect(map.architecture.layers[1]).toMatchObject({ name: 'tests', fileCount: 2 }); + expect(map.architecture.layers[2]).toMatchObject({ name: 'lib', fileCount: 1 }); }); it('derives entrypoints: files with imports but zero importers, excluding tests/scripts', async () => { @@ -103,6 +103,9 @@ describe('buildCodebaseMap', () => { expect(map.architecture.layers).toEqual([]); expect(map.architecture.entrypoints).toEqual([]); expect(map.architecture.hubFiles).toEqual([]); + expect(map.architecture.keyInterfaces).toEqual([]); + expect(map.architecture.apiSurface).toEqual([]); + expect(map.architecture.hotspots).toEqual([]); expect(map.activePatterns).toEqual([]); expect(map.bestExamples).toEqual([]); expect(map.graphStats).toEqual({ files: 0, edges: 0, avgDependencies: 0 }); @@ -115,6 +118,88 @@ describe('buildCodebaseMap', () => { const map = await buildCodebaseMap(project); expect(map.suggestedNextCalls.length).toBeLessThanOrEqual(3); }); + + // --- Structural skeleton (Phase 13) --- + + it('derives keyInterfaces from symbolAware chunks, sorted by importer count', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + // SearchOptions and CodebaseSearcher are both in src/core/search.ts (3 importers) + // SearchResult is in src/types.ts (0 importers) + // helperUtil is not symbolAware — excluded + expect(map.architecture.keyInterfaces.length).toBeGreaterThanOrEqual(2); + // Items with same importerCount: shorter content first → SearchOptions before CodebaseSearcher + expect(map.architecture.keyInterfaces[0].name).toBe('SearchOptions'); + expect(map.architecture.keyInterfaces[0].importerCount).toBe(3); + expect(map.architecture.keyInterfaces[0].kind).toBe('interface'); + expect(map.architecture.keyInterfaces[0].file).toBe('src/core/search.ts'); + }); + + it('signatureHint strips trailing { and caps at 200 chars', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + for (const ki of map.architecture.keyInterfaces) { + expect(ki.signatureHint).not.toMatch(/\{$/); + expect(ki.signatureHint.length).toBeLessThanOrEqual(200); + } + }); + + it('signatureHint contains the symbol name', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + const iface = map.architecture.keyInterfaces.find((k) => k.name === 'SearchOptions')!; + expect(iface.signatureHint).toContain('SearchOptions'); + }); + + it('derives apiSurface from entrypoints x graph.exports', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + // src/cli.ts and src/index.ts are entrypoints; both have exports in fixture + const cli = map.architecture.apiSurface.find((s) => s.file === 'src/cli.ts'); + expect(cli).toBeDefined(); + expect(cli!.exports).toContain('runCli'); + expect(cli!.exports).toContain('parseArgs'); + expect(cli!.exports.length).toBeLessThanOrEqual(5); + }); + + it('apiSurface excludes default exports', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + for (const surface of map.architecture.apiSurface) { + expect(surface.exports).not.toContain('default'); + } + }); + + it('derives hotspots sorted by combined import + importer count', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + expect(map.architecture.hotspots.length).toBeLessThanOrEqual(5); + // src/core/search.ts: importedBy=3, imports=2 → combined=5 (highest) + expect(map.architecture.hotspots[0].file).toBe('src/core/search.ts'); + expect(map.architecture.hotspots[0].combined).toBe(5); + // combined is always importerCount + importCount + for (const h of map.architecture.hotspots) { + expect(h.combined).toBe(h.importerCount + h.importCount); + } + }); + + it('enriches layers with hubFile from importedBy data', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + const srcLayer = map.architecture.layers.find((l) => l.name === 'src')!; + // src/core/search.ts has 3 importers — highest in the src layer + expect(srcLayer.hubFile).toBe('src/core/search.ts'); + }); + + it('enriches layers with hubExports when graph.exports has data', async () => { + const project = createProjectState(FIXTURE_ROOT); + const map = await buildCodebaseMap(project); + // src/cli.ts has exports in fixture but is not the hub of the src layer + // src/index.ts has exports and is also in src — but search.ts (hub) has no exports in fixture + const srcLayer = map.architecture.layers.find((l) => l.name === 'src')!; + // search.ts has no exports in fixture → hubExports should be absent + expect(srcLayer.hubExports).toBeUndefined(); + }); }); // --------------------------------------------------------------------------- @@ -137,6 +222,9 @@ describe('renderMapMarkdown', () => { expect(md).toContain('## Architecture Layers'); expect(md).toContain('## Entrypoints'); expect(md).toContain('## Hub Files'); + expect(md).toContain('## Key Interfaces'); + expect(md).toContain('## API Surface'); + expect(md).toContain('## Dependency Hotspots'); expect(md).toContain('## Active Patterns'); expect(md).toContain('## Best Examples'); expect(md).toContain('## Graph Stats'); @@ -146,7 +234,14 @@ describe('renderMapMarkdown', () => { it('renders empty map sections gracefully', () => { const emptyMap = { project: 'empty', - architecture: { layers: [], entrypoints: [], hubFiles: [] }, + architecture: { + layers: [], + entrypoints: [], + hubFiles: [], + keyInterfaces: [], + apiSurface: [], + hotspots: [] + }, activePatterns: [], bestExamples: [], graphStats: { files: 0, edges: 0, avgDependencies: 0 }, diff --git a/tests/fixtures/map-fixture/.codebase-context/relationships.json b/tests/fixtures/map-fixture/.codebase-context/relationships.json index 196f6d3..fd3fca1 100644 --- a/tests/fixtures/map-fixture/.codebase-context/relationships.json +++ b/tests/fixtures/map-fixture/.codebase-context/relationships.json @@ -17,7 +17,16 @@ "lib/utils.ts": ["src/core/search.ts", "src/cli.ts"] }, "importDetails": {}, - "exports": {} + "exports": { + "src/cli.ts": [ + { "name": "runCli", "type": "function" }, + { "name": "parseArgs", "type": "function" } + ], + "src/index.ts": [ + { "name": "main", "type": "function" }, + { "name": "createServer", "type": "function" } + ] + } }, "stats": { "files": 8, From 804c3d3f9bc9feae5aed42e8396eda4af80a76b4 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sat, 11 Apr 2026 20:19:49 +0200 Subject: [PATCH 2/4] feat(v2.1): surface structural metadata in search + fix reranker retry regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit search-codebase: compact results now include symbol, symbolKind, scope, signaturePreview; full results include chunk imports/exports/complexity. Surfaces reranker health in searchQualityBlock when unavailable. reranker: add RerankerStatus type + getRerankerStatus() export. Add cache-corruption detection (Protobuf/parse errors trigger cache clear). Fix retry regression: replace initPromise=null reset with initFailed guard so failed loads fast-fail on subsequent calls instead of retrying the expensive model download — restoring test suite stability. --- src/core/reranker.ts | 61 +++++++++++++++++++++++++++++------- src/core/search.ts | 2 +- src/tools/search-codebase.ts | 38 ++++++++++++++++++++-- 3 files changed, 86 insertions(+), 15 deletions(-) diff --git a/src/core/reranker.ts b/src/core/reranker.ts index 43b4a90..458fd6d 100644 --- a/src/core/reranker.ts +++ b/src/core/reranker.ts @@ -34,29 +34,67 @@ interface CrossEncoderModel { let cachedTokenizer: CrossEncoderTokenizer | null = null; let cachedModel: CrossEncoderModel | null = null; let initPromise: Promise | null = null; +/** Set permanently after a non-recoverable load failure so subsequent calls fast-fail. */ +let initFailed = false; + +/** Tracks reranker operational health for surfacing in search quality */ +export type RerankerStatus = 'active' | 'fallback' | 'unavailable'; +let rerankerHealth: RerankerStatus = 'fallback'; + +/** Returns the current reranker health status */ +export function getRerankerStatus(): RerankerStatus { + return rerankerHealth; +} async function ensureModelLoaded(): Promise { if (cachedModel && cachedTokenizer) return; + // Fast-fail if a prior attempt already determined the model is unavailable. + if (initFailed) throw new Error('[reranker] Model unavailable (prior load failed)'); if (initPromise) return initPromise; initPromise = (async () => { - const { AutoTokenizer, AutoModelForSequenceClassification } = + const { AutoTokenizer, AutoModelForSequenceClassification, env } = await import('@huggingface/transformers'); console.error(`[reranker] Loading cross-encoder: ${DEFAULT_RERANKER_MODEL}`); console.error('[reranker] (First run will download the model - this may take a moment)'); - cachedTokenizer = await AutoTokenizer.from_pretrained(DEFAULT_RERANKER_MODEL); - cachedModel = await AutoModelForSequenceClassification.from_pretrained(DEFAULT_RERANKER_MODEL, { - dtype: 'q8', - // Limit ONNX Runtime to half cores by default — prevents system freeze during indexing. - session_options: { - intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)), - interOpNumThreads: 1 + try { + cachedTokenizer = await AutoTokenizer.from_pretrained(DEFAULT_RERANKER_MODEL); + cachedModel = await AutoModelForSequenceClassification.from_pretrained(DEFAULT_RERANKER_MODEL, { + dtype: 'q8', + // Limit ONNX Runtime to half cores by default — prevents system freeze during indexing. + session_options: { + intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)), + interOpNumThreads: 1 + } + }); + rerankerHealth = 'fallback'; // loaded but not yet triggered + console.error('[reranker] Cross-encoder loaded successfully'); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('Protobuf') || msg.includes('parse') || msg.includes('corrupt')) { + // Corrupted cache — clear it so next session re-downloads + console.error(`[reranker] Cache corruption detected: ${msg}`); + console.error('[reranker] Clearing corrupted cache. Next startup will re-download.'); + try { + const cacheDir = env.cacheDir ?? null; + if (cacheDir) { + const { rmSync, existsSync } = await import('fs'); + const modelCacheDir = `${cacheDir}/Xenova/ms-marco-MiniLM-L-6-v2`; + if (existsSync(modelCacheDir)) { + rmSync(modelCacheDir, { recursive: true, force: true }); + console.error('[reranker] Corrupted cache cleared. Will re-download on next call.'); + } + } + } catch { + // Cache clear is best-effort + } } - }); - - console.error('[reranker] Cross-encoder loaded successfully'); + rerankerHealth = 'unavailable'; + initFailed = true; + throw err; + } })(); return initPromise; @@ -141,6 +179,7 @@ export async function rerank(query: string, results: SearchResult[]): Promise { const importedByCount = getImportedByCount(r); const topExports = getTopExports(r.filePath); + const scope = buildScopeHeader(r.metadata); + // First 3 lines of chunk content as a lightweight signature preview + const signaturePreview = r.snippet + ? r.snippet + .replace(/^\r?\n+/, '') + .split('\n') + .slice(0, 3) + .join('\n') + .trim() || undefined + : undefined; return { file: `${r.filePath}:${r.startLine}-${r.endLine}`, summary: r.summary, @@ -1079,7 +1093,12 @@ export async function handle( ...(r.patternWarning && { patternWarning: r.patternWarning }), importedByCount, ...(topExports.length > 0 && { topExports }), - ...(r.layer && r.layer !== 'unknown' && { layer: r.layer }) + ...(r.layer && r.layer !== 'unknown' && { layer: r.layer }), + // Structural metadata: surface AST intelligence already computed at index time + ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), + ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), + ...(scope && { scope }), + ...(signaturePreview && { signaturePreview }) }; }), ...(strongMemories.length > 0 && { @@ -1110,6 +1129,10 @@ export async function handle( const enrichedSnippet = includeSnippets ? enrichSnippetWithScope(r.snippet, r.metadata, r.filePath, r.startLine) : undefined; + const scope = buildScopeHeader(r.metadata); + // Chunk-level imports/exports (top 5 each) + complexity + const chunkImports = (r as unknown as { imports?: string[] }).imports?.slice(0, 5); + const chunkExports = (r as unknown as { exports?: string[] }).exports?.slice(0, 5); return { file: `${r.filePath}:${r.startLine}-${r.endLine}`, @@ -1125,7 +1148,16 @@ export async function handle( relationships: relationshipsAndHints.relationships }), ...(relationshipsAndHints.hints && { hints: relationshipsAndHints.hints }), - ...(enrichedSnippet && { snippet: enrichedSnippet }) + ...(enrichedSnippet && { snippet: enrichedSnippet }), + // Structural metadata + ...(r.metadata?.symbolName && { symbol: r.metadata.symbolName }), + ...(r.metadata?.symbolKind && { symbolKind: r.metadata.symbolKind }), + ...(scope && { scope }), + ...(chunkImports && chunkImports.length > 0 && { imports: chunkImports }), + ...(chunkExports && chunkExports.length > 0 && { exports: chunkExports }), + ...(r.metadata?.cyclomaticComplexity && { + complexity: r.metadata.cyclomaticComplexity + }) }; }), totalResults: results.length, From 37b43cd1efa19863c96372dd71219848a462fb79 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sat, 11 Apr 2026 20:20:41 +0200 Subject: [PATCH 3/4] chore: fix prettier formatting in codebase-map and reranker --- src/core/codebase-map.ts | 13 ++++++++----- src/core/reranker.ts | 17 ++++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/core/codebase-map.ts b/src/core/codebase-map.ts index e12b803..5b69f7a 100644 --- a/src/core/codebase-map.ts +++ b/src/core/codebase-map.ts @@ -225,7 +225,10 @@ export async function buildCodebaseMap(project: ProjectState): Promise l.trim()).filter(Boolean); + const lines = content + .split('\n') + .map((l) => l.trim()) + .filter(Boolean); const hint = lines.slice(0, 3).join('\n'); const truncated = hint.length > 200 ? hint.slice(0, 197) + '...' : hint; return truncated.replace(/\s*\{$/, '').trim(); @@ -563,7 +566,9 @@ export function renderMapPretty(map: CodebaseMapSummary): string { map.architecture.layers.length === 0 ? ['(none)'] : map.architecture.layers.map((l) => - l.hubFile ? `${l.name} ${l.fileCount} files [${l.hubFile}]` : `${l.name} ${l.fileCount} files` + l.hubFile + ? `${l.name} ${l.fileCount} files [${l.hubFile}]` + : `${l.name} ${l.fileCount} files` ); sections.push(box('Architecture Layers', layerLines)); @@ -592,9 +597,7 @@ export function renderMapPretty(map: CodebaseMapSummary): string { const hotspotLines = map.architecture.hotspots.length === 0 ? ['(none detected)'] - : map.architecture.hotspots.map( - (h) => `${h.file} +${h.importerCount}/-${h.importCount}` - ); + : map.architecture.hotspots.map((h) => `${h.file} +${h.importerCount}/-${h.importCount}`); sections.push(box('Dependency Hotspots', hotspotLines)); const patternLines = diff --git a/src/core/reranker.ts b/src/core/reranker.ts index 458fd6d..91d4d4b 100644 --- a/src/core/reranker.ts +++ b/src/core/reranker.ts @@ -61,14 +61,17 @@ async function ensureModelLoaded(): Promise { try { cachedTokenizer = await AutoTokenizer.from_pretrained(DEFAULT_RERANKER_MODEL); - cachedModel = await AutoModelForSequenceClassification.from_pretrained(DEFAULT_RERANKER_MODEL, { - dtype: 'q8', - // Limit ONNX Runtime to half cores by default — prevents system freeze during indexing. - session_options: { - intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)), - interOpNumThreads: 1 + cachedModel = await AutoModelForSequenceClassification.from_pretrained( + DEFAULT_RERANKER_MODEL, + { + dtype: 'q8', + // Limit ONNX Runtime to half cores by default — prevents system freeze during indexing. + session_options: { + intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)), + interOpNumThreads: 1 + } } - }); + ); rerankerHealth = 'fallback'; // loaded but not yet triggered console.error('[reranker] Cross-encoder loaded successfully'); } catch (err) { From 762ab9de6269f0f8e99db14364ca3c59470fbdd6 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sat, 11 Apr 2026 20:47:18 +0200 Subject: [PATCH 4/4] fix(reranker): narrow initFailed guard to corrupt-cache errors only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Transient load failures (network, timeout, etc.) now reset initPromise=null so the next call can retry. Only Protobuf/parse/corrupt errors are marked permanently failed — those require a cache re-download in a new session. Long-lived MCP servers can now recover from transient load failures without requiring a restart. Addresses grey-area identified during PR #95 Greptile audit. --- src/core/reranker.ts | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/core/reranker.ts b/src/core/reranker.ts index 91d4d4b..df8b3bd 100644 --- a/src/core/reranker.ts +++ b/src/core/reranker.ts @@ -76,7 +76,10 @@ async function ensureModelLoaded(): Promise { console.error('[reranker] Cross-encoder loaded successfully'); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - if (msg.includes('Protobuf') || msg.includes('parse') || msg.includes('corrupt')) { + const isCorrupt = + msg.includes('Protobuf') || msg.includes('parse') || msg.includes('corrupt'); + + if (isCorrupt) { // Corrupted cache — clear it so next session re-downloads console.error(`[reranker] Cache corruption detected: ${msg}`); console.error('[reranker] Clearing corrupted cache. Next startup will re-download.'); @@ -93,9 +96,15 @@ async function ensureModelLoaded(): Promise { } catch { // Cache clear is best-effort } + rerankerHealth = 'unavailable'; + // Permanent fail — corrupt cache can't be retried in this session. + initFailed = true; + throw err; } + + // Transient error (network, timeout, etc.) — allow retry on next call. rerankerHealth = 'unavailable'; - initFailed = true; + initPromise = null; throw err; } })();