From 24de50e86091d3fc4011d944f097adb039bfc3a4 Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sun, 8 Feb 2026 21:50:52 +0100 Subject: [PATCH 1/3] fix: harden search reliability and indexing hygiene --- .gitignore | 2 + src/core/analyzer-registry.ts | 4 +- src/core/indexer.ts | 21 ++ src/core/search-quality.ts | 83 +++++ src/core/search.ts | 430 ++++++++++++++++++++---- src/index.ts | 219 ++++++++++-- src/patterns/semantics.ts | 35 ++ src/preflight/query-scope.ts | 41 +++ src/resources/uri.ts | 14 + src/storage/lancedb.ts | 4 +- src/types/index.ts | 15 + src/utils/git-dates.ts | 24 ++ src/utils/usage-tracker.ts | 8 +- tests/analyzer-registry.test.ts | 93 +++-- tests/incremental-indexing.test.ts | 53 +++ tests/lancedb-corruption.test.ts | 46 ++- tests/pattern-detector.test.ts | 30 ++ tests/pattern-semantics.test.ts | 38 +++ tests/query-scope.test.ts | 49 +++ tests/resource-uri.test.ts | 24 ++ tests/search-quality.test.ts | 53 +++ tests/search-ranking.test.ts | 106 ++++++ tests/search-retrieval-strategy.test.ts | 121 +++++++ 23 files changed, 1380 insertions(+), 133 deletions(-) create mode 100644 src/core/search-quality.ts create mode 100644 src/patterns/semantics.ts create mode 100644 src/preflight/query-scope.ts create mode 100644 src/resources/uri.ts create mode 100644 tests/pattern-detector.test.ts create mode 100644 tests/pattern-semantics.test.ts create mode 100644 tests/query-scope.test.ts create mode 100644 tests/resource-uri.test.ts create mode 100644 tests/search-quality.test.ts create mode 100644 tests/search-ranking.test.ts create mode 100644 tests/search-retrieval-strategy.test.ts diff --git a/.gitignore b/.gitignore index 8f69e9d..7273e62 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ dist/ *.log .DS_Store .env +opencode.jsonc +nul .vscode/ *.swp *.swo diff --git a/src/core/analyzer-registry.ts b/src/core/analyzer-registry.ts index d600422..e264b1b 100644 --- a/src/core/analyzer-registry.ts +++ b/src/core/analyzer-registry.ts @@ -70,7 +70,9 @@ export class AnalyzerRegistry { const analyzer = this.findAnalyzer(filePath, content); if (!analyzer) { - console.warn(`No analyzer found for file: ${filePath}`); + if (process.env.CODEBASE_CONTEXT_DEBUG) { + console.error(`[DEBUG] No analyzer found for file: ${filePath}`); + } return null; } diff --git a/src/core/indexer.ts b/src/core/indexer.ts index 437cee0..eb5289d 100644 --- a/src/core/indexer.ts +++ b/src/core/indexer.ts @@ -210,6 +210,20 @@ export class CodebaseIndexer { this.updateProgress('complete', 100); stats.duration = Date.now() - startTime; stats.completedAt = new Date(); + + // Preserve accurate counts from the existing index (nothing changed, index is intact) + try { + const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME); + const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8')); + if (Array.isArray(existingChunks)) { + stats.totalChunks = existingChunks.length; + const uniqueFiles = new Set(existingChunks.map((c: { filePath?: string }) => c.filePath)); + stats.indexedFiles = uniqueFiles.size; + } + } catch { + // Keyword index doesn't exist yet — keep counts as 0 + } + return stats; } } @@ -591,6 +605,7 @@ export class CodebaseIndexer { private async scanFiles(): Promise { const files: string[] = []; + const seen = new Set(); // Read .gitignore if respecting it let ig: ReturnType | null = null; @@ -617,6 +632,12 @@ export class CodebaseIndexer { }); for (const file of matches) { + const normalizedFile = file.replace(/\\/g, '/'); + if (seen.has(normalizedFile)) { + continue; + } + seen.add(normalizedFile); + const relativePath = path.relative(this.rootPath, file); // Check gitignore diff --git a/src/core/search-quality.ts b/src/core/search-quality.ts new file mode 100644 index 0000000..4b89ef7 --- /dev/null +++ b/src/core/search-quality.ts @@ -0,0 +1,83 @@ +import type { SearchResult } from '../types/index.js'; +import { isTestingRelatedQuery } from '../preflight/query-scope.js'; + +export interface SearchQualityAssessment { + status: 'ok' | 'low_confidence'; + confidence: number; + signals: string[]; + nextSteps?: string[]; +} + +export function isTestArtifactPath(filePath: string): boolean { + const normalized = filePath.toLowerCase().replace(/\\/g, '/'); + return ( + normalized.includes('.spec.') || + normalized.includes('.test.') || + normalized.includes('/e2e/') || + normalized.includes('/__tests__/') + ); +} + +export function assessSearchQuality( + query: string, + results: SearchResult[] +): SearchQualityAssessment { + if (results.length === 0) { + return { + status: 'low_confidence', + confidence: 0, + signals: ['no results returned'], + nextSteps: [ + 'Try a narrower query with one concrete symbol, route, or file hint.', + 'Apply search filters (framework/language/componentType/layer).', + 'Use get_component_usage for dependency or wiring lookups.' + ] + }; + } + + const topSlice = results.slice(0, Math.min(3, results.length)); + const topScore = results[0].score; + const secondScore = results[1]?.score ?? topScore; + const topAverage = topSlice.reduce((sum, result) => sum + result.score, 0) / topSlice.length; + const topSeparation = Math.max(0, topScore - secondScore); + const testRatio = + topSlice.filter((result) => isTestArtifactPath(result.filePath)).length / topSlice.length; + const queryIsTesting = isTestingRelatedQuery(query); + + const signals: string[] = []; + if (topScore < 0.3) { + signals.push(`low top score (${topScore.toFixed(2)})`); + } + if (topAverage < 0.32) { + signals.push(`weak top-${topSlice.length} average (${topAverage.toFixed(2)})`); + } + if (topSlice.length > 1 && topSeparation < 0.03) { + signals.push(`tight top spread (${topSeparation.toFixed(2)})`); + } + if (!queryIsTesting && testRatio >= 0.67) { + signals.push( + `test artifacts dominate top-${topSlice.length} (${Math.round(testRatio * 100)}%)` + ); + } + + let confidence = topScore; + if (topAverage < 0.32) confidence -= 0.08; + if (topSlice.length > 1 && topSeparation < 0.03) confidence -= 0.05; + if (!queryIsTesting && testRatio >= 0.67) confidence -= 0.15; + confidence = Math.max(0, Math.min(1, Number(confidence.toFixed(2)))); + + const lowConfidence = signals.length >= 2 || confidence < 0.35; + + return { + status: lowConfidence ? 'low_confidence' : 'ok', + confidence, + signals, + ...(lowConfidence && { + nextSteps: [ + 'Add one or two concrete symbols, routes, or file hints to the query.', + 'Apply filters (framework/language/componentType/layer) to narrow candidates.', + 'Use get_component_usage when the question is about wiring or usages.' + ] + }) + }; +} diff --git a/src/core/search.ts b/src/core/search.ts index 8979b27..5373de4 100644 --- a/src/core/search.ts +++ b/src/core/search.ts @@ -11,6 +11,8 @@ import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js' import { VectorStorageProvider, getStorageProvider } from '../storage/index.js'; import { analyzerRegistry } from './analyzer-registry.js'; import { IndexCorruptedError } from '../errors/index.js'; +import { isTestingRelatedQuery } from '../preflight/query-scope.js'; +import { assessSearchQuality } from './search-quality.js'; import { CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME, @@ -23,15 +25,77 @@ export interface SearchOptions { useKeywordSearch?: boolean; semanticWeight?: number; keywordWeight?: number; + profile?: SearchIntentProfile; + enableQueryExpansion?: boolean; + enableLowConfidenceRescue?: boolean; + candidateFloor?: number; +} + +export type SearchIntentProfile = 'explore' | 'edit' | 'refactor' | 'migrate'; + +interface QueryVariant { + query: string; + weight: number; } const DEFAULT_SEARCH_OPTIONS: SearchOptions = { useSemanticSearch: true, useKeywordSearch: true, semanticWeight: 0.7, - keywordWeight: 0.3 + keywordWeight: 0.3, + profile: 'explore', + enableQueryExpansion: true, + enableLowConfidenceRescue: true, + candidateFloor: 30 }; +const QUERY_EXPANSION_HINTS: Array<{ pattern: RegExp; terms: string[] }> = [ + { + pattern: /\b(auth|authentication|login|signin|sign-in|session|token|oauth)\b/i, + terms: ['auth', 'login', 'token', 'session', 'guard', 'oauth'] + }, + { + pattern: /\b(route|routes|routing|router|navigate|navigation|redirect|path)\b/i, + terms: ['router', 'route', 'navigation', 'redirect', 'path'] + }, + { + pattern: /\b(config|configuration|configure|setup|register|provider|providers|bootstrap)\b/i, + terms: ['config', 'setup', 'register', 'provider', 'bootstrap'] + }, + { + pattern: /\b(role|roles|permission|permissions|authorization|authorisation|access)\b/i, + terms: ['roles', 'permissions', 'access', 'policy', 'guard'] + }, + { + pattern: /\b(interceptor|middleware|request|response|http)\b/i, + terms: ['interceptor', 'middleware', 'http', 'request', 'response'] + }, + { + pattern: /\b(theme|styles?|styling|palette|color|branding|upload)\b/i, + terms: ['theme', 'styles', 'palette', 'color', 'branding', 'upload'] + } +]; + +const QUERY_STOP_WORDS = new Set([ + 'the', + 'a', + 'an', + 'to', + 'of', + 'for', + 'and', + 'or', + 'with', + 'in', + 'on', + 'by', + 'how', + 'are', + 'is', + 'after', + 'before' +]); + export class CodebaseSearcher { private rootPath: string; private storagePath: string; @@ -197,71 +261,116 @@ export class CodebaseSearcher { return { trend: 'Stable' }; } - async search( - query: string, - limit: number = 5, - filters?: SearchFilters, - options: SearchOptions = DEFAULT_SEARCH_OPTIONS - ): Promise { - if (!this.initialized) { - await this.initialize(); - } + private isTestFile(filePath: string): boolean { + const normalized = filePath.toLowerCase().replace(/\\/g, '/'); + return ( + normalized.includes('.spec.') || + normalized.includes('.test.') || + normalized.includes('/e2e/') || + normalized.includes('/__tests__/') + ); + } - const { useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight } = { - ...DEFAULT_SEARCH_OPTIONS, - ...options - }; + private normalizeQueryTerms(query: string): string[] { + return query + .toLowerCase() + .split(/[^a-z0-9_]+/) + .filter((term) => term.length > 2 && !QUERY_STOP_WORDS.has(term)); + } - const results: Map = new Map(); + private buildQueryVariants(query: string, maxExpansions: number): QueryVariant[] { + const variants: QueryVariant[] = [{ query, weight: 1 }]; + if (maxExpansions <= 0) return variants; - if (useSemanticSearch && this.embeddingProvider && this.storageProvider) { - try { - const vectorResults = await this.semanticSearch(query, limit * 2, filters); - - vectorResults.forEach((result) => { - const id = result.chunk.id; - const existing = results.get(id); - - if (existing) { - existing.scores.push(result.score * (semanticWeight || 0.7)); - } else { - results.set(id, { - chunk: result.chunk, - scores: [result.score * (semanticWeight || 0.7)] - }); - } - }); - } catch (error) { - if (error instanceof IndexCorruptedError) { - throw error; // Propagate to handler for auto-heal + const normalized = query.toLowerCase(); + const terms = new Set(this.normalizeQueryTerms(query)); + + for (const hint of QUERY_EXPANSION_HINTS) { + if (!hint.pattern.test(query)) continue; + for (const term of hint.terms) { + if (!normalized.includes(term)) { + terms.add(term); } - console.warn('Semantic search failed:', error); } } - if (useKeywordSearch && this.fuseIndex) { - try { - const keywordResults = await this.keywordSearch(query, limit * 2, filters); - - keywordResults.forEach((result) => { - const id = result.chunk.id; - const existing = results.get(id); - - if (existing) { - existing.scores.push(result.score * (keywordWeight || 0.3)); - } else { - results.set(id, { - chunk: result.chunk, - scores: [result.score * (keywordWeight || 0.3)] - }); - } - }); - } catch (error) { - console.warn('Keyword search failed:', error); + const addedTerms = Array.from(terms).filter((term) => !normalized.includes(term)); + if (addedTerms.length === 0) return variants; + + const firstExpansion = `${query} ${addedTerms.slice(0, 6).join(' ')}`.trim(); + if (firstExpansion !== query) { + variants.push({ query: firstExpansion, weight: 0.35 }); + } + + if (maxExpansions > 1 && addedTerms.length > 6) { + const secondExpansion = `${query} ${addedTerms.slice(6, 12).join(' ')}`.trim(); + if (secondExpansion !== query) { + variants.push({ query: secondExpansion, weight: 0.25 }); } } - const combinedResults: SearchResult[] = Array.from(results.entries()) + return variants.slice(0, 1 + maxExpansions); + } + + private isCompositionRootFile(filePath: string): boolean { + const normalized = filePath.toLowerCase().replace(/\\/g, '/'); + const base = path.basename(normalized); + + if (/^(main|index|bootstrap|startup)\./.test(base)) return true; + + return ( + normalized.includes('/routes') || + normalized.includes('/routing') || + normalized.includes('/router') || + normalized.includes('/config') || + normalized.includes('/providers') + ); + } + + private queryPathTokenOverlap(filePath: string, query: string): number { + const queryTerms = new Set(this.normalizeQueryTerms(query)); + if (queryTerms.size === 0) return 0; + + const pathTerms = this.normalizeQueryTerms(filePath.replace(/\\/g, '/')); + return pathTerms.reduce((count, term) => (queryTerms.has(term) ? count + 1 : count), 0); + } + + private isLikelyWiringOrFlowQuery(query: string): boolean { + return /\b(route|router|routing|navigate|navigation|redirect|auth|authentication|login|provider|register|config|configuration|interceptor|middleware)\b/i.test( + query + ); + } + + private isActionOrHowQuery(query: string): boolean { + return /\b(how|where|configure|configured|setup|register|wire|wiring|navigate|redirect|login|authenticate|copy|upload|handle|create|update|delete)\b/i.test( + query + ); + } + + private isDefinitionHeavyResult(chunk: CodeChunk): boolean { + const normalizedPath = chunk.filePath.toLowerCase().replace(/\\/g, '/'); + const componentType = (chunk.componentType || '').toLowerCase(); + + if (['type', 'interface', 'enum', 'constant'].includes(componentType)) return true; + + return ( + normalizedPath.includes('/models/') || + normalizedPath.includes('/interfaces/') || + normalizedPath.includes('/types/') || + normalizedPath.includes('/constants') + ); + } + + private scoreAndSortResults( + query: string, + limit: number, + results: Map, + profile: SearchIntentProfile + ): SearchResult[] { + const likelyWiringQuery = this.isLikelyWiringOrFlowQuery(query); + const actionQuery = this.isActionOrHowQuery(query); + + return Array.from(results.entries()) .map(([_id, { chunk, scores }]) => { // Calculate base combined score let combinedScore = scores.reduce((sum, score) => sum + score, 0); @@ -270,9 +379,9 @@ export class CodebaseSearcher { // If both semantic and keyword matched, max possible is ~1.0 combinedScore = Math.min(1.0, combinedScore); - // Boost scores for Angular components with proper detection - if (chunk.componentType && chunk.framework === 'angular') { - combinedScore = Math.min(1.0, combinedScore * 1.3); + // Slight boost when analyzer identified a concrete component type + if (chunk.componentType && chunk.componentType !== 'unknown') { + combinedScore = Math.min(1.0, combinedScore * 1.1); } // Boost if layer is detected @@ -280,6 +389,36 @@ export class CodebaseSearcher { combinedScore = Math.min(1.0, combinedScore * 1.1); } + // Query-aware reranking to reduce noisy matches in practical workflows. + if (!isTestingRelatedQuery(query) && this.isTestFile(chunk.filePath)) { + combinedScore = combinedScore * 0.75; + } + + if (actionQuery && this.isDefinitionHeavyResult(chunk)) { + combinedScore = combinedScore * 0.82; + } + + if ( + actionQuery && + ['service', 'component', 'interceptor', 'guard', 'module', 'resolver'].includes( + (chunk.componentType || '').toLowerCase() + ) + ) { + combinedScore = Math.min(1.0, combinedScore * 1.06); + } + + // Light intent-aware boost for likely wiring/configuration queries. + if (likelyWiringQuery && profile !== 'explore') { + if (this.isCompositionRootFile(chunk.filePath)) { + combinedScore = Math.min(1.0, combinedScore * 1.12); + } + } + + const pathOverlap = this.queryPathTokenOverlap(chunk.filePath, query); + if (pathOverlap >= 2) { + combinedScore = Math.min(1.0, combinedScore * 1.08); + } + // v1.2: Detect pattern trend and apply momentum boost const { trend, warning } = this.detectChunkTrend(chunk); if (trend === 'Rising') { @@ -311,8 +450,177 @@ export class CodebaseSearcher { }) .sort((a, b) => b.score - a.score) .slice(0, limit); + } + + private pickBetterResultSet( + query: string, + primary: SearchResult[], + rescue: SearchResult[] + ): SearchResult[] { + const primaryQuality = assessSearchQuality(query, primary); + const rescueQuality = assessSearchQuality(query, rescue); + + if ( + rescueQuality.status === 'ok' && + primaryQuality.status === 'low_confidence' && + rescueQuality.confidence >= primaryQuality.confidence + ) { + return rescue; + } + + if (rescueQuality.confidence >= primaryQuality.confidence + 0.05) { + return rescue; + } + + return primary; + } + + private async collectHybridMatches( + queryVariants: QueryVariant[], + candidateLimit: number, + filters: SearchFilters | undefined, + useSemanticSearch: boolean, + useKeywordSearch: boolean, + semanticWeight: number, + keywordWeight: number + ): Promise> { + const results: Map = new Map(); + + if (useSemanticSearch && this.embeddingProvider && this.storageProvider) { + try { + for (const variant of queryVariants) { + const vectorResults = await this.semanticSearch(variant.query, candidateLimit, filters); + + vectorResults.forEach((result) => { + const id = result.chunk.id; + const weightedScore = result.score * semanticWeight * variant.weight; + const existing = results.get(id); + + if (existing) { + existing.scores.push(weightedScore); + } else { + results.set(id, { + chunk: result.chunk, + scores: [weightedScore] + }); + } + }); + } + } catch (error) { + if (error instanceof IndexCorruptedError) { + throw error; // Propagate to handler for auto-heal + } + console.warn('Semantic search failed:', error); + } + } + + if (useKeywordSearch && this.fuseIndex) { + try { + for (const variant of queryVariants) { + const keywordResults = await this.keywordSearch(variant.query, candidateLimit, filters); + + keywordResults.forEach((result) => { + const id = result.chunk.id; + const weightedScore = result.score * keywordWeight * variant.weight; + const existing = results.get(id); + + if (existing) { + existing.scores.push(weightedScore); + } else { + results.set(id, { + chunk: result.chunk, + scores: [weightedScore] + }); + } + }); + } + } catch (error) { + console.warn('Keyword search failed:', error); + } + } + + return results; + } + + async search( + query: string, + limit: number = 5, + filters?: SearchFilters, + options: SearchOptions = DEFAULT_SEARCH_OPTIONS + ): Promise { + if (!this.initialized) { + await this.initialize(); + } + + const { + useSemanticSearch, + useKeywordSearch, + semanticWeight, + keywordWeight, + profile, + enableQueryExpansion, + enableLowConfidenceRescue, + candidateFloor + } = { + ...DEFAULT_SEARCH_OPTIONS, + ...options + }; + + const candidateLimit = Math.max(limit * 2, candidateFloor || 30); + const primaryVariants = this.buildQueryVariants(query, enableQueryExpansion ? 1 : 0); + + const primaryMatches = await this.collectHybridMatches( + primaryVariants, + candidateLimit, + filters, + Boolean(useSemanticSearch), + Boolean(useKeywordSearch), + semanticWeight || 0.7, + keywordWeight || 0.3 + ); + + const primaryResults = this.scoreAndSortResults( + query, + limit, + primaryMatches, + (profile || 'explore') as SearchIntentProfile + ); + + if (!enableLowConfidenceRescue) { + return primaryResults; + } + + const primaryQuality = assessSearchQuality(query, primaryResults); + if (primaryQuality.status !== 'low_confidence') { + return primaryResults; + } + + const rescueVariants = this.buildQueryVariants(query, 2).slice(1); + if (rescueVariants.length === 0) { + return primaryResults; + } + + const rescueMatches = await this.collectHybridMatches( + rescueVariants.map((variant, index) => ({ + query: variant.query, + weight: index === 0 ? 1 : 0.8 + })), + candidateLimit, + filters, + Boolean(useSemanticSearch), + Boolean(useKeywordSearch), + semanticWeight || 0.7, + keywordWeight || 0.3 + ); + + const rescueResults = this.scoreAndSortResults( + query, + limit, + rescueMatches, + (profile || 'explore') as SearchIntentProfile + ); - return combinedResults; + return this.pickBetterResultSet(query, primaryResults, rescueResults); } private generateSummary(chunk: CodeChunk): string { @@ -364,7 +672,7 @@ export class CodebaseSearcher { return `${langMap[ext] || ext.toUpperCase()} in ${fileName}.`; } - private generateSnippet(content: string, maxLines: number = 100): string { + private generateSnippet(content: string, maxLines: number = 20): string { const lines = content.split('\n'); if (lines.length <= maxLines) { return content; diff --git a/src/index.ts b/src/index.ts index 972881c..084bc0b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -25,6 +25,7 @@ import { CodebaseIndexer } from './core/indexer.js'; import type { IndexingStats, SearchResult, + RelationshipData, Memory, MemoryCategory, MemoryType @@ -34,6 +35,7 @@ import { analyzerRegistry } from './core/analyzer-registry.js'; import { AngularAnalyzer } from './analyzers/angular/index.js'; import { GenericAnalyzer } from './analyzers/generic/index.js'; import { InternalFileGraph } from './utils/usage-tracker.js'; +import { getFileCommitDates } from './utils/git-dates.js'; import { IndexCorruptedError } from './errors/index.js'; import { CODEBASE_CONTEXT_DIRNAME, @@ -51,6 +53,14 @@ import { } from './memory/store.js'; import { parseGitLogLineToMemory } from './memory/git-memory.js'; import { buildEvidenceLock } from './preflight/evidence-lock.js'; +import { shouldIncludePatternConflictCategory } from './preflight/query-scope.js'; +import { + isComplementaryPatternCategory, + isComplementaryPatternConflict, + shouldSkipLegacyTestingFrameworkCategory +} from './patterns/semantics.js'; +import { CONTEXT_RESOURCE_URI, isContextResourceUri } from './resources/uri.js'; +import { assessSearchQuality } from './core/search-quality.js'; analyzerRegistry.register(new AngularAnalyzer()); analyzerRegistry.register(new GenericAnalyzer()); @@ -434,7 +444,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { // MCP Resources - Proactive context injection const RESOURCES: Resource[] = [ { - uri: 'codebase://context', + uri: CONTEXT_RESOURCE_URI, name: 'Codebase Intelligence', description: 'Automatic codebase context: libraries used, team patterns, and conventions. ' + @@ -494,18 +504,48 @@ async function generateCodebaseContext(): Promise { // Pattern consensus if (intelligence.patterns && Object.keys(intelligence.patterns).length > 0) { + const patterns = intelligence.patterns as Record; lines.push("## YOUR Codebase's Actual Patterns (Not Generic Best Practices)"); lines.push(''); lines.push('These patterns were detected by analyzing your actual code.'); lines.push('This is what YOUR team does in practice, not what tutorials recommend.'); lines.push(''); - for (const [category, data] of Object.entries(intelligence.patterns)) { + for (const [category, data] of Object.entries(patterns)) { + if (shouldSkipLegacyTestingFrameworkCategory(category, patterns)) { + continue; + } + const patternData: any = data; const primary = patternData.primary; + const alternatives = patternData.alsoDetected ?? []; if (!primary) continue; + if ( + isComplementaryPatternCategory( + category, + [primary.name, ...alternatives.map((alt: any) => alt.name)].filter(Boolean) + ) + ) { + const secondary = alternatives[0]; + if (secondary) { + const categoryName = category + .replace(/([A-Z])/g, ' $1') + .trim() + .replace(/^./, (str: string) => str.toUpperCase()); + lines.push( + `### ${categoryName}: **${primary.name}** (${primary.frequency}) + **${secondary.name}** (${secondary.frequency})` + ); + lines.push( + ' → Computed and effect are complementary Signals primitives and are commonly used together.' + ); + lines.push(' → Treat this as balanced usage, not a hard split decision.'); + lines.push(''); + continue; + } + } + const percentage = parseInt(primary.frequency); const categoryName = category .replace(/([A-Z])/g, ' $1') @@ -520,8 +560,8 @@ async function generateCodebaseContext(): Promise { `### ${categoryName}: **${primary.name}** (${primary.frequency} - strong consensus)` ); lines.push(` → Your team strongly prefers ${primary.name}`); - if (patternData.alsoDetected?.length) { - const alt = patternData.alsoDetected[0]; + if (alternatives.length) { + const alt = alternatives[0]; lines.push( ` → Minority pattern: ${alt.name} (${alt.frequency}) - avoid for new code` ); @@ -529,9 +569,9 @@ async function generateCodebaseContext(): Promise { } else if (percentage >= 60) { lines.push(`### ${categoryName}: **${primary.name}** (${primary.frequency} - majority)`); lines.push(` → Most code uses ${primary.name}, but not unanimous`); - if (patternData.alsoDetected?.length) { + if (alternatives.length) { lines.push( - ` → Also detected: ${patternData.alsoDetected[0].name} (${patternData.alsoDetected[0].frequency})` + ` → Also detected: ${alternatives[0].name} (${alternatives[0].frequency})` ); } } else { @@ -539,8 +579,8 @@ async function generateCodebaseContext(): Promise { lines.push(`### ${categoryName}: ⚠️ NO TEAM CONSENSUS`); lines.push(` Your codebase is split between multiple approaches:`); lines.push(` - ${primary.name} (${primary.frequency})`); - if (patternData.alsoDetected?.length) { - for (const alt of patternData.alsoDetected.slice(0, 2)) { + if (alternatives.length) { + for (const alt of alternatives.slice(0, 2)) { lines.push(` - ${alt.name} (${alt.frequency})`); } } @@ -566,13 +606,13 @@ async function generateCodebaseContext(): Promise { server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const uri = request.params.uri; - if (uri === 'codebase://context') { + if (isContextResourceUri(uri)) { const content = await generateCodebaseContext(); return { contents: [ { - uri, + uri: CONTEXT_RESOURCE_URI, mimeType: 'text/plain', text: content } @@ -733,9 +773,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { const searcher = new CodebaseSearcher(ROOT_PATH); let results: SearchResult[]; + const searchProfile = + intent && ['explore', 'edit', 'refactor', 'migrate'].includes(intent) + ? intent + : 'explore'; try { - results = await searcher.search(query, limit || 5, filters); + results = await searcher.search(query, limit || 5, filters, { + profile: searchProfile + }); } catch (error) { if (error instanceof IndexCorruptedError) { console.error('[Auto-Heal] Index corrupted. Triggering full re-index...'); @@ -746,7 +792,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { console.error('[Auto-Heal] Success. Retrying search...'); const freshSearcher = new CodebaseSearcher(ROOT_PATH); try { - results = await freshSearcher.search(query, limit || 5, filters); + results = await freshSearcher.search(query, limit || 5, filters, { + profile: searchProfile + }); } catch (retryError) { return { content: [ @@ -801,14 +849,111 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }) .sort((a, b) => b.effectiveConfidence - a.effectiveConfidence); + // Load intelligence data for enrichment (all intents, not just preflight) + let intelligence: any = null; + try { + const intelligenceContent = await fs.readFile(PATHS.intelligence, 'utf-8'); + intelligence = JSON.parse(intelligenceContent); + } catch { + /* graceful degradation — intelligence file may not exist yet */ + } + + // Build reverse import map from intelligence graph + const reverseImports = new Map(); + if (intelligence?.internalFileGraph?.imports) { + for (const [file, deps] of Object.entries( + intelligence.internalFileGraph.imports + )) { + for (const dep of deps) { + if (!reverseImports.has(dep)) reverseImports.set(dep, []); + reverseImports.get(dep)!.push(file); + } + } + } + + // Load git dates for lastModified enrichment + let gitDates: Map | null = null; + try { + gitDates = await getFileCommitDates(ROOT_PATH); + } catch { + /* not a git repo */ + } + + // Enrich a search result with relationship data + function enrichResult(r: SearchResult): RelationshipData | undefined { + const rPath = r.filePath; + + // importedBy: files that import this result (reverse lookup) + const importedBy: string[] = []; + for (const [dep, importers] of reverseImports) { + if (dep.endsWith(rPath) || rPath.endsWith(dep)) { + importedBy.push(...importers); + } + } + + // imports: files this result depends on (forward lookup) + const imports: string[] = []; + if (intelligence?.internalFileGraph?.imports) { + for (const [file, deps] of Object.entries( + intelligence.internalFileGraph.imports + )) { + if (file.endsWith(rPath) || rPath.endsWith(file)) { + imports.push(...deps); + } + } + } + + // testedIn: heuristic — same basename with .spec/.test extension + const testedIn: string[] = []; + const baseName = path.basename(rPath).replace(/\.[^.]+$/, ''); + if (intelligence?.internalFileGraph?.imports) { + for (const file of Object.keys(intelligence.internalFileGraph.imports)) { + const fileBase = path.basename(file); + if ( + (fileBase.includes('.spec.') || fileBase.includes('.test.')) && + fileBase.startsWith(baseName) + ) { + testedIn.push(file); + } + } + } + + // lastModified: from git dates + let lastModified: string | undefined; + if (gitDates) { + // Try matching by relative path (git dates use repo-relative forward-slash paths) + const relPath = path.relative(ROOT_PATH, rPath).replace(/\\/g, '/'); + const date = gitDates.get(relPath); + if (date) { + lastModified = date.toISOString(); + } + } + + // Only return if we have at least one piece of data + if ( + importedBy.length === 0 && + imports.length === 0 && + testedIn.length === 0 && + !lastModified + ) { + return undefined; + } + + return { + ...(importedBy.length > 0 && { importedBy }), + ...(imports.length > 0 && { imports }), + ...(testedIn.length > 0 && { testedIn }), + ...(lastModified && { lastModified }) + }; + } + + const searchQuality = assessSearchQuality(query, results); + // Compose preflight card for edit/refactor/migrate intents let preflight: any = undefined; const preflightIntents = ['edit', 'refactor', 'migrate']; - if (intent && preflightIntents.includes(intent)) { + if (intent && preflightIntents.includes(intent) && intelligence) { try { - const intelligenceContent = await fs.readFile(PATHS.intelligence, 'utf-8'); - const intelligence = JSON.parse(intelligenceContent); - // --- Avoid / Prefer patterns --- const avoidPatterns: any[] = []; const preferredPatterns: any[] = []; @@ -927,13 +1072,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { primary: { name: string; adoption: string }; alternative: { name: string; adoption: string }; }> = []; + const hasUnitTestFramework = Boolean((patterns as any).unitTestFramework?.primary); for (const [cat, data] of Object.entries(patterns)) { + if (shouldSkipLegacyTestingFrameworkCategory(cat, patterns as any)) continue; + if (!shouldIncludePatternConflictCategory(cat, query)) continue; if (!data.primary || !data.alsoDetected?.length) continue; const primaryFreq = parseFloat(data.primary.frequency) || 100; if (primaryFreq >= 80) continue; for (const alt of data.alsoDetected) { const altFreq = parseFloat(alt.frequency) || 0; if (altFreq >= 20) { + if (isComplementaryPatternConflict(cat, data.primary.name, alt.name)) continue; + if (hasUnitTestFramework && cat === 'testingFramework') continue; patternConflicts.push({ category: cat, primary: { name: data.primary.name, adoption: data.primary.frequency }, @@ -985,7 +1135,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { ...(failureWarnings.length > 0 && { failureWarnings }) }; } catch { - // Intelligence file not available — skip preflight, don't fail the search + // Preflight construction failed — skip preflight, don't fail the search } } @@ -997,18 +1147,23 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { { status: 'success', ...(preflight && { preflight }), - results: results.map((r) => ({ - summary: r.summary, - snippet: r.snippet, - filePath: `${r.filePath}:${r.startLine}-${r.endLine}`, - score: r.score, - relevanceReason: r.relevanceReason, - componentType: r.componentType, - layer: r.layer, - framework: r.framework, - trend: r.trend, - patternWarning: r.patternWarning - })), + searchQuality, + results: results.map((r) => { + const relationships = enrichResult(r); + return { + summary: r.summary, + snippet: r.snippet, + filePath: `${r.filePath}:${r.startLine}-${r.endLine}`, + score: r.score, + relevanceReason: r.relevanceReason, + componentType: r.componentType, + layer: r.layer, + framework: r.framework, + trend: r.trend, + patternWarning: r.patternWarning, + ...(relationships && { relationships }) + }; + }), totalResults: results.length, ...(relatedMemories.length > 0 && { relatedMemories: relatedMemories.slice(0, 5) @@ -1269,6 +1424,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } else if (category === 'state') { result.stateManagement = intelligence.patterns?.stateManagement; } else if (category === 'testing') { + result.unitTestFramework = intelligence.patterns?.unitTestFramework; + result.e2eFramework = intelligence.patterns?.e2eFramework; result.testingFramework = intelligence.patterns?.testingFramework; result.testMocking = intelligence.patterns?.testMocking; } else if (category === 'libraries') { @@ -1306,7 +1463,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { // Detect pattern conflicts: primary < 80% and any alternative > 20% const conflicts: any[] = []; const patternsData = intelligence.patterns || {}; + const hasUnitTestFramework = Boolean(patternsData.unitTestFramework?.primary); for (const [cat, data] of Object.entries(patternsData)) { + if (shouldSkipLegacyTestingFrameworkCategory(cat, patternsData)) continue; if (category && category !== 'all' && cat !== category) continue; if (!data.primary || !data.alsoDetected?.length) continue; @@ -1316,6 +1475,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { for (const alt of data.alsoDetected) { const altFreq = parseFloat(alt.frequency) || 0; if (altFreq < 20) continue; + if (isComplementaryPatternConflict(cat, data.primary.name, alt.name)) continue; + if (hasUnitTestFramework && cat === 'testingFramework') continue; conflicts.push({ category: cat, diff --git a/src/patterns/semantics.ts b/src/patterns/semantics.ts new file mode 100644 index 0000000..0b400f0 --- /dev/null +++ b/src/patterns/semantics.ts @@ -0,0 +1,35 @@ +const COMPLEMENTARY_REACTIVITY_PATTERNS = new Set(['computed', 'effect']); + +function normalizePatternName(name: string): string { + return name.trim().toLowerCase(); +} + +export function isComplementaryPatternConflict( + category: string, + primaryName: string, + alternativeName: string +): boolean { + if (category !== 'reactivity') return false; + + const primary = normalizePatternName(primaryName); + const alternative = normalizePatternName(alternativeName); + + if (!COMPLEMENTARY_REACTIVITY_PATTERNS.has(primary)) return false; + if (!COMPLEMENTARY_REACTIVITY_PATTERNS.has(alternative)) return false; + + return primary !== alternative; +} + +export function isComplementaryPatternCategory(category: string, patternNames: string[]): boolean { + if (category !== 'reactivity' || patternNames.length < 2) return false; + return patternNames.every((name) => + COMPLEMENTARY_REACTIVITY_PATTERNS.has(normalizePatternName(name)) + ); +} + +export function shouldSkipLegacyTestingFrameworkCategory( + category: string, + patterns: Record +): boolean { + return category === 'testingFramework' && Boolean(patterns.unitTestFramework?.primary); +} diff --git a/src/preflight/query-scope.ts b/src/preflight/query-scope.ts new file mode 100644 index 0000000..48f0a89 --- /dev/null +++ b/src/preflight/query-scope.ts @@ -0,0 +1,41 @@ +const TESTING_QUERY_TERMS = [ + 'test', + 'tests', + 'testing', + 'unit', + 'integration', + 'spec', + 'jest', + 'testbed', + 'mock', + 'mocks', + 'mocking', + 'spy', + 'spyon', + 'coverage', + 'e2e', + 'playwright', + 'cypress', + 'vitest', + 'jasmine' +] as const; + +const TESTING_PATTERN_CATEGORIES = new Set([ + 'unitTestFramework', + 'testingFramework', + 'testMocking', + 'testUtility', + 'e2eFramework' +]); + +const TESTING_QUERY_REGEX = new RegExp(`\\b(${TESTING_QUERY_TERMS.join('|')})\\b`, 'i'); + +export function isTestingRelatedQuery(query: string): boolean { + if (!query || !query.trim()) return false; + return TESTING_QUERY_REGEX.test(query); +} + +export function shouldIncludePatternConflictCategory(category: string, query: string): boolean { + if (!TESTING_PATTERN_CATEGORIES.has(category)) return true; + return isTestingRelatedQuery(query); +} diff --git a/src/resources/uri.ts b/src/resources/uri.ts new file mode 100644 index 0000000..b88ad68 --- /dev/null +++ b/src/resources/uri.ts @@ -0,0 +1,14 @@ +const CONTEXT_RESOURCE_URI = 'codebase://context'; + +export function normalizeResourceUri(uri: string): string { + if (!uri) return uri; + if (uri === CONTEXT_RESOURCE_URI) return uri; + if (uri.endsWith(`/${CONTEXT_RESOURCE_URI}`)) return CONTEXT_RESOURCE_URI; + return uri; +} + +export function isContextResourceUri(uri: string): boolean { + return normalizeResourceUri(uri) === CONTEXT_RESOURCE_URI; +} + +export { CONTEXT_RESOURCE_URI }; diff --git a/src/storage/lancedb.ts b/src/storage/lancedb.ts index 1011fff..8310dd3 100644 --- a/src/storage/lancedb.ts +++ b/src/storage/lancedb.ts @@ -121,7 +121,7 @@ export class LanceDBStorageProvider implements VectorStorageProvider { whereConditions.push(`framework = '${filters.framework}'`); } if (filters.componentType) { - whereConditions.push(`componentType = '${filters.componentType}'`); + whereConditions.push(`"componentType" = '${filters.componentType}'`); } if (filters.layer) { whereConditions.push(`layer = '${filters.layer}'`); @@ -184,7 +184,7 @@ export class LanceDBStorageProvider implements VectorStorageProvider { // Escape single quotes in file paths to prevent SQL injection const escaped = filePaths.map((p) => p.replace(/'/g, "''")); const inClause = escaped.map((p) => `'${p}'`).join(', '); - await this.table.delete(`filePath IN (${inClause})`); + await this.table.delete(`"filePath" IN (${inClause})`); const countAfter = await this.table.countRows(); const deleted = countBefore - countAfter; diff --git a/src/types/index.ts b/src/types/index.ts index 8aad2a1..634a725 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -352,12 +352,27 @@ export interface SearchResult { /** Warning if this result uses declining/legacy patterns */ patternWarning?: string; + // v1.5: Relationship enrichment + /** Structured relationship data from the import graph and git history */ + relationships?: RelationshipData; + // Optional detailed context (for agent to request if needed) fullContent?: string; // Only included if explicitly requested relatedChunks?: CodeChunk[]; highlights?: TextHighlight[]; } +export interface RelationshipData { + /** Files that import this result */ + importedBy?: string[]; + /** Files this result depends on */ + imports?: string[]; + /** Test files covering this (heuristic: same basename + .spec/.test) */ + testedIn?: string[]; + /** ISO date from git log */ + lastModified?: string; +} + export interface TextHighlight { start: number; end: number; diff --git a/src/utils/git-dates.ts b/src/utils/git-dates.ts index e14ab44..42c7dc8 100644 --- a/src/utils/git-dates.ts +++ b/src/utils/git-dates.ts @@ -7,6 +7,20 @@ import { exec } from 'child_process'; import { promisify } from 'util'; const execAsync = promisify(exec); +const commitDateCache = new Map>(); + +function normalizeRootPath(rootPath: string): string { + return rootPath.replace(/\\/g, '/').toLowerCase(); +} + +export function clearFileCommitDatesCache(rootPath?: string): void { + if (rootPath) { + commitDateCache.delete(normalizeRootPath(rootPath)); + return; + } + + commitDateCache.clear(); +} /** * Get the last commit date for each file in the repository. @@ -16,6 +30,15 @@ const execAsync = promisify(exec); * @returns Map of relative file paths to their last commit date */ export async function getFileCommitDates(rootPath: string): Promise> { + const cacheKey = normalizeRootPath(rootPath); + const cached = commitDateCache.get(cacheKey); + if (cached) { + if (process.env.CODEBASE_CONTEXT_DEBUG) { + console.error(`[git-dates] Cache hit for ${cacheKey}`); + } + return new Map(cached); + } + const fileDates = new Map(); try { @@ -57,6 +80,7 @@ export async function getFileCommitDates(rootPath: string): Promise { + describe('missing analyzer logging', () => { + it('should stay quiet by default when no analyzer matches', async () => { + const registry = new AnalyzerRegistry(); + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); - describe('getAll', () => { - it('should return analyzers sorted by priority (highest first)', () => { - const analyzers = analyzerRegistry.getAll(); + const priorDebug = process.env.CODEBASE_CONTEXT_DEBUG; + delete process.env.CODEBASE_CONTEXT_DEBUG; - for (let i = 1; i < analyzers.length; i++) { - expect(analyzers[i - 1].priority).toBeGreaterThanOrEqual(analyzers[i].priority); - } - }); + try { + const result = await registry.analyzeFile('file.unknown', 'content'); + expect(result).toBeNull(); + expect(warnSpy).not.toHaveBeenCalled(); + expect(errorSpy).not.toHaveBeenCalled(); + } finally { + if (priorDebug === undefined) { + delete process.env.CODEBASE_CONTEXT_DEBUG; + } else { + process.env.CODEBASE_CONTEXT_DEBUG = priorDebug; + } + warnSpy.mockRestore(); + errorSpy.mockRestore(); + } + }); + }); + + describe('getAll', () => { + it('should return analyzers sorted by priority (highest first)', () => { + const analyzers = analyzerRegistry.getAll(); + + for (let i = 1; i < analyzers.length; i++) { + expect(analyzers[i - 1].priority).toBeGreaterThanOrEqual(analyzers[i].priority); + } + }); + + it('should include default analyzers (Angular, Generic)', () => { + const analyzers = analyzerRegistry.getAll(); + const names = analyzers.map((a) => a.name); - it('should include default analyzers (Angular, Generic)', () => { - const analyzers = analyzerRegistry.getAll(); - const names = analyzers.map(a => a.name); + expect(names).toContain('angular'); + expect(names).toContain('generic'); + }); + }); - expect(names).toContain('angular'); - expect(names).toContain('generic'); - }); + describe('get', () => { + it('should return analyzer by name', () => { + const angular = analyzerRegistry.get('angular'); + expect(angular).toBeDefined(); + expect(angular?.name).toBe('angular'); }); - describe('get', () => { - it('should return analyzer by name', () => { - const angular = analyzerRegistry.get('angular'); - expect(angular).toBeDefined(); - expect(angular?.name).toBe('angular'); - }); - - it('should return undefined for unknown analyzer', () => { - const unknown = analyzerRegistry.get('unknown-analyzer'); - expect(unknown).toBeUndefined(); - }); + it('should return undefined for unknown analyzer', () => { + const unknown = analyzerRegistry.get('unknown-analyzer'); + expect(unknown).toBeUndefined(); }); + }); - describe('priority ordering', () => { - it('should have Angular higher priority than Generic', () => { - const angular = analyzerRegistry.get('angular'); - const generic = analyzerRegistry.get('generic'); + describe('priority ordering', () => { + it('should have Angular higher priority than Generic', () => { + const angular = analyzerRegistry.get('angular'); + const generic = analyzerRegistry.get('generic'); - expect(angular).toBeDefined(); - expect(generic).toBeDefined(); - expect(angular!.priority).toBeGreaterThan(generic!.priority); - }); + expect(angular).toBeDefined(); + expect(generic).toBeDefined(); + expect(angular!.priority).toBeGreaterThan(generic!.priority); }); + }); }); diff --git a/tests/incremental-indexing.test.ts b/tests/incremental-indexing.test.ts index 85cc00b..4e706df 100644 --- a/tests/incremental-indexing.test.ts +++ b/tests/incremental-indexing.test.ts @@ -66,6 +66,59 @@ describe('Incremental Indexing', () => { expect(stats.duration).toBeLessThan(5000); }); + it('should preserve indexedFiles and totalChunks in short-circuit (nothing changed)', async () => { + // Use files substantial enough to produce chunks + await fs.writeFile(path.join(tempDir, 'service.ts'), [ + 'import { Injectable } from "@angular/core";', + '', + '@Injectable({ providedIn: "root" })', + 'export class UserService {', + ' private users: string[] = [];', + '', + ' getUsers(): string[] {', + ' return this.users;', + ' }', + '', + ' addUser(name: string): void {', + ' this.users.push(name);', + ' }', + '}' + ].join('\n')); + await fs.writeFile(path.join(tempDir, 'utils.ts'), [ + 'export function formatDate(date: Date): string {', + ' return date.toISOString().split("T")[0];', + '}', + '', + 'export function capitalize(str: string): string {', + ' return str.charAt(0).toUpperCase() + str.slice(1);', + '}', + '', + 'export function range(n: number): number[] {', + ' return Array.from({ length: n }, (_, i) => i);', + '}' + ].join('\n')); + + // Full index first + const indexer1 = new CodebaseIndexer({ + rootPath: tempDir, + config: { skipEmbedding: true } + }); + const fullStats = await indexer1.index(); + + // Incremental index — nothing changed (short-circuit) + const indexer2 = new CodebaseIndexer({ + rootPath: tempDir, + config: { skipEmbedding: true }, + incrementalOnly: true + }); + const incStats = await indexer2.index(); + + // Key invariant: short-circuit stats must match full index, not reset to 0 + expect(incStats.indexedFiles).toBe(fullStats.indexedFiles); + expect(incStats.totalChunks).toBe(fullStats.totalChunks); + expect(incStats.totalFiles).toBe(fullStats.totalFiles); + }); + it('should detect changed files in incremental mode', async () => { await fs.writeFile(path.join(tempDir, 'index.ts'), 'export const x = 1;'); diff --git a/tests/lancedb-corruption.test.ts b/tests/lancedb-corruption.test.ts index 2ec825b..8f758db 100644 --- a/tests/lancedb-corruption.test.ts +++ b/tests/lancedb-corruption.test.ts @@ -19,7 +19,7 @@ describe('LanceDBStorageProvider corruption detection', () => { beforeEach(async () => { tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'lancedb-test-')); lancedb.connect.mockReset(); - consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => { }); + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); }); afterEach(async () => { @@ -103,4 +103,48 @@ describe('LanceDBStorageProvider corruption detection', () => { const results = await provider.search([0.1, 0.2], 5); expect(results).toEqual([]); }); + + it('quotes filePath column when deleting by file paths', async () => { + const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js'); + const provider = new LanceDBStorageProvider() as any; + + const deleteSpy = vi.fn(async () => {}); + provider.initialized = true; + provider.table = { + countRows: vi.fn().mockResolvedValueOnce(10).mockResolvedValueOnce(7), + delete: deleteSpy + }; + + const deleted = await provider.deleteByFilePaths(['C:/repo/src/a.ts', "C:/repo/src/b'test.ts"]); + + expect(deleted).toBe(3); + expect(deleteSpy).toHaveBeenCalledWith( + "\"filePath\" IN ('C:/repo/src/a.ts', 'C:/repo/src/b''test.ts')" + ); + }); + + it('quotes componentType column when applying search filters', async () => { + const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js'); + const provider = new LanceDBStorageProvider() as any; + + const whereSpy = vi.fn(() => query); + const query = { + limit: vi.fn(() => query), + where: whereSpy, + toArray: vi.fn(async () => []) + }; + + provider.initialized = true; + provider.table = { + vectorSearch: vi.fn(() => query) + }; + + const results = await provider.search([0.1, 0.2], 5, { + componentType: 'service', + layer: 'business' + }); + + expect(results).toEqual([]); + expect(whereSpy).toHaveBeenCalledWith("\"componentType\" = 'service' AND layer = 'business'"); + }); }); diff --git a/tests/pattern-detector.test.ts b/tests/pattern-detector.test.ts new file mode 100644 index 0000000..c5d668d --- /dev/null +++ b/tests/pattern-detector.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from 'vitest'; +import { PatternDetector } from '../src/utils/usage-tracker.js'; + +describe('PatternDetector testing categories', () => { + it('tracks e2e without polluting testingFramework', () => { + const detector = new PatternDetector(); + + detector.detectFromCode( + "import { test } from '@playwright/test'; test('smoke', async ({ page }) => { await page.goto('/'); });", + 'apps/app/e2e/session/session.spec.ts' + ); + + const patterns = detector.getAllPatterns(); + expect(patterns.e2eFramework?.primary.name).toBe('Playwright'); + expect(patterns.testingFramework).toBeUndefined(); + }); + + it('keeps unit testingFramework classification for unit tests', () => { + const detector = new PatternDetector(); + + detector.detectFromCode( + "describe('AuthService', () => { TestBed.configureTestingModule({}); jest.spyOn(service, 'load'); });", + 'libs/library/src/lib/auth/auth.service.spec.ts' + ); + + const patterns = detector.getAllPatterns(); + expect(patterns.unitTestFramework?.primary.name).toBe('Jest'); + expect(patterns.testingFramework?.primary.name).toBe('Jest'); + }); +}); diff --git a/tests/pattern-semantics.test.ts b/tests/pattern-semantics.test.ts new file mode 100644 index 0000000..7cd21a0 --- /dev/null +++ b/tests/pattern-semantics.test.ts @@ -0,0 +1,38 @@ +import { describe, it, expect } from 'vitest'; +import { + isComplementaryPatternCategory, + isComplementaryPatternConflict, + shouldSkipLegacyTestingFrameworkCategory +} from '../src/patterns/semantics.js'; + +describe('pattern semantics helpers', () => { + it('treats computed/effect reactivity pair as complementary', () => { + expect(isComplementaryPatternConflict('reactivity', 'Computed', 'Effect')).toBe(true); + expect(isComplementaryPatternConflict('reactivity', 'Effect', 'Computed')).toBe(true); + }); + + it('does not mark unrelated categories as complementary conflicts', () => { + expect(isComplementaryPatternConflict('stateManagement', 'RxJS', 'Signals')).toBe(false); + expect(isComplementaryPatternConflict('reactivity', 'Computed', 'Signals')).toBe(false); + }); + + it('detects complementary reactivity categories', () => { + expect(isComplementaryPatternCategory('reactivity', ['Computed', 'Effect'])).toBe(true); + expect(isComplementaryPatternCategory('reactivity', ['Effect', 'Computed'])).toBe(true); + expect(isComplementaryPatternCategory('reactivity', ['Computed', 'RxJS'])).toBe(false); + }); + + it('suppresses legacy testingFramework when unitTestFramework exists', () => { + expect( + shouldSkipLegacyTestingFrameworkCategory('testingFramework', { + unitTestFramework: { primary: { name: 'Jest' } } + }) + ).toBe(true); + + expect( + shouldSkipLegacyTestingFrameworkCategory('testingFramework', { + testingFramework: { primary: { name: 'Jest' } } + }) + ).toBe(false); + }); +}); diff --git a/tests/query-scope.test.ts b/tests/query-scope.test.ts new file mode 100644 index 0000000..93f1a68 --- /dev/null +++ b/tests/query-scope.test.ts @@ -0,0 +1,49 @@ +import { describe, it, expect } from 'vitest'; +import { + isTestingRelatedQuery, + shouldIncludePatternConflictCategory +} from '../src/preflight/query-scope.js'; + +describe('Preflight query scope', () => { + it('detects testing-related queries', () => { + expect(isTestingRelatedQuery('Update unit tests for AuthInterceptor with jest mocks')).toBe( + true + ); + expect(isTestingRelatedQuery('Refactor TestBed setup and spyOn assertions')).toBe(true); + expect(isTestingRelatedQuery('Migrate e2e Playwright flows')).toBe(true); + }); + + it('ignores non-testing queries', () => { + expect( + isTestingRelatedQuery('Refactor AuthInterceptor registration and HTTP provider wiring') + ).toBe(false); + expect(isTestingRelatedQuery('')).toBe(false); + }); + + it('filters testing conflicts for non-testing prompts', () => { + expect( + shouldIncludePatternConflictCategory( + 'testingFramework', + 'Refactor AuthInterceptor registration and HTTP provider wiring' + ) + ).toBe(false); + expect( + shouldIncludePatternConflictCategory( + 'testMocking', + 'Refactor AuthInterceptor registration and HTTP provider wiring' + ) + ).toBe(false); + expect( + shouldIncludePatternConflictCategory('stateManagement', 'Refactor state service to signals') + ).toBe(true); + }); + + it('keeps testing conflicts for testing prompts', () => { + expect( + shouldIncludePatternConflictCategory( + 'testingFramework', + 'Fix failing unit tests in auth service' + ) + ).toBe(true); + }); +}); diff --git a/tests/resource-uri.test.ts b/tests/resource-uri.test.ts new file mode 100644 index 0000000..ff9fbba --- /dev/null +++ b/tests/resource-uri.test.ts @@ -0,0 +1,24 @@ +import { describe, it, expect } from 'vitest'; +import { + CONTEXT_RESOURCE_URI, + isContextResourceUri, + normalizeResourceUri +} from '../src/resources/uri.js'; + +describe('resource URI normalization', () => { + it('accepts canonical resource URI', () => { + expect(normalizeResourceUri(CONTEXT_RESOURCE_URI)).toBe(CONTEXT_RESOURCE_URI); + expect(isContextResourceUri(CONTEXT_RESOURCE_URI)).toBe(true); + }); + + it('accepts namespaced resource URI from some MCP hosts', () => { + const namespaced = `codebase-context/${CONTEXT_RESOURCE_URI}`; + expect(normalizeResourceUri(namespaced)).toBe(CONTEXT_RESOURCE_URI); + expect(isContextResourceUri(namespaced)).toBe(true); + }); + + it('rejects unknown URIs', () => { + expect(isContextResourceUri('codebase://other')).toBe(false); + expect(isContextResourceUri('other/codebase://other')).toBe(false); + }); +}); diff --git a/tests/search-quality.test.ts b/tests/search-quality.test.ts new file mode 100644 index 0000000..52adffe --- /dev/null +++ b/tests/search-quality.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from 'vitest'; +import type { SearchResult } from '../src/types/index.js'; +import { assessSearchQuality } from '../src/core/search-quality.js'; + +function makeResult(filePath: string, score: number): SearchResult { + return { + summary: 'summary', + snippet: 'snippet', + filePath, + startLine: 1, + endLine: 10, + score, + relevanceReason: 'match', + language: 'typescript', + framework: 'generic', + componentType: 'service', + layer: 'core', + metadata: {} + }; +} + +describe('assessSearchQuality', () => { + it('returns low confidence when no results are returned', () => { + const quality = assessSearchQuality('find authentication flow', []); + + expect(quality.status).toBe('low_confidence'); + expect(quality.confidence).toBe(0); + expect(quality.signals).toContain('no results returned'); + expect(quality.nextSteps?.length).toBeGreaterThan(0); + }); + + it('flags test-artifact dominance for non-testing queries', () => { + const quality = assessSearchQuality('find login redirect implementation', [ + makeResult('src/features/login/login.service.spec.ts', 0.31), + makeResult('tests/e2e/login-flow.test.ts', 0.29), + makeResult('src/features/login/login-helpers.spec.ts', 0.28) + ]); + + expect(quality.status).toBe('low_confidence'); + expect(quality.signals.some((signal) => signal.includes('test artifacts dominate'))).toBe(true); + }); + + it('returns ok when top results are strong and separated', () => { + const quality = assessSearchQuality('where is order validation implemented', [ + makeResult('src/domain/orders/order-validation.service.ts', 0.78), + makeResult('src/domain/orders/order-rules.ts', 0.61), + makeResult('src/domain/orders/order-types.ts', 0.53) + ]); + + expect(quality.status).toBe('ok'); + expect(quality.confidence).toBeGreaterThanOrEqual(0.5); + }); +}); diff --git a/tests/search-ranking.test.ts b/tests/search-ranking.test.ts new file mode 100644 index 0000000..24449e4 --- /dev/null +++ b/tests/search-ranking.test.ts @@ -0,0 +1,106 @@ +import { describe, it, expect, vi } from 'vitest'; +import type { CodeChunk } from '../src/types/index.js'; +import { CodebaseSearcher } from '../src/core/search.js'; + +function createChunk(id: string, filePath: string, content: string): CodeChunk { + return { + id, + content, + filePath, + relativePath: filePath.replace(/^.*?Repos\//, ''), + startLine: 1, + endLine: 40, + language: 'typescript', + framework: 'generic', + componentType: 'service', + layer: 'core', + dependencies: [], + imports: [], + exports: [], + tags: [], + metadata: {} + }; +} + +function setupSemanticOnlySearcher( + results: { chunk: CodeChunk; score: number }[] +): CodebaseSearcher { + const searcher = new CodebaseSearcher('C:/repo') as any; + searcher.initialized = true; + searcher.embeddingProvider = { + embed: vi.fn(async () => [0.1, 0.2]) + }; + searcher.storageProvider = { + search: vi.fn(async () => results), + count: vi.fn(async () => results.length) + }; + searcher.fuseIndex = null; + searcher.patternIntelligence = null; + return searcher as CodebaseSearcher; +} + +describe('CodebaseSearcher query-aware ranking', () => { + it('de-prioritizes spec files for non-testing queries', async () => { + const specChunk = createChunk( + 'spec', + 'C:/repo/src/domain/session/session-manager.spec.ts', + "describe('SessionManager', () => {})" + ); + const implChunk = createChunk( + 'impl', + 'C:/repo/src/domain/session/session-manager.ts', + 'export class SessionManager {}' + ); + + const searcher = setupSemanticOnlySearcher([ + { chunk: specChunk, score: 0.75 }, + { chunk: implChunk, score: 0.68 } + ]); + + const results = await searcher.search('Refactor session management flow', 2); + expect(results[0].filePath).toContain('session-manager.ts'); + expect(results[0].filePath).not.toContain('.spec.ts'); + }); + + it('keeps spec files prioritized for testing queries', async () => { + const specChunk = createChunk( + 'spec', + 'C:/repo/src/domain/session/session-manager.spec.ts', + "describe('SessionManager', () => {})" + ); + const implChunk = createChunk( + 'impl', + 'C:/repo/src/domain/session/session-manager.ts', + 'export class SessionManager {}' + ); + + const searcher = setupSemanticOnlySearcher([ + { chunk: specChunk, score: 0.75 }, + { chunk: implChunk, score: 0.68 } + ]); + + const results = await searcher.search('Update unit tests for session manager with mocks', 2); + expect(results[0].filePath).toContain('.spec.ts'); + }); + + it('de-prioritizes Windows e2e paths for non-testing queries', async () => { + const e2eChunk = createChunk( + 'e2e', + 'C:\\repo\\apps\\app\\e2e\\src\\tests\\session-setup.ts', + "describe('session setup', () => {})" + ); + const implChunk = createChunk( + 'impl', + 'C:\\repo\\src\\domain\\session\\session-manager.ts', + 'export class SessionManager {}' + ); + + const searcher = setupSemanticOnlySearcher([ + { chunk: e2eChunk, score: 0.75 }, + { chunk: implChunk, score: 0.72 } + ]); + + const results = await searcher.search('session login flow', 2); + expect(results[0].filePath.toLowerCase()).toContain('session-manager.ts'); + }); +}); diff --git a/tests/search-retrieval-strategy.test.ts b/tests/search-retrieval-strategy.test.ts new file mode 100644 index 0000000..065f939 --- /dev/null +++ b/tests/search-retrieval-strategy.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect, vi } from 'vitest'; +import type { CodeChunk } from '../src/types/index.js'; +import { CodebaseSearcher } from '../src/core/search.js'; + +function createChunk(id: string, filePath: string, content: string): CodeChunk { + return { + id, + content, + filePath, + relativePath: filePath, + startLine: 1, + endLine: 20, + language: 'typescript', + framework: 'generic', + componentType: 'service', + layer: 'core', + dependencies: [], + imports: [], + exports: [], + tags: [], + metadata: {} + }; +} + +describe('CodebaseSearcher retrieval strategy', () => { + it('enforces candidate floor independently from user limit', async () => { + const implChunk = createChunk( + 'impl', + 'src/core/auth-service.ts', + 'export class AuthService {}' + ); + const searcher = new CodebaseSearcher('C:/repo') as any; + + searcher.initialized = true; + searcher.embeddingProvider = {}; + searcher.storageProvider = {}; + searcher.fuseIndex = null; + searcher.patternIntelligence = null; + searcher.semanticSearch = vi.fn(async () => [{ chunk: implChunk, score: 0.7 }]); + searcher.keywordSearch = vi.fn(async () => []); + + await searcher.search('authentication login', 1, undefined, { + enableQueryExpansion: false, + enableLowConfidenceRescue: false, + candidateFloor: 30 + }); + + expect(searcher.semanticSearch).toHaveBeenCalledTimes(1); + expect(searcher.semanticSearch.mock.calls[0][1]).toBe(30); + }); + + it('uses bounded query expansion for intent-heavy queries', async () => { + const implChunk = createChunk( + 'impl', + 'src/core/router-service.ts', + 'export class RouterService {}' + ); + const searcher = new CodebaseSearcher('C:/repo') as any; + + searcher.initialized = true; + searcher.embeddingProvider = {}; + searcher.storageProvider = {}; + searcher.fuseIndex = null; + searcher.patternIntelligence = null; + searcher.semanticSearch = vi.fn(async () => [{ chunk: implChunk, score: 0.65 }]); + searcher.keywordSearch = vi.fn(async () => []); + + await searcher.search('authentication login', 3, undefined, { + enableQueryExpansion: true, + enableLowConfidenceRescue: false + }); + + const semanticQueries = searcher.semanticSearch.mock.calls.map((call: any[]) => call[0]); + expect(semanticQueries[0]).toBe('authentication login'); + expect(semanticQueries.length).toBeLessThanOrEqual(2); + }); + + it('runs low-confidence rescue and can replace poor primary ranking', async () => { + const specChunk = createChunk( + 'spec', + 'src/core/auth/auth-callback.component.spec.ts', + "describe('auth', () => {})" + ); + const implChunk = createChunk( + 'impl', + 'src/core/auth/auth-callback.component.ts', + 'export class AuthCallbackComponent {}' + ); + + const searcher = new CodebaseSearcher('C:/repo') as any; + searcher.initialized = true; + searcher.embeddingProvider = {}; + searcher.storageProvider = {}; + searcher.fuseIndex = null; + searcher.patternIntelligence = null; + + searcher.semanticSearch = vi.fn(async (query: string) => { + if (query.includes('router') || query.includes('navigation')) { + return [ + { chunk: implChunk, score: 0.8 }, + { chunk: specChunk, score: 0.2 } + ]; + } + + return [ + { chunk: specChunk, score: 0.5 }, + { chunk: implChunk, score: 0.35 } + ]; + }); + searcher.keywordSearch = vi.fn(async () => []); + + const results = await searcher.search('navigate to page after login redirect', 2, undefined, { + enableQueryExpansion: false, + enableLowConfidenceRescue: true, + profile: 'edit' + }); + + expect(results[0].filePath).toContain('auth-callback.component.ts'); + expect(results[0].filePath).not.toContain('.spec.ts'); + }); +}); From 833e759cd0d1fa26395b0094fc45b24a82001c5f Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sun, 8 Feb 2026 22:34:57 +0100 Subject: [PATCH 2/3] chore: format indexer for quality checks --- src/core/indexer.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/indexer.ts b/src/core/indexer.ts index eb5289d..50454c5 100644 --- a/src/core/indexer.ts +++ b/src/core/indexer.ts @@ -217,7 +217,9 @@ export class CodebaseIndexer { const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8')); if (Array.isArray(existingChunks)) { stats.totalChunks = existingChunks.length; - const uniqueFiles = new Set(existingChunks.map((c: { filePath?: string }) => c.filePath)); + const uniqueFiles = new Set( + existingChunks.map((c: { filePath?: string }) => c.filePath) + ); stats.indexedFiles = uniqueFiles.size; } } catch { From fd5e617ac0abb5fb074465ad38c819cb17d0366f Mon Sep 17 00:00:00 2001 From: PatrickSys Date: Sun, 8 Feb 2026 22:40:01 +0100 Subject: [PATCH 3/3] fix: restore accurate stats for no-op incremental indexing --- src/constants/codebase-context.ts | 1 + src/core/indexer.ts | 72 +++++++++++++++--- tests/incremental-indexing.test.ts | 113 +++++++++++++++++++++-------- 3 files changed, 143 insertions(+), 43 deletions(-) diff --git a/src/constants/codebase-context.ts b/src/constants/codebase-context.ts index e3cf2af..ef92758 100644 --- a/src/constants/codebase-context.ts +++ b/src/constants/codebase-context.ts @@ -6,5 +6,6 @@ export const CODEBASE_CONTEXT_DIRNAME = '.codebase-context' as const; export const MEMORY_FILENAME = 'memory.json' as const; export const INTELLIGENCE_FILENAME = 'intelligence.json' as const; export const KEYWORD_INDEX_FILENAME = 'index.json' as const; +export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const; export const VECTOR_DB_DIRNAME = 'index' as const; export const MANIFEST_FILENAME = 'manifest.json' as const; diff --git a/src/core/indexer.ts b/src/core/indexer.ts index 50454c5..1016843 100644 --- a/src/core/indexer.ts +++ b/src/core/indexer.ts @@ -30,6 +30,7 @@ import { import { getFileCommitDates } from '../utils/git-dates.js'; import { CODEBASE_CONTEXT_DIRNAME, + INDEXING_STATS_FILENAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, @@ -51,6 +52,13 @@ export interface IndexerOptions { incrementalOnly?: boolean; } +interface PersistedIndexingStats { + indexedFiles: number; + totalChunks: number; + totalFiles: number; + generatedAt: string; +} + export class CodebaseIndexer { private rootPath: string; private config: CodebaseConfig; @@ -181,16 +189,18 @@ export class CodebaseIndexer { // Phase 1b: Incremental diff (if incremental mode) const contextDir = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME); const manifestPath = path.join(contextDir, MANIFEST_FILENAME); + const indexingStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME); let diff: ManifestDiff | null = null; let currentHashes: Record | null = null; + let previousManifest: FileManifest | null = null; if (this.incrementalOnly) { this.updateProgress('scanning', 10); console.error('Computing file hashes for incremental diff...'); currentHashes = await computeFileHashes(files, this.rootPath); - const oldManifest = await readManifest(manifestPath); - diff = diffManifest(oldManifest, currentHashes); + previousManifest = await readManifest(manifestPath); + diff = diffManifest(previousManifest, currentHashes); console.error( `Incremental diff: ${diff.added.length} added, ${diff.changed.length} changed, ` + @@ -211,21 +221,51 @@ export class CodebaseIndexer { stats.duration = Date.now() - startTime; stats.completedAt = new Date(); - // Preserve accurate counts from the existing index (nothing changed, index is intact) + let restoredFromPersistedStats = false; + try { - const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME); - const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8')); - if (Array.isArray(existingChunks)) { - stats.totalChunks = existingChunks.length; - const uniqueFiles = new Set( - existingChunks.map((c: { filePath?: string }) => c.filePath) - ); - stats.indexedFiles = uniqueFiles.size; + const persisted = JSON.parse( + await fs.readFile(indexingStatsPath, 'utf-8') + ) as Partial; + + if ( + typeof persisted.indexedFiles === 'number' && + typeof persisted.totalChunks === 'number' && + typeof persisted.totalFiles === 'number' + ) { + stats.indexedFiles = persisted.indexedFiles; + stats.totalChunks = persisted.totalChunks; + stats.totalFiles = persisted.totalFiles; + restoredFromPersistedStats = true; } } catch { - // Keyword index doesn't exist yet — keep counts as 0 + // No persisted stats yet — fall back below } + if (!restoredFromPersistedStats) { + if (previousManifest) { + stats.indexedFiles = Object.keys(previousManifest.files).length; + } + + try { + const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME); + const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8')); + if (Array.isArray(existingChunks)) { + stats.totalChunks = existingChunks.length; + if (stats.indexedFiles === 0) { + const uniqueFiles = new Set( + existingChunks.map((c: { filePath?: string }) => c.filePath) + ); + stats.indexedFiles = uniqueFiles.size; + } + } + } catch { + // Keyword index doesn't exist yet — keep best-known counts + } + } + + stats.totalFiles = files.length; + return stats; } } @@ -575,6 +615,14 @@ export class CodebaseIndexer { }; await writeManifest(manifestPath, manifest); + const persistedStats: PersistedIndexingStats = { + indexedFiles: stats.indexedFiles, + totalChunks: stats.totalChunks, + totalFiles: stats.totalFiles, + generatedAt: new Date().toISOString() + }; + await fs.writeFile(indexingStatsPath, JSON.stringify(persistedStats, null, 2)); + // Phase 5: Complete this.updateProgress('complete', 100); diff --git a/tests/incremental-indexing.test.ts b/tests/incremental-indexing.test.ts index 4e706df..ee8f951 100644 --- a/tests/incremental-indexing.test.ts +++ b/tests/incremental-indexing.test.ts @@ -4,7 +4,12 @@ import path from 'path'; import os from 'os'; import { CodebaseIndexer } from '../src/core/indexer.js'; import { readManifest } from '../src/core/manifest.js'; -import { CODEBASE_CONTEXT_DIRNAME, MANIFEST_FILENAME, KEYWORD_INDEX_FILENAME } from '../src/constants/codebase-context.js'; +import { + CODEBASE_CONTEXT_DIRNAME, + MANIFEST_FILENAME, + KEYWORD_INDEX_FILENAME, + INDEXING_STATS_FILENAME +} from '../src/constants/codebase-context.js'; describe('Incremental Indexing', () => { let tempDir: string; @@ -68,35 +73,41 @@ describe('Incremental Indexing', () => { it('should preserve indexedFiles and totalChunks in short-circuit (nothing changed)', async () => { // Use files substantial enough to produce chunks - await fs.writeFile(path.join(tempDir, 'service.ts'), [ - 'import { Injectable } from "@angular/core";', - '', - '@Injectable({ providedIn: "root" })', - 'export class UserService {', - ' private users: string[] = [];', - '', - ' getUsers(): string[] {', - ' return this.users;', - ' }', - '', - ' addUser(name: string): void {', - ' this.users.push(name);', - ' }', - '}' - ].join('\n')); - await fs.writeFile(path.join(tempDir, 'utils.ts'), [ - 'export function formatDate(date: Date): string {', - ' return date.toISOString().split("T")[0];', - '}', - '', - 'export function capitalize(str: string): string {', - ' return str.charAt(0).toUpperCase() + str.slice(1);', - '}', - '', - 'export function range(n: number): number[] {', - ' return Array.from({ length: n }, (_, i) => i);', - '}' - ].join('\n')); + await fs.writeFile( + path.join(tempDir, 'service.ts'), + [ + 'import { Injectable } from "@angular/core";', + '', + '@Injectable({ providedIn: "root" })', + 'export class UserService {', + ' private users: string[] = [];', + '', + ' getUsers(): string[] {', + ' return this.users;', + ' }', + '', + ' addUser(name: string): void {', + ' this.users.push(name);', + ' }', + '}' + ].join('\n') + ); + await fs.writeFile( + path.join(tempDir, 'utils.ts'), + [ + 'export function formatDate(date: Date): string {', + ' return date.toISOString().split("T")[0];', + '}', + '', + 'export function capitalize(str: string): string {', + ' return str.charAt(0).toUpperCase() + str.slice(1);', + '}', + '', + 'export function range(n: number): number[] {', + ' return Array.from({ length: n }, (_, i) => i);', + '}' + ].join('\n') + ); // Full index first const indexer1 = new CodebaseIndexer({ @@ -119,6 +130,43 @@ describe('Incremental Indexing', () => { expect(incStats.totalFiles).toBe(fullStats.totalFiles); }); + it('should prefer persisted stats over keyword index in no-op incremental runs', async () => { + await fs.writeFile(path.join(tempDir, 'index.ts'), 'export const x = 1;'); + + const fullIndexer = new CodebaseIndexer({ + rootPath: tempDir, + config: { skipEmbedding: true } + }); + await fullIndexer.index(); + + const contextDir = path.join(tempDir, CODEBASE_CONTEXT_DIRNAME); + await fs.writeFile( + path.join(contextDir, INDEXING_STATS_FILENAME), + JSON.stringify( + { + indexedFiles: 77, + totalChunks: 1234, + totalFiles: 88, + generatedAt: new Date().toISOString() + }, + null, + 2 + ) + ); + await fs.writeFile(path.join(contextDir, KEYWORD_INDEX_FILENAME), JSON.stringify([])); + + const incIndexer = new CodebaseIndexer({ + rootPath: tempDir, + config: { skipEmbedding: true }, + incrementalOnly: true + }); + const stats = await incIndexer.index(); + + expect(stats.indexedFiles).toBe(77); + expect(stats.totalChunks).toBe(1234); + expect(stats.totalFiles).toBe(1); + }); + it('should detect changed files in incremental mode', async () => { await fs.writeFile(path.join(tempDir, 'index.ts'), 'export const x = 1;'); @@ -155,7 +203,10 @@ describe('Incremental Indexing', () => { await indexer1.index(); // Add a new file - await fs.writeFile(path.join(tempDir, 'utils.ts'), 'export function add(a: number, b: number) { return a + b; }'); + await fs.writeFile( + path.join(tempDir, 'utils.ts'), + 'export function add(a: number, b: number) { return a + b; }' + ); // Incremental index const indexer2 = new CodebaseIndexer({