Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ dist/
*.log
.DS_Store
.env
opencode.jsonc
nul
.vscode/
*.swp
*.swo
Expand Down
1 change: 1 addition & 0 deletions src/constants/codebase-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ export const CODEBASE_CONTEXT_DIRNAME = '.codebase-context' as const;
export const MEMORY_FILENAME = 'memory.json' as const;
export const INTELLIGENCE_FILENAME = 'intelligence.json' as const;
export const KEYWORD_INDEX_FILENAME = 'index.json' as const;
export const INDEXING_STATS_FILENAME = 'indexing-stats.json' as const;
export const VECTOR_DB_DIRNAME = 'index' as const;
export const MANIFEST_FILENAME = 'manifest.json' as const;
4 changes: 3 additions & 1 deletion src/core/analyzer-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ export class AnalyzerRegistry {
const analyzer = this.findAnalyzer(filePath, content);

if (!analyzer) {
console.warn(`No analyzer found for file: ${filePath}`);
if (process.env.CODEBASE_CONTEXT_DEBUG) {
console.error(`[DEBUG] No analyzer found for file: ${filePath}`);
}
return null;
}

Expand Down
75 changes: 73 additions & 2 deletions src/core/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
import { getFileCommitDates } from '../utils/git-dates.js';
import {
CODEBASE_CONTEXT_DIRNAME,
INDEXING_STATS_FILENAME,
INTELLIGENCE_FILENAME,
KEYWORD_INDEX_FILENAME,
MANIFEST_FILENAME,
Expand All @@ -51,6 +52,13 @@ export interface IndexerOptions {
incrementalOnly?: boolean;
}

interface PersistedIndexingStats {
indexedFiles: number;
totalChunks: number;
totalFiles: number;
generatedAt: string;
}

export class CodebaseIndexer {
private rootPath: string;
private config: CodebaseConfig;
Expand Down Expand Up @@ -181,16 +189,18 @@ export class CodebaseIndexer {
// Phase 1b: Incremental diff (if incremental mode)
const contextDir = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME);
const manifestPath = path.join(contextDir, MANIFEST_FILENAME);
const indexingStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
let diff: ManifestDiff | null = null;
let currentHashes: Record<string, string> | null = null;
let previousManifest: FileManifest | null = null;

if (this.incrementalOnly) {
this.updateProgress('scanning', 10);
console.error('Computing file hashes for incremental diff...');
currentHashes = await computeFileHashes(files, this.rootPath);

const oldManifest = await readManifest(manifestPath);
diff = diffManifest(oldManifest, currentHashes);
previousManifest = await readManifest(manifestPath);
diff = diffManifest(previousManifest, currentHashes);

console.error(
`Incremental diff: ${diff.added.length} added, ${diff.changed.length} changed, ` +
Expand All @@ -210,6 +220,52 @@ export class CodebaseIndexer {
this.updateProgress('complete', 100);
stats.duration = Date.now() - startTime;
stats.completedAt = new Date();

let restoredFromPersistedStats = false;

try {
const persisted = JSON.parse(
await fs.readFile(indexingStatsPath, 'utf-8')
) as Partial<PersistedIndexingStats>;

if (
typeof persisted.indexedFiles === 'number' &&
typeof persisted.totalChunks === 'number' &&
typeof persisted.totalFiles === 'number'
) {
stats.indexedFiles = persisted.indexedFiles;
stats.totalChunks = persisted.totalChunks;
stats.totalFiles = persisted.totalFiles;
restoredFromPersistedStats = true;
}
} catch {
// No persisted stats yet — fall back below
}

if (!restoredFromPersistedStats) {
if (previousManifest) {
stats.indexedFiles = Object.keys(previousManifest.files).length;
}

try {
const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
if (Array.isArray(existingChunks)) {
stats.totalChunks = existingChunks.length;
if (stats.indexedFiles === 0) {
const uniqueFiles = new Set(
existingChunks.map((c: { filePath?: string }) => c.filePath)
);
stats.indexedFiles = uniqueFiles.size;
}
}
} catch {
// Keyword index doesn't exist yet — keep best-known counts
}
}

stats.totalFiles = files.length;

return stats;
}
}
Expand Down Expand Up @@ -559,6 +615,14 @@ export class CodebaseIndexer {
};
await writeManifest(manifestPath, manifest);

const persistedStats: PersistedIndexingStats = {
indexedFiles: stats.indexedFiles,
totalChunks: stats.totalChunks,
totalFiles: stats.totalFiles,
generatedAt: new Date().toISOString()
};
await fs.writeFile(indexingStatsPath, JSON.stringify(persistedStats, null, 2));

// Phase 5: Complete
this.updateProgress('complete', 100);

Expand Down Expand Up @@ -591,6 +655,7 @@ export class CodebaseIndexer {

private async scanFiles(): Promise<string[]> {
const files: string[] = [];
const seen = new Set<string>();

// Read .gitignore if respecting it
let ig: ReturnType<typeof ignore.default> | null = null;
Expand All @@ -617,6 +682,12 @@ export class CodebaseIndexer {
});

for (const file of matches) {
const normalizedFile = file.replace(/\\/g, '/');
if (seen.has(normalizedFile)) {
continue;
}
seen.add(normalizedFile);

const relativePath = path.relative(this.rootPath, file);

// Check gitignore
Expand Down
83 changes: 83 additions & 0 deletions src/core/search-quality.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import type { SearchResult } from '../types/index.js';
import { isTestingRelatedQuery } from '../preflight/query-scope.js';

export interface SearchQualityAssessment {
status: 'ok' | 'low_confidence';
confidence: number;
signals: string[];
nextSteps?: string[];
}

export function isTestArtifactPath(filePath: string): boolean {
const normalized = filePath.toLowerCase().replace(/\\/g, '/');
return (
normalized.includes('.spec.') ||
normalized.includes('.test.') ||
normalized.includes('/e2e/') ||
normalized.includes('/__tests__/')
);
}

export function assessSearchQuality(
query: string,
results: SearchResult[]
): SearchQualityAssessment {
if (results.length === 0) {
return {
status: 'low_confidence',
confidence: 0,
signals: ['no results returned'],
nextSteps: [
'Try a narrower query with one concrete symbol, route, or file hint.',
'Apply search filters (framework/language/componentType/layer).',
'Use get_component_usage for dependency or wiring lookups.'
]
};
}

const topSlice = results.slice(0, Math.min(3, results.length));
const topScore = results[0].score;
const secondScore = results[1]?.score ?? topScore;
const topAverage = topSlice.reduce((sum, result) => sum + result.score, 0) / topSlice.length;
const topSeparation = Math.max(0, topScore - secondScore);
const testRatio =
topSlice.filter((result) => isTestArtifactPath(result.filePath)).length / topSlice.length;
const queryIsTesting = isTestingRelatedQuery(query);

const signals: string[] = [];
if (topScore < 0.3) {
signals.push(`low top score (${topScore.toFixed(2)})`);
}
if (topAverage < 0.32) {
signals.push(`weak top-${topSlice.length} average (${topAverage.toFixed(2)})`);
}
if (topSlice.length > 1 && topSeparation < 0.03) {
signals.push(`tight top spread (${topSeparation.toFixed(2)})`);
}
if (!queryIsTesting && testRatio >= 0.67) {
signals.push(
`test artifacts dominate top-${topSlice.length} (${Math.round(testRatio * 100)}%)`
);
}

let confidence = topScore;
if (topAverage < 0.32) confidence -= 0.08;
if (topSlice.length > 1 && topSeparation < 0.03) confidence -= 0.05;
if (!queryIsTesting && testRatio >= 0.67) confidence -= 0.15;
confidence = Math.max(0, Math.min(1, Number(confidence.toFixed(2))));

const lowConfidence = signals.length >= 2 || confidence < 0.35;

return {
status: lowConfidence ? 'low_confidence' : 'ok',
confidence,
signals,
...(lowConfidence && {
nextSteps: [
'Add one or two concrete symbols, routes, or file hints to the query.',
'Apply filters (framework/language/componentType/layer) to narrow candidates.',
'Use get_component_usage when the question is about wiring or usages.'
]
})
};
}
Loading