Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog


## [1.3.1] - 2026-01-05

### Fixed
- **Auto-Heal Semantic Search**: Detects LanceDB schema corruption (missing `vector` column), triggers re-indexing, and retries search instead of silently falling back to keyword-only results.

## [1.3.0] - 2026-01-01

### Added
Expand Down
2 changes: 1 addition & 1 deletion internal-docs
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codebase-context",
"version": "1.3.0",
"version": "1.3.1",
"description": "MCP server that helps AI agents understand your codebase - patterns, libraries, architecture, monorepo support",
"type": "module",
"main": "./dist/lib.js",
Expand Down Expand Up @@ -120,4 +120,4 @@
"sharp"
]
}
}
}
5 changes: 2 additions & 3 deletions src/core/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ export class CodebaseIndexer {
},
embedding: {
provider: 'transformers',
model: 'Xenova/bge-base-en-v1.5',
model: 'Xenova/bge-small-en-v1.5',
batchSize: 100
},
skipEmbedding: false,
Expand Down Expand Up @@ -376,8 +376,7 @@ export class CodebaseIndexer {

if ((i + batchSize) % 100 === 0 || i + batchSize >= chunksToEmbed.length) {
console.error(
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${
chunksToEmbed.length
`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${chunksToEmbed.length
} chunks`
);
}
Expand Down
12 changes: 9 additions & 3 deletions src/core/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { CodeChunk, SearchResult, SearchFilters } from '../types/index.js';
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
import { analyzerRegistry } from './analyzer-registry.js';
import { IndexCorruptedError } from '../errors/index.js';

export interface SearchOptions {
useSemanticSearch?: boolean;
Expand Down Expand Up @@ -62,6 +63,9 @@ export class CodebaseSearcher {

this.initialized = true;
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Partial initialization (keyword search only):', error);
this.initialized = true;
}
Expand Down Expand Up @@ -217,6 +221,9 @@ export class CodebaseSearcher {
}
});
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Semantic search failed:', error);
}
}
Expand Down Expand Up @@ -324,9 +331,8 @@ export class CodebaseSearcher {
const name = componentName || (classMatch ? classMatch[1] : null);

if (name && componentType) {
return `${
componentType.charAt(0).toUpperCase() + componentType.slice(1)
} '${name}' in ${fileName}.`;
return `${componentType.charAt(0).toUpperCase() + componentType.slice(1)
} '${name}' in ${fileName}.`;
} else if (name) {
return `'${name}' defined in ${fileName}.`;
} else if (componentType) {
Expand Down
11 changes: 11 additions & 0 deletions src/errors/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Thrown when the LanceDB index is corrupted or has a schema mismatch.
* This error signals that re-indexing is required for semantic search to work.
*/
export class IndexCorruptedError extends Error {
constructor(message: string) {
super(message);
this.name = 'IndexCorruptedError';
}
}

62 changes: 59 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ import {
Resource
} from '@modelcontextprotocol/sdk/types.js';
import { CodebaseIndexer } from './core/indexer.js';
import { IndexingStats } from './types/index.js';
import { IndexingStats, SearchResult } from './types/index.js';
import { CodebaseSearcher } from './core/search.js';
import { analyzerRegistry } from './core/analyzer-registry.js';
import { AngularAnalyzer } from './analyzers/angular/index.js';
import { GenericAnalyzer } from './analyzers/generic/index.js';
import { InternalFileGraph } from './utils/usage-tracker.js';
import { IndexCorruptedError } from './errors/index.js';

analyzerRegistry.register(new AngularAnalyzer());
analyzerRegistry.register(new GenericAnalyzer());
Expand Down Expand Up @@ -66,7 +67,7 @@ const indexState: IndexState = {
const server: Server = new Server(
{
name: 'codebase-context',
version: '1.3.0'
version: '1.3.1'
},
{
capabilities: {
Expand Down Expand Up @@ -492,7 +493,62 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
}

const searcher = new CodebaseSearcher(ROOT_PATH);
const results = await searcher.search(query, limit || 5, filters);
let results: SearchResult[];

try {
results = await searcher.search(query, limit || 5, filters);
} catch (error) {
if (error instanceof IndexCorruptedError) {
console.error('[Auto-Heal] Index corrupted. Triggering full re-index...');

await performIndexing();

if (indexState.status === 'ready') {
console.error('[Auto-Heal] Success. Retrying search...');
const freshSearcher = new CodebaseSearcher(ROOT_PATH);
try {
results = await freshSearcher.search(query, limit || 5, filters);
} catch (retryError) {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal retry failed: ${
retryError instanceof Error ? retryError.message : String(retryError)
}`
},
null,
2
)
}
]
};
}
} else {
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
status: 'error',
message: `Auto-heal failed: Indexing ended with status '${indexState.status}'`,
error: indexState.error
},
null,
2
)
}
]
};
}
} else {
throw error; // Propagate unexpected errors
}
}

return {
content: [
Expand Down
25 changes: 23 additions & 2 deletions src/storage/lancedb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import { promises as fs } from 'fs';
import { VectorStorageProvider, CodeChunkWithEmbedding, VectorSearchResult } from './types.js';
import { CodeChunk, SearchFilters } from '../types/index.js';
import { IndexCorruptedError } from '../errors/index.js';

export class LanceDBStorageProvider implements VectorStorageProvider {
readonly name = 'lancedb';
Expand Down Expand Up @@ -44,20 +45,31 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
console.error('Stale index detected (missing vector column). Rebuilding...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
} else {
console.error('Opened existing LanceDB table');
}
} catch (_schemaError) {
} catch (schemaError) {
if (schemaError instanceof IndexCorruptedError) {
throw schemaError;
}
// If schema check fails, table is likely corrupted - drop and rebuild
console.error('Failed to validate table schema, rebuilding index...');
await this.db.dropTable('code_chunks');
this.table = null;
throw new IndexCorruptedError('LanceDB index corrupted: schema validation failed');
}
} else {
// Table missing entirely - not necessarily an error during initialization
this.table = null;
}

this.initialized = true;
console.error(`LanceDB initialized at: ${storagePath}`);
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error;
}
console.error('Failed to initialize LanceDB:', error);
throw error;
}
Expand Down Expand Up @@ -115,7 +127,8 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
filters?: SearchFilters
): Promise<VectorSearchResult[]> {
if (!this.initialized || !this.table) {
return [];
// If table is missing, throw so auto-heal can fix it
throw new IndexCorruptedError('LanceDB index corrupted: no table available for search');
}

try {
Expand Down Expand Up @@ -170,7 +183,15 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
distance: result._distance || 0
}));
} catch (error) {
if (error instanceof Error && error.message.includes('No vector column')) {
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
}
console.error('Failed to search:', error);
// For other errors, we throw IndexCorruptedError to be safe and trigger auto-heal
// if it looks like a database issue
if (error instanceof Error && (error.message.includes('LanceDB') || error.message.includes('Arrow'))) {
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
Comment on lines +192 to +193
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Overly broad error catching - any error matching "LanceDB" or "Arrow" triggers auto-heal

This catch-all may cause unnecessary re-indexing for transient network issues, OOM errors, or other non-corruption problems. Consider narrowing to specific error types or adding additional validation before throwing IndexCorruptedError.

Suggested change
if (error instanceof Error && (error.message.includes('LanceDB') || error.message.includes('Arrow'))) {
throw new IndexCorruptedError(`LanceDB runtime error: ${error.message}`);
if (error instanceof Error && error.message.includes('No vector column')) {
throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
}
Prompt To Fix With AI
This is a comment left during a code review.
Path: src/storage/lancedb.ts
Line: 192:193

Comment:
**logic:** Overly broad error catching - any error matching "LanceDB" or "Arrow" triggers auto-heal

This catch-all may cause unnecessary re-indexing for transient network issues, OOM errors, or other non-corruption problems. Consider narrowing to specific error types or adding additional validation before throwing `IndexCorruptedError`.

```suggestion
      if (error instanceof Error && error.message.includes('No vector column')) {
        throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
      }
```

How can I resolve this? If you propose a fix, please make it concise.

}
return [];
}
}
Expand Down
90 changes: 90 additions & 0 deletions tests/lancedb-corruption.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { promises as fs } from 'fs';
import os from 'os';
import path from 'path';
import { IndexCorruptedError } from '../src/errors/index.js';

const lancedb = vi.hoisted(() => ({
connect: vi.fn()
}));

vi.mock('@lancedb/lancedb', () => ({
connect: lancedb.connect
}));

describe('LanceDBStorageProvider corruption detection', () => {
let tempDir: string;
let consoleErrorSpy: ReturnType<typeof vi.spyOn>;

beforeEach(async () => {
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'lancedb-test-'));
lancedb.connect.mockReset();
consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
});

afterEach(async () => {
consoleErrorSpy.mockRestore();
await fs.rm(tempDir, { recursive: true, force: true });
});

it('throws IndexCorruptedError when vector column missing during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => ({ fields: [{ name: 'id' }] }))
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when schema validation fails during initialize()', async () => {
const dropTable = vi.fn(async () => {});
const db = {
tableNames: vi.fn(async () => ['code_chunks']),
openTable: vi.fn(async () => ({
schema: vi.fn(async () => {
throw new Error('schema error');
})
})),
dropTable
};

lancedb.connect.mockResolvedValue(db);

const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider();

await expect(provider.initialize(tempDir)).rejects.toBeInstanceOf(IndexCorruptedError);
expect(dropTable).toHaveBeenCalledWith('code_chunks');
});

it('throws IndexCorruptedError when vector search fails with "No vector column"', async () => {
const { LanceDBStorageProvider } = await import('../src/storage/lancedb.js');
const provider = new LanceDBStorageProvider() as any;

const query = {
limit: vi.fn(() => query),
where: vi.fn(() => query),
toArray: vi.fn(async () => {
throw new Error('Schema Error: No vector column found to create index');
})
};

provider.initialized = true;
provider.table = {
vectorSearch: vi.fn(() => query)
};

await expect(provider.search([0.1, 0.2], 5)).rejects.toBeInstanceOf(IndexCorruptedError);
});
});

Loading
Loading