Skip to content

Commit 5de0ed6

Browse files
committed
Code parser audit Phase 6: fix embedder test, add search quality e2e
- Fix embedder.test.ts: match actual loadModel(ModelConfig, EmbeddingConfig, modelsDir) signature instead of broken single-object call - Add search-quality.test.ts: e2e test with real embeddings — 6 symbols, 6 queries, validates top-1 relevance (run with npx tsx, excluded from Jest) - Exclude search-quality.test.ts from Jest (requires real model download)
1 parent 13fecea commit 5de0ed6

File tree

3 files changed

+138
-8
lines changed

3 files changed

+138
-8
lines changed

jest.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ const config: Config = {
44
preset: 'ts-jest',
55
testEnvironment: 'node',
66
testMatch: ['<rootDir>/src/tests/**/*.test.ts'],
7-
testPathIgnorePatterns: ['/node_modules/', '/dist/', 'embedder\\.test\\.ts$'],
7+
testPathIgnorePatterns: ['/node_modules/', '/dist/', 'embedder\\.test\\.ts$', 'search-quality\\.test\\.ts$'],
88
transform: {
99
'^.+\\.tsx?$': 'ts-jest',
1010
},

src/tests/embedder.test.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
import { loadModel, embed, embedQuery, cosineSimilarity } from '@/lib/embedder';
2+
import type { ModelConfig, EmbeddingConfig } from '@/lib/multi-config';
3+
4+
const MODEL: ModelConfig = {
5+
name: 'Xenova/bge-m3',
6+
pooling: 'cls',
7+
normalize: true,
8+
queryPrefix: '',
9+
documentPrefix: '',
10+
};
11+
12+
const EMBEDDING: EmbeddingConfig = {
13+
batchSize: 32,
14+
maxChars: 4000,
15+
cacheSize: 10_000,
16+
};
217

318
describe('embedder', () => {
419
beforeAll(async () => {
5-
await loadModel({
6-
model: 'Xenova/bge-m3',
7-
pooling: 'cls',
8-
normalize: true,
9-
queryPrefix: '',
10-
documentPrefix: '',
11-
}, './models', 4000);
20+
await loadModel(MODEL, EMBEDDING, './models');
1221
}, 60_000);
1322

1423
it.each([

src/tests/search-quality.test.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/**
2+
* E2e search quality tests — uses real embedding model.
3+
* Excluded from Jest (slow). Run with: npx tsx src/tests/search-quality.test.ts
4+
*
5+
* Tests that search_code and search (docs) return relevant results
6+
* with real embeddings, not fake [0,0,...] vectors.
7+
*/
8+
import { loadModel, embed, embedBatch, embedQuery, cosineSimilarity } from '@/lib/embedder';
9+
import { createCodeGraph } from '@/graphs/code-types';
10+
import type { CodeNodeAttributes } from '@/graphs/code-types';
11+
import { updateCodeFile } from '@/graphs/code';
12+
import { searchCode } from '@/lib/search/code';
13+
import { BM25Index } from '@/lib/search/bm25';
14+
import type { ModelConfig, EmbeddingConfig } from '@/lib/multi-config';
15+
16+
const MODEL: ModelConfig = {
17+
name: 'Xenova/bge-m3',
18+
pooling: 'cls',
19+
normalize: true,
20+
queryPrefix: '',
21+
documentPrefix: '',
22+
};
23+
24+
const EMBEDDING: EmbeddingConfig = {
25+
batchSize: 32,
26+
maxChars: 4000,
27+
cacheSize: 10_000,
28+
};
29+
30+
// ---------------------------------------------------------------------------
31+
// Helpers
32+
// ---------------------------------------------------------------------------
33+
34+
function makeNode(fileId: string, name: string, kind: string, signature: string, docComment: string, body: string, embedding: number[]) {
35+
return {
36+
id: `${fileId}::${name}`,
37+
attrs: {
38+
kind, fileId, name, signature, docComment, body,
39+
startLine: 1, endLine: 10, isExported: true,
40+
embedding, fileEmbedding: [], mtime: 1000,
41+
} as CodeNodeAttributes,
42+
};
43+
}
44+
45+
// ---------------------------------------------------------------------------
46+
// Tests
47+
// ---------------------------------------------------------------------------
48+
49+
async function run() {
50+
console.log('Loading model...');
51+
await loadModel(MODEL, EMBEDDING, './models');
52+
console.log('Model loaded.\n');
53+
54+
const graph = createCodeGraph();
55+
const bm25 = new BM25Index<CodeNodeAttributes>((a) => `${a.name} ${a.signature} ${a.docComment} ${a.body}`);
56+
57+
// Build test symbols with real embeddings
58+
const symbols = [
59+
{ file: 'auth.ts', name: 'hashPassword', sig: 'async function hashPassword(password: string): Promise<string>', doc: '/** Hash a password using scrypt. */', body: 'return scrypt(password, salt, 64)' },
60+
{ file: 'auth.ts', name: 'verifyToken', sig: 'function verifyToken(token: string): Claims', doc: '/** Verify a JWT token and return claims. */', body: 'return jwt.verify(token, secret)' },
61+
{ file: 'db.ts', name: 'connect', sig: 'async function connect(url: string): Promise<Connection>', doc: '/** Connect to PostgreSQL database. */', body: 'return new Pool({ connectionString: url })' },
62+
{ file: 'db.ts', name: 'migrate', sig: 'async function migrate(dir: string): Promise<void>', doc: '/** Run database migration scripts from directory. */', body: 'for (const file of files) await exec(file)' },
63+
{ file: 'search.ts', name: 'searchUsers', sig: 'function searchUsers(query: string): User[]', doc: '/** Search users by name or email. */', body: 'return users.filter(u => u.name.includes(query))' },
64+
{ file: 'config.ts', name: 'loadConfig', sig: 'function loadConfig(path: string): Config', doc: '/** Load YAML configuration file. */', body: 'return yaml.parse(fs.readFileSync(path))' },
65+
];
66+
67+
console.log('Embedding symbols...');
68+
const inputs = symbols.map(s => ({ title: s.sig, content: s.doc }));
69+
const embeddings = await embedBatch(inputs);
70+
71+
for (let i = 0; i < symbols.length; i++) {
72+
const s = symbols[i];
73+
const node = makeNode(s.file, s.name, 'function', s.sig, s.doc, s.body, embeddings[i]);
74+
graph.addNode(node.id, node.attrs);
75+
graph.addNode(s.file, { kind: 'file', fileId: s.file, name: s.file, signature: s.file, docComment: '', body: '', startLine: 1, endLine: 1, isExported: false, embedding: [], fileEmbedding: [], mtime: 1000 } as CodeNodeAttributes);
76+
if (!graph.hasEdge(s.file, node.id)) {
77+
graph.addEdgeWithKey(`${s.file}${node.id}`, s.file, node.id, { kind: 'contains' });
78+
}
79+
bm25.addDocument(node.id, node.attrs);
80+
}
81+
82+
// Test cases: query → expected top result name
83+
const tests = [
84+
{ query: 'password hashing', expected: 'hashPassword' },
85+
{ query: 'JWT token verification', expected: 'verifyToken' },
86+
{ query: 'database connection', expected: 'connect' },
87+
{ query: 'run migrations', expected: 'migrate' },
88+
{ query: 'find users', expected: 'searchUsers' },
89+
{ query: 'load yaml config', expected: 'loadConfig' },
90+
];
91+
92+
let passed = 0;
93+
let failed = 0;
94+
95+
for (const t of tests) {
96+
const queryEmb = await embedQuery(t.query);
97+
const results = searchCode(graph, queryEmb, {
98+
topK: 3,
99+
bfsDepth: 0,
100+
minScore: 0.1,
101+
queryText: t.query,
102+
bm25Index: bm25,
103+
});
104+
105+
const topName = results[0]?.name ?? '(none)';
106+
const ok = topName === t.expected;
107+
const icon = ok ? '✓' : '✗';
108+
console.log(` ${icon} "${t.query}" → ${topName} (expected ${t.expected}, score=${results[0]?.score.toFixed(3) ?? 'n/a'})`);
109+
110+
if (ok) passed++;
111+
else failed++;
112+
}
113+
114+
console.log(`\n${passed} passed, ${failed} failed out of ${tests.length}`);
115+
if (failed > 0) process.exit(1);
116+
}
117+
118+
run().catch(err => {
119+
console.error('Fatal:', err);
120+
process.exit(1);
121+
});

0 commit comments

Comments
 (0)