Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,16 @@ npm run build
npm test
```

### Benchmarks

Measure server-side processing overhead with mocked Pinecone responses (no live API calls, no API key required):

```bash
npm run benchmark
```

The script prints a table of p50, p95, and p99 latencies in milliseconds and writes results to [`benchmarks/baseline.json`](benchmarks/baseline.json). Compare a new run to the committed baseline (for example with `git diff benchmarks/baseline.json` after re-running the command) to spot regressions.

### Testing the keyword_search tool

1. **Connectivity and keyword search (script):**
Expand Down
325 changes: 325 additions & 0 deletions benchmarks/latency.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
#!/usr/bin/env tsx
/**
* Local benchmark harness: mocked Pinecone I/O, measures server-side latency (p50/p95/p99).
*
* Usage: npm run benchmark
*/

import { writeFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { performance } from 'node:perf_hooks';
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { PineconeClient } from '../src/pinecone-client.js';
import { setLogLevel } from '../src/logger.js';
import { setPineconeClient } from '../src/server/client-context.js';
import { invalidateNamespacesCache, getNamespacesWithCache } from '../src/server/namespaces-cache.js';
import { registerGuidedQueryTool } from '../src/server/tools/guided-query-tool.js';
import type { MergedHit, PineconeHit, SearchResult, SearchableIndex } from '../src/types.js';

const WARMUP = 10;
const ITERATIONS = 200;
const TOP_K = 20;

type BenchmarkResult = {
name: string;
p50: number;
p95: number;
p99: number;
min: number;
max: number;
iterations: number;
};

/** Test double: stub ensureIndexes, searchIndex, rerankResults (no network). */
type PineconeClientBenchDouble = PineconeClient & {
ensureIndexes: () => Promise<{ denseIndex: SearchableIndex; sparseIndex: SearchableIndex }>;
searchIndex: (
_index: SearchableIndex,
_query: string,
_topK: number,
_namespace?: string,
_metadataFilter?: Record<string, unknown>,
_options?: { fields?: string[] }
) => Promise<PineconeHit[]>;
rerankResults: (_q: string, results: MergedHit[], topN: number) => Promise<SearchResult[]>;
};

function syntheticHits(prefix: string, count: number, scoreBase: number): PineconeHit[] {
const hits: PineconeHit[] = [];
for (let i = 0; i < count; i++) {
hits.push({
_id: `${prefix}-${i}`,
_score: scoreBase - i * 0.01,
fields: {
chunk_text: `Content ${prefix} ${i} lorem ipsum dolor sit amet.`,
document_number: `DOC-${prefix}-${i}`,
title: `Title ${i}`,
url: `https://example.com/${prefix}/${i}`,
author: 'bench',
},
});
}
return hits;
}

function percentile(sorted: number[], p: number): number {
if (sorted.length === 0) return 0;
const idx = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, idx)] ?? 0;
}

async function runBenchmark(
name: string,
fn: () => Promise<void>,
iterations = ITERATIONS
): Promise<BenchmarkResult> {
for (let w = 0; w < WARMUP; w++) {
await fn();
}
const samples: number[] = [];
let min = Number.POSITIVE_INFINITY;
let max = Number.NEGATIVE_INFINITY;
for (let i = 0; i < iterations; i++) {
const t0 = performance.now();
await fn();
const t1 = performance.now();
const ms = t1 - t0;
samples.push(ms);
min = Math.min(min, ms);
max = Math.max(max, ms);
}
samples.sort((a, b) => a - b);
const round4 = (n: number) => Math.round(n * 10000) / 10000;
return {
name,
p50: round4(percentile(samples, 50)),
p95: round4(percentile(samples, 95)),
p99: round4(percentile(samples, 99)),
min: round4(min),
max: round4(max),
iterations,
};
}

function formatTable(rows: BenchmarkResult[]): string {
const headers = ['Scenario', 'p50 (ms)', 'p95 (ms)', 'p99 (ms)', 'min (ms)', 'max (ms)'];
const colWidths = [28, 12, 12, 12, 12, 12];
const line = (cells: string[]) =>
cells.map((c, i) => c.padEnd(colWidths[i])).join(' | ');
const out: string[] = [line(headers), line(colWidths.map((w) => '-'.repeat(w)))];
for (const r of rows) {
out.push(
line([
r.name.slice(0, colWidths[0] ?? 28),
r.p50.toFixed(4),
r.p95.toFixed(4),
r.p99.toFixed(4),
r.min.toFixed(4),
r.max.toFixed(4),
])
Comment thread
coderabbitai[bot] marked this conversation as resolved.
);
}
return out.join('\n');
}

function buildQueryBenchClient(): PineconeClientBenchDouble {
const denseHits = syntheticHits('dense', TOP_K, 0.95);
const sparseHits = syntheticHits('sparse', TOP_K, 0.9);
const denseIndexRef = {} as SearchableIndex;
const sparseIndexRef = {} as SearchableIndex;
const client = new PineconeClient({
apiKey: 'bench-key',
indexName: 'bench-index',
rerankModel: 'bench-rerank',
}) as PineconeClientBenchDouble;

client.ensureIndexes = async () => ({
denseIndex: denseIndexRef,
sparseIndex: sparseIndexRef,
});

client.searchIndex = async (index) => {
if (index === denseIndexRef) return denseHits;
if (index === sparseIndexRef) return sparseHits;
return [];
};

client.rerankResults = async (_q, results, topN) =>
results.slice(0, topN).map((r, i) => ({
id: r._id,
content: r.chunk_text,
score: 1 - i * 0.01,
metadata: r.metadata,
reranked: true,
}));

return client;
}

function captureGuidedQueryHandler(): (params: {
user_query: string;
namespace?: string;
metadata_filter?: Record<string, unknown>;
top_k: number;
preferred_tool: 'auto' | 'count' | 'query_fast' | 'query_detailed';
enrich_urls: boolean;
}) => Promise<unknown> {
const handlers = new Map<string, (params: unknown) => Promise<unknown>>();
const mockServer = {
registerTool: (
name: string,
_config: unknown,
handler: (params: unknown) => Promise<unknown>
) => {
handlers.set(name, handler);
},
} as unknown as McpServer;
registerGuidedQueryTool(mockServer);
const h = handlers.get('guided_query');
if (!h) {
throw new Error('guided_query handler not registered');
}
return h as (params: {
user_query: string;
namespace?: string;
metadata_filter?: Record<string, unknown>;
top_k: number;
preferred_tool: 'auto' | 'count' | 'query_fast' | 'query_detailed';
enrich_urls: boolean;
}) => Promise<unknown>;
}

const benchNamespaceMetadata = {
document_number: 'string',
title: 'string',
url: 'string',
author: 'string',
chunk_text: 'string',
} as const;

function createBenchPineconeMock(): PineconeClient {
const namespaces = [
{
namespace: 'docs',
recordCount: 1000,
metadata: { ...benchNamespaceMetadata },
},
];

const mockQueryResults: SearchResult[] = syntheticHits('mock', 10, 0.9).map((h) => ({
id: h._id,
content: String(h.fields['chunk_text'] ?? ''),
score: h._score,
metadata: {
document_number: h.fields['document_number'],
title: h.fields['title'],
url: h.fields['url'],
author: h.fields['author'],
},
reranked: false,
}));

return {
async query() {
return mockQueryResults;
},
async count() {
return { count: 42, truncated: false };
},
async listNamespacesWithMetadata() {
return namespaces;
},
async listNamespacesFromKeywordIndex() {
return namespaces.map((n) => ({ namespace: n.namespace, recordCount: n.recordCount }));
},
getSparseIndexName() {
return 'bench-index-sparse';
},
async keywordSearch() {
return mockQueryResults;
},
} as unknown as PineconeClient;
}

async function main(): Promise<void> {
setLogLevel('ERROR');
const results: BenchmarkResult[] = [];

const queryClient = buildQueryBenchClient();
results.push(
await runBenchmark('query_no_rerank', async () => {
await queryClient.query({
query: 'benchmark hybrid query text',
namespace: 'docs',
topK: TOP_K,
useReranking: false,
});
})
);

results.push(
await runBenchmark('query_with_rerank', async () => {
await queryClient.query({
query: 'benchmark hybrid query text',
namespace: 'docs',
topK: TOP_K,
useReranking: true,
});
})
);

setPineconeClient(createBenchPineconeMock());
invalidateNamespacesCache();
await getNamespacesWithCache();

const guidedHandler = captureGuidedQueryHandler();
const guidedParams = {
user_query: 'list papers about machine learning',
top_k: TOP_K,
preferred_tool: 'query_fast' as const,
enrich_urls: false,
};

results.push(
await runBenchmark('guided_query_end_to_end', async () => {
await guidedHandler(guidedParams);
})
);

results.push(
await runBenchmark('list_namespaces_cache_miss', async () => {
invalidateNamespacesCache();
await getNamespacesWithCache();
})
);

results.push(
await runBenchmark('list_namespaces_cache_hit', async () => {
await getNamespacesWithCache();
})
);

const table = formatTable(results);
console.log(table);
console.log('');

const payload = {
generated_at: new Date().toISOString(),
node: process.version,
warmup_iterations: WARMUP,
measured_iterations: ITERATIONS,
results,
};

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const baselinePath = join(__dirname, 'baseline.json');
writeFileSync(baselinePath, `${JSON.stringify(payload, null, 2)}\n`, 'utf8');
console.log(`Wrote ${baselinePath}`);
}

main().catch((err) => {
console.error(err);
process.exit(1);
});
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"smoke": "npm run build && node dist/index.js --help",
"test": "vitest run",
"test:watch": "vitest",
"benchmark": "tsx benchmarks/latency.ts",
"test:search": "tsx scripts/test-search.ts",
"test:mcp": "node test-mcp-server.js",
"lint": "eslint src/",
Expand Down
Loading