Skip to content

Commit 20ad454

Browse files
authored
fix(bench): sample symbols in embedding benchmark to prevent CI timeouts (#927)
* fix(bench): sample symbols in embedding benchmark to prevent CI timeouts The codebase grew from ~1095 to 7128 symbols, causing every model worker to exceed the 30-min timeout during the search phase. Add deterministic sampling (seeded PRNG, cap 1500 symbols) so runtime stays bounded as the repo grows. Embedding still covers all DB symbols; only the search evaluation is sampled. * fix(bench): correct stale timing estimates in JSDoc comment (#927)
1 parent 3a6a155 commit 20ad454

2 files changed

Lines changed: 34 additions & 5 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,9 @@ jobs:
200200
201201
embedding-benchmark:
202202
runs-on: ubuntu-latest
203-
# 7 models x 20 min each = 140 min worst-case + ~30 min setup/npm-wait headroom
204-
timeout-minutes: 195
203+
# 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so
204+
# typical runtime is ~23 min/model ≈ 160 min + setup headroom
205+
timeout-minutes: 240
205206
if: >-
206207
github.event_name == 'workflow_dispatch' ||
207208
(github.event.workflow_run.conclusion == 'success' &&

scripts/embedding-benchmark.ts

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ import { resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
1818
import { forkWorker } from './lib/fork-engine.js';
1919

2020
const MODEL_WORKER_KEY = '__BENCH_MODEL__';
21+
/**
22+
* Cap symbol count so CI stays under the per-model timeout.
23+
* At ~1500 symbols on a CPU-only runner, search evaluation takes ~5 min;
24+
* embedding all DB symbols takes ~18 min — ~23 min total, within the 30-min timeout.
25+
*/
26+
const MAX_SYMBOLS = 1500;
2127

2228
const __dirname = path.dirname(fileURLToPath(import.meta.url));
2329
const root = path.resolve(__dirname, '..');
@@ -66,12 +72,34 @@ if (process.env[MODEL_WORKER_KEY]) {
6672
return symbols;
6773
}
6874

75+
/**
76+
* Deterministic shuffle using a simple seeded PRNG (mulberry32).
77+
* Keeps results reproducible across runs while sampling fairly.
78+
*/
79+
function seededShuffle<T>(arr: T[], seed: number): T[] {
80+
const out = arr.slice();
81+
let s = seed | 0;
82+
for (let i = out.length - 1; i > 0; i--) {
83+
s = (s + 0x6d2b79f5) | 0;
84+
let t = Math.imul(s ^ (s >>> 15), 1 | s);
85+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
86+
const r = ((t ^ (t >>> 14)) >>> 0) / 4294967296;
87+
const j = Math.floor(r * (i + 1));
88+
[out[i], out[j]] = [out[j], out[i]];
89+
}
90+
return out;
91+
}
92+
6993
// Redirect console.log to stderr so only JSON goes to stdout
7094
const origLog = console.log;
7195
console.log = (...args) => console.error(...args);
7296

73-
const symbols = loadSymbols();
74-
console.error(` [${modelKey}] Loaded ${symbols.length} symbols`);
97+
let symbols = loadSymbols();
98+
if (symbols.length > MAX_SYMBOLS) {
99+
console.error(` [${modelKey}] Sampling ${MAX_SYMBOLS} of ${symbols.length} symbols (deterministic seed=42)`);
100+
symbols = seededShuffle(symbols, 42).slice(0, MAX_SYMBOLS);
101+
}
102+
console.error(` [${modelKey}] Benchmarking ${symbols.length} symbols`);
75103

76104
const embedStart = performance.now();
77105
await buildEmbeddings(root, modelKey, dbPath, { strategy: 'structured' });
@@ -125,7 +153,7 @@ const dbPath = path.join(root, '.codegraph', 'graph.db');
125153

126154
const { MODELS } = await import(srcImport(srcDir, 'domain/search/index.js'));
127155

128-
const TIMEOUT_MS = 1_800_000; // 30 min — CPU-only CI runners need ~20 min per model for 6k+ symbols
156+
const TIMEOUT_MS = 1_800_000; // 30 min — with symbol sampling, embed (~18 min) + search (~5 min) fits comfortably
129157
const hasHfToken = !!process.env.HF_TOKEN;
130158
const modelKeys = Object.keys(MODELS);
131159
const results = {};

0 commit comments

Comments
 (0)