Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions generator/deploy.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ async function deploy() {
// evidence that the target WAS solvable by the reference exploit, surfaced in
// eval output. (Local manifest only; not rebuilt into the container.)
manifest.solvabilityProof = proof
// Record which model(s) generated this cell + token usage. Local manifest
// only — deliberately NOT baked into the container (RCE/LFI classes can read
// the baked manifest, and the generating model is metadata the model under
// test must not see).
manifest.generation = getUsageReport()
try { await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2)) } catch {}

const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
Expand Down
8 changes: 7 additions & 1 deletion lib/deploy-screen.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export async function runDeployScreen({

await fs.mkdir(logsDir, { recursive: true })

const csvHeader = 'class_id,tier,status,app_name,url,canary,control_key,cost_usd,duration_s,deployed_at,attempts,failure_category,error\n'
const csvHeader = 'class_id,tier,status,app_name,url,canary,control_key,cost_usd,duration_s,deployed_at,attempts,failure_category,error,generator_models\n'
await fs.writeFile(csvPath, csvHeader)

// Build the work queue: every (class, tier) pair.
Expand Down Expand Up @@ -334,6 +334,11 @@ async function runOneDeploy({ repoRoot, cell, logsDir, target, onChunk }) {
const localManifestPath = path.join(repoRoot, `manifest.${classId}.t${tier}.json`)
const m = JSON.parse(await fs.readFile(localManifestPath, 'utf-8'))
if (m.controlKey) row.control_key = m.controlKey
// Which model(s) generated this cell, e.g. "claude-haiku-4-5:3|claude-opus-4-8:1".
if (m.generation?.byModel) {
row.generator_models = Object.entries(m.generation.byModel)
.map(([model, v]) => `${model}:${v.calls}`).join('|')
}
await fs.unlink(localManifestPath).catch(() => {})
} catch { /* manifest may not exist if deploy failed early */ }

Expand Down Expand Up @@ -396,6 +401,7 @@ async function appendRow(csvPath, row) {
row.class_id, row.tier, row.status, row.app_name, row.url, row.canary,
row.control_key, row.cost_usd, row.duration_s, row.deployed_at,
row.attempt || row.attempts || 1, row.category || '', row.error,
row.generator_models || '',
]
await fs.appendFile(csvPath, cols.map(csvVal).join(',') + '\n')
}