diff --git a/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx b/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx index 5ef6b832c..25e7eec21 100644 --- a/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx +++ b/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx @@ -158,7 +158,7 @@ tests: ``` The `metadata` field is included in the stdin JSON passed to lifecycle commands as `case_metadata`. -Operational checkout state belongs under `workspace.repos[].checkout.base_commit`; `metadata.base_commit` is informational only. `workspace.docker.base_commit` is retained as a deprecated compatibility bridge for legacy Docker-backed evals. +Operational checkout state belongs under `workspace.repos[].checkout.base_commit`; `metadata.base_commit` is informational only. ## Per-Test Assertions diff --git a/apps/web/src/content/docs/docs/tools/import.mdx b/apps/web/src/content/docs/docs/tools/import.mdx index 4b3d4e153..1948ba735 100644 --- a/apps/web/src/content/docs/docs/tools/import.mdx +++ b/apps/web/src/content/docs/docs/tools/import.mdx @@ -163,7 +163,7 @@ uv run scripts/import-huggingface.py \ Each instance becomes an EVAL.yaml with: - `input` — the problem statement - `workspace.docker.image` — the pre-built SWE-bench Docker image (`ghcr.io/epoch-research/swe-bench.eval.x86_64.:latest`) -- `workspace.docker.base_commit` — the commit to reset to before the agent runs +- `workspace.repos[].checkout.base_commit` — the commit to reset to before the agent runs - `assertions` — `code-grader` tasks that run `FAIL_TO_PASS` and `PASS_TO_PASS` pytest suites inside the container Run an imported SWE-bench eval against any coding agent target: diff --git a/examples/features/docker-workspace/README.md b/examples/features/docker-workspace/README.md index 7e27007bd..076a6560b 100644 --- a/examples/features/docker-workspace/README.md +++ b/examples/features/docker-workspace/README.md @@ -37,7 +37,19 @@ workspace: cpus: 2 # optional Docker CPU limit ``` -For evals that need a repo pinned to a dataset snapshot, prefer `workspace.repos[].checkout.base_commit`. `workspace.docker.base_commit` still works as a compatibility bridge for existing Docker-backed SWE-bench configs, but new configs should keep checkout state in the repo model rather than in the Docker block. +For evals that need a repo pinned to a dataset snapshot, use `workspace.repos[].checkout.base_commit`: + +```yaml +workspace: + docker: + image: swebench/sweb.eval.x86_64.django__django-15180 + repos: + - path: /testbed + checkout: + base_commit: abc123def +``` + +Repos defined without `source` are assumed to already exist inside the container (e.g., SWE-bench prebuilt images). ## Running diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index 54032ed58..cbc8ef3e9 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -527,8 +527,9 @@ export async function runEvaluation( for (const ec of filteredEvalCases) { if (ec.workspace?.repos) { for (const repo of ec.workspace.repos) { - // Deduplicate by repo path + source path - const key = `${repo.path}::${repo.source.type === 'local' ? repo.source.path : ''}`; + // Deduplicate by repo path + source path (skip source-less Docker repos) + if (!repo.source) continue; + const key = `${repo.path ?? ''}::${repo.source.type === 'local' ? repo.source.path : ''}`; if (!allRepos.has(key)) { allRepos.set(key, repo); } @@ -543,7 +544,7 @@ export async function runEvaluation( // Store invalid repo paths so affected tests can be failed with execution_error const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath)); // If suite-level repos have invalid paths, fail the entire run early - if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) { + if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) { throw new Error(message); } } @@ -735,6 +736,7 @@ export async function runEvaluation( if (needsPerRepoCheck) { // Static workspace with existing content: materialize only missing repos for (const repo of suiteWorkspace.repos) { + if (!repo.path || !repo.source) continue; const targetDir = path.join(sharedWorkspacePath, repo.path); if (existsSync(targetDir)) { setupLog(`reusing existing repo at: ${targetDir}`); diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts index 7e0214095..18144cb11 100644 --- a/packages/core/src/evaluation/types.ts +++ b/packages/core/src/evaluation/types.ts @@ -245,8 +245,10 @@ export type RepoClone = { }; export type RepoConfig = { - readonly path: string; - readonly source: RepoSource; + /** Target path inside the workspace. Optional for Docker repos targeting the container's working directory. */ + readonly path?: string; + /** Clone source. Optional for Docker prebuilt images where repos exist inside the container. */ + readonly source?: RepoSource; readonly checkout?: RepoCheckout; readonly clone?: RepoClone; }; @@ -292,8 +294,6 @@ export type DockerWorkspaceConfig = { readonly memory?: string; /** CPU limit (e.g. 2, 0.5) */ readonly cpus?: number; - /** @deprecated Prefer workspace.repos[].checkout.base_commit as the checkout source of truth */ - readonly base_commit?: string; }; export type WorkspaceConfig = { diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index f45ec40a3..609f04544 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -283,8 +283,8 @@ const RepoCloneSchema = z.object({ }); const RepoSchema = z.object({ - path: z.string(), - source: RepoSourceSchema, + path: z.string().optional(), + source: RepoSourceSchema.optional(), checkout: RepoCheckoutSchema.optional(), clone: RepoCloneSchema.optional(), }); @@ -311,7 +311,6 @@ const DockerWorkspaceSchema = z.object({ timeout: z.number().int().min(1).optional(), memory: z.string().optional(), cpus: z.number().min(0.1).optional(), - base_commit: z.string().min(1).optional(), }); const WorkspaceSchema = z diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts index 6f7f1bbda..4506acc50 100644 --- a/packages/core/src/evaluation/validation/eval-validator.ts +++ b/packages/core/src/evaluation/validation/eval-validator.ts @@ -402,6 +402,8 @@ function validateWorkspaceRepoConfig( const afterEachHook = isObject(hooks) ? hooks.after_each : undefined; const isolation = workspace.isolation; + const docker = workspace.docker; + // Depth vs ancestor warning if (Array.isArray(repos)) { for (const repo of repos) { @@ -410,6 +412,18 @@ function validateWorkspaceRepoConfig( const checkout = repo.checkout; const clone = repo.clone; + // Source-less repos are only valid with Docker (repo exists inside container) + if (!isObject(source) && !isObject(docker)) { + errors.push({ + severity: 'error', + filePath, + location: `workspace.repos[path=${repo.path ?? '(none)'}]`, + message: + 'repos[].source is required for non-Docker workspaces. ' + + 'Source-less repos are only valid when workspace.docker is configured (repo exists inside the container).', + }); + } + if (isObject(source) && isObject(checkout)) { const sourceType = source.type; const resolve = checkout.resolve; @@ -417,7 +431,7 @@ function validateWorkspaceRepoConfig( errors.push({ severity: 'warning', filePath, - location: `workspace.repos[path=${repo.path}]`, + location: `workspace.repos[path=${repo.path ?? '(none)'}]`, message: 'checkout.resolve has no effect for a local source. ' + 'Use source.type to choose where the repo comes from; keep checkout.ref, checkout.base_commit, or checkout.ancestor only when pinning a local source.', @@ -432,7 +446,7 @@ function validateWorkspaceRepoConfig( errors.push({ severity: 'warning', filePath, - location: `workspace.repos[path=${repo.path}]`, + location: `workspace.repos[path=${repo.path ?? '(none)'}]`, message: `clone.depth (${depth}) may be insufficient for checkout.ancestor (${ancestor}). ` + `Recommend depth >= ${ancestor + 1}.`, @@ -454,17 +468,6 @@ function validateWorkspaceRepoConfig( } } - const docker = workspace.docker; - if (isObject(docker) && typeof docker.base_commit === 'string') { - errors.push({ - severity: 'warning', - filePath, - location: 'workspace.docker.base_commit', - message: - 'workspace.docker.base_commit is deprecated. Prefer workspace.repos[].checkout.base_commit so checkout state remains backend-agnostic.', - }); - } - // after_each reset with per_test isolation warning if (isObject(afterEachHook) && afterEachHook.reset && isolation === 'per_test') { errors.push({ diff --git a/packages/core/src/evaluation/workspace/deps-scanner.ts b/packages/core/src/evaluation/workspace/deps-scanner.ts index f00991243..0378273d4 100644 --- a/packages/core/src/evaluation/workspace/deps-scanner.ts +++ b/packages/core/src/evaluation/workspace/deps-scanner.ts @@ -74,7 +74,7 @@ export async function scanRepoDeps(evalFilePaths: readonly string[]): Promise { - const checkoutTargets = - repoCheckouts && repoCheckouts.length > 0 - ? repoCheckouts - : this.config.base_commit - ? [{ ref: this.config.base_commit }] - : []; - - if (checkoutTargets.length === 0) { + if (!repoCheckouts || repoCheckouts.length === 0) { return; } - for (const target of checkoutTargets) { + for (const target of repoCheckouts) { const resetResult = await this.execInContainer({ containerId, command: buildGitCommand(target, ['reset', '--hard', target.ref]), diff --git a/packages/core/src/evaluation/workspace/pool-manager.ts b/packages/core/src/evaluation/workspace/pool-manager.ts index f1e9913fd..26bdea835 100644 --- a/packages/core/src/evaluation/workspace/pool-manager.ts +++ b/packages/core/src/evaluation/workspace/pool-manager.ts @@ -67,16 +67,20 @@ interface PoolMetadata { * Git URLs are lowercased with .git suffix stripped; local paths are kept as-is. */ function normalizeRepoForFingerprint(repo: RepoConfig): Record { - const source = - repo.source.type === 'git' - ? { type: 'git', url: repo.source.url.toLowerCase().replace(/\.git$/, '') } - : { type: 'local', path: repo.source.path }; - - const result: Record = { - path: repo.path, - source, - ref: getRepoCheckoutRef(repo.checkout), - }; + const result: Record = {}; + + if (repo.path) { + result.path = repo.path; + } + + if (repo.source) { + result.source = + repo.source.type === 'git' + ? { type: 'git', url: repo.source.url.toLowerCase().replace(/\.git$/, '') } + : { type: 'local', path: repo.source.path }; + } + + result.ref = getRepoCheckoutRef(repo.checkout); if (repo.clone?.depth !== undefined) { result.depth = repo.clone.depth; @@ -99,7 +103,9 @@ function normalizeRepoForFingerprint(repo: RepoConfig): Record */ export function computeWorkspaceFingerprint(repos: readonly RepoConfig[]): string { const canonical = { - repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint), + repos: [...repos] + .sort((a, b) => (a.path ?? '').localeCompare(b.path ?? '')) + .map(normalizeRepoForFingerprint), }; return createHash('sha256').update(JSON.stringify(canonical)).digest('hex'); @@ -364,8 +370,9 @@ export class WorkspacePoolManager { repos: readonly RepoConfig[], poolReset: 'none' | 'fast' | 'strict' = 'fast', ): Promise { - // Reset each repo + // Reset each repo (skip source-less repos — they live inside Docker only) for (const repo of repos) { + if (!repo.path || !repo.source) continue; const repoDir = path.join(slotPath, repo.path); if (!existsSync(repoDir)) { continue; @@ -398,12 +405,14 @@ export class WorkspacePoolManager { // Re-copy template files, skipping repo directories if (templatePath) { const repoDirNames = new Set( - repos.map((r) => { - // Get the top-level directory name from the repo path - // e.g., './my-repo' -> 'my-repo', 'repos/foo' -> 'repos' - const normalized = r.path.replace(/^\.\//, ''); - return normalized.split('/')[0]; - }), + repos + .filter((r) => r.path) + .map((r) => { + // Get the top-level directory name from the repo path + // e.g., './my-repo' -> 'my-repo', 'repos/foo' -> 'repos' + const normalized = (r.path ?? '').replace(/^\.\//, ''); + return normalized.split('/')[0]; + }), ); await copyDirectoryRecursive(templatePath, slotPath, repoDirNames); } diff --git a/packages/core/src/evaluation/workspace/repo-config-parser.ts b/packages/core/src/evaluation/workspace/repo-config-parser.ts index cd7e62820..2884e5cce 100644 --- a/packages/core/src/evaluation/workspace/repo-config-parser.ts +++ b/packages/core/src/evaluation/workspace/repo-config-parser.ts @@ -56,12 +56,13 @@ export function parseRepoConfig(raw: unknown): RepoConfig | undefined { const obj = raw as Record; const repoPath = typeof obj.path === 'string' ? obj.path : undefined; const source = parseRepoSource(obj.source); - if (!repoPath || !source) return undefined; const checkout = parseRepoCheckout(obj.checkout); const clone = parseRepoClone(obj.clone); + // At least one meaningful field must be present + if (!repoPath && !source && !checkout && !clone) return undefined; return { - path: repoPath, - source, + ...(repoPath !== undefined && { path: repoPath }), + ...(source !== undefined && { source }), ...(checkout !== undefined && { checkout }), ...(clone !== undefined && { clone }), }; diff --git a/packages/core/src/evaluation/workspace/repo-manager.ts b/packages/core/src/evaluation/workspace/repo-manager.ts index 0b67fea28..66006f579 100644 --- a/packages/core/src/evaluation/workspace/repo-manager.ts +++ b/packages/core/src/evaluation/workspace/repo-manager.ts @@ -63,18 +63,18 @@ export class RepoManager { static validateLocalPaths(repos: readonly RepoConfig[]): readonly LocalPathValidationError[] { const errors: LocalPathValidationError[] = []; for (const repo of repos) { - if (repo.source.type !== 'local') continue; + if (!repo.source || repo.source.type !== 'local') continue; const sourcePath = repo.source.path; if (!sourcePath || sourcePath.trim() === '') { errors.push({ - repoPath: repo.path, + repoPath: repo.path ?? '(none)', resolvedSourcePath: sourcePath ?? '', reason: 'empty_path', }); } else if (!existsSync(sourcePath)) { errors.push({ - repoPath: repo.path, + repoPath: repo.path ?? '(none)', resolvedSourcePath: sourcePath, reason: 'not_found', }); @@ -124,6 +124,12 @@ export class RepoManager { * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout. */ async materialize(repo: RepoConfig, workspacePath: string): Promise { + if (!repo.source || !repo.path) { + if (this.verbose) { + console.log(`[repo] materialize skip path=${repo.path ?? '(none)'} (no source or path)`); + } + return; + } const targetDir = path.join(workspacePath, repo.path); const sourceUrl = getSourceUrl(repo.source); const startedAt = Date.now(); @@ -225,12 +231,15 @@ export class RepoManager { } } - /** Materialize all repos into the workspace. */ + /** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */ async materializeAll(repos: readonly RepoConfig[], workspacePath: string): Promise { + const materializableRepos = repos.filter((r) => r.source); if (this.verbose) { - console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`); + console.log( + `[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`, + ); } - for (const repo of repos) { + for (const repo of materializableRepos) { await this.materialize(repo, workspacePath); } if (this.verbose) { @@ -238,7 +247,7 @@ export class RepoManager { } } - /** Reset repos in workspace to their checkout state. */ + /** Reset repos in workspace to their checkout state. Skips repos without path or source. */ async reset( repos: readonly RepoConfig[], workspacePath: string, @@ -246,6 +255,7 @@ export class RepoManager { ): Promise { const cleanFlag = reset === 'strict' ? '-fdx' : '-fd'; for (const repo of repos) { + if (!repo.path || !repo.source) continue; const targetDir = path.join(workspacePath, repo.path); await this.runGit(['reset', '--hard', 'HEAD'], { cwd: targetDir }); await this.runGit(['clean', cleanFlag], { cwd: targetDir }); diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 0f4f1dbae..9e6f7de1e 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -737,7 +737,6 @@ function parseDockerWorkspaceConfig(raw: unknown): DockerWorkspaceConfig | undef ...(typeof obj.timeout === 'number' && { timeout: obj.timeout }), ...(typeof obj.memory === 'string' && { memory: obj.memory }), ...(typeof obj.cpus === 'number' && { cpus: obj.cpus }), - ...(typeof obj.base_commit === 'string' && { base_commit: obj.base_commit }), }; } diff --git a/packages/core/test/evaluation/workspace-config-parsing.test.ts b/packages/core/test/evaluation/workspace-config-parsing.test.ts index 22685b1b7..3bc6703e2 100644 --- a/packages/core/test/evaluation/workspace-config-parsing.test.ts +++ b/packages/core/test/evaluation/workspace-config-parsing.test.ts @@ -188,19 +188,22 @@ tests: expect(cases[0].workspace?.template).toBe(path.join(testDir, 'workspace-template')); }); - it('should parse docker workspace base_commit', async () => { - const evalFile = path.join(testDir, 'workspace-docker-base-commit.yaml'); + it('should parse Docker repos without source (prebuilt image)', async () => { + const evalFile = path.join(testDir, 'workspace-docker-no-source.yaml'); await writeFile( evalFile, ` tests: - - id: docker-base-commit + - id: docker-no-source input: "Do something" criteria: "Should work" workspace: docker: image: swebench/sweb.eval.django__django:latest - base_commit: abc123def + repos: + - path: /testbed + checkout: + base_commit: abc123def `, ); @@ -208,10 +211,41 @@ tests: expect(cases).toHaveLength(1); expect(cases[0].workspace?.docker).toEqual({ image: 'swebench/sweb.eval.django__django:latest', + }); + expect(cases[0].workspace?.repos).toHaveLength(1); + expect(cases[0].workspace?.repos?.[0].path).toBe('/testbed'); + expect(cases[0].workspace?.repos?.[0].source).toBeUndefined(); + expect(cases[0].workspace?.repos?.[0].checkout).toEqual({ base_commit: 'abc123def', }); }); + it('should parse repos with path + checkout but no source', async () => { + const evalFile = path.join(testDir, 'workspace-repo-path-checkout-only.yaml'); + await writeFile( + evalFile, + ` +tests: + - id: path-checkout-only + input: "Do something" + criteria: "Should work" + workspace: + docker: + image: myimage:latest + repos: + - path: /workspace/project + checkout: + ref: v2.0.0 +`, + ); + + const cases = await loadTests(evalFile, testDir); + expect(cases).toHaveLength(1); + expect(cases[0].workspace?.repos?.[0].path).toBe('/workspace/project'); + expect(cases[0].workspace?.repos?.[0].source).toBeUndefined(); + expect(cases[0].workspace?.repos?.[0].checkout?.ref).toBe('v2.0.0'); + }); + it('should parse repo checkout base_commit', async () => { const evalFile = path.join(testDir, 'workspace-repo-base-commit.yaml'); await writeFile( diff --git a/packages/core/test/evaluation/workspace/docker-workspace.test.ts b/packages/core/test/evaluation/workspace/docker-workspace.test.ts index 2b48aca60..d25628512 100644 --- a/packages/core/test/evaluation/workspace/docker-workspace.test.ts +++ b/packages/core/test/evaluation/workspace/docker-workspace.test.ts @@ -215,39 +215,46 @@ describe('DockerWorkspaceProvider', () => { ]); }); - it('skips reset when base_commit is not set', async () => { + it('skips reset when no checkout targets are provided', async () => { const provider = new DockerWorkspaceProvider({ image: 'img:1' }, executor); await provider.resetContainerCheckout('container-1'); expect(executor.calls).toHaveLength(0); }); - it('falls back to deprecated docker base_commit when repo checkout targets are absent', async () => { - executor.pushResponse({ exitCode: 0 }); // git reset --hard - executor.pushResponse({ stdout: 'abc123\n', exitCode: 0 }); // git rev-parse HEAD + it('resets multiple repos with different paths', async () => { + executor.pushResponse({ exitCode: 0 }); // git reset --hard repo 1 + executor.pushResponse({ stdout: 'abc123\n', exitCode: 0 }); // git rev-parse HEAD repo 1 + executor.pushResponse({ exitCode: 0 }); // git reset --hard repo 2 + executor.pushResponse({ stdout: 'def456\n', exitCode: 0 }); // git rev-parse HEAD repo 2 - const provider = new DockerWorkspaceProvider( - { image: 'img:1', base_commit: 'abc123' }, - executor, - ); + const provider = new DockerWorkspaceProvider({ image: 'img:1' }, executor); - await provider.resetContainerCheckout('container-1'); + await provider.resetContainerCheckout('container-1', [ + { path: '/testbed', ref: 'abc123' }, + { path: '/app', ref: 'def456' }, + ]); expect(executor.callArgv(0)).toEqual([ 'docker', 'exec', 'container-1', 'git', + '-C', + '/testbed', 'reset', '--hard', 'abc123', ]); - expect(executor.callArgv(1)).toEqual([ + expect(executor.callArgv(2)).toEqual([ 'docker', 'exec', 'container-1', 'git', - 'rev-parse', - 'HEAD', + '-C', + '/app', + 'reset', + '--hard', + 'def456', ]); }); }); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md index 05d2e3403..2f4123de4 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md +++ b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md @@ -311,7 +311,7 @@ tests: **Merge:** Case-level fields replace suite-level fields. **Commands receive stdin JSON:** `{workspace_path, test_id, eval_run_id, case_input, case_metadata}` **Setup failure:** aborts case. **Teardown failure:** non-fatal (warning). -For SWE-bench-style evals, keep operational checkout state under `workspace.repos[].checkout.base_commit`; treat `metadata.base_commit` as informational only. `workspace.docker.base_commit` remains a deprecated compatibility bridge for legacy Docker-backed evals. +For SWE-bench-style evals, keep operational checkout state under `workspace.repos[].checkout.base_commit`; treat `metadata.base_commit` as informational only. ### Repository Lifecycle diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index d261b0bdb..0c7805ba1 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -4925,7 +4925,6 @@ "additionalProperties": false } }, - "required": ["path", "source"], "additionalProperties": false } }, @@ -5085,10 +5084,6 @@ "cpus": { "type": "number", "minimum": 0.1 - }, - "base_commit": { - "type": "string", - "minLength": 1 } }, "required": ["image"], @@ -9950,7 +9945,6 @@ "additionalProperties": false } }, - "required": ["path", "source"], "additionalProperties": false } }, @@ -10110,10 +10104,6 @@ "cpus": { "type": "number", "minimum": 0.1 - }, - "base_commit": { - "type": "string", - "minLength": 1 } }, "required": ["image"], @@ -13754,7 +13744,6 @@ "additionalProperties": false } }, - "required": ["path", "source"], "additionalProperties": false } }, @@ -13914,10 +13903,6 @@ "cpus": { "type": "number", "minimum": 0.1 - }, - "base_commit": { - "type": "string", - "minLength": 1 } }, "required": ["image"], diff --git a/scripts/import-huggingface.py b/scripts/import-huggingface.py index 66244425b..24cc8459b 100644 --- a/scripts/import-huggingface.py +++ b/scripts/import-huggingface.py @@ -23,7 +23,7 @@ problem_statement -> input (user message) repo -> metadata.repo instance_id -> workspace.docker.image (ghcr.io/epoch-research/swe-bench.eval.x86_64.:latest) - base_commit -> legacy workspace.docker compatibility bridge + base_commit -> workspace.repos[].checkout.base_commit FAIL_TO_PASS -> assertions (code-grader commands) difficulty -> metadata.difficulty @@ -183,17 +183,18 @@ def _convert_swebench_instance(row: dict[str, Any]) -> dict[str, Any]: } # Docker workspace config - # base_commit remains here as a compatibility bridge until Docker-backed - # prebuilt images consume workspace.repos[].checkout directly. if repo: docker_config: dict[str, Any] = { "image": _docker_image_for_instance(instance_id), "timeout": 600, "memory": "4g", } + workspace: dict[str, Any] = {"docker": docker_config} if base_commit: - docker_config["base_commit"] = base_commit - eval_doc["workspace"] = {"docker": docker_config} + workspace["repos"] = [ + {"path": "/testbed", "checkout": {"base_commit": base_commit}} + ] + eval_doc["workspace"] = workspace eval_doc["tests"] = [test_case]