Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions apps/web/src/content/docs/docs/guides/workspace-pool.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,11 @@ For workspaces you manage outside AgentV, use static mode:
agentv eval evals/my-eval.yaml --workspace-mode static --workspace-path /path/to/my-workspace
```

**Auto-materialisation:** When `workspace.path` points to an empty or missing directory, AgentV automatically copies the template and clones repos into it. If the directory already exists and is populated, it is reused as-is. This makes static mode convenient for first-run bootstrap without manual workspace preparation.
**Auto-materialisation:** When `workspace.path` points to an empty or missing directory, AgentV automatically copies the template and clones repos into it. If the directory already exists and is populated, AgentV checks each repo individually — existing repos are reused as-is, and only missing repos are cloned. This makes static mode convenient for both first-run bootstrap and incremental setup.

When the directory is already populated, clone, copy, and pool are bypassed entirely. AgentV never deletes a user-provided workspace. Lifecycle hooks still execute (unless `hooks.enabled: false`). This is useful for local development where you already have the repo checked out.
AgentV never deletes a user-provided workspace. Lifecycle hooks still execute (unless `hooks.enabled: false`). This is useful for local development where you already have repos checked out.

**Note:** When using `--workspace-path` (CLI flag) instead of `workspace.path` (YAML), the directory is always used as-is with no auto-materialisation or repo cloning.

**Precedence:** `workspace.mode` / `--workspace-mode` first, then default pooled behavior for shared repo workspaces.

Expand All @@ -199,7 +201,7 @@ CLI flags `--retain-on-success` / `--retain-on-failure` control temporary eval-r
|------|-----------|-----------|--------------------------|-------------------|
| **Pooled** (default) | First run only; reset on reuse | Yes | Yes (`.gitignore`d files) | Yes (slot per worker) |
| **Temp** (`mode: temp`) | Full clone + checkout every run | No | No | Sequential only |
| **Static** (`mode: static`) | None if populated; auto-materialised if empty/missing | Yes | User-managed | Sequential only |
| **Static** (`mode: static`) | Per-repo: clones only missing repos; auto-materialises if empty | Yes | User-managed | Sequential only |

## When to disable pooling

Expand Down
42 changes: 33 additions & 9 deletions packages/core/src/evaluation/orchestrator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { createHash, randomUUID } from 'node:crypto';
import { existsSync } from 'node:fs';
import { copyFile, mkdir, readdir, stat } from 'node:fs/promises';
import path from 'node:path';
import micromatch from 'micromatch';
Expand Down Expand Up @@ -629,12 +630,14 @@ export async function runEvaluation(

// Track whether a static workspace was freshly materialised (needs repo clone + hooks)
let staticMaterialised = false;
// YAML-configured static paths support auto-materialisation and per-repo checks.
// CLI-provided paths (--workspace-path) always reuse the directory as-is.
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;

// Static workspace: auto-materialise if path is empty or missing, reuse if populated.
// Auto-materialisation only applies to YAML-configured paths (workspace.path), not CLI flags
// (--workspace / --workspace-path), which always reuse the directory as-is.
if (useStaticWorkspace && configuredStaticPath) {
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
const dirExists = await stat(configuredStaticPath).then(
(s) => s.isDirectory(),
() => false,
Expand Down Expand Up @@ -714,16 +717,37 @@ export async function runEvaluation(
}
}

// Materialize repos into shared workspace (skip for per_test, pool, and existing static workspace)
// Materialize repos into shared workspace (skip for per_test and pool modes).
// For static workspaces: materialize only repos whose target path is missing (per-repo reuse).
// For non-static workspaces: materialize all repos when freshly created.
const hasReposToMaterialize =
!!suiteWorkspace?.repos?.length && !usePool && !isPerTestIsolation;
const needsRepoMaterialisation =
!!suiteWorkspace?.repos?.length && !usePool && (!useStaticWorkspace || staticMaterialised);
const repoManager = needsRepoMaterialisation ? new RepoManager(verbose) : undefined;
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
setupLog(
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`,
);
hasReposToMaterialize && (!useStaticWorkspace || staticMaterialised);
const needsPerRepoCheck =
hasReposToMaterialize && useStaticWorkspace && !staticMaterialised && isYamlConfiguredPath;
const repoManager =
needsRepoMaterialisation || needsPerRepoCheck ? new RepoManager(verbose) : undefined;

if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos) {
try {
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
if (needsPerRepoCheck) {
// Static workspace with existing content: materialize only missing repos
for (const repo of suiteWorkspace.repos) {
const targetDir = path.join(sharedWorkspacePath, repo.path);
if (existsSync(targetDir)) {
setupLog(`reusing existing repo at: ${targetDir}`);
continue;
}
setupLog(`materializing missing repo: ${repo.path}`);
await repoManager.materialize(repo, sharedWorkspacePath);
}
} else {
setupLog(
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`,
);
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
}
setupLog('shared repo materialization complete');
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
Expand Down
108 changes: 108 additions & 0 deletions packages/core/test/evaluation/orchestrator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2755,6 +2755,114 @@ describe('--workspace flag', () => {
expect(results[0].error).toBeUndefined();
});

it('materializes only missing repos in YAML-configured static workspace', async () => {
const {
mkdtemp,
mkdir: fsMkdir,
writeFile,
access: fsAccess,
} = await import('node:fs/promises');
testDir = await mkdtemp(path.join(tmpdir(), 'agentv-ws-static-'));

// Pre-create repo-a to simulate an existing local checkout
const repoADir = path.join(testDir, 'repo-a');
await fsMkdir(repoADir, { recursive: true });
await writeFile(path.join(repoADir, 'marker.txt'), 'pre-existing');

const provider = new SequenceProvider('mock', {
responses: [{ output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }] }],
});

// Use YAML workspace.path (not CLI --workspace) with type: git repos.
// repo-a exists → should be reused. repo-b is missing but uses a fake URL → should fail clone.
// Since repo-a is reused (skipped) and repo-b clone fails, this proves per-repo logic works.
const evalCase: EvalTest = {
...baseTestCase,
workspace: {
mode: 'static',
path: testDir,
repos: [
{
path: 'repo-a',
source: { type: 'git', url: 'https://github.com/example/repo-a.git' },
checkout: { ref: 'main' },
},
{
path: 'repo-b',
source: { type: 'git', url: 'https://github.com/example/repo-b.git' },
checkout: { ref: 'main' },
},
],
},
};

// repo-b clone will fail (fake URL), which proves repo-a was skipped (per-repo check)
// and only repo-b was attempted
await expect(
runEvaluation({
testFilePath: 'in-memory.yaml',
repoRoot: 'in-memory',
target: baseTarget,
providerFactory: () => provider,
evaluators: evaluatorRegistry,
evalCases: [evalCase],
keepWorkspaces: true,
}),
).rejects.toThrow('Failed to materialize repos');

// repo-a marker should still exist (not deleted by static workspace cleanup)
await fsAccess(path.join(repoADir, 'marker.txt'));
});

it('skips all repos when all exist in YAML-configured static workspace', async () => {
const { mkdtemp, mkdir: fsMkdir, writeFile } = await import('node:fs/promises');
testDir = await mkdtemp(path.join(tmpdir(), 'agentv-ws-static-'));

// Pre-create both repos
await fsMkdir(path.join(testDir, 'repo-a'), { recursive: true });
await writeFile(path.join(testDir, 'repo-a', 'file.txt'), 'a');
await fsMkdir(path.join(testDir, 'repo-b'), { recursive: true });
await writeFile(path.join(testDir, 'repo-b', 'file.txt'), 'b');

const provider = new SequenceProvider('mock', {
responses: [{ output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }] }],
});

// Both repos exist → no clone attempts → should succeed without network
const evalCase: EvalTest = {
...baseTestCase,
workspace: {
mode: 'static',
path: testDir,
repos: [
{
path: 'repo-a',
source: { type: 'git', url: 'https://github.com/example/repo-a.git' },
checkout: { ref: 'main' },
},
{
path: 'repo-b',
source: { type: 'git', url: 'https://github.com/example/repo-b.git' },
checkout: { ref: 'main' },
},
],
},
};

const results = await runEvaluation({
testFilePath: 'in-memory.yaml',
repoRoot: 'in-memory',
target: baseTarget,
providerFactory: () => provider,
evaluators: evaluatorRegistry,
evalCases: [evalCase],
keepWorkspaces: true,
});

expect(results).toHaveLength(1);
expect(results[0].error).toBeUndefined();
});

it('errors when workspaceMode is static without workspace path', async () => {
const provider = new SequenceProvider('mock', {
responses: [{ output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }] }],
Expand Down
Loading