diff --git a/apps/cli/src/commands/results/eval-runner.ts b/apps/cli/src/commands/results/eval-runner.ts
index 47556f7eb..9d5053cdd 100644
--- a/apps/cli/src/commands/results/eval-runner.ts
+++ b/apps/cli/src/commands/results/eval-runner.ts
@@ -376,7 +376,7 @@ export function registerEvalRoutes(
   });
 
   // ── Project-scoped variants ────────────────────────────────────────────
-  app.get('/api/benchmarks/:projectId/eval/discover', async (c) => {
+  app.get('/api/benchmarks/:benchmarkId/eval/discover', async (c) => {
     const cwd = getCwd(c);
     try {
       const files = await discoverEvalFiles(cwd);
@@ -392,7 +392,7 @@ export function registerEvalRoutes(
     }
   });
 
-  app.get('/api/benchmarks/:projectId/eval/targets', async (c) => {
+  app.get('/api/benchmarks/:benchmarkId/eval/targets', async (c) => {
     const cwd = getCwd(c);
     try {
       const names = await discoverTargetsInProject(cwd);
@@ -402,7 +402,7 @@ export function registerEvalRoutes(
     }
   });
 
-  app.post('/api/benchmarks/:projectId/eval/run', async (c) => {
+  app.post('/api/benchmarks/:benchmarkId/eval/run', async (c) => {
     const cwd = getCwd(c);
 
     let body: RunEvalRequest;
@@ -476,7 +476,7 @@ export function registerEvalRoutes(
     }
   });
 
-  app.get('/api/benchmarks/:projectId/eval/status/:id', (c) => {
+  app.get('/api/benchmarks/:benchmarkId/eval/status/:id', (c) => {
     const id = c.req.param('id');
     const run = activeRuns.get(id ?? '');
     if (!run) return c.json({ error: 'Run not found' }, 404);
@@ -492,7 +492,7 @@ export function registerEvalRoutes(
     });
   });
 
-  app.get('/api/benchmarks/:projectId/eval/runs', (c) => {
+  app.get('/api/benchmarks/:benchmarkId/eval/runs', (c) => {
     const runs = [...activeRuns.values()].map((r) => ({
       id: r.id,
       status: r.status,
@@ -505,7 +505,7 @@ export function registerEvalRoutes(
     return c.json({ runs });
   });
 
-  app.post('/api/benchmarks/:projectId/eval/preview', async (c) => {
+  app.post('/api/benchmarks/:benchmarkId/eval/preview', async (c) => {
     let body: RunEvalRequest;
     try {
       body = await c.req.json<RunEvalRequest>();
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index acb658c84..de67aa536 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -12,10 +12,10 @@
  *   - GET /api/feedback  — read feedback reviews
  *   - POST /api/feedback — write feedback reviews
  *   - GET /api/benchmarks  — list registered benchmarks
- *   - GET /api/benchmarks/:projectId/runs — benchmark-scoped run list
+ *   - GET /api/benchmarks/:benchmarkId/runs — benchmark-scoped run list
  *
  * All data routes (runs, suites, categories, evals, experiments, targets)
- * exist in both unscoped (/api/...) and benchmark-scoped (/api/benchmarks/:projectId/...)
+ * exist in both unscoped (/api/...) and benchmark-scoped (/api/benchmarks/:benchmarkId/...)
  * variants. They share handler functions via DataContext, differing only in
  * how searchDir is resolved.
  *
@@ -33,11 +33,11 @@ import { command, flag, number, option, optional, positional, string } from 'cmd
 import {
   DEFAULT_CATEGORY,
   type EvaluationResult,
-  addProject,
-  discoverProjects,
-  getProject,
-  loadProjectRegistry,
-  removeProject,
+  addBenchmark,
+  discoverBenchmarks,
+  getBenchmark,
+  loadBenchmarkRegistry,
+  removeBenchmark,
 } from '@agentv/core';
 import type { Context } from 'hono';
 import { Hono } from 'hono';
@@ -718,19 +718,19 @@ export function createApp(
   const readOnly = options?.readOnly === true;
   const app = new Hono();
 
-  // ── Project resolution wrapper ────────────────────────────────────────
-  // Resolves projectId → DataContext, returning 404 if not found.
-  function withProject(
+  // ── Benchmark resolution wrapper ──────────────────────────────────────
+  // Resolves benchmarkId → DataContext, returning 404 if not found.
+  function withBenchmark(
     c: C,
     handler: (c: C, ctx: DataContext) => Response | Promise<Response>,
   ): Response | Promise<Response> {
-    const project = getProject(c.req.param('projectId') ?? '');
-    if (!project || !existsSync(project.path)) {
+    const benchmark = getBenchmark(c.req.param('benchmarkId') ?? '');
+    if (!benchmark || !existsSync(benchmark.path)) {
       return c.json({ error: 'Project not found' }, 404);
     }
     return handler(c, {
-      searchDir: project.path,
-      agentvDir: path.join(project.path, '.agentv'),
+      searchDir: benchmark.path,
+      agentvDir: path.join(benchmark.path, '.agentv'),
     });
   }
 
@@ -754,10 +754,10 @@ export function createApp(
     }
   });
 
-  // ── Project management endpoints ─────────────────────────────────────
+  // ── Benchmark management endpoints ───────────────────────────────────
 
-  /** Convert a ProjectEntry to snake_case wire format. */
-  function projectEntryToWire(entry: {
+  /** Convert a BenchmarkEntry to snake_case wire format. */
+  function benchmarkEntryToWire(entry: {
     id: string;
     name: string;
     path: string;
@@ -774,9 +774,9 @@ export function createApp(
   }
 
   app.get('/api/benchmarks', async (c) => {
-    const registry = loadProjectRegistry();
-    const projects = await Promise.all(
-      registry.projects.map(async (p) => {
+    const registry = loadBenchmarkRegistry();
+    const benchmarks = await Promise.all(
+      registry.benchmarks.map(async (p) => {
         let runCount = 0;
         let passRate = 0;
         let lastRun: string | null = null;
@@ -789,17 +789,17 @@ export function createApp(
             lastRun = metas[0].timestamp;
           }
         } catch {
-          // Project path may be missing or inaccessible
+          // Benchmark path may be missing or inaccessible
         }
         return {
-          ...projectEntryToWire(p),
+          ...benchmarkEntryToWire(p),
           run_count: runCount,
           pass_rate: passRate,
           last_run: lastRun,
         };
       }),
     );
-    return c.json({ projects });
+    return c.json({ projects: benchmarks });
   });
 
   app.post('/api/benchmarks', async (c) => {
@@ -809,34 +809,34 @@ export function createApp(
     try {
       const body = await c.req.json<{ path: string }>();
       if (!body.path) return c.json({ error: 'Missing path' }, 400);
-      const entry = addProject(body.path);
-      return c.json(projectEntryToWire(entry), 201);
+      const entry = addBenchmark(body.path);
+      return c.json(benchmarkEntryToWire(entry), 201);
     } catch (err) {
       return c.json({ error: (err as Error).message }, 400);
     }
   });
 
-  app.delete('/api/benchmarks/:projectId', (c) => {
+  app.delete('/api/benchmarks/:benchmarkId', (c) => {
     if (readOnly) {
       return c.json({ error: 'Studio is running in read-only mode' }, 403);
     }
-    const removed = removeProject(c.req.param('projectId') ?? '');
+    const removed = removeBenchmark(c.req.param('benchmarkId') ?? '');
     if (!removed) return c.json({ error: 'Project not found' }, 404);
     return c.json({ ok: true });
   });
 
-  app.get('/api/benchmarks/:projectId/summary', async (c) => {
-    const project = getProject(c.req.param('projectId') ?? '');
-    if (!project) return c.json({ error: 'Project not found' }, 404);
+  app.get('/api/benchmarks/:benchmarkId/summary', async (c) => {
+    const benchmark = getBenchmark(c.req.param('benchmarkId') ?? '');
+    if (!benchmark) return c.json({ error: 'Project not found' }, 404);
     try {
-      const { runs: metas } = await listMergedResultFiles(project.path);
+      const { runs: metas } = await listMergedResultFiles(benchmark.path);
       const runCount = metas.length;
       const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
       const lastRun = metas.length > 0 ? metas[0].timestamp : null;
       return c.json({
-        id: project.id,
-        name: project.name,
-        path: project.path,
+        id: benchmark.id,
+        name: benchmark.name,
+        path: benchmark.path,
         run_count: runCount,
         pass_rate: passRate,
         last_run: lastRun,
@@ -853,17 +853,17 @@ export function createApp(
     try {
       const body = await c.req.json<{ path: string }>();
       if (!body.path) return c.json({ error: 'Missing path' }, 400);
-      const discovered = discoverProjects(body.path);
-      const registered = discovered.map((p) => projectEntryToWire(addProject(p)));
+      const discovered = discoverBenchmarks(body.path);
+      const registered = discovered.map((p) => benchmarkEntryToWire(addBenchmark(p)));
       return c.json({ discovered: registered });
     } catch (err) {
       return c.json({ error: (err as Error).message }, 400);
     }
   });
 
-  /** Aggregate runs from all registered projects, sorted by timestamp descending. */
+  /** Aggregate runs from all registered benchmarks, sorted by timestamp descending. */
   app.get('/api/benchmarks/all-runs', async (c) => {
-    const registry = loadProjectRegistry();
+    const registry = loadBenchmarkRegistry();
     const allRuns: Array<{
       filename: string;
       display_name: string;
@@ -880,7 +880,7 @@ export function createApp(
       project_name: string;
     }> = [];
 
-    for (const p of registry.projects) {
+    for (const p of registry.benchmarks) {
       try {
         const { runs: metas } = await listMergedResultFiles(p.path);
         for (const m of metas) {
@@ -1023,60 +1023,60 @@ export function createApp(
     return c.json({ entries });
   });
 
-  // ── Data routes (project-scoped) ──────────────────────────────────────
-  // Same handlers as above, with project-resolved DataContext via withProject.
+  // ── Data routes (benchmark-scoped) ───────────────────────────────────
+  // Same handlers as above, with benchmark-resolved DataContext via withBenchmark.
 
-  app.get('/api/benchmarks/:projectId/config', (c) =>
-    withProject(c, (ctx, dataCtx) =>
+  app.get('/api/benchmarks/:benchmarkId/config', (c) =>
+    withBenchmark(c, (ctx, dataCtx) =>
       handleConfig(ctx, dataCtx, {
         readOnly,
         multiProjectDashboard: options?.multiProjectDashboard,
       }),
     ),
   );
-  app.get('/api/benchmarks/:projectId/remote/status', (c) =>
-    withProject(c, async (ctx, dataCtx) =>
+  app.get('/api/benchmarks/:benchmarkId/remote/status', (c) =>
+    withBenchmark(c, async (ctx, dataCtx) =>
       ctx.json(await getRemoteResultsStatus(dataCtx.searchDir)),
     ),
   );
-  app.post('/api/benchmarks/:projectId/remote/sync', (c) =>
-    withProject(c, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir))),
+  app.post('/api/benchmarks/:benchmarkId/remote/sync', (c) =>
+    withBenchmark(c, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir))),
   );
-  app.get('/api/benchmarks/:projectId/runs', (c) => withProject(c, handleRuns));
-  app.get('/api/benchmarks/:projectId/runs/:filename', (c) => withProject(c, handleRunDetail));
-  app.get('/api/benchmarks/:projectId/runs/:filename/suites', (c) =>
-    withProject(c, handleRunSuites),
+  app.get('/api/benchmarks/:benchmarkId/runs', (c) => withBenchmark(c, handleRuns));
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename', (c) => withBenchmark(c, handleRunDetail));
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/suites', (c) =>
+    withBenchmark(c, handleRunSuites),
   );
-  app.get('/api/benchmarks/:projectId/runs/:filename/categories', (c) =>
-    withProject(c, handleRunCategories),
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/categories', (c) =>
+    withBenchmark(c, handleRunCategories),
   );
-  app.get('/api/benchmarks/:projectId/runs/:filename/categories/:category/suites', (c) =>
-    withProject(c, handleCategorySuites),
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/categories/:category/suites', (c) =>
+    withBenchmark(c, handleCategorySuites),
   );
-  app.get('/api/benchmarks/:projectId/runs/:filename/evals/:evalId', (c) =>
-    withProject(c, handleEvalDetail),
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId', (c) =>
+    withBenchmark(c, handleEvalDetail),
   );
-  app.get('/api/benchmarks/:projectId/runs/:filename/evals/:evalId/files', (c) =>
-    withProject(c, handleEvalFiles),
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files', (c) =>
+    withBenchmark(c, handleEvalFiles),
   );
-  app.get('/api/benchmarks/:projectId/runs/:filename/evals/:evalId/files/*', (c) =>
-    withProject(c, handleEvalFileContent),
+  app.get('/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files/*', (c) =>
+    withBenchmark(c, handleEvalFileContent),
   );
-  app.get('/api/benchmarks/:projectId/experiments', (c) => withProject(c, handleExperiments));
-  app.get('/api/benchmarks/:projectId/compare', (c) => withProject(c, handleCompare));
-  app.get('/api/benchmarks/:projectId/targets', (c) => withProject(c, handleTargets));
-  app.get('/api/benchmarks/:projectId/feedback', (c) => withProject(c, handleFeedbackRead));
+  app.get('/api/benchmarks/:benchmarkId/experiments', (c) => withBenchmark(c, handleExperiments));
+  app.get('/api/benchmarks/:benchmarkId/compare', (c) => withBenchmark(c, handleCompare));
+  app.get('/api/benchmarks/:benchmarkId/targets', (c) => withBenchmark(c, handleTargets));
+  app.get('/api/benchmarks/:benchmarkId/feedback', (c) => withBenchmark(c, handleFeedbackRead));
 
   // ── Eval runner routes (discovery, launch, status) ────────────────────
 
   registerEvalRoutes(
     app,
     (c) => {
-      // For project-scoped routes, resolve to project path; otherwise use searchDir
-      const projectId = c.req.param('projectId');
-      if (projectId) {
-        const project = getProject(projectId);
-        if (project) return project.path;
+      // For benchmark-scoped routes, resolve to benchmark path; otherwise use searchDir
+      const benchmarkId = c.req.param('benchmarkId');
+      if (benchmarkId) {
+        const benchmark = getBenchmark(benchmarkId);
+        if (benchmark) return benchmark.path;
       }
       return searchDir;
     },
@@ -1218,10 +1218,10 @@ export const resultsServeCommand = command({
     const cwd = dir ?? process.cwd();
     const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
 
-    // ── Project management commands (non-server) ─────────────────────
+    // ── Benchmark management commands (non-server) ───────────────────
     if (add) {
       try {
-        const entry = addProject(add);
+        const entry = addBenchmark(add);
         console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
       } catch (err) {
         console.error(`Error: ${(err as Error).message}`);
@@ -1231,7 +1231,7 @@ export const resultsServeCommand = command({
     }
 
     if (remove) {
-      const removed = removeProject(remove);
+      const removed = removeBenchmark(remove);
       if (removed) {
         console.log(`Unregistered project: ${remove}`);
       } else {
@@ -1242,13 +1242,13 @@ export const resultsServeCommand = command({
     }
 
     if (discover) {
-      const discovered = discoverProjects(discover);
+      const discovered = discoverBenchmarks(discover);
       if (discovered.length === 0) {
         console.log(`No projects with .agentv/ found under ${discover}`);
         return;
       }
       for (const p of discovered) {
-        const entry = addProject(p);
+        const entry = addBenchmark(p);
         console.log(`Registered: ${entry.name} (${entry.id}) at ${entry.path}`);
       }
       console.log(`\nDiscovered ${discovered.length} project(s).`);
@@ -1256,8 +1256,8 @@ export const resultsServeCommand = command({
     }
 
     // ── Determine multi-project mode ────────────────────────────────
-    const registry = loadProjectRegistry();
-    const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.projects.length, {
+    const registry = loadBenchmarkRegistry();
+    const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.benchmarks.length, {
       multi,
       single,
     });
@@ -1302,7 +1302,7 @@ export const resultsServeCommand = command({
       }
 
       if (isMultiProject) {
-        console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
+        console.log(`Multi-project mode: ${registry.benchmarks.length} project(s) registered`);
       } else if (results.length > 0 && sourceFile) {
         console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
       } else {
diff --git a/apps/studio/src/components/EvalDetail.tsx b/apps/studio/src/components/EvalDetail.tsx
index 4b3414751..a0a2ddfa9 100644
--- a/apps/studio/src/components/EvalDetail.tsx
+++ b/apps/studio/src/components/EvalDetail.tsx
@@ -10,8 +10,14 @@
 import { useState } from 'react';
 
 import { useQuery } from '@tanstack/react-query';
-import { isPassing, useEvalFileContent, useEvalFiles, useStudioConfig } from '~/lib/api';
-import { projectEvalFileContentOptions, projectEvalFilesOptions } from '~/lib/api';
+import {
+  benchmarkEvalFileContentOptions,
+  benchmarkEvalFilesOptions,
+  isPassing,
+  useEvalFileContent,
+  useEvalFiles,
+  useStudioConfig,
+} from '~/lib/api';
 import type { AssertionEntry, EvalResult, ScoreEntry } from '~/lib/types';
 
 import { FeedbackPanel } from './FeedbackPanel';
@@ -23,7 +29,7 @@ import { ScoreBar } from './ScoreBar';
 interface EvalDetailProps {
   eval: EvalResult;
   runId: string;
-  projectId?: string;
+  benchmarkId?: string;
 }
 
 type Tab = 'checks' | 'files' | 'feedback';
@@ -40,7 +46,7 @@ function findFirstFile(nodes: FileNode[]): string | null {
   return null;
 }
 
-export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps) {
+export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps) {
   const [activeTab, setActiveTab] = useState<Tab>('checks');
   const { data: config } = useStudioConfig();
   const isReadOnly = config?.read_only === true;
@@ -96,7 +102,7 @@ export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps)
         )}
         {activeTab === 'files' && (
           <div className="h-full p-4">
-            <FilesTab result={result} runId={runId} projectId={projectId} />
+            <FilesTab result={result} runId={runId} benchmarkId={benchmarkId} />
           </div>
         )}
         {!isReadOnly && activeTab === 'feedback' && (
@@ -280,13 +286,13 @@ function ChecksTab({ result }: { result: EvalResult }) {
 function FilesTab({
   result,
   runId,
-  projectId,
-}: { result: EvalResult; runId: string; projectId?: string }) {
+  benchmarkId,
+}: { result: EvalResult; runId: string; benchmarkId?: string }) {
   const evalId = result.testId;
 
-  // Use project-scoped API hooks when projectId is present
-  const { data: filesData } = projectId
-    ? useQuery(projectEvalFilesOptions(projectId, runId, evalId))
+  // Use benchmark-scoped API hooks when benchmarkId is present
+  const { data: filesData } = benchmarkId
+    ? useQuery(benchmarkEvalFilesOptions(benchmarkId, runId, evalId))
     : useEvalFiles(runId, evalId);
   const files = filesData?.files ?? [];
 
@@ -294,8 +300,8 @@ function FilesTab({
 
   const effectivePath = selectedPath ?? (files.length > 0 ? findFirstFile(files) : null);
 
-  const { data: fileContentData, isLoading: isLoadingContent } = projectId
-    ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, effectivePath ?? ''))
+  const { data: fileContentData, isLoading: isLoadingContent } = benchmarkId
+    ? useQuery(benchmarkEvalFileContentOptions(benchmarkId, runId, evalId, effectivePath ?? ''))
     : useEvalFileContent(runId, evalId, effectivePath ?? '');
 
   if (files.length === 0) {
diff --git a/apps/studio/src/components/ProjectCard.tsx b/apps/studio/src/components/ProjectCard.tsx
index ff0642dc8..a7af8cebe 100644
--- a/apps/studio/src/components/ProjectCard.tsx
+++ b/apps/studio/src/components/ProjectCard.tsx
@@ -7,7 +7,7 @@
 
 import { Link } from '@tanstack/react-router';
 
-import type { ProjectSummary } from '~/lib/types';
+import type { BenchmarkSummary } from '~/lib/types';
 
 function formatTimeAgo(timestamp: string | null): string {
   if (!timestamp) return 'No runs';
@@ -23,13 +23,13 @@ function formatTimeAgo(timestamp: string | null): string {
   return `${days}d ago`;
 }
 
-export function ProjectCard({ project }: { project: ProjectSummary }) {
+export function ProjectCard({ project }: { project: BenchmarkSummary }) {
   const passPercent = Math.round(project.pass_rate * 100);
 
   return (
     <Link
-      to="/projects/$projectId"
-      params={{ projectId: project.id }}
+      to="/projects/$benchmarkId"
+      params={{ benchmarkId: project.id }}
       className="group block rounded-lg border border-gray-800 bg-gray-900/50 p-5 transition-colors hover:border-cyan-800 hover:bg-gray-900"
     >
       <div className="flex items-start justify-between">
diff --git a/apps/studio/src/components/RunDetail.tsx b/apps/studio/src/components/RunDetail.tsx
index a380b6e88..7e78431a7 100644
--- a/apps/studio/src/components/RunDetail.tsx
+++ b/apps/studio/src/components/RunDetail.tsx
@@ -18,7 +18,7 @@ import { StatsCards } from './StatsCards';
 interface RunDetailProps {
   results: EvalResult[];
   runId: string;
-  projectId?: string;
+  benchmarkId?: string;
 }
 
 interface SuiteStats {
@@ -85,7 +85,7 @@ function buildCategoryGroups(results: EvalResult[], passThreshold: number): Cate
     .sort((a, b) => a.name.localeCompare(b.name));
 }
 
-export function RunDetail({ results, runId, projectId }: RunDetailProps) {
+export function RunDetail({ results, runId, benchmarkId }: RunDetailProps) {
   const { data: config } = useStudioConfig();
   const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
 
@@ -191,10 +191,10 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
                       )}
                     </td>
                     <td className="px-4 py-3">
-                      {projectId ? (
+                      {benchmarkId ? (
                         <Link
-                          to="/projects/$projectId/evals/$runId/$evalId"
-                          params={{ projectId, runId, evalId: result.testId }}
+                          to="/projects/$benchmarkId/evals/$runId/$evalId"
+                          params={{ benchmarkId, runId, evalId: result.testId }}
                           className="font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
                         >
                           {result.testId}
diff --git a/apps/studio/src/components/RunEvalModal.tsx b/apps/studio/src/components/RunEvalModal.tsx
index 886f53eb2..5a17fc85e 100644
--- a/apps/studio/src/components/RunEvalModal.tsx
+++ b/apps/studio/src/components/RunEvalModal.tsx
@@ -28,7 +28,7 @@ import type { RunEvalRequest } from '~/lib/types';
 export interface RunEvalModalProps {
   open: boolean;
   onClose: () => void;
-  projectId?: string;
+  benchmarkId?: string;
   prefill?: {
     suiteFilter?: string;
     testIds?: string[];
@@ -38,7 +38,7 @@ export interface RunEvalModalProps {
 
 // ── Component ────────────────────────────────────────────────────────────
 
-export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModalProps) {
+export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalModalProps) {
   const queryClient = useQueryClient();
 
   // Form state
@@ -58,8 +58,8 @@ export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModal
   const [cliPreview, setCliPreview] = useState<string | null>(null);
 
   // Data
-  const { data: discoverData } = useEvalDiscover(projectId);
-  const { data: targetsData } = useEvalTargets(projectId);
+  const { data: discoverData } = useEvalDiscover(benchmarkId);
+  const { data: targetsData } = useEvalTargets(benchmarkId);
   const { data: runStatus } = useEvalRunStatus(activeRunId);
 
   const evalFiles = useMemo(() => discoverData?.eval_files ?? [], [discoverData]);
@@ -110,10 +110,10 @@ export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModal
       setCliPreview(null);
       return;
     }
-    previewEvalCommand(req, projectId)
+    previewEvalCommand(req, benchmarkId)
       .then((r) => setCliPreview(r.command))
       .catch(() => setCliPreview(null));
-  }, [buildRequest, projectId]);
+  }, [buildRequest, benchmarkId]);
 
   // Add a test ID pill
   function addTestId() {
@@ -134,7 +134,7 @@ export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModal
     setLaunching(true);
     try {
       const req = buildRequest();
-      const result = await launchEvalRun(req, projectId);
+      const result = await launchEvalRun(req, benchmarkId);
       setActiveRunId(result.id);
     } catch (err) {
       setError((err as Error).message);
diff --git a/apps/studio/src/components/RunList.tsx b/apps/studio/src/components/RunList.tsx
index 29ee3a8fe..36b5d9be7 100644
--- a/apps/studio/src/components/RunList.tsx
+++ b/apps/studio/src/components/RunList.tsx
@@ -16,7 +16,7 @@ import { PassRatePill } from './PassRatePill';
 
 interface RunListProps {
   runs: RunMeta[];
-  projectId?: string;
+  benchmarkId?: string;
   emptyMessage?: React.ReactNode;
 }
 
@@ -48,7 +48,7 @@ function runLabel(run: RunMeta): string {
   return run.display_name ?? run.filename;
 }
 
-export function RunList({ runs, projectId, emptyMessage }: RunListProps) {
+export function RunList({ runs, benchmarkId, emptyMessage }: RunListProps) {
   if (runs.length === 0) {
     return (
       <div className="rounded-lg border border-gray-800 bg-gray-900 p-8 text-center">
@@ -101,10 +101,10 @@ export function RunList({ runs, projectId, emptyMessage }: RunListProps) {
 
                 {/* Run name */}
                 <td className="px-4 py-3">
-                  {projectId ? (
+                  {benchmarkId ? (
                     <Link
-                      to="/projects/$projectId/runs/$runId"
-                      params={{ projectId, runId: run.filename }}
+                      to="/projects/$benchmarkId/runs/$runId"
+                      params={{ benchmarkId, runId: run.filename }}
                       className="font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
                     >
                       {label}
diff --git a/apps/studio/src/components/Sidebar.tsx b/apps/studio/src/components/Sidebar.tsx
index 64067076f..a0886eba7 100644
--- a/apps/studio/src/components/Sidebar.tsx
+++ b/apps/studio/src/components/Sidebar.tsx
@@ -18,12 +18,12 @@ import { Link, useLocation, useMatchRoute } from '@tanstack/react-router';
 
 import {
   isPassing,
-  useAllProjectRuns,
+  useAllBenchmarkRuns,
+  useBenchmarkList,
+  useBenchmarkRunDetail,
+  useBenchmarkRunList,
   useCategorySuites,
   useExperiments,
-  useProjectList,
-  useProjectRunDetail,
-  useProjectRunList,
   useRunDetail,
   useRunList,
   useStudioConfig,
@@ -71,38 +71,42 @@ export function Sidebar() {
 
   // ── Project-scoped route matching ────────────────────────────────────
   const projectEvalMatch = matchRoute({
-    to: '/projects/$projectId/evals/$runId/$evalId',
+    to: '/projects/$benchmarkId/evals/$runId/$evalId',
     fuzzy: true,
   });
   const projectRunMatch = matchRoute({
-    to: '/projects/$projectId/runs/$runId',
+    to: '/projects/$benchmarkId/runs/$runId',
     fuzzy: true,
   });
   const projectMatch = matchRoute({
-    to: '/projects/$projectId',
+    to: '/projects/$benchmarkId',
     fuzzy: true,
   });
 
   // Project-scoped eval detail
-  if (projectEvalMatch && typeof projectEvalMatch === 'object' && 'projectId' in projectEvalMatch) {
-    const { projectId, runId, evalId } = projectEvalMatch as {
-      projectId: string;
+  if (
+    projectEvalMatch &&
+    typeof projectEvalMatch === 'object' &&
+    'benchmarkId' in projectEvalMatch
+  ) {
+    const { benchmarkId, runId, evalId } = projectEvalMatch as {
+      benchmarkId: string;
       runId: string;
       evalId: string;
     };
-    return <ProjectEvalSidebar projectId={projectId} runId={runId} currentEvalId={evalId} />;
+    return <ProjectEvalSidebar benchmarkId={benchmarkId} runId={runId} currentEvalId={evalId} />;
   }
 
   // Project-scoped run detail
-  if (projectRunMatch && typeof projectRunMatch === 'object' && 'projectId' in projectRunMatch) {
-    const { projectId, runId } = projectRunMatch as { projectId: string; runId: string };
-    return <ProjectRunDetailSidebar projectId={projectId} currentRunId={runId} />;
+  if (projectRunMatch && typeof projectRunMatch === 'object' && 'benchmarkId' in projectRunMatch) {
+    const { benchmarkId, runId } = projectRunMatch as { benchmarkId: string; runId: string };
+    return <ProjectRunDetailSidebar benchmarkId={benchmarkId} currentRunId={runId} />;
   }
 
   // Project home (runs/experiments/targets)
-  if (projectMatch && typeof projectMatch === 'object' && 'projectId' in projectMatch) {
-    const { projectId } = projectMatch as { projectId: string };
-    return <ProjectRunDetailSidebar projectId={projectId} />;
+  if (projectMatch && typeof projectMatch === 'object' && 'benchmarkId' in projectMatch) {
+    const { benchmarkId } = projectMatch as { benchmarkId: string };
+    return <ProjectRunDetailSidebar benchmarkId={benchmarkId} />;
   }
 
   // ── Unscoped route matching ──────────────────────────────────────────
@@ -149,7 +153,7 @@ export function Sidebar() {
 
 function RunSidebar() {
   const matchRoute = useMatchRoute();
-  const { data: projectData } = useProjectList();
+  const { data: projectData } = useBenchmarkList();
   const hasProjects = (projectData?.projects.length ?? 0) > 0;
 
   const isHome = matchRoute({ to: '/' });
@@ -159,7 +163,7 @@ function RunSidebar() {
   const useAggregated = hasProjects && isHome !== false;
 
   const { data: localData } = useRunList();
-  const { data: aggregatedData } = useAllProjectRuns();
+  const { data: aggregatedData } = useAllBenchmarkRuns();
   const data = useAggregated ? aggregatedData : localData;
 
   return (
@@ -188,8 +192,8 @@ function RunSidebar() {
             return (
               <Link
                 key={`${run.project_id}/${run.filename}`}
-                to="/projects/$projectId/runs/$runId"
-                params={{ projectId: run.project_id, runId: run.filename }}
+                to="/projects/$benchmarkId/runs/$runId"
+                params={{ benchmarkId: run.project_id, runId: run.filename }}
                 className="mb-0.5 block truncate rounded-md px-2 py-1.5 text-sm text-gray-400 transition-colors hover:bg-gray-800/50 hover:text-gray-200"
                 title={run.project_name}
               >
@@ -391,13 +395,13 @@ function CategorySidebar({ runId, category }: { runId: string; category: string
 // ── Project-scoped sidebars ──────────────────────────────────────────────
 
 function ProjectRunDetailSidebar({
-  projectId,
+  benchmarkId,
   currentRunId,
 }: {
-  projectId: string;
+  benchmarkId: string;
   currentRunId?: string;
 }) {
-  const { data } = useProjectRunList(projectId);
+  const { data } = useBenchmarkRunList(benchmarkId);
 
   return (
     <SidebarShell>
@@ -411,7 +415,7 @@ function ProjectRunDetailSidebar({
         <Link to="/" className="text-xs text-gray-400 hover:text-cyan-400">
           &larr; All Benchmarks
         </Link>
-        <p className="mt-1 truncate text-sm font-medium text-gray-300">{projectId}</p>
+        <p className="mt-1 truncate text-sm font-medium text-gray-300">{benchmarkId}</p>
       </div>
 
       <nav className="flex-1 overflow-y-auto px-2 py-3">
@@ -423,8 +427,8 @@ function ProjectRunDetailSidebar({
           return (
             <Link
               key={run.filename}
-              to="/projects/$projectId/runs/$runId"
-              params={{ projectId, runId: run.filename }}
+              to="/projects/$benchmarkId/runs/$runId"
+              params={{ benchmarkId, runId: run.filename }}
               className={`mb-0.5 block truncate rounded-md px-2 py-1.5 text-sm transition-colors ${
                 isActive
                   ? 'bg-gray-800 text-cyan-400'
@@ -441,15 +445,15 @@ function ProjectRunDetailSidebar({
 }
 
 function ProjectEvalSidebar({
-  projectId,
+  benchmarkId,
   runId,
   currentEvalId,
 }: {
-  projectId: string;
+  benchmarkId: string;
   runId: string;
   currentEvalId: string;
 }) {
-  const { data } = useProjectRunDetail(projectId, runId);
+  const { data } = useBenchmarkRunDetail(benchmarkId, runId);
   const { data: config } = useStudioConfig();
   const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
 
@@ -463,8 +467,8 @@ function ProjectEvalSidebar({
 
       <div className="border-b border-gray-800 px-4 py-2">
         <Link
-          to="/projects/$projectId/runs/$runId"
-          params={{ projectId, runId }}
+          to="/projects/$benchmarkId/runs/$runId"
+          params={{ benchmarkId, runId }}
           className="text-xs text-gray-400 hover:text-cyan-400"
         >
           &larr; Back to run
@@ -482,8 +486,8 @@ function ProjectEvalSidebar({
           return (
             <Link
               key={result.testId}
-              to="/projects/$projectId/evals/$runId/$evalId"
-              params={{ projectId, runId, evalId: result.testId }}
+              to="/projects/$benchmarkId/evals/$runId/$evalId"
+              params={{ benchmarkId, runId, evalId: result.testId }}
               className={`mb-0.5 flex items-center gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
                 isActive
                   ? 'bg-gray-800 text-cyan-400'
diff --git a/apps/studio/src/lib/api.ts b/apps/studio/src/lib/api.ts
index c114078ec..ddfe8ce1a 100644
--- a/apps/studio/src/lib/api.ts
+++ b/apps/studio/src/lib/api.ts
@@ -8,6 +8,8 @@
 import { queryOptions, useQuery } from '@tanstack/react-query';
 
 import type {
+  BenchmarkEntry,
+  BenchmarkListResponse,
   CategoriesResponse,
   CompareResponse,
   EvalDetailResponse,
@@ -21,8 +23,6 @@ import type {
   FileContentResponse,
   FileTreeResponse,
   IndexResponse,
-  ProjectEntry,
-  ProjectListResponse,
   RemoteStatusResponse,
   RunDetailResponse,
   RunEvalRequest,
@@ -148,10 +148,10 @@ export const studioConfigOptions = queryOptions({
   staleTime: 5_000,
 });
 
-export function remoteStatusOptions(projectId?: string) {
-  const url = projectId ? `${projectApiBase(projectId)}/remote/status` : '/api/remote/status';
+export function remoteStatusOptions(benchmarkId?: string) {
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/remote/status` : '/api/remote/status';
   return queryOptions({
-    queryKey: ['remote-status', projectId ?? ''],
+    queryKey: ['remote-status', benchmarkId ?? ''],
     queryFn: () => fetchJson<RemoteStatusResponse>(url),
     staleTime: 5_000,
   });
@@ -215,8 +215,8 @@ export function useStudioConfig() {
   return useQuery(studioConfigOptions);
 }
 
-export function useRemoteStatus(projectId?: string) {
-  return useQuery(remoteStatusOptions(projectId));
+export function useRemoteStatus(benchmarkId?: string) {
+  return useQuery(remoteStatusOptions(benchmarkId));
 }
 
 /** Default pass threshold matching @agentv/core DEFAULT_THRESHOLD */
@@ -226,43 +226,43 @@ export function isPassing(score: number, passThreshold: number = DEFAULT_PASS_TH
   return score >= passThreshold;
 }
 
-// ── Project API ─────────────────────────────────────────────────────────
+// ── Benchmark API ────────────────────────────────────────────────────────
 
-export const projectListOptions = queryOptions({
-  queryKey: ['projects'],
-  queryFn: () => fetchJson<ProjectListResponse>('/api/benchmarks'),
+export const benchmarkListOptions = queryOptions({
+  queryKey: ['benchmarks'],
+  queryFn: () => fetchJson<BenchmarkListResponse>('/api/benchmarks'),
   refetchInterval: 10_000,
 });
 
-export function useProjectList() {
-  return useQuery(projectListOptions);
+export function useBenchmarkList() {
+  return useQuery(benchmarkListOptions);
 }
 
-export const allProjectRunsOptions = queryOptions({
-  queryKey: ['projects', 'all-runs'],
+export const allBenchmarkRunsOptions = queryOptions({
+  queryKey: ['benchmarks', 'all-runs'],
   queryFn: () => fetchJson<RunListResponse>('/api/benchmarks/all-runs'),
   refetchInterval: 5_000,
 });
 
-export function useAllProjectRuns() {
-  return useQuery(allProjectRunsOptions);
+export function useAllBenchmarkRuns() {
+  return useQuery(allBenchmarkRunsOptions);
 }
 
-export async function addProjectApi(projectPath: string): Promise<ProjectEntry> {
+export async function addBenchmarkApi(benchmarkPath: string): Promise<BenchmarkEntry> {
   const res = await fetch('/api/benchmarks', {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ path: projectPath }),
+    body: JSON.stringify({ path: benchmarkPath }),
   });
   if (!res.ok) {
     const err = (await res.json()) as { error: string };
     throw new Error(err.error || `Failed to add project: ${res.status}`);
   }
-  return res.json() as Promise<ProjectEntry>;
+  return res.json() as Promise<BenchmarkEntry>;
 }
 
-export async function removeProjectApi(projectId: string): Promise<void> {
-  const res = await fetch(`/api/benchmarks/${encodeURIComponent(projectId)}`, {
+export async function removeBenchmarkApi(benchmarkId: string): Promise<void> {
+  const res = await fetch(`/api/benchmarks/${encodeURIComponent(benchmarkId)}`, {
     method: 'DELETE',
   });
   if (!res.ok) {
@@ -270,7 +270,7 @@ export async function removeProjectApi(projectId: string): Promise<void> {
   }
 }
 
-export async function discoverProjectsApi(dirPath: string): Promise<ProjectEntry[]> {
+export async function discoverBenchmarksApi(dirPath: string): Promise<BenchmarkEntry[]> {
   const res = await fetch('/api/benchmarks/discover', {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
@@ -280,149 +280,153 @@ export async function discoverProjectsApi(dirPath: string): Promise<ProjectEntry
     const err = (await res.json()) as { error: string };
     throw new Error(err.error || `Failed to discover: ${res.status}`);
   }
-  const data = (await res.json()) as { discovered: ProjectEntry[] };
+  const data = (await res.json()) as { discovered: BenchmarkEntry[] };
   return data.discovered;
 }
 
-/** Build the API base URL for a project-scoped request. */
-function projectApiBase(projectId: string): string {
-  return `/api/benchmarks/${encodeURIComponent(projectId)}`;
+/** Build the API base URL for a benchmark-scoped request. */
+function benchmarkApiBase(benchmarkId: string): string {
+  return `/api/benchmarks/${encodeURIComponent(benchmarkId)}`;
 }
 
-export function projectRunListOptions(projectId: string) {
+export function benchmarkRunListOptions(benchmarkId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs'],
-    queryFn: () => fetchJson<RunListResponse>(`${projectApiBase(projectId)}/runs`),
-    enabled: !!projectId,
+    queryKey: ['benchmarks', benchmarkId, 'runs'],
+    queryFn: () => fetchJson<RunListResponse>(`${benchmarkApiBase(benchmarkId)}/runs`),
+    enabled: !!benchmarkId,
     refetchInterval: 5_000,
   });
 }
 
-export function useProjectRunList(projectId: string) {
-  return useQuery(projectRunListOptions(projectId));
+export function useBenchmarkRunList(benchmarkId: string) {
+  return useQuery(benchmarkRunListOptions(benchmarkId));
 }
 
-export function projectRunDetailOptions(projectId: string, filename: string) {
+export function benchmarkRunDetailOptions(benchmarkId: string, filename: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', filename],
+    queryKey: ['benchmarks', benchmarkId, 'runs', filename],
     queryFn: () =>
       fetchJson<RunDetailResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(filename)}`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(filename)}`,
       ),
-    enabled: !!projectId && !!filename,
+    enabled: !!benchmarkId && !!filename,
   });
 }
 
-export function useProjectRunDetail(projectId: string, filename: string) {
-  return useQuery(projectRunDetailOptions(projectId, filename));
+export function useBenchmarkRunDetail(benchmarkId: string, filename: string) {
+  return useQuery(benchmarkRunDetailOptions(benchmarkId, filename));
 }
 
-export function projectRunSuitesOptions(projectId: string, runId: string) {
+export function benchmarkRunSuitesOptions(benchmarkId: string, runId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'suites'],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'suites'],
     queryFn: () =>
       fetchJson<SuitesResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/suites`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/suites`,
       ),
-    enabled: !!projectId && !!runId,
+    enabled: !!benchmarkId && !!runId,
   });
 }
 
-export function projectRunCategoriesOptions(projectId: string, runId: string) {
+export function benchmarkRunCategoriesOptions(benchmarkId: string, runId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'categories'],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'categories'],
     queryFn: () =>
       fetchJson<CategoriesResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/categories`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/categories`,
       ),
-    enabled: !!projectId && !!runId,
+    enabled: !!benchmarkId && !!runId,
   });
 }
 
-export function projectCategorySuitesOptions(projectId: string, runId: string, category: string) {
+export function benchmarkCategorySuitesOptions(
+  benchmarkId: string,
+  runId: string,
+  category: string,
+) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'categories', category, 'suites'],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'categories', category, 'suites'],
     queryFn: () =>
       fetchJson<SuitesResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/categories/${encodeURIComponent(category)}/suites`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/categories/${encodeURIComponent(category)}/suites`,
       ),
-    enabled: !!projectId && !!runId && !!category,
+    enabled: !!benchmarkId && !!runId && !!category,
   });
 }
 
-export function projectEvalDetailOptions(projectId: string, runId: string, evalId: string) {
+export function benchmarkEvalDetailOptions(benchmarkId: string, runId: string, evalId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'evals', evalId],
     queryFn: () =>
       fetchJson<EvalDetailResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}`,
       ),
-    enabled: !!projectId && !!runId && !!evalId,
+    enabled: !!benchmarkId && !!runId && !!evalId,
   });
 }
 
-export function projectEvalFilesOptions(projectId: string, runId: string, evalId: string) {
+export function benchmarkEvalFilesOptions(benchmarkId: string, runId: string, evalId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, 'files'],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'evals', evalId, 'files'],
     queryFn: () =>
       fetchJson<FileTreeResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files`,
       ),
-    enabled: !!projectId && !!runId && !!evalId,
+    enabled: !!benchmarkId && !!runId && !!evalId,
   });
 }
 
-export function projectEvalFileContentOptions(
-  projectId: string,
+export function benchmarkEvalFileContentOptions(
+  benchmarkId: string,
   runId: string,
   evalId: string,
   filePath: string,
 ) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, 'files', filePath],
+    queryKey: ['benchmarks', benchmarkId, 'runs', runId, 'evals', evalId, 'files', filePath],
     queryFn: () =>
       fetchJson<FileContentResponse>(
-        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files/${filePath}`,
+        `${benchmarkApiBase(benchmarkId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/files/${filePath}`,
       ),
-    enabled: !!projectId && !!runId && !!evalId && !!filePath,
+    enabled: !!benchmarkId && !!runId && !!evalId && !!filePath,
   });
 }
 
-export function projectExperimentsOptions(projectId: string) {
+export function benchmarkExperimentsOptions(benchmarkId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'experiments'],
-    queryFn: () => fetchJson<ExperimentsResponse>(`${projectApiBase(projectId)}/experiments`),
-    enabled: !!projectId,
+    queryKey: ['benchmarks', benchmarkId, 'experiments'],
+    queryFn: () => fetchJson<ExperimentsResponse>(`${benchmarkApiBase(benchmarkId)}/experiments`),
+    enabled: !!benchmarkId,
   });
 }
 
-export function projectCompareOptions(projectId: string) {
+export function benchmarkCompareOptions(benchmarkId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'compare'],
-    queryFn: () => fetchJson<CompareResponse>(`${projectApiBase(projectId)}/compare`),
-    enabled: !!projectId,
+    queryKey: ['benchmarks', benchmarkId, 'compare'],
+    queryFn: () => fetchJson<CompareResponse>(`${benchmarkApiBase(benchmarkId)}/compare`),
+    enabled: !!benchmarkId,
   });
 }
 
-export function projectTargetsOptions(projectId: string) {
+export function benchmarkTargetsOptions(benchmarkId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'targets'],
-    queryFn: () => fetchJson<TargetsResponse>(`${projectApiBase(projectId)}/targets`),
-    enabled: !!projectId,
+    queryKey: ['benchmarks', benchmarkId, 'targets'],
+    queryFn: () => fetchJson<TargetsResponse>(`${benchmarkApiBase(benchmarkId)}/targets`),
+    enabled: !!benchmarkId,
   });
 }
 
-export function projectConfigOptions(projectId: string) {
+export function benchmarkConfigOptions(benchmarkId: string) {
   return queryOptions({
-    queryKey: ['projects', projectId, 'config'],
-    queryFn: () => fetchJson<StudioConfigResponse>(`${projectApiBase(projectId)}/config`),
-    enabled: !!projectId,
+    queryKey: ['benchmarks', benchmarkId, 'config'],
+    queryFn: () => fetchJson<StudioConfigResponse>(`${benchmarkApiBase(benchmarkId)}/config`),
+    enabled: !!benchmarkId,
     staleTime: 5_000,
   });
 }
 
-export async function syncRemoteResultsApi(projectId?: string): Promise<RemoteStatusResponse> {
-  const url = projectId ? `${projectApiBase(projectId)}/remote/sync` : '/api/remote/sync';
+export async function syncRemoteResultsApi(benchmarkId?: string): Promise<RemoteStatusResponse> {
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/remote/sync` : '/api/remote/sync';
   const res = await fetch(url, {
     method: 'POST',
   });
@@ -448,37 +452,37 @@ export async function saveStudioConfig(
 
 // ── Eval runner queries & mutations ──────────────────────────────────────
 
-export function evalDiscoverOptions(projectId?: string) {
-  const url = projectId ? `${projectApiBase(projectId)}/eval/discover` : '/api/eval/discover';
+export function evalDiscoverOptions(benchmarkId?: string) {
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/eval/discover` : '/api/eval/discover';
   return queryOptions({
-    queryKey: ['eval-discover', projectId ?? ''],
+    queryKey: ['eval-discover', benchmarkId ?? ''],
     queryFn: () => fetchJson<EvalDiscoverResponse>(url),
     staleTime: 30_000,
   });
 }
 
-export function useEvalDiscover(projectId?: string) {
-  return useQuery(evalDiscoverOptions(projectId));
+export function useEvalDiscover(benchmarkId?: string) {
+  return useQuery(evalDiscoverOptions(benchmarkId));
 }
 
-export function evalTargetsOptions(projectId?: string) {
-  const url = projectId ? `${projectApiBase(projectId)}/eval/targets` : '/api/eval/targets';
+export function evalTargetsOptions(benchmarkId?: string) {
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/eval/targets` : '/api/eval/targets';
   return queryOptions({
-    queryKey: ['eval-targets', projectId ?? ''],
+    queryKey: ['eval-targets', benchmarkId ?? ''],
     queryFn: () => fetchJson<EvalTargetsResponse>(url),
     staleTime: 30_000,
   });
 }
 
-export function useEvalTargets(projectId?: string) {
-  return useQuery(evalTargetsOptions(projectId));
+export function useEvalTargets(benchmarkId?: string) {
+  return useQuery(evalTargetsOptions(benchmarkId));
 }
 
 export async function launchEvalRun(
   body: RunEvalRequest,
-  projectId?: string,
+  benchmarkId?: string,
 ): Promise<EvalRunResponse> {
-  const url = projectId ? `${projectApiBase(projectId)}/eval/run` : '/api/eval/run';
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/eval/run` : '/api/eval/run';
   const res = await fetch(url, {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
@@ -510,9 +514,9 @@ export function useEvalRunStatus(runId: string | null) {
 
 export async function previewEvalCommand(
   body: RunEvalRequest,
-  projectId?: string,
+  benchmarkId?: string,
 ): Promise<EvalPreviewResponse> {
-  const url = projectId ? `${projectApiBase(projectId)}/eval/preview` : '/api/eval/preview';
+  const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/eval/preview` : '/api/eval/preview';
   const res = await fetch(url, {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
diff --git a/apps/studio/src/lib/types.ts b/apps/studio/src/lib/types.ts
index 93752599d..a395a8072 100644
--- a/apps/studio/src/lib/types.ts
+++ b/apps/studio/src/lib/types.ts
@@ -218,9 +218,9 @@ export interface RemoteStatusResponse {
   last_error?: string;
 }
 
-// ── Project types ────────────────────────────────────────────────────────
+// ── Benchmark types ──────────────────────────────────────────────────────
 
-export interface ProjectSummary {
+export interface BenchmarkSummary {
   id: string;
   name: string;
   path: string;
@@ -231,11 +231,11 @@ export interface ProjectSummary {
   last_run: string | null;
 }
 
-export interface ProjectListResponse {
-  projects: ProjectSummary[];
+export interface BenchmarkListResponse {
+  projects: BenchmarkSummary[];
 }
 
-export interface ProjectEntry {
+export interface BenchmarkEntry {
   id: string;
   name: string;
   path: string;
diff --git a/apps/studio/src/routeTree.gen.ts b/apps/studio/src/routeTree.gen.ts
index 3ae0016b9..c52359e26 100644
--- a/apps/studio/src/routeTree.gen.ts
+++ b/apps/studio/src/routeTree.gen.ts
@@ -12,13 +12,13 @@ import { Route as rootRouteImport } from './routes/__root'
 import { Route as SettingsRouteImport } from './routes/settings'
 import { Route as IndexRouteImport } from './routes/index'
 import { Route as RunsRunIdRouteImport } from './routes/runs/$runId'
-import { Route as ProjectsProjectIdRouteImport } from './routes/projects/$projectId'
+import { Route as ProjectsBenchmarkIdRouteImport } from './routes/projects/$benchmarkId'
 import { Route as ExperimentsExperimentNameRouteImport } from './routes/experiments/$experimentName'
 import { Route as EvalsRunIdEvalIdRouteImport } from './routes/evals/$runId.$evalId'
 import { Route as RunsRunIdSuiteSuiteRouteImport } from './routes/runs/$runId_.suite.$suite'
 import { Route as RunsRunIdCategoryCategoryRouteImport } from './routes/runs/$runId_.category.$category'
-import { Route as ProjectsProjectIdRunsRunIdRouteImport } from './routes/projects/$projectId_/runs/$runId'
-import { Route as ProjectsProjectIdEvalsRunIdEvalIdRouteImport } from './routes/projects/$projectId_/evals/$runId.$evalId'
+import { Route as ProjectsBenchmarkIdRunsRunIdRouteImport } from './routes/projects/$benchmarkId_/runs/$runId'
+import { Route as ProjectsBenchmarkIdEvalsRunIdEvalIdRouteImport } from './routes/projects/$benchmarkId_/evals/$runId.$evalId'
 
 const SettingsRoute = SettingsRouteImport.update({
   id: '/settings',
@@ -35,9 +35,9 @@ const RunsRunIdRoute = RunsRunIdRouteImport.update({
   path: '/runs/$runId',
   getParentRoute: () => rootRouteImport,
 } as any)
-const ProjectsProjectIdRoute = ProjectsProjectIdRouteImport.update({
-  id: '/projects/$projectId',
-  path: '/projects/$projectId',
+const ProjectsBenchmarkIdRoute = ProjectsBenchmarkIdRouteImport.update({
+  id: '/projects/$benchmarkId',
+  path: '/projects/$benchmarkId',
   getParentRoute: () => rootRouteImport,
 } as any)
 const ExperimentsExperimentNameRoute =
@@ -62,16 +62,16 @@ const RunsRunIdCategoryCategoryRoute =
     path: '/runs/$runId/category/$category',
     getParentRoute: () => rootRouteImport,
   } as any)
-const ProjectsProjectIdRunsRunIdRoute =
-  ProjectsProjectIdRunsRunIdRouteImport.update({
-    id: '/projects/$projectId_/runs/$runId',
-    path: '/projects/$projectId/runs/$runId',
+const ProjectsBenchmarkIdRunsRunIdRoute =
+  ProjectsBenchmarkIdRunsRunIdRouteImport.update({
+    id: '/projects/$benchmarkId_/runs/$runId',
+    path: '/projects/$benchmarkId/runs/$runId',
     getParentRoute: () => rootRouteImport,
   } as any)
-const ProjectsProjectIdEvalsRunIdEvalIdRoute =
-  ProjectsProjectIdEvalsRunIdEvalIdRouteImport.update({
-    id: '/projects/$projectId_/evals/$runId/$evalId',
-    path: '/projects/$projectId/evals/$runId/$evalId',
+const ProjectsBenchmarkIdEvalsRunIdEvalIdRoute =
+  ProjectsBenchmarkIdEvalsRunIdEvalIdRouteImport.update({
+    id: '/projects/$benchmarkId_/evals/$runId/$evalId',
+    path: '/projects/$benchmarkId/evals/$runId/$evalId',
     getParentRoute: () => rootRouteImport,
   } as any)
 
@@ -79,38 +79,38 @@ export interface FileRoutesByFullPath {
   '/': typeof IndexRoute
   '/settings': typeof SettingsRoute
   '/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
-  '/projects/$projectId': typeof ProjectsProjectIdRoute
+  '/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
   '/runs/$runId': typeof RunsRunIdRoute
   '/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
-  '/projects/$projectId/runs/$runId': typeof ProjectsProjectIdRunsRunIdRoute
+  '/projects/$benchmarkId/runs/$runId': typeof ProjectsBenchmarkIdRunsRunIdRoute
   '/runs/$runId/category/$category': typeof RunsRunIdCategoryCategoryRoute
   '/runs/$runId/suite/$suite': typeof RunsRunIdSuiteSuiteRoute
-  '/projects/$projectId/evals/$runId/$evalId': typeof ProjectsProjectIdEvalsRunIdEvalIdRoute
+  '/projects/$benchmarkId/evals/$runId/$evalId': typeof ProjectsBenchmarkIdEvalsRunIdEvalIdRoute
 }
 export interface FileRoutesByTo {
   '/': typeof IndexRoute
   '/settings': typeof SettingsRoute
   '/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
-  '/projects/$projectId': typeof ProjectsProjectIdRoute
+  '/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
   '/runs/$runId': typeof RunsRunIdRoute
   '/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
-  '/projects/$projectId/runs/$runId': typeof ProjectsProjectIdRunsRunIdRoute
+  '/projects/$benchmarkId/runs/$runId': typeof ProjectsBenchmarkIdRunsRunIdRoute
   '/runs/$runId/category/$category': typeof RunsRunIdCategoryCategoryRoute
   '/runs/$runId/suite/$suite': typeof RunsRunIdSuiteSuiteRoute
-  '/projects/$projectId/evals/$runId/$evalId': typeof ProjectsProjectIdEvalsRunIdEvalIdRoute
+  '/projects/$benchmarkId/evals/$runId/$evalId': typeof ProjectsBenchmarkIdEvalsRunIdEvalIdRoute
 }
 export interface FileRoutesById {
   __root__: typeof rootRouteImport
   '/': typeof IndexRoute
   '/settings': typeof SettingsRoute
   '/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
-  '/projects/$projectId': typeof ProjectsProjectIdRoute
+  '/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
   '/runs/$runId': typeof RunsRunIdRoute
   '/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
-  '/projects/$projectId_/runs/$runId': typeof ProjectsProjectIdRunsRunIdRoute
+  '/projects/$benchmarkId_/runs/$runId': typeof ProjectsBenchmarkIdRunsRunIdRoute
   '/runs/$runId_/category/$category': typeof RunsRunIdCategoryCategoryRoute
   '/runs/$runId_/suite/$suite': typeof RunsRunIdSuiteSuiteRoute
-  '/projects/$projectId_/evals/$runId/$evalId': typeof ProjectsProjectIdEvalsRunIdEvalIdRoute
+  '/projects/$benchmarkId_/evals/$runId/$evalId': typeof ProjectsBenchmarkIdEvalsRunIdEvalIdRoute
 }
 export interface FileRouteTypes {
   fileRoutesByFullPath: FileRoutesByFullPath
@@ -118,50 +118,50 @@ export interface FileRouteTypes {
     | '/'
     | '/settings'
     | '/experiments/$experimentName'
-    | '/projects/$projectId'
+    | '/projects/$benchmarkId'
     | '/runs/$runId'
     | '/evals/$runId/$evalId'
-    | '/projects/$projectId/runs/$runId'
+    | '/projects/$benchmarkId/runs/$runId'
     | '/runs/$runId/category/$category'
     | '/runs/$runId/suite/$suite'
-    | '/projects/$projectId/evals/$runId/$evalId'
+    | '/projects/$benchmarkId/evals/$runId/$evalId'
   fileRoutesByTo: FileRoutesByTo
   to:
     | '/'
     | '/settings'
     | '/experiments/$experimentName'
-    | '/projects/$projectId'
+    | '/projects/$benchmarkId'
     | '/runs/$runId'
     | '/evals/$runId/$evalId'
-    | '/projects/$projectId/runs/$runId'
+    | '/projects/$benchmarkId/runs/$runId'
     | '/runs/$runId/category/$category'
     | '/runs/$runId/suite/$suite'
-    | '/projects/$projectId/evals/$runId/$evalId'
+    | '/projects/$benchmarkId/evals/$runId/$evalId'
   id:
     | '__root__'
     | '/'
     | '/settings'
     | '/experiments/$experimentName'
-    | '/projects/$projectId'
+    | '/projects/$benchmarkId'
     | '/runs/$runId'
     | '/evals/$runId/$evalId'
-    | '/projects/$projectId_/runs/$runId'
+    | '/projects/$benchmarkId_/runs/$runId'
     | '/runs/$runId_/category/$category'
     | '/runs/$runId_/suite/$suite'
-    | '/projects/$projectId_/evals/$runId/$evalId'
+    | '/projects/$benchmarkId_/evals/$runId/$evalId'
   fileRoutesById: FileRoutesById
 }
 export interface RootRouteChildren {
   IndexRoute: typeof IndexRoute
   SettingsRoute: typeof SettingsRoute
   ExperimentsExperimentNameRoute: typeof ExperimentsExperimentNameRoute
-  ProjectsProjectIdRoute: typeof ProjectsProjectIdRoute
+  ProjectsBenchmarkIdRoute: typeof ProjectsBenchmarkIdRoute
   RunsRunIdRoute: typeof RunsRunIdRoute
   EvalsRunIdEvalIdRoute: typeof EvalsRunIdEvalIdRoute
-  ProjectsProjectIdRunsRunIdRoute: typeof ProjectsProjectIdRunsRunIdRoute
+  ProjectsBenchmarkIdRunsRunIdRoute: typeof ProjectsBenchmarkIdRunsRunIdRoute
   RunsRunIdCategoryCategoryRoute: typeof RunsRunIdCategoryCategoryRoute
   RunsRunIdSuiteSuiteRoute: typeof RunsRunIdSuiteSuiteRoute
-  ProjectsProjectIdEvalsRunIdEvalIdRoute: typeof ProjectsProjectIdEvalsRunIdEvalIdRoute
+  ProjectsBenchmarkIdEvalsRunIdEvalIdRoute: typeof ProjectsBenchmarkIdEvalsRunIdEvalIdRoute
 }
 
 declare module '@tanstack/react-router' {
@@ -187,11 +187,11 @@ declare module '@tanstack/react-router' {
       preLoaderRoute: typeof RunsRunIdRouteImport
       parentRoute: typeof rootRouteImport
     }
-    '/projects/$projectId': {
-      id: '/projects/$projectId'
-      path: '/projects/$projectId'
-      fullPath: '/projects/$projectId'
-      preLoaderRoute: typeof ProjectsProjectIdRouteImport
+    '/projects/$benchmarkId': {
+      id: '/projects/$benchmarkId'
+      path: '/projects/$benchmarkId'
+      fullPath: '/projects/$benchmarkId'
+      preLoaderRoute: typeof ProjectsBenchmarkIdRouteImport
       parentRoute: typeof rootRouteImport
     }
     '/experiments/$experimentName': {
@@ -222,18 +222,18 @@ declare module '@tanstack/react-router' {
       preLoaderRoute: typeof RunsRunIdCategoryCategoryRouteImport
       parentRoute: typeof rootRouteImport
     }
-    '/projects/$projectId_/runs/$runId': {
-      id: '/projects/$projectId_/runs/$runId'
-      path: '/projects/$projectId/runs/$runId'
-      fullPath: '/projects/$projectId/runs/$runId'
-      preLoaderRoute: typeof ProjectsProjectIdRunsRunIdRouteImport
+    '/projects/$benchmarkId_/runs/$runId': {
+      id: '/projects/$benchmarkId_/runs/$runId'
+      path: '/projects/$benchmarkId/runs/$runId'
+      fullPath: '/projects/$benchmarkId/runs/$runId'
+      preLoaderRoute: typeof ProjectsBenchmarkIdRunsRunIdRouteImport
       parentRoute: typeof rootRouteImport
     }
-    '/projects/$projectId_/evals/$runId/$evalId': {
-      id: '/projects/$projectId_/evals/$runId/$evalId'
-      path: '/projects/$projectId/evals/$runId/$evalId'
-      fullPath: '/projects/$projectId/evals/$runId/$evalId'
-      preLoaderRoute: typeof ProjectsProjectIdEvalsRunIdEvalIdRouteImport
+    '/projects/$benchmarkId_/evals/$runId/$evalId': {
+      id: '/projects/$benchmarkId_/evals/$runId/$evalId'
+      path: '/projects/$benchmarkId/evals/$runId/$evalId'
+      fullPath: '/projects/$benchmarkId/evals/$runId/$evalId'
+      preLoaderRoute: typeof ProjectsBenchmarkIdEvalsRunIdEvalIdRouteImport
       parentRoute: typeof rootRouteImport
     }
   }
@@ -243,14 +243,14 @@ const rootRouteChildren: RootRouteChildren = {
   IndexRoute: IndexRoute,
   SettingsRoute: SettingsRoute,
   ExperimentsExperimentNameRoute: ExperimentsExperimentNameRoute,
-  ProjectsProjectIdRoute: ProjectsProjectIdRoute,
+  ProjectsBenchmarkIdRoute: ProjectsBenchmarkIdRoute,
   RunsRunIdRoute: RunsRunIdRoute,
   EvalsRunIdEvalIdRoute: EvalsRunIdEvalIdRoute,
-  ProjectsProjectIdRunsRunIdRoute: ProjectsProjectIdRunsRunIdRoute,
+  ProjectsBenchmarkIdRunsRunIdRoute: ProjectsBenchmarkIdRunsRunIdRoute,
   RunsRunIdCategoryCategoryRoute: RunsRunIdCategoryCategoryRoute,
   RunsRunIdSuiteSuiteRoute: RunsRunIdSuiteSuiteRoute,
-  ProjectsProjectIdEvalsRunIdEvalIdRoute:
-    ProjectsProjectIdEvalsRunIdEvalIdRoute,
+  ProjectsBenchmarkIdEvalsRunIdEvalIdRoute:
+    ProjectsBenchmarkIdEvalsRunIdEvalIdRoute,
 }
 export const routeTree = rootRouteImport
   ._addFileChildren(rootRouteChildren)
diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx
index 082518ad6..10732d68f 100644
--- a/apps/studio/src/routes/index.tsx
+++ b/apps/studio/src/routes/index.tsx
@@ -18,11 +18,11 @@ import { RunList } from '~/components/RunList';
 import { type RunSourceFilter, RunSourceToolbar } from '~/components/RunSourceToolbar';
 import { TargetsTab } from '~/components/TargetsTab';
 import {
-  addProjectApi,
-  discoverProjectsApi,
+  addBenchmarkApi,
+  discoverBenchmarksApi,
   syncRemoteResultsApi,
+  useBenchmarkList,
   useCompare,
-  useProjectList,
   useRemoteStatus,
   useRunList,
   useStudioConfig,
@@ -42,7 +42,7 @@ export const Route = createFileRoute('/')({
 });
 
 function HomePage() {
-  const { data: projectData, isLoading: projectsLoading } = useProjectList();
+  const { data: projectData, isLoading: projectsLoading } = useBenchmarkList();
   const { data: config, isLoading: configLoading } = useStudioConfig();
   const hasProjects = (projectData?.projects.length ?? 0) > 0;
   const multiProjectDashboard = config?.multi_project_dashboard;
@@ -61,7 +61,7 @@ function HomePage() {
 // ── Projects Dashboard ──────────────────────────────────────────────────
 
 function ProjectsDashboard() {
-  const { data } = useProjectList();
+  const { data } = useBenchmarkList();
   const { data: config } = useStudioConfig();
   const queryClient = useQueryClient();
   const [addPath, setAddPath] = useState('');
@@ -78,10 +78,10 @@ function ProjectsDashboard() {
     if (!addPath.trim()) return;
     setError(null);
     try {
-      await addProjectApi(addPath.trim());
+      await addBenchmarkApi(addPath.trim());
       setAddPath('');
       setShowAddForm(false);
-      queryClient.invalidateQueries({ queryKey: ['projects'] });
+      queryClient.invalidateQueries({ queryKey: ['benchmarks'] });
     } catch (err) {
       setError((err as Error).message);
     }
@@ -92,12 +92,12 @@ function ProjectsDashboard() {
     if (!discoverPath.trim()) return;
     setError(null);
     try {
-      const discovered = await discoverProjectsApi(discoverPath.trim());
+      const discovered = await discoverBenchmarksApi(discoverPath.trim());
       setDiscoverPath('');
       if (discovered.length === 0) {
         setError('No projects with .agentv/ found in that directory.');
       }
-      queryClient.invalidateQueries({ queryKey: ['projects'] });
+      queryClient.invalidateQueries({ queryKey: ['benchmarks'] });
     } catch (err) {
       setError((err as Error).message);
     }
diff --git a/apps/studio/src/routes/projects/$projectId.tsx b/apps/studio/src/routes/projects/$benchmarkId.tsx
similarity index 77%
rename from apps/studio/src/routes/projects/$projectId.tsx
rename to apps/studio/src/routes/projects/$benchmarkId.tsx
index c2de23ae0..1d0660cb4 100644
--- a/apps/studio/src/routes/projects/$projectId.tsx
+++ b/apps/studio/src/routes/projects/$benchmarkId.tsx
@@ -13,11 +13,11 @@ import { RunEvalModal } from '~/components/RunEvalModal';
 import { RunList } from '~/components/RunList';
 import { type RunSourceFilter, RunSourceToolbar } from '~/components/RunSourceToolbar';
 import {
-  projectCompareOptions,
-  projectExperimentsOptions,
-  projectTargetsOptions,
+  benchmarkCompareOptions,
+  benchmarkExperimentsOptions,
+  benchmarkTargetsOptions,
   syncRemoteResultsApi,
-  useProjectRunList,
+  useBenchmarkRunList,
   useRemoteStatus,
   useStudioConfig,
 } from '~/lib/api';
@@ -32,12 +32,12 @@ const tabs: { id: TabId; label: string }[] = [
   { id: 'targets', label: 'Targets' },
 ];
 
-export const Route = createFileRoute('/projects/$projectId')({
+export const Route = createFileRoute('/projects/$benchmarkId')({
   component: ProjectHomePage,
 });
 
 function ProjectHomePage() {
-  const { projectId } = Route.useParams();
+  const { benchmarkId } = Route.useParams();
   const routerState = useRouterState();
   const searchParams = routerState.location.search as Record<string, string>;
   const tab = searchParams.tab as TabId | undefined;
@@ -51,7 +51,7 @@ function ProjectHomePage() {
   return (
     <div className="space-y-6">
       <div className="flex items-center justify-between">
-        <h1 className="text-2xl font-semibold text-white">{projectId}</h1>
+        <h1 className="text-2xl font-semibold text-white">{benchmarkId}</h1>
         {!isReadOnly && (
           <button
             type="button"
@@ -72,8 +72,8 @@ function ProjectHomePage() {
               key={t.id}
               onClick={() =>
                 navigate({
-                  to: '/projects/$projectId',
-                  params: { projectId },
+                  to: '/projects/$benchmarkId',
+                  params: { benchmarkId },
                   search: { tab: t.id } as Record<string, string>,
                 })
               }
@@ -89,26 +89,26 @@ function ProjectHomePage() {
         </div>
       </div>
 
-      {activeTab === 'runs' && <ProjectRunsTab projectId={projectId} />}
-      {activeTab === 'experiments' && <ProjectExperimentsTab projectId={projectId} />}
-      {activeTab === 'compare' && <ProjectCompareTab projectId={projectId} />}
-      {activeTab === 'targets' && <ProjectTargetsTab projectId={projectId} />}
+      {activeTab === 'runs' && <ProjectRunsTab benchmarkId={benchmarkId} />}
+      {activeTab === 'experiments' && <ProjectExperimentsTab benchmarkId={benchmarkId} />}
+      {activeTab === 'compare' && <ProjectCompareTab benchmarkId={benchmarkId} />}
+      {activeTab === 'targets' && <ProjectTargetsTab benchmarkId={benchmarkId} />}
 
       {!isReadOnly && (
         <RunEvalModal
           open={showRunEval}
           onClose={() => setShowRunEval(false)}
-          projectId={projectId}
+          benchmarkId={benchmarkId}
         />
       )}
     </div>
   );
 }
 
-function ProjectRunsTab({ projectId }: { projectId: string }) {
+function ProjectRunsTab({ benchmarkId }: { benchmarkId: string }) {
   const queryClient = useQueryClient();
-  const { data, isLoading, error } = useProjectRunList(projectId);
-  const { data: remoteStatus } = useRemoteStatus(projectId);
+  const { data, isLoading, error } = useBenchmarkRunList(benchmarkId);
+  const { data: remoteStatus } = useRemoteStatus(benchmarkId);
   const [sourceFilter, setSourceFilter] = useState<RunSourceFilter>('all');
   const [syncInFlight, setSyncInFlight] = useState(false);
 
@@ -120,13 +120,13 @@ function ProjectRunsTab({ projectId }: { projectId: string }) {
   async function handleSyncRemote() {
     setSyncInFlight(true);
     try {
-      await syncRemoteResultsApi(projectId);
+      await syncRemoteResultsApi(benchmarkId);
       await Promise.all([
-        queryClient.invalidateQueries({ queryKey: ['projects', projectId, 'runs'] }),
-        queryClient.invalidateQueries({ queryKey: ['projects', projectId, 'experiments'] }),
-        queryClient.invalidateQueries({ queryKey: ['projects', projectId, 'compare'] }),
-        queryClient.invalidateQueries({ queryKey: ['projects', projectId, 'targets'] }),
-        queryClient.invalidateQueries({ queryKey: ['remote-status', projectId] }),
+        queryClient.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'runs'] }),
+        queryClient.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'experiments'] }),
+        queryClient.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'compare'] }),
+        queryClient.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'targets'] }),
+        queryClient.invalidateQueries({ queryKey: ['remote-status', benchmarkId] }),
       ]);
     } finally {
       setSyncInFlight(false);
@@ -160,13 +160,13 @@ function ProjectRunsTab({ projectId }: { projectId: string }) {
         syncInFlight={syncInFlight}
         onSync={handleSyncRemote}
       />
-      <RunList runs={filteredRuns} projectId={projectId} />
+      <RunList runs={filteredRuns} benchmarkId={benchmarkId} />
     </div>
   );
 }
 
-function ProjectExperimentsTab({ projectId }: { projectId: string }) {
-  const { data, isLoading } = useQuery(projectExperimentsOptions(projectId));
+function ProjectExperimentsTab({ benchmarkId }: { benchmarkId: string }) {
+  const { data, isLoading } = useQuery(benchmarkExperimentsOptions(benchmarkId));
   const experiments = (data as ExperimentsResponse | undefined)?.experiments ?? [];
 
   if (isLoading) {
@@ -209,13 +209,13 @@ function ProjectExperimentsTab({ projectId }: { projectId: string }) {
   );
 }
 
-function ProjectCompareTab({ projectId }: { projectId: string }) {
-  const { data, isLoading, isError, error } = useQuery(projectCompareOptions(projectId));
+function ProjectCompareTab({ benchmarkId }: { benchmarkId: string }) {
+  const { data, isLoading, isError, error } = useQuery(benchmarkCompareOptions(benchmarkId));
   return <CompareTab data={data} isLoading={isLoading} isError={isError} error={error} />;
 }
 
-function ProjectTargetsTab({ projectId }: { projectId: string }) {
-  const { data, isLoading } = useQuery(projectTargetsOptions(projectId));
+function ProjectTargetsTab({ benchmarkId }: { benchmarkId: string }) {
+  const { data, isLoading } = useQuery(benchmarkTargetsOptions(benchmarkId));
   const targets = (data as TargetsResponse | undefined)?.targets ?? [];
 
   if (isLoading) {
diff --git a/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx b/apps/studio/src/routes/projects/$benchmarkId_/evals/$runId.$evalId.tsx
similarity index 84%
rename from apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx
rename to apps/studio/src/routes/projects/$benchmarkId_/evals/$runId.$evalId.tsx
index 62242c174..0ad6c533d 100644
--- a/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx
+++ b/apps/studio/src/routes/projects/$benchmarkId_/evals/$runId.$evalId.tsx
@@ -7,15 +7,15 @@ import { useState } from 'react';
 
 import { EvalDetail } from '~/components/EvalDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useProjectRunDetail, useStudioConfig } from '~/lib/api';
+import { useBenchmarkRunDetail, useStudioConfig } from '~/lib/api';
 
-export const Route = createFileRoute('/projects/$projectId_/evals/$runId/$evalId')({
+export const Route = createFileRoute('/projects/$benchmarkId_/evals/$runId/$evalId')({
   component: ProjectEvalDetailPage,
 });
 
 function ProjectEvalDetailPage() {
-  const { projectId, runId, evalId } = Route.useParams();
-  const { data, isLoading, error } = useProjectRunDetail(projectId, runId);
+  const { benchmarkId, runId, evalId } = Route.useParams();
+  const { data, isLoading, error } = useBenchmarkRunDetail(benchmarkId, runId);
   const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
   const isReadOnly = config?.read_only === true;
@@ -69,12 +69,12 @@ function ProjectEvalDetailPage() {
           </button>
         )}
       </div>
-      <EvalDetail eval={result} runId={runId} projectId={projectId} />
+      <EvalDetail eval={result} runId={runId} benchmarkId={benchmarkId} />
       {!isReadOnly && (
         <RunEvalModal
           open={showRunEval}
           onClose={() => setShowRunEval(false)}
-          projectId={projectId}
+          benchmarkId={benchmarkId}
           prefill={{
             testIds: [evalId],
             target: result.target,
diff --git a/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx b/apps/studio/src/routes/projects/$benchmarkId_/runs/$runId.tsx
similarity index 85%
rename from apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
rename to apps/studio/src/routes/projects/$benchmarkId_/runs/$runId.tsx
index 2d44cb894..d9776d23a 100644
--- a/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
+++ b/apps/studio/src/routes/projects/$benchmarkId_/runs/$runId.tsx
@@ -7,15 +7,15 @@ import { useState } from 'react';
 
 import { RunDetail } from '~/components/RunDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useProjectRunDetail, useStudioConfig } from '~/lib/api';
+import { useBenchmarkRunDetail, useStudioConfig } from '~/lib/api';
 
-export const Route = createFileRoute('/projects/$projectId_/runs/$runId')({
+export const Route = createFileRoute('/projects/$benchmarkId_/runs/$runId')({
   component: ProjectRunDetailPage,
 });
 
 function ProjectRunDetailPage() {
-  const { projectId, runId } = Route.useParams();
-  const { data, isLoading, error } = useProjectRunDetail(projectId, runId);
+  const { benchmarkId, runId } = Route.useParams();
+  const { data, isLoading, error } = useBenchmarkRunDetail(benchmarkId, runId);
   const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
   const isReadOnly = config?.read_only === true;
@@ -78,12 +78,12 @@ function ProjectRunDetailPage() {
           </button>
         )}
       </div>
-      <RunDetail results={data?.results ?? []} runId={runId} projectId={projectId} />
+      <RunDetail results={data?.results ?? []} runId={runId} benchmarkId={benchmarkId} />
       {!isReadOnly && (
         <RunEvalModal
           open={showRunEval}
           onClose={() => setShowRunEval(false)}
-          projectId={projectId}
+          benchmarkId={benchmarkId}
           prefill={prefill}
         />
       )}
diff --git a/packages/core/src/projects.ts b/packages/core/src/benchmarks.ts
similarity index 58%
rename from packages/core/src/projects.ts
rename to packages/core/src/benchmarks.ts
index 5fc58a541..2fb603b12 100644
--- a/packages/core/src/projects.ts
+++ b/packages/core/src/benchmarks.ts
@@ -1,19 +1,19 @@
 /**
- * Project registry for AgentV Studio multi-project support.
+ * Benchmark registry for AgentV Studio multi-benchmark support.
  *
- * A Project = any directory containing a `.agentv/` folder.
- * The registry lives at `~/.agentv/projects.yaml` and tracks registered projects.
+ * A Benchmark = any directory containing a `.agentv/` folder.
+ * The registry lives at `~/.agentv/projects.yaml` and tracks registered benchmarks.
  *
  * YAML format:
- *   projects:
+ *   benchmarks:
  *     - id: my-app
  *       name: My App
  *       path: /home/user/projects/my-app
  *       addedAt: "2026-03-20T10:00:00Z"
  *       lastOpenedAt: "2026-03-30T14:00:00Z"
  *
- * To extend: use loadProjectRegistry() / saveProjectRegistry() for CRUD,
- * discoverProjects() to scan a directory tree for `.agentv/` directories.
+ * To extend: use loadBenchmarkRegistry() / saveBenchmarkRegistry() for CRUD,
+ * discoverBenchmarks() to scan a directory tree for `.agentv/` directories.
  */
 
 import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
@@ -25,7 +25,7 @@ import { getAgentvHome } from './paths.js';
 
 // ── Types ───────────────────────────────────────────────────────────────
 
-export interface ProjectEntry {
+export interface BenchmarkEntry {
   id: string;
   name: string;
   path: string;
@@ -33,59 +33,59 @@ export interface ProjectEntry {
   lastOpenedAt: string;
 }
 
-export interface ProjectRegistry {
-  projects: ProjectEntry[];
+export interface BenchmarkRegistry {
+  benchmarks: BenchmarkEntry[];
 }
 
 // ── Registry path ───────────────────────────────────────────────────────
 
-export function getProjectsRegistryPath(): string {
+export function getBenchmarksRegistryPath(): string {
   return path.join(getAgentvHome(), 'projects.yaml');
 }
 
 // ── Load / Save ─────────────────────────────────────────────────────────
 
-export function loadProjectRegistry(): ProjectRegistry {
-  const registryPath = getProjectsRegistryPath();
+export function loadBenchmarkRegistry(): BenchmarkRegistry {
+  const registryPath = getBenchmarksRegistryPath();
   if (!existsSync(registryPath)) {
-    return { projects: [] };
+    return { benchmarks: [] };
   }
   try {
     const raw = readFileSync(registryPath, 'utf-8');
     const parsed = parseYaml(raw);
-    if (!parsed || !Array.isArray(parsed.projects)) {
-      return { projects: [] };
+    if (!parsed || !Array.isArray(parsed.benchmarks)) {
+      return { benchmarks: [] };
     }
-    return { projects: parsed.projects as ProjectEntry[] };
+    return { benchmarks: parsed.benchmarks as BenchmarkEntry[] };
   } catch {
-    return { projects: [] };
+    return { benchmarks: [] };
   }
 }
 
-export function saveProjectRegistry(registry: ProjectRegistry): void {
-  const registryPath = getProjectsRegistryPath();
+export function saveBenchmarkRegistry(registry: BenchmarkRegistry): void {
+  const registryPath = getBenchmarksRegistryPath();
   const dir = path.dirname(registryPath);
   if (!existsSync(dir)) {
     mkdirSync(dir, { recursive: true });
   }
-  writeFileSync(registryPath, stringifyYaml(registry), 'utf-8');
+  writeFileSync(registryPath, stringifyYaml({ benchmarks: registry.benchmarks }), 'utf-8');
 }
 
 // ── CRUD operations ─────────────────────────────────────────────────────
 
 /**
- * Derive a URL-safe project ID from a directory path.
+ * Derive a URL-safe benchmark ID from a directory path.
  * Uses the directory basename, lowercased, with non-alphanumeric chars replaced by hyphens.
  * Appends a numeric suffix if the ID already exists in the registry.
  */
-export function deriveProjectId(dirPath: string, existingIds: string[]): string {
+export function deriveBenchmarkId(dirPath: string, existingIds: string[]): string {
   const base = path
     .basename(dirPath)
     .toLowerCase()
     .replace(/[^a-z0-9-]/g, '-')
     .replace(/-+/g, '-')
     .replace(/^-|-$/g, '');
-  let candidate = base || 'project';
+  let candidate = base || 'benchmark';
   let suffix = 2;
   while (existingIds.includes(candidate)) {
     candidate = `${base}-${suffix}`;
@@ -95,11 +95,11 @@ export function deriveProjectId(dirPath: string, existingIds: string[]): string
 }
 
 /**
- * Register a project by path. Returns the new entry, or the existing one if already registered.
+ * Register a benchmark by path. Returns the new entry, or the existing one if already registered.
  * Validates that the path exists and contains a `.agentv/` directory.
  */
-export function addProject(projectPath: string): ProjectEntry {
-  const absPath = path.resolve(projectPath);
+export function addBenchmark(benchmarkPath: string): BenchmarkEntry {
+  const absPath = path.resolve(benchmarkPath);
   if (!existsSync(absPath)) {
     throw new Error(`Directory not found: ${absPath}`);
   }
@@ -107,56 +107,56 @@ export function addProject(projectPath: string): ProjectEntry {
     throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
   }
 
-  const registry = loadProjectRegistry();
-  const existing = registry.projects.find((p) => p.path === absPath);
+  const registry = loadBenchmarkRegistry();
+  const existing = registry.benchmarks.find((p) => p.path === absPath);
   if (existing) {
     return existing;
   }
 
   const now = new Date().toISOString();
-  const entry: ProjectEntry = {
-    id: deriveProjectId(
+  const entry: BenchmarkEntry = {
+    id: deriveBenchmarkId(
       absPath,
-      registry.projects.map((p) => p.id),
+      registry.benchmarks.map((p) => p.id),
     ),
     name: path.basename(absPath),
     path: absPath,
     addedAt: now,
     lastOpenedAt: now,
   };
-  registry.projects.push(entry);
-  saveProjectRegistry(registry);
+  registry.benchmarks.push(entry);
+  saveBenchmarkRegistry(registry);
   return entry;
 }
 
 /**
- * Remove a project by ID. Returns true if removed, false if not found.
+ * Remove a benchmark by ID. Returns true if removed, false if not found.
  */
-export function removeProject(projectId: string): boolean {
-  const registry = loadProjectRegistry();
-  const idx = registry.projects.findIndex((p) => p.id === projectId);
+export function removeBenchmark(benchmarkId: string): boolean {
+  const registry = loadBenchmarkRegistry();
+  const idx = registry.benchmarks.findIndex((p) => p.id === benchmarkId);
   if (idx < 0) return false;
-  registry.projects.splice(idx, 1);
-  saveProjectRegistry(registry);
+  registry.benchmarks.splice(idx, 1);
+  saveBenchmarkRegistry(registry);
   return true;
 }
 
 /**
- * Look up a project by ID. Returns undefined if not found.
+ * Look up a benchmark by ID. Returns undefined if not found.
  */
-export function getProject(projectId: string): ProjectEntry | undefined {
-  return loadProjectRegistry().projects.find((p) => p.id === projectId);
+export function getBenchmark(benchmarkId: string): BenchmarkEntry | undefined {
+  return loadBenchmarkRegistry().benchmarks.find((p) => p.id === benchmarkId);
 }
 
 /**
- * Update lastOpenedAt for a project.
+ * Update lastOpenedAt for a benchmark.
  */
-export function touchProject(projectId: string): void {
-  const registry = loadProjectRegistry();
-  const entry = registry.projects.find((p) => p.id === projectId);
+export function touchBenchmark(benchmarkId: string): void {
+  const registry = loadBenchmarkRegistry();
+  const entry = registry.benchmarks.find((p) => p.id === benchmarkId);
   if (entry) {
     entry.lastOpenedAt = new Date().toISOString();
-    saveProjectRegistry(registry);
+    saveBenchmarkRegistry(registry);
   }
 }
 
@@ -164,9 +164,9 @@ export function touchProject(projectId: string): void {
 
 /**
  * Scan a directory tree (up to maxDepth levels) for directories containing `.agentv/`.
- * Returns absolute paths of discovered project directories.
+ * Returns absolute paths of discovered benchmark directories.
  */
-export function discoverProjects(rootDir: string, maxDepth = 2): string[] {
+export function discoverBenchmarks(rootDir: string, maxDepth = 2): string[] {
   const absRoot = path.resolve(rootDir);
   if (!existsSync(absRoot) || !statSync(absRoot).isDirectory()) {
     return [];
@@ -177,10 +177,10 @@ export function discoverProjects(rootDir: string, maxDepth = 2): string[] {
   function scan(dir: string, depth: number) {
     if (depth > maxDepth) return;
 
-    // Check if this directory itself is a project
+    // Check if this directory itself is a benchmark
     if (existsSync(path.join(dir, '.agentv'))) {
       results.push(dir);
-      return; // Don't scan subdirectories of a project
+      return; // Don't scan subdirectories of a benchmark
     }
 
     if (depth === maxDepth) return;
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 64a68de23..a44e7d6e7 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -80,18 +80,18 @@ export {
   getWorkspacePoolRoot,
 } from './paths.js';
 export {
-  type ProjectEntry,
-  type ProjectRegistry,
-  loadProjectRegistry,
-  saveProjectRegistry,
-  addProject,
-  removeProject,
-  getProject,
-  touchProject,
-  discoverProjects,
-  deriveProjectId,
-  getProjectsRegistryPath,
-} from './projects.js';
+  type BenchmarkEntry,
+  type BenchmarkRegistry,
+  loadBenchmarkRegistry,
+  saveBenchmarkRegistry,
+  addBenchmark,
+  removeBenchmark,
+  getBenchmark,
+  touchBenchmark,
+  discoverBenchmarks,
+  deriveBenchmarkId,
+  getBenchmarksRegistryPath,
+} from './benchmarks.js';
 export { trimBaselineResult } from './evaluation/baseline.js';
 export { DEFAULT_CATEGORY, deriveCategory } from './evaluation/category.js';
 export * from './observability/index.js';