From f8f1d6acfb5620162e7b3d9ac379ee707f06e32f Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 8 Apr 2026 13:23:41 +0000
Subject: [PATCH 1/3] feat(studio): add experiment-scoped runs and read-only
 mode

---
 apps/cli/src/commands/eval/artifact-writer.ts | 15 ++--
 apps/cli/src/commands/eval/commands/run.ts    |  6 ++
 apps/cli/src/commands/eval/result-layout.ts   | 29 ++++++--
 apps/cli/src/commands/eval/run-eval.ts        | 16 +++--
 apps/cli/src/commands/inspect/utils.ts        | 55 ++++++++++++---
 apps/cli/src/commands/pipeline/input.ts       |  4 +-
 apps/cli/src/commands/pipeline/run.ts         |  5 +-
 apps/cli/src/commands/results/eval-runner.ts  | 10 ++-
 apps/cli/src/commands/results/serve.ts        | 65 +++++++++++++-----
 apps/cli/test/commands/results/serve.test.ts  | 35 ++++++++++
 apps/cli/test/commands/trace/trace.test.ts    | 14 ++++
 apps/studio/src/components/Breadcrumbs.tsx    | 14 +++-
 apps/studio/src/components/EvalDetail.tsx     |  6 +-
 apps/studio/src/components/RunList.tsx        |  4 +-
 apps/studio/src/components/Sidebar.tsx        |  6 +-
 apps/studio/src/lib/types.ts                  |  3 +
 .../src/routes/evals/$runId.$evalId.tsx       | 38 ++++++-----
 apps/studio/src/routes/index.tsx              | 68 ++++++++++++-------
 .../studio/src/routes/projects/$projectId.tsx | 34 ++++++----
 .../$projectId_/evals/$runId.$evalId.tsx      | 40 ++++++-----
 .../projects/$projectId_/runs/$runId.tsx      | 34 ++++++----
 apps/studio/src/routes/runs/$runId.tsx        | 24 ++++---
 apps/studio/src/routes/settings.tsx           | 21 +++---
 23 files changed, 388 insertions(+), 158 deletions(-)
diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts
index 63be38f3f..7bd1e359c 100644
--- a/apps/cli/src/commands/eval/artifact-writer.ts
+++ b/apps/cli/src/commands/eval/artifact-writer.ts
@@ -61,6 +61,7 @@ export interface BenchmarkArtifact {
     readonly timestamp: string;
     readonly targets: readonly string[];
     readonly tests_run: readonly string[];
+    readonly experiment?: string;
   };
   readonly run_summary: Record<
     string,
@@ -97,6 +98,7 @@ export interface IndexArtifactEntry {
   readonly suite?: string;
   readonly category?: string;
   readonly conversation_id?: string;
+  readonly experiment?: string;
   readonly score: number;
   readonly target: string;
   readonly scores?: readonly Record<string, unknown>[];
@@ -313,6 +315,7 @@ export function buildTimingArtifact(results: readonly EvaluationResult[]): Timin
 export function buildBenchmarkArtifact(
   results: readonly EvaluationResult[],
   evalFile = '',
+  experiment?: string,
 ): BenchmarkArtifact {
   const targetSet = new Set<string>();
   const testIdSet = new Set<string>();
@@ -405,6 +408,7 @@ export function buildBenchmarkArtifact(
       timestamp,
       targets,
       tests_run: testIds,
+      experiment,
     },
     run_summary: runSummary,
     per_grader_summary: perEvaluatorSummary,
@@ -689,7 +693,7 @@ export function parseJsonlResults(content: string): EvaluationResult[] {
 export async function writeArtifacts(
   jsonlPath: string,
   outputDir: string,
-  options?: { evalFile?: string },
+  options?: { evalFile?: string; experiment?: string },
 ): Promise<{
   testArtifactDir: string;
   timingPath: string;
@@ -705,7 +709,7 @@ export async function writeArtifacts(
 export async function writeArtifactsFromResults(
   results: readonly EvaluationResult[],
   outputDir: string,
-  options?: { evalFile?: string },
+  options?: { evalFile?: string; experiment?: string },
 ): Promise<{
   testArtifactDir: string;
   timingPath: string;
@@ -746,7 +750,10 @@ export async function writeArtifactsFromResults(
       );
     }
 
-    indexRecords.push(buildResultIndexArtifact(result));
+    indexRecords.push({
+      ...buildResultIndexArtifact(result),
+      experiment: options?.experiment,
+    });
   }
 
   // Write aggregate timing
@@ -754,7 +761,7 @@ export async function writeArtifactsFromResults(
   await writeFile(timingPath, `${JSON.stringify(timing, null, 2)}\n`, 'utf8');
 
   // Write benchmark
-  const benchmark = buildBenchmarkArtifact(results, options?.evalFile);
+  const benchmark = buildBenchmarkArtifact(results, options?.evalFile, options?.experiment);
   await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}\n`, 'utf8');
 
   await writeJsonlFile(indexPath, indexRecords);
diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
index 098cffa5c..282d8d655 100644
--- a/apps/cli/src/commands/eval/commands/run.ts
+++ b/apps/cli/src/commands/eval/commands/run.ts
@@ -60,6 +60,11 @@ export const evalRunCommand = command({
       long: 'output-format',
       description: "[Deprecated] Output format: 'jsonl', 'yaml', or 'html' (default: jsonl)",
     }),
+    experiment: option({
+      type: optional(string),
+      long: 'experiment',
+      description: 'Experiment label for canonical run output (default: default)',
+    }),
     export: multioption({
       type: array(string),
       long: 'export',
@@ -223,6 +228,7 @@ export const evalRunCommand = command({
       out: args.out,
       output: args.output,
       outputFormat: args.outputFormat,
+      experiment: args.experiment,
       export: args.export,
       dryRun: args.dryRun,
       dryRunDelay: args.dryRunDelay,
diff --git a/apps/cli/src/commands/eval/result-layout.ts b/apps/cli/src/commands/eval/result-layout.ts
index b6e6c57b7..f9acd3e69 100644
--- a/apps/cli/src/commands/eval/result-layout.ts
+++ b/apps/cli/src/commands/eval/result-layout.ts
@@ -3,17 +3,38 @@ import path from 'node:path';
 
 export const RESULT_INDEX_FILENAME = 'index.jsonl';
 export const RESULT_RUNS_DIRNAME = 'runs';
+export const DEFAULT_EXPERIMENT_NAME = 'default';
+
+export function normalizeExperimentName(experiment?: string): string {
+  const trimmed = experiment?.trim();
+  if (!trimmed) {
+    return DEFAULT_EXPERIMENT_NAME;
+  }
+  if (!/^[A-Za-z0-9._-]+$/.test(trimmed)) {
+    throw new Error(
+      `Invalid experiment name "${trimmed}". Use only letters, numbers, ".", "_" and "-".`,
+    );
+  }
+  return trimmed;
+}
 
 export function createRunDirName(timestamp = new Date()): string {
   return timestamp.toISOString().replace(/[:.]/g, '-');
 }
 
-export function buildDefaultRunDir(cwd: string): string {
-  return path.join(cwd, '.agentv', 'results', RESULT_RUNS_DIRNAME, createRunDirName());
+export function buildDefaultRunDir(cwd: string, experiment?: string, timestamp = new Date()): string {
+  return path.join(
+    cwd,
+    '.agentv',
+    'results',
+    RESULT_RUNS_DIRNAME,
+    normalizeExperimentName(experiment),
+    createRunDirName(timestamp),
+  );
 }
 
-export function buildDefaultIndexPath(cwd: string): string {
-  return path.join(buildDefaultRunDir(cwd), RESULT_INDEX_FILENAME);
+export function buildDefaultIndexPath(cwd: string, experiment?: string): string {
+  return path.join(buildDefaultRunDir(cwd, experiment), RESULT_INDEX_FILENAME);
 }
 
 export function resolveRunIndexPath(runDir: string): string {
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 471293a52..c97d3d816 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -33,7 +33,7 @@ import { writeBenchmarkJson } from './benchmark-writer.js';
 import { loadEnvFromHierarchy } from './env.js';
 import { type OutputWriter, createOutputWriter, createWriterFromPath } from './output-writer.js';
 import { ProgressDisplay, type Verdict, type WorkerProgress } from './progress-display.js';
-import { buildDefaultRunDir } from './result-layout.js';
+import { buildDefaultRunDir, normalizeExperimentName } from './result-layout.js';
 import {
   buildExclusionFilter,
   loadErrorTestIds,
@@ -96,6 +96,7 @@ interface NormalizedOptions {
   readonly tags: readonly string[];
   readonly excludeTags: readonly string[];
   readonly transcript?: string;
+  readonly experiment?: string;
 }
 
 function normalizeBoolean(value: unknown): boolean {
@@ -363,6 +364,7 @@ function normalizeOptions(
     tags: normalizeStringArray(rawOptions.tag),
     excludeTags: normalizeStringArray(rawOptions.excludeTag),
     transcript: normalizeString(rawOptions.transcript),
+    experiment: normalizeString(rawOptions.experiment),
   } satisfies NormalizedOptions;
 }
 
@@ -374,8 +376,8 @@ async function ensureFileExists(filePath: string, description: string): Promise<
   }
 }
 
-function buildDefaultOutputPath(cwd: string): string {
-  const runDir = buildDefaultRunDir(cwd);
+function buildDefaultOutputPathForExperiment(cwd: string, experiment?: string): string {
+  const runDir = buildDefaultRunDir(cwd, experiment);
   mkdirSync(runDir, { recursive: true });
   return path.join(runDir, 'index.jsonl');
 }
@@ -868,6 +870,9 @@ export async function runEvalCommand(
       .replace(/:/g, '-')
       .replace(/\./g, '-');
   }
+  if (!process.env.AGENTV_EXPERIMENT) {
+    process.env.AGENTV_EXPERIMENT = normalizeExperimentName(options.experiment);
+  }
 
   // Load agentv.config.ts (if present) for default values
   let config: Awaited<ReturnType<typeof loadTsConfig>> = null;
@@ -987,8 +992,8 @@ export async function runEvalCommand(
     mkdirSync(runDir, { recursive: true });
     usesDefaultArtifactWorkspace = false;
   } else {
-    // Default: .agentv/results/runs/<timestamp>/
-    outputPath = buildDefaultOutputPath(cwd);
+    // Default: .agentv/results/runs/<experiment>/<timestamp>/
+    outputPath = buildDefaultOutputPathForExperiment(cwd, options.experiment);
     runDir = path.dirname(outputPath);
     usesDefaultArtifactWorkspace = true;
   }
@@ -1426,6 +1431,7 @@ export async function runEvalCommand(
         indexPath,
       } = await writeArtifactsFromResults(allResults, runDir, {
         evalFile,
+        experiment: normalizeExperimentName(options.experiment),
       });
       console.log(`Artifact workspace written to: ${runDir}`);
       console.log(`  Index: ${indexPath}`);
diff --git a/apps/cli/src/commands/inspect/utils.ts b/apps/cli/src/commands/inspect/utils.ts
index 01c6f6fc0..0d9689153 100644
--- a/apps/cli/src/commands/inspect/utils.ts
+++ b/apps/cli/src/commands/inspect/utils.ts
@@ -523,6 +523,7 @@ export function toTraceSummary(result: RawResult): TraceSummary | undefined {
 export interface ResultFileMeta {
   path: string;
   filename: string;
+  displayName: string;
   timestamp: string;
   testCount: number;
   passRate: number;
@@ -530,24 +531,57 @@ export interface ResultFileMeta {
   sizeBytes: number;
 }
 
+function buildRunId(relativeRunPath: string): string {
+  const normalized = relativeRunPath.split(path.sep).join('/');
+  const segments = normalized.split('/').filter(Boolean);
+  if (segments.length >= 2) {
+    const experiment = segments.slice(0, -1).join('/');
+    const timestamp = segments.at(-1);
+    if (experiment === 'default') {
+      return timestamp ?? normalized;
+    }
+    return `${experiment}::${timestamp}`;
+  }
+  return segments[0];
+}
+
+function collectRunManifestPaths(
+  runsDir: string,
+  currentDir: string,
+  files: { filePath: string; displayName: string; runId: string }[],
+): void {
+  const primaryPath = resolveExistingRunPrimaryPath(currentDir);
+  if (primaryPath) {
+    const relativeRunPath = path.relative(runsDir, currentDir);
+    files.push({
+      filePath: primaryPath,
+      displayName: path.basename(currentDir),
+      runId: buildRunId(relativeRunPath),
+    });
+    return;
+  }
+
+  const entries = readdirSync(currentDir, { withFileTypes: true });
+  for (const entry of entries) {
+    if (entry.isDirectory()) {
+      collectRunManifestPaths(runsDir, path.join(currentDir, entry.name), files);
+    }
+  }
+}
+
 /**
  * Enumerate canonical run manifests in `.agentv/results/runs/`.
  */
 export function listResultFiles(cwd: string, limit?: number): ResultFileMeta[] {
   const runsDir = path.join(cwd, '.agentv', 'results', RESULT_RUNS_DIRNAME);
 
-  const files: { filePath: string; displayName: string }[] = [];
+  const files: { filePath: string; displayName: string; runId: string }[] = [];
 
   try {
     const entries = readdirSync(runsDir, { withFileTypes: true });
     for (const entry of entries) {
-      if (!entry.isDirectory()) {
-        continue;
-      }
-
-      const primaryPath = resolveExistingRunPrimaryPath(path.join(runsDir, entry.name));
-      if (primaryPath) {
-        files.push({ filePath: primaryPath, displayName: entry.name });
+      if (entry.isDirectory()) {
+        collectRunManifestPaths(runsDir, path.join(runsDir, entry.name), files);
       }
     }
   } catch {
@@ -561,7 +595,7 @@ export function listResultFiles(cwd: string, limit?: number): ResultFileMeta[] {
 
   const metas: ResultFileMeta[] = [];
 
-  for (const { filePath, displayName } of limited) {
+  for (const { filePath, displayName, runId } of limited) {
     try {
       const fileStat = statSync(filePath);
       const results = loadResultFile(filePath);
@@ -576,7 +610,8 @@ export function listResultFiles(cwd: string, limit?: number): ResultFileMeta[] {
 
       metas.push({
         path: filePath,
-        filename: displayName,
+        filename: runId,
+        displayName,
         timestamp,
         testCount,
         passRate,
diff --git a/apps/cli/src/commands/pipeline/input.ts b/apps/cli/src/commands/pipeline/input.ts
index 3eb7ad0a4..c3a54e20d 100644
--- a/apps/cli/src/commands/pipeline/input.ts
+++ b/apps/cli/src/commands/pipeline/input.ts
@@ -43,7 +43,7 @@ export const evalInputCommand = command({
       type: optional(string),
       long: 'out',
       description:
-        'Output directory for extracted inputs (default: .agentv/results/runs/<timestamp>)',
+        'Output directory for extracted inputs (default: .agentv/results/runs/<experiment>/<timestamp>)',
     }),
     experiment: option({
       type: optional(string),
@@ -53,7 +53,7 @@ export const evalInputCommand = command({
   },
   handler: async ({ evalPath, out, experiment }) => {
     const resolvedEvalPath = resolve(evalPath);
-    const outDir = resolve(out ?? buildDefaultRunDir(process.cwd()));
+    const outDir = resolve(out ?? buildDefaultRunDir(process.cwd(), experiment));
     const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
     const evalDir = dirname(resolvedEvalPath);
 
diff --git a/apps/cli/src/commands/pipeline/run.ts b/apps/cli/src/commands/pipeline/run.ts
index f91db3dad..86c1b2289 100644
--- a/apps/cli/src/commands/pipeline/run.ts
+++ b/apps/cli/src/commands/pipeline/run.ts
@@ -73,7 +73,8 @@ export const evalRunCommand = command({
     out: option({
       type: optional(string),
       long: 'out',
-      description: 'Output directory for results (default: .agentv/results/runs/<timestamp>)',
+      description:
+        'Output directory for results (default: .agentv/results/runs/<experiment>/<timestamp>)',
     }),
     workers: option({
       type: optional(number),
@@ -94,7 +95,7 @@ export const evalRunCommand = command({
   },
   handler: async ({ evalPath, out, workers, experiment, graderType }) => {
     const resolvedEvalPath = resolve(evalPath);
-    const outDir = resolve(out ?? buildDefaultRunDir(process.cwd()));
+    const outDir = resolve(out ?? buildDefaultRunDir(process.cwd(), experiment));
     const repoRoot = await findRepoRoot(dirname(resolvedEvalPath));
     const evalDir = dirname(resolvedEvalPath);
 
diff --git a/apps/cli/src/commands/results/eval-runner.ts b/apps/cli/src/commands/results/eval-runner.ts
index 1ab92bf9e..0e19e10e1 100644
--- a/apps/cli/src/commands/results/eval-runner.ts
+++ b/apps/cli/src/commands/results/eval-runner.ts
@@ -185,7 +185,12 @@ function resolveCliPath(cwd: string): { bunPath: string; cliPath: string } | und
 // biome-ignore lint/suspicious/noExplicitAny: Hono Context generic varies by route
 type C = Context<any, any, any>;
 
-export function registerEvalRoutes(app: Hono, getCwd: (c: C) => string) {
+export function registerEvalRoutes(
+  app: Hono,
+  getCwd: (c: C) => string,
+  options?: { readOnly?: boolean },
+) {
+  const readOnly = options?.readOnly === true;
   // ── Discovery: eval files ──────────────────────────────────────────────
   app.get('/api/eval/discover', async (c) => {
     const cwd = getCwd(c);
@@ -216,6 +221,9 @@ export function registerEvalRoutes(app: Hono, getCwd: (c: C) => string) {
 
   // ── Launch eval run ────────────────────────────────────────────────────
   app.post('/api/eval/run', async (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     const cwd = getCwd(c);
 
     let body: RunEvalRequest;
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 6bd2cc503..925a22e6c 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -251,6 +251,7 @@ function handleRuns(c: C, { searchDir }: DataContext) {
       }
       return {
         filename: m.filename,
+        display_name: m.displayName,
         path: m.path,
         timestamp: m.timestamp,
         test_count: m.testCount,
@@ -270,7 +271,7 @@ function handleRunDetail(c: C, { searchDir }: DataContext) {
   if (!meta) return c.json({ error: 'Run not found' }, 404);
   try {
     const loaded = loadManifestResults(meta.path);
-    return c.json({ results: stripHeavyFields(loaded), source: meta.filename });
+    return c.json({ results: stripHeavyFields(loaded), source: meta.displayName });
   } catch {
     return c.json({ error: 'Failed to load run' }, 500);
   }
@@ -565,8 +566,11 @@ function handleTargets(c: C, { searchDir, agentvDir }: DataContext) {
   return c.json({ targets });
 }
 
-function handleConfig(c: C, { agentvDir }: DataContext) {
-  return c.json(loadStudioConfig(agentvDir));
+function handleConfig(c: C, { agentvDir }: DataContext, options?: { readOnly?: boolean }) {
+  return c.json({
+    ...loadStudioConfig(agentvDir),
+    read_only: options?.readOnly === true,
+  });
 }
 
 function handleFeedbackRead(c: C, { searchDir }: DataContext) {
@@ -585,11 +589,12 @@ export function createApp(
   resultDir: string,
   cwd?: string,
   sourceFile?: string,
-  options?: { studioDir?: string },
+  options?: { studioDir?: string; readOnly?: boolean },
 ): Hono {
   const searchDir = cwd ?? resultDir;
   const agentvDir = path.join(searchDir, '.agentv');
   const defaultCtx: DataContext = { searchDir, agentvDir };
+  const readOnly = options?.readOnly === true;
   const app = new Hono();
 
   // ── Project resolution wrapper ────────────────────────────────────────
@@ -611,6 +616,9 @@ export function createApp(
   // ── Studio configuration ──────────────────────────────────────────────
 
   app.post('/api/config', async (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     try {
       const body = await c.req.json<Partial<StudioConfig>>();
       const current = loadStudioConfig(agentvDir);
@@ -672,6 +680,9 @@ export function createApp(
   });
 
   app.post('/api/projects', async (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     try {
       const body = await c.req.json<{ path: string }>();
       if (!body.path) return c.json({ error: 'Missing path' }, 400);
@@ -683,6 +694,9 @@ export function createApp(
   });
 
   app.delete('/api/projects/:projectId', (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     const removed = removeProject(c.req.param('projectId') ?? '');
     if (!removed) return c.json({ error: 'Project not found' }, 404);
     return c.json({ ok: true });
@@ -710,6 +724,9 @@ export function createApp(
   });
 
   app.post('/api/projects/discover', async (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     try {
       const body = await c.req.json<{ path: string }>();
       if (!body.path) return c.json({ error: 'Missing path' }, 400);
@@ -726,6 +743,7 @@ export function createApp(
     const registry = loadProjectRegistry();
     const allRuns: Array<{
       filename: string;
+      display_name: string;
       path: string;
       timestamp: string;
       test_count: number;
@@ -755,6 +773,7 @@ export function createApp(
           }
           allRuns.push({
             filename: m.filename,
+            display_name: m.displayName,
             path: m.path,
             timestamp: m.timestamp,
             test_count: m.testCount,
@@ -778,7 +797,7 @@ export function createApp(
 
   // ── Data routes (unscoped) ────────────────────────────────────────────
 
-  app.get('/api/config', (c) => handleConfig(c, defaultCtx));
+  app.get('/api/config', (c) => handleConfig(c, defaultCtx, { readOnly }));
   app.get('/api/runs', (c) => handleRuns(c, defaultCtx));
   app.get('/api/runs/:filename', (c) => handleRunDetail(c, defaultCtx));
   app.get('/api/runs/:filename/suites', (c) => handleRunSuites(c, defaultCtx));
@@ -799,6 +818,9 @@ export function createApp(
   });
 
   app.post('/api/feedback', async (c) => {
+    if (readOnly) {
+      return c.json({ error: 'Studio is running in read-only mode' }, 403);
+    }
     let body: unknown;
     try {
       body = await c.req.json();
@@ -857,6 +879,7 @@ export function createApp(
       }
       return {
         run_filename: m.filename,
+        display_name: m.displayName,
         test_count: m.testCount,
         pass_rate: m.passRate,
         avg_score: m.avgScore,
@@ -870,7 +893,9 @@ export function createApp(
   // ── Data routes (project-scoped) ──────────────────────────────────────
   // Same handlers as above, with project-resolved DataContext via withProject.
 
-  app.get('/api/projects/:projectId/config', (c) => withProject(c, handleConfig));
+  app.get('/api/projects/:projectId/config', (c) =>
+    withProject(c, (ctx, dataCtx) => handleConfig(ctx, dataCtx, { readOnly })),
+  );
   app.get('/api/projects/:projectId/runs', (c) => withProject(c, handleRuns));
   app.get('/api/projects/:projectId/runs/:filename', (c) => withProject(c, handleRunDetail));
   app.get('/api/projects/:projectId/runs/:filename/suites', (c) => withProject(c, handleRunSuites));
@@ -895,15 +920,19 @@ export function createApp(
 
   // ── Eval runner routes (discovery, launch, status) ────────────────────
 
-  registerEvalRoutes(app, (c) => {
+  registerEvalRoutes(
+    app,
+    (c) => {
     // For project-scoped routes, resolve to project path; otherwise use searchDir
-    const projectId = c.req.param('projectId');
-    if (projectId) {
-      const project = getProject(projectId);
-      if (project) return project.path;
-    }
-    return searchDir;
-  });
+      const projectId = c.req.param('projectId');
+      if (projectId) {
+        const project = getProject(projectId);
+        if (project) return project.path;
+      }
+      return searchDir;
+    },
+    { readOnly },
+  );
 
   // ── Static file serving for Studio SPA ────────────────────────────────
 
@@ -1026,8 +1055,12 @@ export const resultsServeCommand = command({
       long: 'discover',
       description: 'Scan a directory tree for repos with .agentv/',
     }),
+    readOnly: flag({
+      long: 'read-only',
+      description: 'Disable write operations and launch Studio in read-only leaderboard mode',
+    }),
   },
-  handler: async ({ source, port, dir, multi, add, remove, discover }) => {
+  handler: async ({ source, port, dir, multi, add, remove, discover, readOnly }) => {
     const cwd = dir ?? process.cwd();
     const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
 
@@ -1100,7 +1133,7 @@ export const resultsServeCommand = command({
 
       // Use the run directory for feedback storage (matches #764 behavior)
       const resultDir = sourceFile ? path.dirname(path.resolve(sourceFile)) : cwd;
-      const app = createApp(results, resultDir, cwd, sourceFile);
+      const app = createApp(results, resultDir, cwd, sourceFile, { readOnly });
 
       if (isMultiProject) {
         console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 343625fea..efb5ee370 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -291,6 +291,41 @@ describe('serve app', () => {
       });
       expect(res3.status).toBe(400);
     });
+
+    it('returns 403 in read-only mode', async () => {
+      const content = toJsonl(RESULT_A, RESULT_B);
+      const results = loadResults(content);
+      const app = createApp(results, tempDir, undefined, undefined, {
+        studioDir,
+        readOnly: true,
+      });
+
+      const res = await app.request('/api/feedback', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          reviews: [{ test_id: 'test-greeting', comment: 'blocked' }],
+        }),
+      });
+
+      expect(res.status).toBe(403);
+    });
+  });
+
+  describe('GET /api/config', () => {
+    it('includes read_only mode in the config payload', async () => {
+      const content = toJsonl(RESULT_A, RESULT_B);
+      const results = loadResults(content);
+      const app = createApp(results, tempDir, undefined, undefined, {
+        studioDir,
+        readOnly: true,
+      });
+
+      const res = await app.request('/api/config');
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as { read_only?: boolean };
+      expect(data.read_only).toBe(true);
+    });
   });
 
   // ── Empty state (no results) ────────────────────────────────────────
diff --git a/apps/cli/test/commands/trace/trace.test.ts b/apps/cli/test/commands/trace/trace.test.ts
index f49941f6b..808586cbc 100644
--- a/apps/cli/test/commands/trace/trace.test.ts
+++ b/apps/cli/test/commands/trace/trace.test.ts
@@ -345,6 +345,20 @@ describe('trace utils', () => {
       expect(metas[0].filename).toBe('2026-02-20T21-38-05-833Z');
     });
 
+    it('should discover nested experiment run directories and emit safe run ids', () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
+      const runDir = path.join(runsDir, 'with-skills', '2026-02-20T21-38-05-833Z');
+      mkdirSync(runDir, { recursive: true });
+
+      writeFileSync(path.join(runDir, 'index.jsonl'), `${RESULT_WITH_TRACE}\n`);
+
+      const metas = listResultFiles(tempDir);
+
+      expect(metas).toHaveLength(1);
+      expect(metas[0].filename).toBe('with-skills::2026-02-20T21-38-05-833Z');
+      expect(metas[0].displayName).toBe('2026-02-20T21-38-05-833Z');
+    });
+
     it('should skip directories without index.jsonl', () => {
       const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
       const emptyDir = path.join(runsDir, '2026-02-20T21-38-05-833Z');
diff --git a/apps/studio/src/components/Breadcrumbs.tsx b/apps/studio/src/components/Breadcrumbs.tsx
index 9dedf70a5..680f7cb5c 100644
--- a/apps/studio/src/components/Breadcrumbs.tsx
+++ b/apps/studio/src/components/Breadcrumbs.tsx
@@ -12,6 +12,14 @@ interface BreadcrumbSegment {
   to?: string;
 }
 
+function formatRunLabel(runId: string | undefined): string {
+  if (!runId) {
+    return 'Run';
+  }
+  const [, timestamp] = runId.split('::');
+  return timestamp || runId;
+}
+
 function deriveSegments(matches: ReturnType<typeof useMatches>): BreadcrumbSegment[] {
   const segments: BreadcrumbSegment[] = [];
 
@@ -26,7 +34,7 @@ function deriveSegments(matches: ReturnType<typeof useMatches>): BreadcrumbSegme
     if (routeId.includes('/runs/$runId/category/$category')) {
       if (!segments.some((s) => s.label === params.runId)) {
         segments.push({
-          label: params.runId ?? 'Run',
+          label: formatRunLabel(params.runId),
           to: `/runs/${encodeURIComponent(params.runId)}`,
         });
       }
@@ -41,14 +49,14 @@ function deriveSegments(matches: ReturnType<typeof useMatches>): BreadcrumbSegme
       });
     } else if (routeId.includes('/runs/$runId')) {
       segments.push({
-        label: params.runId ?? 'Run',
+        label: formatRunLabel(params.runId),
         to: match.pathname,
       });
     } else if (routeId.includes('/evals/$runId/$evalId')) {
       // For eval pages, show the run as a parent segment too
       if (!segments.some((s) => s.label === params.runId)) {
         segments.push({
-          label: params.runId ?? 'Run',
+          label: formatRunLabel(params.runId),
           to: `/runs/${encodeURIComponent(params.runId)}`,
         });
       }
diff --git a/apps/studio/src/components/EvalDetail.tsx b/apps/studio/src/components/EvalDetail.tsx
index e28279cec..ee4f9f485 100644
--- a/apps/studio/src/components/EvalDetail.tsx
+++ b/apps/studio/src/components/EvalDetail.tsx
@@ -41,11 +41,13 @@ function findFirstFile(nodes: FileNode[]): string | null {
 
 export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps) {
   const [activeTab, setActiveTab] = useState<Tab>('checks');
+  const { data: config } = useStudioConfig();
+  const isReadOnly = config?.read_only === true;
 
   const tabs: { id: Tab; label: string }[] = [
     { id: 'checks', label: 'Checks' },
     { id: 'files', label: 'Files' },
-    { id: 'feedback', label: 'Feedback' },
+    ...(isReadOnly ? [] : [{ id: 'feedback' as const, label: 'Feedback' }]),
   ];
 
   return (
@@ -112,7 +114,7 @@ export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps)
       <div className="min-h-0 flex-1">
         {activeTab === 'checks' && <StepsTab result={result} />}
         {activeTab === 'files' && <FilesTab result={result} runId={runId} projectId={projectId} />}
-        {activeTab === 'feedback' && <FeedbackPanel testId={result.testId} />}
+        {!isReadOnly && activeTab === 'feedback' && <FeedbackPanel testId={result.testId} />}
       </div>
     </div>
   );
diff --git a/apps/studio/src/components/RunList.tsx b/apps/studio/src/components/RunList.tsx
index 75a11a63f..d102c8225 100644
--- a/apps/studio/src/components/RunList.tsx
+++ b/apps/studio/src/components/RunList.tsx
@@ -77,7 +77,7 @@ export function RunList({ runs, projectId }: RunListProps) {
                     params={{ projectId, runId: run.filename }}
                     className="font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
                   >
-                    {run.filename}
+                    {run.display_name ?? run.filename}
                   </Link>
                 ) : (
                   <Link
@@ -85,7 +85,7 @@ export function RunList({ runs, projectId }: RunListProps) {
                     params={{ runId: run.filename }}
                     className="font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
                   >
-                    {run.filename}
+                    {run.display_name ?? run.filename}
                   </Link>
                 )}
               </td>
diff --git a/apps/studio/src/components/Sidebar.tsx b/apps/studio/src/components/Sidebar.tsx
index 461eb74ed..474fcb426 100644
--- a/apps/studio/src/components/Sidebar.tsx
+++ b/apps/studio/src/components/Sidebar.tsx
@@ -150,7 +150,7 @@ function RunSidebar() {
                 className="mb-0.5 block truncate rounded-md px-2 py-1.5 text-sm text-gray-400 transition-colors hover:bg-gray-800/50 hover:text-gray-200"
                 title={run.project_name}
               >
-                {run.filename}
+                {run.display_name ?? run.filename}
               </Link>
             );
           }
@@ -166,7 +166,7 @@ function RunSidebar() {
                   : 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200'
               }`}
             >
-              {run.filename}
+              {run.display_name ?? run.filename}
             </Link>
           );
         })}
@@ -388,7 +388,7 @@ function ProjectRunDetailSidebar({
                   : 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200'
               }`}
             >
-              {run.filename}
+              {run.display_name ?? run.filename}
             </Link>
           );
         })}
diff --git a/apps/studio/src/lib/types.ts b/apps/studio/src/lib/types.ts
index 1b735a00e..bc6be5908 100644
--- a/apps/studio/src/lib/types.ts
+++ b/apps/studio/src/lib/types.ts
@@ -7,6 +7,7 @@
 
 export interface RunMeta {
   filename: string;
+  display_name?: string;
   path: string;
   timestamp: string;
   test_count: number;
@@ -91,6 +92,7 @@ export interface EvalDetailResponse {
 
 export interface IndexEntry {
   run_filename: string;
+  display_name?: string;
   target?: string;
   test_count: number;
   pass_rate: number;
@@ -171,6 +173,7 @@ export interface StudioConfigResponse {
   threshold: number;
   /** @deprecated Use threshold */
   pass_threshold?: number;
+  read_only?: boolean;
 }
 
 // ── Project types ────────────────────────────────────────────────────────
diff --git a/apps/studio/src/routes/evals/$runId.$evalId.tsx b/apps/studio/src/routes/evals/$runId.$evalId.tsx
index 27fb29e60..79ba93fff 100644
--- a/apps/studio/src/routes/evals/$runId.$evalId.tsx
+++ b/apps/studio/src/routes/evals/$runId.$evalId.tsx
@@ -11,7 +11,7 @@ import { useState } from 'react';
 
 import { EvalDetail } from '~/components/EvalDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useRunDetail } from '~/lib/api';
+import { useRunDetail, useStudioConfig } from '~/lib/api';
 
 export const Route = createFileRoute('/evals/$runId/$evalId')({
   component: EvalDetailPage,
@@ -20,7 +20,9 @@ export const Route = createFileRoute('/evals/$runId/$evalId')({
 function EvalDetailPage() {
   const { runId, evalId } = Route.useParams();
   const { data, isLoading, error } = useRunDetail(runId);
+  const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
+  const isReadOnly = config?.read_only === true;
 
   if (isLoading) {
     return (
@@ -61,23 +63,27 @@ function EvalDetailPage() {
           </p>
           <h1 className="text-2xl font-semibold text-white">{evalId}</h1>
         </div>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Run this Test
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Run this Test
+          </button>
+        )}
       </div>
       <EvalDetail eval={result} runId={runId} />
-      <RunEvalModal
-        open={showRunEval}
-        onClose={() => setShowRunEval(false)}
-        prefill={{
-          testIds: [evalId],
-          target: result.target,
-        }}
-      />
+      {!isReadOnly && (
+        <RunEvalModal
+          open={showRunEval}
+          onClose={() => setShowRunEval(false)}
+          prefill={{
+            testIds: [evalId],
+            target: result.target,
+          }}
+        />
+      )}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx
index a700e4825..cf95c22bf 100644
--- a/apps/studio/src/routes/index.tsx
+++ b/apps/studio/src/routes/index.tsx
@@ -15,7 +15,13 @@ import { ProjectCard } from '~/components/ProjectCard';
 import { RunEvalModal } from '~/components/RunEvalModal';
 import { RunList } from '~/components/RunList';
 import { TargetsTab } from '~/components/TargetsTab';
-import { addProjectApi, discoverProjectsApi, useProjectList, useRunList } from '~/lib/api';
+import {
+  addProjectApi,
+  discoverProjectsApi,
+  useProjectList,
+  useRunList,
+  useStudioConfig,
+} from '~/lib/api';
 
 type TabId = 'runs' | 'experiments' | 'targets';
 
@@ -48,6 +54,7 @@ function HomePage() {
 
 function ProjectsDashboard() {
   const { data } = useProjectList();
+  const { data: config } = useStudioConfig();
   const queryClient = useQueryClient();
   const [addPath, setAddPath] = useState('');
   const [discoverPath, setDiscoverPath] = useState('');
@@ -56,6 +63,7 @@ function ProjectsDashboard() {
   const [showRunEval, setShowRunEval] = useState(false);
 
   const projects = data?.projects ?? [];
+  const isReadOnly = config?.read_only === true;
 
   async function handleAddProject(e: React.FormEvent) {
     e.preventDefault();
@@ -92,20 +100,24 @@ function ProjectsDashboard() {
       <div className="flex items-center justify-between">
         <h1 className="text-2xl font-semibold text-white">Projects</h1>
         <div className="flex gap-2">
-          <button
-            type="button"
-            onClick={() => setShowRunEval(true)}
-            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-          >
-            ▶ Run Eval
-          </button>
-          <button
-            type="button"
-            onClick={() => setShowAddForm(!showAddForm)}
-            className="rounded-md bg-cyan-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-cyan-500"
-          >
-            {showAddForm ? 'Cancel' : 'Add Project'}
-          </button>
+          {!isReadOnly && (
+            <>
+              <button
+                type="button"
+                onClick={() => setShowRunEval(true)}
+                className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+              >
+                ▶ Run Eval
+              </button>
+              <button
+                type="button"
+                onClick={() => setShowAddForm(!showAddForm)}
+                className="rounded-md bg-cyan-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-cyan-500"
+              >
+                {showAddForm ? 'Cancel' : 'Add Project'}
+              </button>
+            </>
+          )}
         </div>
       </div>
 
@@ -115,7 +127,7 @@ function ProjectsDashboard() {
         </div>
       )}
 
-      {showAddForm && (
+      {!isReadOnly && showAddForm && (
         <div className="space-y-3 rounded-lg border border-gray-800 bg-gray-900/50 p-4">
           <form onSubmit={handleAddProject} className="flex gap-2">
             <input
@@ -156,7 +168,7 @@ function ProjectsDashboard() {
         ))}
       </div>
 
-      <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} />
+      {!isReadOnly && <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} />}
     </div>
   );
 }
@@ -169,21 +181,25 @@ function SingleProjectHome() {
   const tab = searchParams.tab as TabId | undefined;
   const navigate = useNavigate();
   const { data, isLoading, error } = useRunList();
+  const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
+  const isReadOnly = config?.read_only === true;
 
-  const activeTab: TabId = tabs.some((t) => t.id === tab) ? (tab as TabId) : 'runs';
+  const activeTab: TabId = tabs.some((t) => t.id === tab) ? (tab as TabId) : 'experiments';
 
   return (
     <div className="space-y-6">
       <div className="flex items-center justify-between">
         <h1 className="text-2xl font-semibold text-white">Evaluation Runs</h1>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Run Eval
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Run Eval
+          </button>
+        )}
       </div>
 
       {/* Tab navigation */}
@@ -211,7 +227,7 @@ function SingleProjectHome() {
       {activeTab === 'experiments' && <ExperimentsTab />}
       {activeTab === 'targets' && <TargetsTab />}
 
-      <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} />
+      {!isReadOnly && <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} />}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/projects/$projectId.tsx b/apps/studio/src/routes/projects/$projectId.tsx
index 493f38064..b38d112e4 100644
--- a/apps/studio/src/routes/projects/$projectId.tsx
+++ b/apps/studio/src/routes/projects/$projectId.tsx
@@ -10,7 +10,7 @@ import { useState } from 'react';
 import { useQuery } from '@tanstack/react-query';
 import { RunEvalModal } from '~/components/RunEvalModal';
 import { RunList } from '~/components/RunList';
-import { useProjectRunList } from '~/lib/api';
+import { useProjectRunList, useStudioConfig } from '~/lib/api';
 import { projectExperimentsOptions, projectTargetsOptions } from '~/lib/api';
 import type { ExperimentsResponse, TargetsResponse } from '~/lib/types';
 
@@ -33,20 +33,24 @@ function ProjectHomePage() {
   const tab = searchParams.tab as TabId | undefined;
   const navigate = useNavigate();
   const [showRunEval, setShowRunEval] = useState(false);
+  const { data: config } = useStudioConfig();
+  const isReadOnly = config?.read_only === true;
 
-  const activeTab: TabId = tabs.some((t) => t.id === tab) ? (tab as TabId) : 'runs';
+  const activeTab: TabId = tabs.some((t) => t.id === tab) ? (tab as TabId) : 'experiments';
 
   return (
     <div className="space-y-6">
       <div className="flex items-center justify-between">
         <h1 className="text-2xl font-semibold text-white">{projectId}</h1>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Run Eval
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Run Eval
+          </button>
+        )}
       </div>
 
       {/* Tab navigation */}
@@ -79,11 +83,13 @@ function ProjectHomePage() {
       {activeTab === 'experiments' && <ProjectExperimentsTab projectId={projectId} />}
       {activeTab === 'targets' && <ProjectTargetsTab projectId={projectId} />}
 
-      <RunEvalModal
-        open={showRunEval}
-        onClose={() => setShowRunEval(false)}
-        projectId={projectId}
-      />
+      {!isReadOnly && (
+        <RunEvalModal
+          open={showRunEval}
+          onClose={() => setShowRunEval(false)}
+          projectId={projectId}
+        />
+      )}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx b/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx
index 94499866c..62242c174 100644
--- a/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx
+++ b/apps/studio/src/routes/projects/$projectId_/evals/$runId.$evalId.tsx
@@ -7,7 +7,7 @@ import { useState } from 'react';
 
 import { EvalDetail } from '~/components/EvalDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useProjectRunDetail } from '~/lib/api';
+import { useProjectRunDetail, useStudioConfig } from '~/lib/api';
 
 export const Route = createFileRoute('/projects/$projectId_/evals/$runId/$evalId')({
   component: ProjectEvalDetailPage,
@@ -16,7 +16,9 @@ export const Route = createFileRoute('/projects/$projectId_/evals/$runId/$evalId
 function ProjectEvalDetailPage() {
   const { projectId, runId, evalId } = Route.useParams();
   const { data, isLoading, error } = useProjectRunDetail(projectId, runId);
+  const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
+  const isReadOnly = config?.read_only === true;
 
   if (isLoading) {
     return (
@@ -57,24 +59,28 @@ function ProjectEvalDetailPage() {
           </p>
           <h1 className="text-2xl font-semibold text-white">{evalId}</h1>
         </div>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Run this Test
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Run this Test
+          </button>
+        )}
       </div>
       <EvalDetail eval={result} runId={runId} projectId={projectId} />
-      <RunEvalModal
-        open={showRunEval}
-        onClose={() => setShowRunEval(false)}
-        projectId={projectId}
-        prefill={{
-          testIds: [evalId],
-          target: result.target,
-        }}
-      />
+      {!isReadOnly && (
+        <RunEvalModal
+          open={showRunEval}
+          onClose={() => setShowRunEval(false)}
+          projectId={projectId}
+          prefill={{
+            testIds: [evalId],
+            target: result.target,
+          }}
+        />
+      )}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx b/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
index f23ba6095..f21fed6ae 100644
--- a/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
+++ b/apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
@@ -7,7 +7,7 @@ import { useState } from 'react';
 
 import { RunDetail } from '~/components/RunDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useProjectRunDetail } from '~/lib/api';
+import { useProjectRunDetail, useStudioConfig } from '~/lib/api';
 
 export const Route = createFileRoute('/projects/$projectId_/runs/$runId')({
   component: ProjectRunDetailPage,
@@ -16,7 +16,9 @@ export const Route = createFileRoute('/projects/$projectId_/runs/$runId')({
 function ProjectRunDetailPage() {
   const { projectId, runId } = Route.useParams();
   const { data, isLoading, error } = useProjectRunDetail(projectId, runId);
+  const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
+  const isReadOnly = config?.read_only === true;
 
   if (isLoading) {
     return (
@@ -49,21 +51,25 @@ function ProjectRunDetailPage() {
           <h1 className="text-2xl font-semibold text-white">Run: {runId}</h1>
           <p className="mt-1 text-sm text-gray-400">Source: {data?.source}</p>
         </div>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Re-run with Filters
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Re-run with Filters
+          </button>
+        )}
       </div>
       <RunDetail results={data?.results ?? []} runId={runId} projectId={projectId} />
-      <RunEvalModal
-        open={showRunEval}
-        onClose={() => setShowRunEval(false)}
-        projectId={projectId}
-        prefill={prefill}
-      />
+      {!isReadOnly && (
+        <RunEvalModal
+          open={showRunEval}
+          onClose={() => setShowRunEval(false)}
+          projectId={projectId}
+          prefill={prefill}
+        />
+      )}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/runs/$runId.tsx b/apps/studio/src/routes/runs/$runId.tsx
index 1ae307cf8..87f5231ee 100644
--- a/apps/studio/src/routes/runs/$runId.tsx
+++ b/apps/studio/src/routes/runs/$runId.tsx
@@ -7,7 +7,7 @@ import { useState } from 'react';
 
 import { RunDetail } from '~/components/RunDetail';
 import { RunEvalModal } from '~/components/RunEvalModal';
-import { useRunDetail } from '~/lib/api';
+import { useRunDetail, useStudioConfig } from '~/lib/api';
 
 export const Route = createFileRoute('/runs/$runId')({
   component: RunDetailPage,
@@ -16,7 +16,9 @@ export const Route = createFileRoute('/runs/$runId')({
 function RunDetailPage() {
   const { runId } = Route.useParams();
   const { data, isLoading, error } = useRunDetail(runId);
+  const { data: config } = useStudioConfig();
   const [showRunEval, setShowRunEval] = useState(false);
+  const isReadOnly = config?.read_only === true;
 
   if (isLoading) {
     return (
@@ -50,16 +52,20 @@ function RunDetailPage() {
           <h1 className="text-2xl font-semibold text-white">Run: {runId}</h1>
           <p className="mt-1 text-sm text-gray-400">Source: {data?.source}</p>
         </div>
-        <button
-          type="button"
-          onClick={() => setShowRunEval(true)}
-          className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
-        >
-          ▶ Re-run with Filters
-        </button>
+        {!isReadOnly && (
+          <button
+            type="button"
+            onClick={() => setShowRunEval(true)}
+            className="rounded-md bg-emerald-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-emerald-500"
+          >
+            ▶ Re-run with Filters
+          </button>
+        )}
       </div>
       <RunDetail results={data?.results ?? []} runId={runId} />
-      <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} prefill={prefill} />
+      {!isReadOnly && (
+        <RunEvalModal open={showRunEval} onClose={() => setShowRunEval(false)} prefill={prefill} />
+      )}
     </div>
   );
 }
diff --git a/apps/studio/src/routes/settings.tsx b/apps/studio/src/routes/settings.tsx
index 417657cdc..d00c330ac 100644
--- a/apps/studio/src/routes/settings.tsx
+++ b/apps/studio/src/routes/settings.tsx
@@ -24,6 +24,7 @@ function SettingsPage() {
 
   const currentThreshold = config?.threshold ?? DEFAULT_PASS_THRESHOLD;
   const displayThreshold = threshold || String(currentThreshold);
+  const isReadOnly = config?.read_only === true;
 
   const handleSave = async () => {
     const value = Number.parseFloat(threshold || String(currentThreshold));
@@ -87,6 +88,7 @@ function SettingsPage() {
                 step="0.05"
                 value={displayThreshold}
                 onChange={(e) => setThreshold(e.target.value)}
+                disabled={isReadOnly}
                 className="w-32 rounded-md border border-gray-700 bg-gray-800 px-3 py-2 text-sm text-white placeholder-gray-500 focus:border-cyan-500 focus:outline-none focus:ring-1 focus:ring-cyan-500"
               />
               <span className="text-sm text-gray-400">
@@ -97,14 +99,17 @@ function SettingsPage() {
         </div>
 
         <div className="mt-6 flex items-center gap-3">
-          <button
-            type="button"
-            onClick={handleSave}
-            disabled={saving}
-            className="rounded-md bg-cyan-600 px-4 py-2 text-sm font-medium text-white transition-colors hover:bg-cyan-500 disabled:opacity-50"
-          >
-            {saving ? 'Saving...' : 'Save Settings'}
-          </button>
+          {!isReadOnly && (
+            <button
+              type="button"
+              onClick={handleSave}
+              disabled={saving}
+              className="rounded-md bg-cyan-600 px-4 py-2 text-sm font-medium text-white transition-colors hover:bg-cyan-500 disabled:opacity-50"
+            >
+              {saving ? 'Saving...' : 'Save Settings'}
+            </button>
+          )}
+          {isReadOnly && <span className="text-sm text-gray-400">Read-only mode is enabled.</span>}
           {message && (
             <span
               className={`text-sm ${message.type === 'success' ? 'text-emerald-400' : 'text-red-400'}`}

From 34e2fd9a864a56d6ba09f318e0f4db45b76a618a Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 8 Apr 2026 13:24:56 +0000
Subject: [PATCH 2/3] fix(cli): resolve pre-push validation issues

---
 apps/cli/src/commands/eval/result-layout.ts | 6 +++++-
 apps/cli/src/commands/eval/run-eval.ts      | 6 +++---
 apps/cli/src/commands/results/serve.ts      | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/apps/cli/src/commands/eval/result-layout.ts b/apps/cli/src/commands/eval/result-layout.ts
index f9acd3e69..1373e7089 100644
--- a/apps/cli/src/commands/eval/result-layout.ts
+++ b/apps/cli/src/commands/eval/result-layout.ts
@@ -22,7 +22,11 @@ export function createRunDirName(timestamp = new Date()): string {
   return timestamp.toISOString().replace(/[:.]/g, '-');
 }
 
-export function buildDefaultRunDir(cwd: string, experiment?: string, timestamp = new Date()): string {
+export function buildDefaultRunDir(
+  cwd: string,
+  experiment?: string,
+  timestamp = new Date(),
+): string {
   return path.join(
     cwd,
     '.agentv',
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index c97d3d816..7dc02d77d 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -870,9 +870,6 @@ export async function runEvalCommand(
       .replace(/:/g, '-')
       .replace(/\./g, '-');
   }
-  if (!process.env.AGENTV_EXPERIMENT) {
-    process.env.AGENTV_EXPERIMENT = normalizeExperimentName(options.experiment);
-  }
 
   // Load agentv.config.ts (if present) for default values
   let config: Awaited<ReturnType<typeof loadTsConfig>> = null;
@@ -899,6 +896,9 @@ export async function runEvalCommand(
   }
 
   let options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
+  if (!process.env.AGENTV_EXPERIMENT) {
+    process.env.AGENTV_EXPERIMENT = normalizeExperimentName(options.experiment);
+  }
 
   // Validate --grader-target / --model combinations
   if (options.graderTarget === 'agentv' && !options.model) {
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 925a22e6c..06a71fa31 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -923,7 +923,7 @@ export function createApp(
   registerEvalRoutes(
     app,
     (c) => {
-    // For project-scoped routes, resolve to project path; otherwise use searchDir
+      // For project-scoped routes, resolve to project path; otherwise use searchDir
       const projectId = c.req.param('projectId');
       if (projectId) {
         const project = getProject(projectId);

From 7b84ac2a6c17589b23a95be260c3307d22dbcca2 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 8 Apr 2026 14:41:50 +0000
Subject: [PATCH 3/3] fix(results): support experiment-scoped run workspaces

---
 apps/cli/src/commands/results/export.ts       |  9 +++-
 apps/cli/src/commands/results/serve.ts        |  5 +-
 apps/cli/src/commands/results/validate.ts     | 40 ++++++++++------
 apps/cli/test/commands/results/export.test.ts | 18 ++++++++
 apps/cli/test/commands/results/serve.test.ts  | 35 ++++++++++++++
 .../test/commands/results/validate.test.ts    | 46 +++++++++++++++++++
 6 files changed, 134 insertions(+), 19 deletions(-)
 create mode 100644 apps/cli/test/commands/results/validate.test.ts

diff --git a/apps/cli/src/commands/results/export.ts b/apps/cli/src/commands/results/export.ts
index c31622cc0..bfe82a89d 100644
--- a/apps/cli/src/commands/results/export.ts
+++ b/apps/cli/src/commands/results/export.ts
@@ -59,7 +59,14 @@ export function deriveOutputDir(cwd: string, sourceFile: string): string {
     throw new Error(`Expected a run manifest named ${RESULT_INDEX_FILENAME}: ${sourceFile}`);
   }
 
-  const parentDir = path.basename(path.dirname(sourceFile));
+  const runDir = path.dirname(sourceFile);
+  const segments = path.normalize(runDir).split(path.sep).filter(Boolean);
+  const runsIndex = segments.lastIndexOf('runs');
+  if (runsIndex >= 0 && runsIndex < segments.length - 1) {
+    return path.join(cwd, '.agentv', 'results', 'export', ...segments.slice(runsIndex + 1));
+  }
+
+  const parentDir = path.basename(runDir);
   if (parentDir.startsWith('eval_')) {
     return path.join(cwd, '.agentv', 'results', 'export', parentDir.slice(5));
   }
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 06a71fa31..4e54155b8 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -431,12 +431,11 @@ function handleEvalFiles(c: C, { searchDir }: DataContext) {
 
 function handleEvalFileContent(c: C, { searchDir }: DataContext) {
   const filename = c.req.param('filename');
-  const evalId = c.req.param('evalId');
   const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
   if (!meta) return c.json({ error: 'Run not found' }, 404);
 
-  // Extract file path from wildcard using a mount-agnostic marker
-  const marker = `/runs/${filename}/evals/${evalId}/files/`;
+  // Extract the wildcard suffix without depending on decoded route params.
+  const marker = '/files/';
   const markerIdx = c.req.path.indexOf(marker);
   const filePath = markerIdx >= 0 ? c.req.path.slice(markerIdx + marker.length) : '';
 
diff --git a/apps/cli/src/commands/results/validate.ts b/apps/cli/src/commands/results/validate.ts
index 0a53fbf94..991ffe7df 100644
--- a/apps/cli/src/commands/results/validate.ts
+++ b/apps/cli/src/commands/results/validate.ts
@@ -3,7 +3,7 @@
  * artifacts compatible with the AgentV dashboard and results commands.
  *
  * Checks:
- *   1. Directory follows the `runs/<timestamp>` naming convention
+ *   1. Directory follows the `runs/<experiment>/<timestamp>` naming convention
  *   2. index.jsonl exists and each line has required fields
  *   3. Per-test grading.json exists for every entry in the index
  *   4. Per-test timing.json exists (warning if missing)
@@ -43,13 +43,15 @@ interface IndexEntry {
 
 function checkDirectoryNaming(runDir: string): Diagnostic[] {
   const dirName = path.basename(runDir);
-  const parentName = path.basename(path.dirname(runDir));
+  const pathSegments = path.normalize(runDir).split(path.sep).filter(Boolean);
+  const runsIndex = pathSegments.lastIndexOf('runs');
   const diagnostics: Diagnostic[] = [];
 
-  if (parentName !== 'runs') {
+  if (runsIndex < 0 || runsIndex >= pathSegments.length - 1) {
     diagnostics.push({
       severity: 'warning',
-      message: `Directory is not under a 'runs/' parent (found '${parentName}/'). Expected: .agentv/results/runs/<run-dir>`,
+      message:
+        "Directory is not under a 'runs/' tree. Expected: .agentv/results/runs/<experiment>/<run-dir>",
     });
   }
 
@@ -65,6 +67,24 @@ function checkDirectoryNaming(runDir: string): Diagnostic[] {
   return diagnostics;
 }
 
+export function validateRunDirectory(runDir: string): {
+  diagnostics: Diagnostic[];
+  entries: IndexEntry[];
+} {
+  const diagnostics: Diagnostic[] = [];
+
+  diagnostics.push(...checkDirectoryNaming(runDir));
+
+  const { diagnostics: indexDiags, entries } = checkIndexJsonl(runDir);
+  diagnostics.push(...indexDiags);
+
+  if (entries.length > 0) {
+    diagnostics.push(...checkArtifactFiles(runDir, entries));
+  }
+
+  return { diagnostics, entries };
+}
+
 function checkIndexJsonl(runDir: string): { diagnostics: Diagnostic[]; entries: IndexEntry[] } {
   const indexPath = path.join(runDir, 'index.jsonl');
   const diagnostics: Diagnostic[] = [];
@@ -251,17 +271,7 @@ export const resultsValidateCommand = command({
       process.exit(1);
     }
 
-    const allDiagnostics: Diagnostic[] = [];
-
-    // Run all checks
-    allDiagnostics.push(...checkDirectoryNaming(resolvedDir));
-
-    const { diagnostics: indexDiags, entries } = checkIndexJsonl(resolvedDir);
-    allDiagnostics.push(...indexDiags);
-
-    if (entries.length > 0) {
-      allDiagnostics.push(...checkArtifactFiles(resolvedDir, entries));
-    }
+    const { diagnostics: allDiagnostics, entries } = validateRunDirectory(resolvedDir);
 
     // Report
     const errors = allDiagnostics.filter((d) => d.severity === 'error');
diff --git a/apps/cli/test/commands/results/export.test.ts b/apps/cli/test/commands/results/export.test.ts
index 60d54661a..e37d5fbd4 100644
--- a/apps/cli/test/commands/results/export.test.ts
+++ b/apps/cli/test/commands/results/export.test.ts
@@ -138,6 +138,24 @@ describe('results export', () => {
     );
   });
 
+  it('deriveOutputDir preserves experiment directories for canonical nested runs', () => {
+    const outputDir = deriveOutputDir(
+      tempDir,
+      path.join(
+        tempDir,
+        '.agentv',
+        'results',
+        'runs',
+        'with-skills',
+        '2026-03-18T10-00-00-000Z',
+        'index.jsonl',
+      ),
+    );
+    expect(outputDir).toBe(
+      path.join(tempDir, '.agentv', 'results', 'export', 'with-skills', '2026-03-18T10-00-00-000Z'),
+    );
+  });
+
   it('deriveOutputDir rejects non-manifest paths', () => {
     expect(() => deriveOutputDir(tempDir, path.join(tempDir, 'results.jsonl'))).toThrow(
       'Expected a run manifest named index.jsonl',
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index efb5ee370..94841e32f 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -389,6 +389,41 @@ describe('serve app', () => {
     });
   });
 
+  describe('GET /api/runs/:filename/evals/:evalId/files/*', () => {
+    it('loads file content for experiment-scoped run ids', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'with-skills');
+      const runId = 'with-skills::2026-03-25T10-00-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T10-00-00-000Z');
+      const responsePath = path.join(
+        timestampDir,
+        'demo',
+        'test-greeting',
+        'outputs',
+        'response.md',
+      );
+
+      mkdirSync(path.dirname(responsePath), { recursive: true });
+      writeFileSync(responsePath, '@[assistant]:\nHello, Alice!');
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'with-skills',
+          output_path: 'demo/test-greeting/outputs/response.md',
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/files/demo/test-greeting/outputs/response.md`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as { content: string };
+      expect(data.content).toContain('Hello, Alice!');
+    });
+  });
+
   // ── SPA fallback ──────────────────────────────────────────────────────
 
   describe('SPA fallback', () => {
diff --git a/apps/cli/test/commands/results/validate.test.ts b/apps/cli/test/commands/results/validate.test.ts
new file mode 100644
index 000000000..c418ab8e6
--- /dev/null
+++ b/apps/cli/test/commands/results/validate.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from 'bun:test';
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+
+import { validateRunDirectory } from '../../../src/commands/results/validate.js';
+
+describe('results validate', () => {
+  it('accepts experiment-scoped canonical run directories without layout warnings', () => {
+    const tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-validate-test-'));
+
+    try {
+      const runDir = path.join(
+        tempDir,
+        '.agentv',
+        'results',
+        'runs',
+        'with-skills',
+        '2026-03-27T12-42-24-429Z',
+      );
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(
+        path.join(runDir, 'index.jsonl'),
+        `${JSON.stringify({
+          timestamp: '2026-03-27T12:42:24.429Z',
+          test_id: 'test-greeting',
+          score: 1,
+          target: 'gpt-4o',
+          execution_status: 'ok',
+        })}\n`,
+      );
+
+      const { diagnostics } = validateRunDirectory(runDir);
+
+      expect(diagnostics.filter((d) => d.severity === 'error')).toEqual([]);
+      expect(diagnostics.map((d) => d.message)).not.toContain(
+        "Directory is not under a 'runs/' tree. Expected: .agentv/results/runs/<experiment>/<run-dir>",
+      );
+      expect(
+        diagnostics.some((d) => d.message.includes('does not match the expected pattern')),
+      ).toBe(false);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});