diff --git a/API.md b/API.md
deleted file mode 100644
index 8e1f5cf5..00000000
--- a/API.md
+++ /dev/null
@@ -1,724 +0,0 @@
-# Datalayer Core - DatalayerClient Documentation
-
-This document provides comprehensive examples for using the DatalayerClient.
-
-## Table of Contents
-
-- [Getting Started](#getting-started)
-  - [Installation](#installation)
-  - [Initialization](#initialization)
-  - [Handlers Pattern](#handlers-pattern)
-- [Authentication](#authentication)
-- [Runtime Management](#runtime-management)
-- [Notebook & Document Management](#notebook--document-management)
-- [Model Classes](#model-classes)
-  - [Runtime Model](#runtime-model)
-  - [Snapshot Model](#snapshot-model)
-  - [Notebook Model](#notebook-model)
-  - [Lexical Model](#lexical-model)
-  - [Space Model](#space-model)
-- [Error Handling](#error-handling)
-- [Best Practices](#best-practices)
-- [Testing](#testing)
-
-## Getting Started
-
-### Installation
-
-```bash
-npm install @datalayer/core
-```
-
-### Initialization
-
-The DatalayerClient provides a high-level, object-oriented interface for interacting with Datalayer services.
-
-#### Key Features
-
-- **Flat API**: All methods directly on `client.` (e.g., `client.createNotebook()`)
-- **Handlers Pattern**: Inject platform-specific behavior without wrapping Client methods
-- **Rich Models**: Returns model instances with methods, not just plain objects
-- **Type Safety**: Full TypeScript support with proper interfaces
-
-#### Basic Initialization
-
-```typescript
-import { DatalayerClient } from '@datalayer/core/client';
-import { DEFAULT_SERVICE_URLS } from '@datalayer/core/api/constants';
-
-// Basic initialization with token
-const client = new DatalayerClient({
-  token: 'bearer-token-123',
-  iamRunUrl: DEFAULT_SERVICE_URLS.IAM,
-  runtimesRunUrl: DEFAULT_SERVICE_URLS.RUNTIMES,
-  spacerRunUrl: DEFAULT_SERVICE_URLS.SPACER
-});
-
-// Quick initialization with defaults
-const client = new DatalayerClient({
-  token: 'bearer-token-123'
-});
-```
-
-### Handlers Pattern
-
-Initialize with lifecycle handlers for cross-cutting concerns:
-
-```typescript
-// Initialize with handlers for logging and error handling
-const client = new DatalayerClient({
-  token: 'bearer-token-123',
-  iamRunUrl: 'https://prod1.datalayer.run',
-  handlers: {
-    // Called before every Client method
-    beforeCall: async (methodName, args) => {
-      console.log(`[Client] Calling ${methodName}`, args);
-    },
-    // Called after successful method execution
-    afterCall: async (methodName, result) => {
-      console.log(`[Client] ${methodName} completed`);
-      // Track analytics, update UI, etc.
-    },
-    // Called when a method throws an error
-    onError: async (methodName, error) => {
-      console.error(`[Client] ${methodName} failed:`, error);
-
-      // Platform-specific error handling
-      if (error.message.includes('Not authenticated')) {
-        // Show login prompt in your platform's UI
-        // e.g., vscode.window.showErrorMessage(...)
-        // or showAuthModal() in React
-      }
-    }
-  }
-});
-
-// Initialize for VS Code extension with platform-specific handlers
-const vscodeClient = new DatalayerClient({
-  token: 'bearer-token-123',
-  handlers: {
-    onError: async (methodName, error) => {
-      // VS Code specific error handling
-      const vscode = require('vscode');
-      vscode.window.showErrorMessage(`Datalayer: ${error.message}`);
-    }
-  }
-});
-
-// Initialize for React app with UI handlers
-const reactClient = new DatalayerClient({
-  token: 'bearer-token-123',
-  handlers: {
-    onError: async (methodName, error) => {
-      // React specific error handling
-      toast.error(`Error: ${error.message}`);
-    },
-    beforeCall: async (methodName, args) => {
-      setLoading(true);
-    },
-    afterCall: async (methodName, result) => {
-      setLoading(false);
-    }
-  }
-});
-```
-
-## Authentication
-
-```typescript
-// Get current user profile (whoami)
-const user = await client.whoami();
-console.log('User ID:', user.uid);
-console.log('Email:', user.email);
-console.log('Roles:', user.roles);
-
-// Login with token
-await client.login('new-bearer-token');
-
-// Get credits information
-const credits = await client.getCredits();
-console.log('Available credits:', credits.balance);
-
-// Check IAM service health
-const health = await client.checkIAMHealth();
-console.log('IAM service status:', health.status);
-
-// Logout
-await client.logout();
-```
-
-## Runtime Management
-
-```typescript
-// List available environments
-const environments = await client.listEnvironments();
-environments.forEach(env => {
-  console.log(`${env.name}: ${env.type}`);
-  console.log('Resources:', env.resources);
-});
-
-// Ensure runtime (creates or reuses existing)
-const runtime = await client.ensureRuntime(
-  'ai-agents-env',  // environment name
-  50,                // credits limit
-  true,              // wait for ready
-  60000,             // max wait time (ms)
-  true,              // reuse existing
-  'snapshot-id'      // optional snapshot to restore from
-);
-
-console.log('Runtime ready:', runtime.podName);
-console.log('Jupyter URL:', runtime.jupyterUrl);
-
-// Create a specific runtime
-const newRuntime = await client.createRuntime(
-  'python-gpu-env',     // environment name
-  'notebook',           // type
-  'ml-training-gpu',    // given name
-  100                   // credits limit
-);
-
-// Wait for runtime to be ready
-await newRuntime.waitUntilReady(60000); // 60 seconds timeout
-console.log('Runtime is ready!');
-
-// Check runtime state
-const state = await newRuntime.getState();
-console.log('Current state:', state);
-
-// Create a snapshot
-const snapshot = await client.createSnapshot(
-  newRuntime.podName,
-  'checkpoint-before-training',
-  'Saving model state before training',
-  false  // don't stop runtime after snapshot
-);
-console.log('Snapshot created:', snapshot.uid);
-
-// List all runtimes
-const runtimes = await client.listRuntimes();
-runtimes.forEach(r => {
-  console.log(`${r.podName}: ${r.givenName} (${r.environmentName})`);
-});
-
-// Get specific runtime
-const specificRuntime = await client.getRuntime('pod-name-123');
-console.log('Runtime details:', specificRuntime.givenName);
-
-// List snapshots
-const snapshots = await client.listSnapshots();
-snapshots.forEach(s => {
-  console.log(`${s.name}: ${s.description} (${s.status})`);
-});
-
-// Get specific snapshot
-const specificSnapshot = await client.getSnapshot('snapshot-id-123');
-console.log('Snapshot size:', await specificSnapshot.getSize());
-
-// Delete resources
-await client.deleteRuntime(runtime.podName);
-await client.deleteSnapshot(snapshot.uid);
-console.log('Resources cleaned up');
-
-// Check runtimes service health
-const runtimesHealth = await client.checkRuntimesHealth();
-console.log('Runtimes service status:', runtimesHealth.status);
-```
-
-## Notebook & Document Management
-
-```typescript
-// Get user's spaces
-const spaces = await client.getMySpaces();
-console.log('Available spaces:', spaces.length);
-
-const mySpace = spaces[0];
-console.log('Space:', mySpace.uid);
-
-// Get items in space
-const items = await client.getSpaceItems(mySpace.uid);
-console.log('Items in space:', items.length);
-
-// Create a space
-const newSpace = await client.createSpace(
-  'Analysis Workspace',    // name
-  'Data analysis workspace', // description
-  'workspace',             // variant
-  'analysis-ws',           // space handle
-  'org-id-123',           // organization ID
-  '',                     // seed space ID
-  false                   // is public
-);
-
-// Create a notebook
-const notebook = await client.createNotebook(
-  mySpace.uid,              // space ID
-  'Analysis Notebook',      // name
-  'Data analysis for Q4'    // description
-  // optional: file (File | Blob)
-);
-
-console.log('Notebook created:', notebook.id);
-console.log('Path:', notebook.path);
-
-// Get notebook details
-const notebookDetails = await client.getNotebook(notebook.id);
-console.log('Notebook UID:', notebookDetails.uid);
-
-// Update notebook
-const updatedNotebook = await client.updateNotebook(
-  notebook.id,
-  'Q4 Analysis - Final',           // new name
-  'Final analysis for Q4 2024'     // new description
-);
-
-// Get notebook content
-const content = await client.getNotebookContent(notebook.id, {
-  includeOutputs: true,
-  format: 'json'
-});
-console.log('Notebook cells:', content.cells.length);
-
-// Create a lexical document
-const document = await client.createLexical(
-  mySpace.uid,            // space ID
-  'Project Notes',        // name
-  'Implementation notes'  // description
-  // optional: file (File | Blob)
-);
-
-console.log('Document created:', document.id);
-
-// Get lexical document
-const lexicalDetails = await client.getLexical(document.id);
-
-// Update lexical document
-const updatedDocument = await client.updateLexical(
-  document.id,
-  'Project Notes v2',     // new name
-  'Updated notes'         // new description
-);
-
-// Get lexical content
-const lexicalContent = await client.getLexicalContent(document.id, {
-  format: 'json'
-});
-console.log('Document content:', lexicalContent);
-
-// Prefetch content for multiple items (caching)
-const itemIds = [notebook.id, document.id];
-await client.prefetchContent(itemIds, 'notebook');
-await client.prefetchContent([document.id], 'lexical');
-
-// Clear content cache
-await client.clearContentCache(notebook.id, 'notebook');
-await client.clearContentCache(); // clear all cache
-
-// Delete items
-try {
-  await client.deleteSpaceItem(notebook.id);
-  await client.deleteSpaceItem(document.id);
-  console.log('Items deleted successfully');
-} catch (error) {
-  console.error('Failed to delete items:', error.message);
-}
-
-// Check spacer service health
-const spacerHealth = await client.checkSpacerHealth();
-console.log('Spacer service status:', spacerHealth.status);
-```
-
-## Model Classes
-
-The DatalayerClient provides rich model classes that wrap API responses with convenient methods:
-
-### Runtime Model
-
-```typescript
-const runtime = await client.createRuntime(
-  'python-gpu-env',
-  'notebook',
-  'ml-training',
-  100
-);
-
-// Static properties (no API calls)
-console.log(runtime.podName);         // Unique pod identifier
-console.log(runtime.environmentName); // Environment being used
-console.log(runtime.jupyterUrl);      // Jupyter server URL
-console.log(runtime.jupyterToken);    // Authentication token
-console.log(runtime.burningRate);     // Credits per hour
-console.log(runtime.givenName);       // User-friendly name
-console.log(runtime.createdAt);       // Creation timestamp
-
-// Dynamic methods (fetch fresh data)
-const state = await runtime.getState();       // Current state
-const isRunning = await runtime.isRunning();  // Check if running
-const isStarting = await runtime.isStarting(); // Check if starting
-
-// Actions
-await runtime.waitUntilReady(30000);  // Wait for ready state
-const snapshot = await runtime.createSnapshot('checkpoint', 'Before changes');
-await runtime.delete();                // Delete runtime
-```
-
-### Snapshot Model
-
-```typescript
-const snapshot = await client.createSnapshot(
-  runtime.podName,
-  'training-checkpoint',
-  'After epoch 10'
-);
-
-// Static properties
-console.log(snapshot.uid);          // Unique identifier
-console.log(snapshot.name);         // Snapshot name
-console.log(snapshot.description);  // Description
-console.log(snapshot.environment);  // Environment name
-console.log(snapshot.format);       // Snapshot format
-console.log(snapshot.metadata);     // Custom metadata
-console.log(snapshot.updatedAt);    // Last update time
-
-// Dynamic methods
-const status = await snapshot.getStatus();     // Current status
-const size = await snapshot.getSize();         // Size in bytes
-const metadata = await snapshot.getLatestMetadata(); // Fresh metadata
-
-// Actions
-const newRuntime = await snapshot.restore({    // Create runtime from snapshot
-  given_name: 'restored-runtime',
-  credits_limit: 100
-});
-await snapshot.delete();                       // Delete snapshot
-```
-
-### Notebook Model
-
-```typescript
-const notebook = await client.createNotebook(
-  'space-uid',
-  'ML Experiments',
-  'Machine learning experiments notebook'
-);
-
-// Static properties (instant access, no API calls)
-console.log(notebook.id);         // Notebook ID
-console.log(notebook.uid);        // Unique identifier
-console.log(notebook.path);       // File path
-console.log(notebook.spaceId);    // Parent space
-console.log(notebook.ownerId);    // Owner user ID
-console.log(notebook.createdAt);  // Creation date
-console.log(notebook.version);    // Version number
-console.log(notebook.metadata);   // Metadata object
-
-// Dynamic methods (fetch fresh data from API)
-const name = await notebook.getName();           // Current name
-const content = await notebook.getContent();     // Notebook content
-const kernelSpec = await notebook.getKernelSpec(); // Kernel specification
-const updatedAt = await notebook.getUpdatedAt(); // Last update time
-
-// Actions
-const updated = await notebook.update({
-  name: 'ML Experiments - Final',
-  description: 'Completed experiments'
-});
-await notebook.delete();  // Delete notebook
-
-// After deletion, accessing properties will throw errors
-try {
-  await notebook.getName();
-} catch (error) {
-  console.log('Notebook has been deleted');
-}
-```
-
-### Lexical Model
-
-```typescript
-const document = await client.createLexical(
-  'space-uid',
-  'Architecture Design',
-  'System architecture documentation'
-);
-
-// Static properties
-console.log(document.id);        // Document ID
-console.log(document.uid);       // Unique identifier
-console.log(document.spaceId);   // Parent space
-console.log(document.ownerId);   // Owner ID
-console.log(document.createdAt); // Creation date
-
-// Dynamic methods
-const name = await document.getName();       // Current name
-const content = await document.getContent(); // Document content
-const updatedAt = await document.getUpdatedAt(); // Last update
-
-// Actions
-const updated = await document.update({
-  name: 'Architecture Design v2',
-  content: { /* Lexical content */ }
-});
-await document.delete();  // Delete document
-```
-
-### Space Model
-
-```typescript
-const spaces = await client.getMySpaces();
-const space = spaces[0];
-
-// Static properties
-console.log(space.uid);         // Space UID
-console.log(space.handle);      // Space handle
-console.log(space.variant);     // Space variant
-console.log(space.visibility);  // Visibility setting
-console.log(space.ownerId);     // Owner ID
-console.log(space.createdAt);   // Creation date
-
-// Dynamic methods
-const name = await space.getName();               // Current name
-const description = await space.getDescription(); // Description
-const items = await space.getItems();            // Items in space
-const updatedAt = await space.getUpdatedAt();    // Last update
-
-// Get items with type checking
-items.forEach(item => {
-  if ('notebookType' in item) {
-    console.log('Notebook:', item.name);
-  } else if ('documentType' in item) {
-    console.log('Document:', item.name);
-  }
-});
-```
-
-## Error Handling
-
-The DatalayerClient provides detailed error messages with proper error handling:
-
-```typescript
-// Basic error handling
-try {
-  const notebook = await client.createNotebook(
-    'space-id',
-    'My Notebook',
-    'Description'
-  );
-} catch (error) {
-  console.error('Failed to create notebook:', error.message);
-}
-
-// Handle authentication errors
-try {
-  const user = await client.whoami();
-} catch (error) {
-  if (error.message.includes('401') || error.message.includes('Not authenticated')) {
-    console.error('Authentication failed - please check your token');
-    // Trigger re-authentication in your app
-  } else {
-    console.error('API error:', error.message);
-  }
-}
-
-// Handle insufficient credits
-try {
-  const runtime = await client.createRuntime('python-gpu-env', 'notebook', 'test', 1000);
-} catch (error) {
-  if (error.message.includes('insufficient credits')) {
-    console.error('Not enough credits to create runtime');
-  } else if (error.message.includes('quota exceeded')) {
-    console.error('Runtime quota exceeded');
-  } else {
-    console.error('Runtime creation failed:', error.message);
-  }
-}
-
-// Model deletion state
-const runtime = await client.createRuntime('ai-agents-env', 'notebook', 'test', 10);
-await client.deleteRuntime(runtime.podName);
-
-// This will throw an error
-try {
-  await runtime.getState();
-} catch (error) {
-  console.log('Runtime has been deleted');
-}
-
-// Using handlers for global error handling
-const client = new DatalayerClient({
-  token: 'your-token',
-  handlers: {
-    onError: async (methodName, error) => {
-      // Global error handling
-      if (error.message.includes('401')) {
-        console.log('Authentication required');
-        // Handle auth globally
-      } else if (error.message.includes('429')) {
-        console.log('Rate limited - retrying...');
-        // Handle rate limiting
-      } else {
-        console.error(`Global error in ${methodName}:`, error.message);
-      }
-    }
-  }
-});
-```
-
-## Best Practices
-
-1. **Use handlers for cross-cutting concerns**: Implement logging, error handling, and UI updates through the handlers pattern rather than wrapping Client methods.
-
-2. **Handle deletion states**: Models track deletion state to prevent operations on deleted resources.
-
-3. **Cache dynamic data**: The Client models cache dynamic data for 5 seconds to reduce API calls.
-
-4. **Wait for runtime readiness**: Always use `waitUntilReady()` after creating a runtime before performing operations:
-   ```typescript
-   const runtime = await client.createRuntime('ai-agents-env', 'notebook', 'analysis', 50);
-   await runtime.waitUntilReady(60000); // Wait up to 60 seconds
-   // Now safe to use runtime
-   ```
-
-5. **Reuse runtimes when possible**: Use `ensureRuntime()` instead of `createRuntime()` to reuse existing runtimes and save credits:
-   ```typescript
-   const runtime = await client.ensureRuntime(
-     'ai-agents-env',
-     50,    // credits limit
-     true,  // wait for ready
-     60000, // max wait time
-     true   // reuse existing
-   );
-   ```
-
-6. **Clean up resources**: Always delete runtimes and snapshots when done to avoid charges:
-   ```typescript
-   try {
-     // Use runtime for work
-     const runtime = await client.createRuntime(...);
-     // ... do work ...
-   } finally {
-     // Always clean up
-     await client.deleteRuntime(runtime.podName);
-   }
-   ```
-
-7. **Use environment variables for configuration**:
-   ```typescript
-   const client = new DatalayerClient({
-     token: process.env.DATALAYER_API_KEY,
-     iamRunUrl: process.env.DATALAYER_IAM_URL || DEFAULT_SERVICE_URLS.IAM,
-     runtimesRunUrl: process.env.DATALAYER_RUNTIMES_URL || DEFAULT_SERVICE_URLS.RUNTIMES,
-     spacerRunUrl: process.env.DATALAYER_SPACER_URL || DEFAULT_SERVICE_URLS.SPACER
-   });
-   ```
-
-8. **Prefetch content for better performance**:
-   ```typescript
-   // Prefetch multiple items to reduce individual API calls
-   const notebookIds = ['nb1', 'nb2', 'nb3'];
-   await client.prefetchContent(notebookIds, 'notebook');
-
-   // Now accessing content is much faster
-   for (const id of notebookIds) {
-     const content = await client.getNotebookContent(id);
-   }
-   ```
-
-9. **Use appropriate service health checks**:
-   ```typescript
-   // Check service health before critical operations
-   const iamHealth = await client.checkIAMHealth();
-   const runtimesHealth = await client.checkRuntimesHealth();
-   const spacerHealth = await client.checkSpacerHealth();
-
-   if (iamHealth.status === 'healthy' && runtimesHealth.status === 'healthy') {
-     // Safe to proceed with runtime operations
-   }
-   ```
-
-## Testing
-
-For testing, you can use the provided test utilities:
-
-```typescript
-import { testConfig } from '@datalayer/core/__tests__/shared/test-config';
-import { performCleanup } from '@datalayer/core/__tests__/shared/cleanup-shared';
-
-// Check if tests should run
-if (testConfig.hasToken()) {
-  // Cleanup before tests
-  await performCleanup('setup');
-
-  // Run your tests
-  const client = new DatalayerClient({
-    token: testConfig.getToken(),
-    ...DEFAULT_SERVICE_URLS
-  });
-
-  // Your test code here
-  const user = await client.whoami();
-  expect(user.uid).toBeDefined();
-
-  // Cleanup after tests
-  await performCleanup('teardown');
-}
-
-// Skip expensive tests if configured
-if (!testConfig.shouldSkipExpensive()) {
-  // Run expensive tests (runtime creation, etc.)
-  const runtime = await client.createRuntime('ai-agents-env', 'notebook', 'test', 10);
-  await client.deleteRuntime(runtime.podName);
-}
-```
-
-## Configuration Options
-
-The DatalayerClient accepts these configuration options:
-
-```typescript
-export interface DatalayerClientConfig {
-  /** Authentication token for API requests */
-  token?: string;
-  /** URL for the IAM service */
-  iamRunUrl?: string;
-  /** URL for the Runtimes service */
-  runtimesRunUrl?: string;
-  /** URL for the Spacer service */
-  spacerRunUrl?: string;
-  /** Platform-specific storage implementation */
-  storage?: PlatformStorage;
-  /** Enable caching for API responses */
-  cacheEnabled?: boolean;
-  /** Enable offline mode */
-  offlineMode?: boolean;
-  /** Handlers for intercepting Client method calls */
-  handlers?: ClientHandlers;
-}
-```
-
-Example with all options:
-
-```typescript
-import { BrowserStorage } from '@datalayer/core/client/storage';
-
-const client = new DatalayerClient({
-  token: 'your-token',
-  iamRunUrl: 'https://custom-iam.example.com',
-  runtimesRunUrl: 'https://custom-runtimes.example.com',
-  spacerRunUrl: 'https://custom-spacer.example.com',
-  storage: new BrowserStorage(),
-  cacheEnabled: true,
-  offlineMode: false,
-  handlers: {
-    beforeCall: async (methodName, args) => {
-      console.log(`Calling ${methodName}`, args);
-    },
-    afterCall: async (methodName, result) => {
-      console.log(`${methodName} completed`);
-    },
-    onError: async (methodName, error) => {
-      console.error(`${methodName} failed:`, error);
-    }
-  }
-});
-```
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index e10c0628..00000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,444 +0,0 @@
-# CLAUDE.md
-
-Datalayer Core - Python Client and CLI for the Datalayer AI Platform. Hybrid Python/TypeScript codebase with server-side Python and client-side React components.
-
-## ⚠️ CRITICAL: Import/Export Pattern Issue (January 2025)
-
-**NEVER use destructured imports from `src/api/spacer`!**
-
-### The Problem
-The spacer API exports use namespace pattern in `src/api/spacer/index.js`:
-```javascript
-export * as items from './items';
-export * as users from './users';
-export * as notebooks from './notebooks';
-export * as lexicals from './lexicals';
-export * as cells from './cells';
-```
-
-This creates a structure like `spacerAPI.items`, NOT direct named exports.
-
-### ❌ WRONG - Destructured Import (causes runtime errors):
-```javascript
-import { items, users, notebooks } from '../../../api/spacer';
-const response = await items.getSpaceItems(...);  // ❌ items is undefined
-```
-
-### ✅ CORRECT - Namespace Import:
-```javascript
-import * as spacerAPI from '../../../api/spacer';
-const response = await spacerAPI.items.getSpaceItems(...);  // ✅ Works correctly
-```
-
-### Why This Happens
-- Webpack bundling works fine (no build errors)
-- Runtime fails because destructured import `{ items }` expects named export
-- Namespace export `export * as items` creates nested structure instead
-- Result: `items` becomes `undefined` at runtime, causing "Cannot read properties of undefined"
-
-### Files Fixed (January 2025)
-- `lib/client/client/models/Space.js`
-- `lib/client/client/models/Notebook.js`
-- `lib/client/client/models/Lexical.js`
-- `lib/client/client/models/Item.js`
-
-**Always use namespace imports for spacer API!**
-
-## Project Structure
-
-- **Source code**: `src/` contains the TypeScript/React library code
-- **API Layer**: `src/api/` contains raw API functions for direct service access
-- **Client**: `src/client/client/` contains the high-level Client with models and mixins
-- **Examples**: `src/examples/` contains interactive React examples
-- **Python**: `datalayer_core/` contains the Python Client
-- **Tests**: `src/__tests__/` for TypeScript, `datalayer_core/tests/` for Python
-- **No default Vite files**: Removed App.tsx, main.tsx, public/ - this is a library, not an app
-
-## Development Commands
-
-**Python**: `pip install -e .[test]` | `pytest datalayer_core/tests/` | `mypy datalayer_core/`
-**TypeScript Library**: `npm install` | `npm run build:lib` | `npm run lint` | `npm run test`
-**Integration Tests**: `npm run test:integration` (runs all API and Client integration tests)
-**Examples**: `npm run examples` (starts dev server at http://localhost:3000/)
-**Code Quality**: `npm run check` | `npm run check:fix` | `npm run lint` | `npm run format` | `npm run type-check`
-**Docs**: `cd docs && make build` | `npm run typedoc` (generates TypeScript API docs) | See `API.md` for comprehensive API/Client examples
-**Make**: `make build` | `make start` | `make docs`
-
-**CLI Scripts**: `datalayer`/`dla`/`d`, `datalayer-config`, `datalayer-migrate`, `datalayer-server`, `datalayer-troubleshoot`
-
-## Architecture
-
-**Python Core**:
-
-- `DatalayerApp` - Base application class (traitlets)
-- `DatalayerClient` - Main Client class with mixins
-- CLI with subcommands: about, console, envs, runtimes, login, secrets, snapshots
-- Resource management: runtimes, environments, secrets, snapshots
-
-**TypeScript/React**: NPM package `@datalayer/core`
-
-- API layer with `DatalayerApi.ts`
-- Component library (UI, Jupyter, business logic)
-- Zustand state management
-- 70+ TypeScript models
-- Custom hooks for auth, platform integration, UI/UX
-- Universal navigation system that auto-detects React Router, Next.js, or falls back to native browser navigation
-
-## Configuration
-
-- Environment variables: `DATALAYER_API_KEY`, `DATALAYER_RUN_URL`
-- Traitlets configuration with custom Datalayer paths
-- Dev setup in `dev/`, examples in `examples/`
-
-## Quality Standards
-
-- **Type checking**: 100% mypy compliance (Python), strict TypeScript checks
-- **Testing**: pytest + Vitest with React Testing Library + comprehensive test mocks
-- **Linting**: ESLint with React/TypeScript rules, ruff for Python
-- **Formatting**: Prettier for consistent code style (80 char width, single quotes)
-- **Security**: bandit compliance, replaced `eval()` with `ast.literal_eval()`
-- **Documentation**: NumPy-style docstrings, TypeDoc API docs, Docusaurus site
-- **Pre-commit**: Updated to latest versions (ruff v0.12.8, bandit 1.8.6, pip-audit v2.9.0)
-
-## Development Tips
-
-- Use npm, not yarn
-- Run checks after changes: `npm run check:fix`
-- Use playwright MCP servers when you need to check stuff
-- Ensure things always build after changes
-- Run also npm run format/lint/type-check to ensure all is working properly
-
-## Running Examples
-
-**Start the examples server:**
-
-```bash
-npm run examples
-```
-
-The examples are served at http://localhost:3000/ and include:
-
-- `DatalayerNotebookExample`: Demonstrates Datalayer services integration with Jupyter notebooks
-- `NotebookExample`: Basic notebook example
-- `CellExample`: Individual cell execution example
-
-**Next.js Notebook Example:**
-
-Located in `examples/nextjj/`, this is a full Next.js application demonstrating platform integration:
-
-```bash
-cd examples/nextjj
-npm install
-npm run dev
-```
-
-Features:
-
-- Token authentication with Datalayer IAM
-- Browse and create notebooks from workspace
-- Select compute environments for execution
-- Interactive notebook viewer with real-time outputs
-- Clean UI with centered empty states and proper spacing
-- Welcome page with token authentication
-- Navigation between notebooks, environments, and viewer pages
-- Error handling for runtime creation failures
-
-**Configuration:**
-
-- The application uses local storage for token management
-- Authentication happens through the welcome page where users enter their Datalayer API token
-- The app communicates directly with `https://prod1.datalayer.run` API endpoints
-- Built with Next.js 14, TypeScript, and GitHub Primer components
-
-**Desktop Example:**
-
-For a native desktop application with Jupyter integration, see the separate Datalayer Desktop repository:
-https://github.com/datalayer/desktop
-
-Features:
-- Native desktop app with Electron
-- Full Jupyter notebook integration
-- Real-time collaboration support
-- WebSocket proxy for kernel communication
-
-## TypeScript/React Services
-
-**DatalayerServiceManager**: Creates and configures ServiceManager for Datalayer infrastructure
-
-- Located in `src/services/DatalayerServiceManager.ts`
-- Uses the runtime API (`/api/runtimes/v1/runtimes`) to create kernels
-- Internally uses `createRuntime` from the API module for proper auth handling
-- Returns configured ServiceManager for use with Jupyter components
-
-**DatalayerCollaborationProvider**: Enables real-time collaboration
-
-- Located in `src/collaboration/DatalayerCollaborationProvider.ts`
-- Requires Datalayer credentials (runUrl and token)
-- Integrates with Jupyter notebooks for collaborative editing
-- **IMPORTANT**: Uses notebook UIDs (not paths) for document IDs in Datalayer SaaS
-- Collaboration is enabled by default in Notebook2 components
-
-## API Notes
-
-- **Runtime API**: `POST /api/runtimes/v1/runtimes` - Creates compute runtimes
-- **Collaboration API**: `/api/spacer/v1/documents/{notebook_uid}` - Works for notebooks (not just documents!)
-- **Required Headers**: Authorization (Bearer token), X-External-Token (for some operations)
-- **Proxy Setup**: Vite dev server proxies `/api` to `https://prod1.datalayer.run` for CORS
-- **API Docs**: Available at https://prod1.datalayer.run/api/runtimes/v1/ui/
-- **Pre-commit hooks**: Husky + lint-staged for automatic code quality checks
-- **Code Quality Scripts**:
-  - `npm run check` - Run all checks (format, lint, type-check)
-  - `npm run check:fix` - Auto-fix all issues
-  - `npm run lint` / `npm run lint:fix` - ESLint checking
-  - `npm run format` / `npm run format:check` - Prettier formatting
-  - `npm run type-check` - TypeScript compilation check
-
-## API and Client Architecture
-
-### Two-Layer Architecture
-
-**1. Raw API Layer** (`src/api/`)
-- Direct access to REST endpoints
-- Organized by service (IAM, Runtimes, Spacer)
-- Returns raw API responses
-- Minimal abstraction, maximum control
-
-**2. Client Layer** (`src/client/client/`)
-- High-level, intuitive interface
-- Domain models with rich methods
-- Automatic state management
-- Mixins for organized functionality
-
-**Client Structure**:
-- `storage/`: Platform-agnostic storage implementations (Browser, Node, Electron)
-- `state/`: Service-specific state managers with TTL caching
-- `models/`: Rich domain models (User, Runtime, Space, Notebook, Lexical, Snapshot)
-- `mixins/`: Service mixins (IAMMixin, RuntimesMixin, SpacerMixin, HealthMixin)
-- `base.ts`: Client base class composition
-
-### Key Changes and Fixes
-
-**Authentication**:
-- Fixed logout endpoint to use GET method (was incorrectly using POST)
-- Proper error handling for invalid tokens
-- OAuth support limited to GitHub and LinkedIn only (removed Google/Microsoft)
-
-**Model Lifecycle Management**:
-- Models track deletion state to prevent operations on deleted resources
-- Runtime and Snapshot deletion now marks instances as deleted
-- All model methods check deletion state before operations
-
-**Platform Abstraction Layer** (January 2025):
-- Implemented PlatformStorage interface with 3 implementations (Browser, Node, Electron)
-- State managers with TTL-based caching (IAMState, RuntimesState, SpacerState)
-- RuntimesState tracks runtime keys for proper getCachedRuntimes() implementation
-- All storage implementations support encryption
-
-**Test Infrastructure**:
-- 100% test pass rate achieved (247 tests passing)
-- Consolidated test configuration (removed redundant `shouldRunExpensive()`)
-- Integration tests are self-contained (no inter-test dependencies)
-- Proper cleanup in test teardown
-- Environment variable `DATALAYER_TEST_SKIP_EXPENSIVE=false` enables all tests
-- Fixed empty string handling in BrowserStorage
-- Fixed OAuth provider recognition in User model tests
-
-**TypeScript Improvements**:
-- Fixed strict null checks in model constructors
-- Proper typing for Client mixins and models
-- Consistent error handling across all models
-- Fixed unused variable warnings in test files
-
-### Client Models
-
-**Runtime Model**:
-- Dynamic state checking (always fetches fresh from API)
-- `waitUntilReady()` method for startup synchronization
-- Direct snapshot creation via `createSnapshot()`
-- Deletion state tracking
-
-**Snapshot Model**:
-- Status and size checking methods
-- Metadata access
-- Relationship with Runtime model
-- Deletion state tracking
-
-**Space Model**:
-- Item listing with proper relationship handling
-- Support for both Notebooks and Lexical documents
-- Lazy loading of properties
-
-**Notebook/Lexical Models**:
-- Content management
-- Update operations
-- Proper serialization to JSON
-- Deletion lifecycle
-
-## AI Notes IMPORTANT
-
-- Use npm, not yarn
-- Run checks after changes:
-  - npm run format
-  - npm run lint
-  - npm run type-check
-  - npm run build:lib (ensure it builds with fresh output)
-- Run integration tests: `npm run test:integration`
-- Avoid old-school require imports
-- Use playwright MCP to inspect things directly
-- Check API.md for comprehensive examples of both raw API and Client usage
-- **Client Usage**: Always use the handlers pattern for cross-cutting concerns instead of wrapping Client methods
-- **VS Code Extension**: Use `(client as any)` casting when TypeScript definitions are incomplete
-
-## ag-ui (CopilotKit) Architecture (November 2024)
-
-### Critical Fix: Separated Hook Files
-
-The ag-ui adapter uses **separated hook files** to prevent Lumino widget initialization crashes:
-
-**Files:**
-
-- `src/tools/adapters/agui/notebookHooks.tsx` - Notebook-only (imports `@datalayer/jupyter-react`)
-- `src/tools/adapters/agui/lexicalHooks.tsx` - Lexical-only (imports `@datalayer/jupyter-lexical`)
-- `src/tools/adapters/agui/AgUIToolAdapter.ts` - Shared components (`ActionRegistrar`, `UseFrontendToolFn`)
-
-**Problem Solved:**
-
-Original combined `hooks.tsx` imported from BOTH packages, causing:
-
-1. When `useNotebookToolActions` was called → entire lexical package loaded
-2. Lexical package initialization → creates Lumino widgets for Jupyter output nodes
-3. Lumino widget initialization → **CRASH**: `Cannot set properties of undefined (setting 'class-name')`
-
-**Solution Benefits:**
-
-- ✅ Notebook example never loads lexical code (no crash)
-- ✅ Lazy loading (lexical only loads when needed)
-- ✅ No code duplication (shared components in `AgUIToolAdapter.ts`)
-- ✅ Smaller bundles (tree-shaking eliminates unused code)
-
-**Critical Rule:**
-
-```typescript
-// ❌ NEVER create combined hooks that import from both packages
-import { ... } from '@datalayer/jupyter-lexical';
-import { ... } from '@datalayer/jupyter-react';
-
-// ✅ ALWAYS keep hooks separated by package
-// notebookHooks.tsx
-import { ... } from '@datalayer/jupyter-react';
-
-// lexicalHooks.tsx
-import { ... } from '@datalayer/jupyter-lexical';
-```
-
-## Critical Lessons Learned (January 2025)
-
-### Module Import/Export Issues
-**Problem**: Webpack couldn't resolve namespace exports when destructured in consuming code.
-**Symptom**: Runtime error "Cannot read properties of undefined (reading 'getSpaceItems')"
-**Root Cause**: Using `export * as items from './items'` in index files, then importing as `import * as spacerAPI` and accessing `spacerAPI.items.getSpaceItems()`
-**Solution**: Use direct module imports instead:
-```typescript
-// BAD - webpack can't resolve this properly
-import * as spacerAPI from '../../../api/spacer';
-await spacerAPI.items.getSpaceItems(...);
-
-// GOOD - direct imports work
-import * as items from '../../../api/spacer/items';
-await items.getSpaceItems(...);
-```
-
-### Code Deduplication with Abstract Base Classes
-**Achievement**: Reduced code duplication by 45-47% across models
-**Pattern**: Created `Item<TData, TUpdateRequest>` abstract base class for Notebook, Lexical, and Cell models
-**Benefits**:
-- Single source of truth for common functionality
-- Consistent deletion state tracking
-- Unified error handling
-
-### Build System Improvements
-**Issue**: Stale build artifacts causing confusion
-**Solution**: Added clean scripts to all build commands
-- `build:lib` now runs `npm run clean:lib` first
-- Removes `lib/`, `dist/`, `build/`, and `tsconfig.tsbuildinfo`
-- Ensures fresh builds every time
-
-### TypeScript Module Resolution
-**Issue**: Node.js ESM requires explicit file extensions in imports
-**Context**: Only matters for direct Node.js execution, not webpack bundles
-**Note**: TypeScript source files don't need .js extensions - only needed if running compiled JS directly with Node
-
-### Debugging Approach
-**Key Learning**: When fixing runtime errors in webpack bundles:
-1. Check the actual TypeScript source files, not compiled JavaScript
-2. Webpack module resolution differs from Node.js ESM
-3. Clean rebuild (`rm -rf dist lib node_modules`) can resolve mysterious issues
-4. Always verify fixes actually work in the runtime environment
-
-## Client Handlers Pattern (January 2025)
-
-### Problem Solved
-Eliminated massive code duplication where consuming applications (VS Code extension, React apps) were wrapping every Client method 1:1 just to add logging, error handling, or platform-specific behavior.
-
-### Solution: Handlers Pattern
-The Client now supports lifecycle handlers that can be injected at initialization:
-
-```typescript
-const client = new DatalayerClient({
-  token: 'your-token',
-  iamRunUrl: 'https://prod1.datalayer.run',
-  handlers: {
-    beforeCall: async (methodName, args) => {
-      console.log(`[Client] Calling ${methodName}`, args);
-    },
-    afterCall: async (methodName, result) => {
-      console.log(`[Client] ${methodName} completed`, result);
-    },
-    onError: async (methodName, error) => {
-      console.error(`[Client] ${methodName} failed`, error);
-      // Platform-specific error handling
-      if (error.message.includes('Not authenticated')) {
-        // Show platform-specific auth prompt
-      }
-    }
-  }
-});
-```
-
-### Key Implementation Details
-
-**Automatic Method Wrapping**: The Client automatically wraps all mixin methods with handlers:
-- Located in `src/client/client/base.ts`
-- Smart detection: Only wraps mixin methods, not base class infrastructure
-- No hardcoded method lists - automatically detects based on prototype chain
-
-**Clean Mixin Composition**: Uses helper function for readable mixin composition:
-```typescript
-const DatalayerClientWithMixins = composeMixins(
-  IAMMixin,
-  RuntimesMixin,
-  SpacerMixin,
-);
-```
-
-**TypeScript Support**: Proper interface declaration for mixin methods:
-```typescript
-export interface DatalayerClient {
-  // All mixin methods declared here for TypeScript
-  whoami(): Promise<any>;
-  createRuntime(config: any): Promise<any>;
-  // ... etc
-}
-```
-
-### Benefits
-- **Zero code duplication**: No more wrapper services
-- **Platform agnostic**: Same Client works everywhere
-- **Clean separation**: Business logic in Client, platform behavior in handlers
-- **Type safe**: Full TypeScript support
-- **Maintainable**: Add new Client methods without updating consumers
-
-### Removed Components
-- Deleted `HealthMixin` (unnecessary complexity)
-- VS Code extension: Removed `spacerService.ts` and `runtimeService.ts`
-- All wrapper services replaced with direct Client usage + handlers
diff --git a/README.md b/README.md
index 98fe0730..5721af09 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [![Become a Sponsor](https://img.shields.io/static/v1?label=Become%20a%20Sponsor&message=%E2%9D%A4&logo=GitHub&style=flat&color=1ABC9C)](https://github.com/sponsors/datalayer)
 
-# ☰ Datalayer Core
+# ☰ ☢️ Datalayer Core
 
 <p align="center">
   <strong>Python and Typescript libraries for Datalayer</strong>
@@ -118,7 +118,7 @@ datalayer runtime list
 datalayer runtime create ai-env --given-name my-runtime-123
 
 # Execute a script in a runtime
-datalayer runtime exec my-script.py --runtime <runtime-id>
+datalayer runtime exec my-script.py --agent <agent-id>
 
 # Create a snapshot from a runtime but do not terminate the runtime
 datalayer snapshots create <pod-name> my-snapshot 'AI work!' False
@@ -151,6 +151,29 @@ datalayer usage team-allocate-member --team-uid <team_uid> --member-uid <member_
 datalayer usage team-revoke-member --team-uid <team_uid> --member-uid <member_uid> --amount 5
 ```
 
+### 5. Evals CLI (Multi-Agentspec)
+
+Use comma-separated agentspec ids to create one experiment per agentspec variant:
+
+```bash
+# Creates one experiment per agentspec in the list
+datalayer evals experiments create my-exp \
+  --evalset-id <evalset_id> \
+  --agent-spec-ids example-evals,example-evals-nocodemode,example-custom
+```
+
+Generate a comparison report:
+
+```bash
+datalayer evals report <evalset_id> --run-limit 50 --export
+```
+
+How to interpret grouped comparisons in the report:
+
+- `Within-Agentspec Pairwise Latest-Pass Deltas`: compares experiments using the same agentspec id.
+- `Cross-Agentspec Pairwise Latest-Pass Deltas`: compares experiments using different agentspec ids.
+- Pairwise sections compute all combinations for the selected experiments, not just two agentspecs.
+
 ## Examples
 
 ### Python Examples
diff --git a/datalayer_core/__version__.py b/datalayer_core/__version__.py
index 0bad1d00..6109e043 100644
--- a/datalayer_core/__version__.py
+++ b/datalayer_core/__version__.py
@@ -3,4 +3,4 @@
 
 """Datalayer Core version information."""
 
-__version__ = "1.1.24"
+__version__ = "1.1.38"
diff --git a/datalayer_core/agents/__init__.py b/datalayer_core/agents/__init__.py
new file mode 100644
index 00000000..8000f6f4
--- /dev/null
+++ b/datalayer_core/agents/__init__.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Runtime and agent execution helpers."""
+
+from datalayer_core.agents.agent_cloud import RuntimeService
+from datalayer_core.agents.agent_local import (
+	DEFAULT_LOCAL_AGENT_NAME,
+	DEFAULT_LOCAL_HOST,
+	DEFAULT_LOCAL_LOG_LEVEL,
+	DEFAULT_LOCAL_PROTOCOL,
+	LocalAgentRuntime,
+	ensure_local_agent,
+	start_local_agent_runtime,
+	terminate_local_agent_runtime,
+)
+from datalayer_core.agents.utils import (
+	compute_time_reservation_minutes,
+	create_cloud_agent_runtime,
+	resolve_environment_burning_rate,
+	teardown_agent_execution_resources,
+	terminate_cloud_agent_runtime,
+)
+
+__all__ = [
+	"RuntimeService",
+	"LocalAgentRuntime",
+	"DEFAULT_LOCAL_AGENT_NAME",
+	"DEFAULT_LOCAL_HOST",
+	"DEFAULT_LOCAL_LOG_LEVEL",
+	"DEFAULT_LOCAL_PROTOCOL",
+	"ensure_local_agent",
+	"start_local_agent_runtime",
+	"terminate_local_agent_runtime",
+	"resolve_environment_burning_rate",
+	"compute_time_reservation_minutes",
+	"create_cloud_agent_runtime",
+	"terminate_cloud_agent_runtime",
+	"teardown_agent_execution_resources",
+]
diff --git a/datalayer_core/runtimes/runtime.py b/datalayer_core/agents/agent_cloud.py
similarity index 98%
rename from datalayer_core/runtimes/runtime.py
rename to datalayer_core/agents/agent_cloud.py
index dd292ccc..06d62068 100644
--- a/datalayer_core/runtimes/runtime.py
+++ b/datalayer_core/agents/agent_cloud.py
@@ -19,9 +19,9 @@
 from datalayer_core.mixins.sandbox_snapshots import SandboxSnapshotsMixin
 from datalayer_core.mixins.runtimes import RuntimesMixin
 from datalayer_core.models import ExecutionResponse
+from datalayer_core.models.sandbox_snapshot import SandboxSnapshotModel
 from datalayer_core.models.runtime import RuntimeModel
-from datalayer_core.runtimes.sandbox_snapshot import (
-    SandboxSnapshotModel,
+from datalayer_core.sandboxes.code_sandbox_snapshots import (
     as_code_sandbox_snapshots,
     create_snapshot,
 )
@@ -60,6 +60,7 @@ def __init__(
         run_url: str = DEFAULT_DATALAYER_RUN_URL,
         iam_url: Optional[str] = None,
         token: Optional[str] = None,
+        api_key: Optional[str] = None,
         pod_name: Optional[str] = None,
         ingress: Optional[str] = None,
         reservation_id: Optional[str] = None,
@@ -86,6 +87,8 @@ def __init__(
             Datalayer IAM server URL. If not provided, defaults to run_url.
         token : Optional[str]
             Authentication token (can also be set via DATALAYER_API_KEY env var).
+        api_key : Optional[str]
+            Authentication API key alias for ``token``.
         pod_name : Optional[str]
             Name of the pod running the runtime.
         ingress : Optional[str]
@@ -110,7 +113,7 @@ def __init__(
             time_reservation=time_reservation,
             run_url=run_url,
             iam_url=iam_url or run_url,
-            token=token,
+            token=token or api_key,
             external_token=None,
             pod_name=pod_name,
             ingress=ingress,
diff --git a/datalayer_core/runtimes/local.py b/datalayer_core/agents/agent_local.py
similarity index 80%
rename from datalayer_core/runtimes/local.py
rename to datalayer_core/agents/agent_local.py
index 3ab44ca4..648e170f 100644
--- a/datalayer_core/runtimes/local.py
+++ b/datalayer_core/agents/agent_local.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -122,13 +125,14 @@ def start_local_agent_runtime(
     protocol: str = DEFAULT_LOCAL_PROTOCOL,
     log_level: str = DEFAULT_LOCAL_LOG_LEVEL,
     wait: bool = True,
+    disable_tool_approvals: bool = False,
 ) -> LocalAgentRuntime:
     """Launch a local ``agent-runtimes`` server as a subprocess.
 
     Parameters
     ----------
     agent_spec_id : str
-        Agent spec id to boot the runtime with.
+        Agentspec id to boot the runtime with.
     agent_name : str
         Registered agent name/id served by the runtime.
     host : str
@@ -172,6 +176,8 @@ def start_local_agent_runtime(
         "--log-level",
         log_level,
     ]
+    if disable_tool_approvals:
+        command.append("--disable-tool-approvals")
 
     runtime_env, mapped_targets = build_agent_runtime_env()
     if mapped_targets:
@@ -238,6 +244,7 @@ def ensure_local_agent(
     enable_skills: bool = True,
     description: Optional[str] = None,
     timeout: int = 120,
+    disable_tool_approvals: bool = False,
 ) -> None:
     """Ensure a local agent with the expected transport is registered.
 
@@ -298,6 +305,7 @@ def ensure_local_agent(
         "agent_spec_id": agent_spec_id,
         "enable_skills": enable_skills,
         "tools": [],
+        "disableToolApprovals": disable_tool_approvals,
     }
     try:
         response = requests.post(
@@ -456,6 +464,129 @@ def extract_vercel_stream_text(raw: str) -> str:
     return "".join(text_parts).strip()
 
 
+def _coerce_usage_payload(candidate: Any) -> dict[str, Any]:
+    if not isinstance(candidate, dict) or not candidate:
+        return {}
+    nested = candidate.get("usage")
+    if isinstance(nested, dict) and nested:
+        merged = dict(nested)
+        for key, value in candidate.items():
+            if key == "usage":
+                continue
+            merged.setdefault(str(key), value)
+        return merged
+    return dict(candidate)
+
+
+def _usage_payload_score(payload: dict[str, Any]) -> int:
+    if not payload:
+        return 0
+    token_keys = {
+        "prompt_tokens",
+        "promptTokens",
+        "input_tokens",
+        "inputTokens",
+        "completion_tokens",
+        "completionTokens",
+        "output_tokens",
+        "outputTokens",
+        "total_tokens",
+        "totalTokens",
+        "tokens_total",
+        "token_total",
+    }
+    score = len(payload)
+    if any(key in payload for key in token_keys):
+        score += 100
+    if any(
+        key in payload
+        for key in (
+            "credits_consumed",
+            "creditsConsumed",
+            "credits",
+            "total_credits",
+            "cost_credits",
+        )
+    ):
+        score += 10
+    return score
+
+
+def extract_vercel_stream_usage(raw: str) -> dict[str, Any]:
+    """Extract best-effort pydantic usage metadata from a Vercel AI SSE stream."""
+    best: dict[str, Any] = {}
+    best_score = 0
+    for line in raw.splitlines():
+        if not line.startswith("data: "):
+            continue
+        payload = line[6:].strip()
+        if not payload or payload == "[DONE]":
+            continue
+        try:
+            event = json.loads(payload)
+        except json.JSONDecodeError:
+            continue
+        if not isinstance(event, dict):
+            continue
+
+        candidates: list[dict[str, Any]] = []
+        message_metadata = event.get("messageMetadata")
+        if isinstance(message_metadata, dict):
+            candidates.extend(
+                [
+                    _coerce_usage_payload(message_metadata.get("pydantic_ai")),
+                    _coerce_usage_payload(message_metadata.get("pydanticAI")),
+                    _coerce_usage_payload(message_metadata.get("usage")),
+                ]
+            )
+        candidates.extend(
+            [
+                _coerce_usage_payload(event.get("pydantic_ai_usage")),
+                _coerce_usage_payload(event.get("pydantic_ai")),
+                _coerce_usage_payload(event.get("usage")),
+            ]
+        )
+        for candidate in candidates:
+            score = _usage_payload_score(candidate)
+            if score > best_score:
+                best = candidate
+                best_score = score
+    return best
+
+
+def _vercel_ai_error_message(raw: str) -> Optional[str]:
+    """Detect a non-stream error body returned with an HTTP 200 status.
+
+    The ``agent-runtimes`` server answers an unknown agent route with HTTP 200
+    and a JSON error body (for example
+    ``{"error": "Agent '...' not found", "message": "No agent registered ..."}``)
+    instead of an SSE stream. Such a body must NOT be treated as a successful
+    completion, otherwise route-candidate fallback stops at the first wrong
+    route and an empty answer is recorded.
+
+    Returns
+    -------
+    Optional[str]
+        The error message when the body is an error payload (or an empty body),
+        otherwise ``None`` when the body is a genuine SSE stream.
+    """
+    text = (raw or "").strip()
+    if not text:
+        return "Empty response body"
+    # A genuine Vercel AI response is an SSE stream of ``data:`` lines.
+    if "data:" in text:
+        return None
+    try:
+        payload = json.loads(text)
+    except json.JSONDecodeError:
+        return None
+    if isinstance(payload, dict):
+        error = payload.get("error") or payload.get("message")
+        if error:
+            return str(error)
+    return None
+
+
 def _post_vercel_ai_chat(
     *,
     endpoint: str,
@@ -528,13 +659,36 @@ def _post_vercel_ai_chat(
         }
 
     output_text = extract_vercel_stream_text(raw)
-    return {
+    usage = extract_vercel_stream_usage(raw)
+    if not output_text:
+        error_message = _vercel_ai_error_message(raw)
+        if error_message is not None:
+            message_text = (
+                f"{source_label} chat returned no output: {error_message}"
+            )
+            return {
+                "status": "failed",
+                "output": {"text": "", "raw_stream_excerpt": raw[:2000]},
+                "failure_cause": {
+                    "stage": "runtime_execution",
+                    "type": "runtime_agent_unavailable",
+                    "message": message_text,
+                    "detail_excerpt": raw[:2000] or message_text,
+                    "execution_url": endpoint,
+                },
+            }
+    output: dict[str, Any] = {
+        "text": output_text,
+        "raw_stream_excerpt": raw[:2000],
+    }
+    result: dict[str, Any] = {
         "status": "completed",
-        "output": {
-            "text": output_text,
-            "raw_stream_excerpt": raw[:2000],
-        },
+        "output": output,
     }
+    if usage:
+        output["pydantic_ai_usage"] = usage
+        result["usage"] = usage
+    return result
 
 
 def run_local_agent_chat(
@@ -606,7 +760,7 @@ def runtime_route_candidates(
 
     The ``agent-runtimes`` server inside a cloud runtime may register its agent
     under different names depending on how it was launched. Trying a few known
-    candidates (explicit agent name, agent spec id, pod name, then the default
+    candidates (explicit agent name, agentspec id, pod name, then the default
     route) makes cloud execution resilient.
     """
     candidates: list[str] = []
diff --git a/datalayer_core/runtimes/agent_runtime.py b/datalayer_core/agents/utils.py
similarity index 95%
rename from datalayer_core/runtimes/agent_runtime.py
rename to datalayer_core/agents/utils.py
index 27856a57..ae9a78dd 100644
--- a/datalayer_core/runtimes/agent_runtime.py
+++ b/datalayer_core/agents/utils.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -144,9 +147,9 @@ def create_cloud_agent_runtime(
     name : Optional[str]
         Optional runtime name.
     agent_spec_id : Optional[str]
-        Registered agent spec id (ignored when ``agent_spec`` is provided).
+        Registered agentspec id (ignored when ``agent_spec`` is provided).
     agent_spec : Optional[dict[str, Any]]
-        Inline agent spec payload (takes precedence over ``agent_spec_id``).
+        Inline agentspec payload (takes precedence over ``agent_spec_id``).
     credits_limit : Optional[float]
         Target credits budget used to derive ``time_reservation`` when the
         latter is not supplied.
@@ -284,7 +287,7 @@ def teardown_agent_execution_resources(
 
     if target == "local":
         if local_base_url and token and local_agent_name:
-            from datalayer_core.runtimes.local import delete_local_agent
+            from datalayer_core.agents.agent_local import delete_local_agent
 
             result["local_agent_deleted"] = delete_local_agent(
                 base_url=local_base_url,
@@ -292,7 +295,7 @@ def teardown_agent_execution_resources(
                 agent_name=local_agent_name,
             )
         if local_runtime is not None:
-            from datalayer_core.runtimes.local import terminate_local_agent_runtime
+            from datalayer_core.agents.agent_local import terminate_local_agent_runtime
 
             terminate_local_agent_runtime(local_runtime)
             result["local_runtime_terminated"] = True
diff --git a/datalayer_core/base/serverapplication.py b/datalayer_core/base/serverapplication.py
index 0a00ee97..4a24daf2 100644
--- a/datalayer_core/base/serverapplication.py
+++ b/datalayer_core/base/serverapplication.py
@@ -3,7 +3,7 @@
 
 """The Datalayer Core Server application."""
 
-import os
+from pathlib import Path
 
 from jupyter_server.extension.application import ExtensionApp, ExtensionAppJinjaMixin
 from jupyter_server.utils import url_path_join
@@ -18,9 +18,9 @@
 from datalayer_core.handlers.service_worker.handler import ServiceWorkerHandler
 from datalayer_core.utils.urls import DEFAULT_DATALAYER_IAM_URL
 
-DEFAULT_STATIC_FILES_PATH = os.path.join(os.path.dirname(__file__), "./static")
-
-DEFAULT_TEMPLATE_FILES_PATH = os.path.join(os.path.dirname(__file__), "./templates")
+_PACKAGE_ROOT = Path(__file__).resolve().parent.parent
+DEFAULT_STATIC_FILES_PATH = str(_PACKAGE_ROOT / "static")
+DEFAULT_TEMPLATE_FILES_PATH = str(_PACKAGE_ROOT / "templates")
 
 
 class DatalayerExtensionApp(ExtensionAppJinjaMixin, ExtensionApp):
diff --git a/datalayer_core/cli/__main__.py b/datalayer_core/cli/__main__.py
index 8413fcd8..474f5585 100644
--- a/datalayer_core/cli/__main__.py
+++ b/datalayer_core/cli/__main__.py
@@ -5,10 +5,12 @@
 
 import os
 import sys
+from typing import Optional
 
 import typer
 
 from datalayer_core.__version__ import __version__
+from datalayer_core.authn import AuthenticationManager
 from datalayer_core.cli.commands.about import app as about_app
 from datalayer_core.cli.commands.agents import agents_ls
 from datalayer_core.cli.commands.agents import app as agents_app
@@ -34,20 +36,19 @@
 from datalayer_core.cli.commands.otel import app as otel_app
 from datalayer_core.cli.commands.pools import app as pools_app
 from datalayer_core.cli.commands.ray import app as ray_app
-from datalayer_core.cli.commands.runtime_checkpoints import app as checkpoints_app
-from datalayer_core.cli.commands.runtime_checkpoints import (
+from datalayer_core.cli.commands.checkpoints import app as checkpoints_app
+from datalayer_core.cli.commands.checkpoints import (
     checkpoints_ls,
 )
 from datalayer_core.cli.commands.sandbox_snapshots import app as snapshots_app
 from datalayer_core.cli.commands.sandbox_snapshots import snapshots_ls
-from datalayer_core.cli.commands.runtimes import app as runtimes_app
-from datalayer_core.cli.commands.runtimes import runtimes_ls
+from datalayer_core.cli.commands.schedules import app as schedules_app
 from datalayer_core.cli.commands.secrets import app as secrets_app
 from datalayer_core.cli.commands.secrets import secrets_ls
 from datalayer_core.cli.commands.subscription import app as subscription_app
 from datalayer_core.cli.commands.subscription import subscription_root
-from datalayer_core.cli.commands.tokens import app as tokens_app
-from datalayer_core.cli.commands.tokens import tokens_ls
+from datalayer_core.cli.commands.api_keys import app as api_keys_app
+from datalayer_core.cli.commands.api_keys import api_keys_ls
 from datalayer_core.cli.commands.usage import app as usage_app
 from datalayer_core.cli.commands.usage import usage_root
 from datalayer_core.cli.commands.plans import app as plans_app
@@ -63,6 +64,48 @@ def version_callback(value: bool) -> None:
         raise typer.Exit()
 
 
+def _lookup_billable_account_uid_by_handle(
+    *, iam_url: str, access_token: str, account_handle: str
+) -> Optional[str]:
+    """Resolve an account handle to UID using IAM APIs."""
+    import requests
+
+    handle = str(account_handle or "").strip().lower()
+    if not handle:
+        return None
+
+    headers = {"Authorization": f"Bearer {access_token}"}
+
+    # 1) Directly match the authenticated user's own handle.
+    whoami_response = requests.get(
+        f"{iam_url.rstrip('/')}/api/iam/v1/whoami",
+        headers=headers,
+        timeout=10,
+    )
+    if whoami_response.status_code == 200:
+        payload = whoami_response.json()
+        profile = payload.get("profile") or {}
+        profile_handle = str(profile.get("handle") or "").strip().lower()
+        if profile_handle == handle:
+            return str(profile.get("uid") or "").strip() or None
+
+    # 2) Match organizations and teams from memberships.
+    memberships_response = requests.get(
+        f"{iam_url.rstrip('/')}/api/iam/v1/memberships",
+        headers=headers,
+        timeout=10,
+    )
+    if memberships_response.status_code != 200:
+        return None
+    memberships_payload = memberships_response.json()
+    memberships = memberships_payload.get("memberships") or []
+    for membership in memberships:
+        membership_handle = str(membership.get("handle") or "").strip().lower()
+        if membership_handle == handle:
+            return str(membership.get("uid") or "").strip() or None
+    return None
+
+
 # Create the main Typer app
 app = typer.Typer(
     name="dla",
@@ -152,6 +195,35 @@ def main_callback(
         "--mcp-server-url",
         help="Override DATALAYER_MCP_SERVER_URL for this CLI invocation.",
     ),
+    scheduler_url: str | None = typer.Option(
+        None,
+        "--scheduler-url",
+        help="Override DATALAYER_SCHEDULER_URL for this CLI invocation.",
+    ),
+    api_key: str | None = typer.Option(
+        None,
+        "--api-key",
+        help=(
+            "Auth token for backend calls. Falls back to DATALAYER_API_KEY when "
+            "omitted; otherwise built-in auth resolution is used."
+        ),
+    ),
+    billable_account_uid: str | None = typer.Option(
+        None,
+        "--billable-account-uid",
+        help=(
+            "Billable account UID context. Falls back to DATALAYER_ACCOUNT_UID "
+            "when omitted."
+        ),
+    ),
+    billable_account_handle: str | None = typer.Option(
+        None,
+        "--billable-account-handle",
+        help=(
+            "Billable account handle context. Falls back to DATALAYER_ACCOUNT_HANDLE "
+            "when omitted and is resolved to UID via IAM lookup."
+        ),
+    ),
 ) -> None:
     """Main callback to handle global options."""
     overrides = {
@@ -169,11 +241,62 @@ def main_callback(
         "DATALAYER_STATUS_URL": status_url,
         "DATALAYER_SUPPORT_URL": support_url,
         "DATALAYER_MCP_SERVER_URL": mcp_server_url,
+        "DATALAYER_SCHEDULER_URL": scheduler_url,
     }
     for env_name, value in overrides.items():
         if value is not None:
             os.environ[env_name] = value.rstrip("/")
 
+    # Global auth option: explicit flag overrides env; when omitted keep normal
+    # command behavior (env var token or stored auth token).
+    if api_key is not None:
+        normalized_api_key = str(api_key).strip()
+        if normalized_api_key:
+            os.environ["DATALAYER_API_KEY"] = normalized_api_key
+
+    # Global billable context defaults.
+    resolved_uid = str(billable_account_uid or "").strip() or str(
+        os.environ.get("DATALAYER_ACCOUNT_UID") or ""
+    ).strip()
+    resolved_handle = str(billable_account_handle or "").strip() or str(
+        os.environ.get("DATALAYER_ACCOUNT_HANDLE") or ""
+    ).strip()
+
+    # Convert handle -> uid only when uid is not already known.
+    if not resolved_uid and resolved_handle:
+        effective_iam_url = str(os.environ.get("DATALAYER_IAM_URL") or "").strip()
+        if not effective_iam_url:
+            effective_iam_url = "http://localhost:9700"
+
+        resolved_token = str(os.environ.get("DATALAYER_API_KEY") or "").strip()
+        if not resolved_token:
+            auth = AuthenticationManager(iam_url=effective_iam_url)
+            resolved_token = str(auth.get_stored_token() or "").strip()
+
+        if not resolved_token:
+            raise typer.BadParameter(
+                "Cannot resolve --billable-account-handle without authentication. "
+                "Pass --api-key, set DATALAYER_API_KEY, or login first."
+            )
+
+        resolved_from_handle = _lookup_billable_account_uid_by_handle(
+            iam_url=effective_iam_url,
+            access_token=resolved_token,
+            account_handle=resolved_handle,
+        )
+        if not resolved_from_handle:
+            raise typer.BadParameter(
+                f"Could not resolve billable account handle '{resolved_handle}' to a UID."
+            )
+        resolved_uid = resolved_from_handle
+
+    if resolved_uid:
+        os.environ["DATALAYER_ACCOUNT_UID"] = resolved_uid
+        # Keep backward compatibility with existing scripts.
+        os.environ["DATALAYER_BILLABLE_ACCOUNT_UID"] = resolved_uid
+    if resolved_handle:
+        os.environ["DATALAYER_ACCOUNT_HANDLE"] = resolved_handle
+
 
 # Register commands (without name to add them at the top level)
 app.add_typer(about_app)
@@ -191,11 +314,11 @@ def main_callback(
 app.add_typer(otel_app)
 app.add_typer(pools_app)
 app.add_typer(ray_app)
-app.add_typer(runtimes_app)
+app.add_typer(schedules_app)
 app.add_typer(secrets_app)
 app.add_typer(snapshots_app)
 app.add_typer(subscription_app)
-app.add_typer(tokens_app)
+app.add_typer(api_keys_app)
 app.add_typer(users_app)
 app.add_typer(usage_app)
 app.add_typer(plans_app)
@@ -214,11 +337,10 @@ def main_callback(
 
 # Add convenient aliases at root level
 app.command(name="envs-ls")(envs_ls)
-app.command(name="runtimes-ls")(runtimes_ls)
 app.command(name="secrets-ls")(secrets_ls)
 app.command(name="snapshots-ls")(snapshots_ls)
 app.command(name="checkpoints-ls")(checkpoints_ls)
-app.command(name="tokens-ls")(tokens_ls)
+app.command(name="api-keys-ls")(api_keys_ls)
 app.command(name="agent-nodes-ls")(agent_nodes_ls)
 app.command(name="agents-ls")(agents_ls)
 
@@ -239,6 +361,10 @@ def main_callback(
     "--status-url",
     "--support-url",
     "--mcp-server-url",
+    "--scheduler-url",
+    "--api-key",
+    "--billable-account-uid",
+    "--billable-account-handle",
 }
 
 _GLOBAL_OPTIONS_NO_VALUES = {
diff --git a/datalayer_core/cli/commands/README.md b/datalayer_core/cli/commands/README.md
index 5c0d401c..f14f3ca6 100644
--- a/datalayer_core/cli/commands/README.md
+++ b/datalayer_core/cli/commands/README.md
@@ -1,37 +1,47 @@
 # Exec Module
 
-The `exec` module provides functionality to execute Python files and Jupyter notebooks on Datalayer runtimes.
+The `exec` module provides functionality to execute Python files and Jupyter notebooks on Datalayer code sandboxes.
 
 ## Commands
 
 ### `dla exec`
 
-Execute a Python file or Jupyter notebook on a Datalayer runtime.
+Execute a Python file or Jupyter notebook on a Datalayer code sandbox.
 
 **Usage:**
 ```bash
-dla exec <filename> --runtime <runtime-name> [options]
+dla exec <filename> [options]
+dla exec --example-py [options]
+dla exec --example-notebook [options]
 ```
 
 **Arguments:**
-- `filename`: Path to the Python file (.py) or Jupyter notebook (.ipynb) to execute
+- `filename`: Path to the Python file (.py) or Jupyter notebook (.ipynb) to execute (optional when using `--example-py` or `--example-notebook`)
 
 **Options:**
-- `--runtime, -r`: Name of the runtime to execute on (required)
+- `--sandbox, -s`: Name of the code sandbox to execute on (optional)
 - `--verbose, -v`: Show all cell outputs (default: false, outputs are suppressed)
 - `--timeout, -t`: Execution timeout for each cell in seconds
 - `--raise`: Stop executing if an exception occurs (default: continue on errors)
+- `--example-py`: Create and execute a temporary example Python file
+- `--example-notebook`: Create and execute a temporary example notebook
 
 **Examples:**
 ```bash
-# Execute a Python script on a runtime
-dla exec script.py --runtime my-runtime
+# Execute a Python script on a code sandbox
+dla exec script.py --sandbox my-sandbox
+
+# Execute an auto-generated Python example
+dla exec --example-py --sandbox my-sandbox
+
+# Execute an auto-generated notebook example
+dla exec --example-notebook
 
 # Execute a Jupyter notebook with verbose output
-dla exec notebook.ipynb --runtime my-runtime --verbose
+dla exec notebook.ipynb --sandbox my-sandbox --verbose
 
 # Execute with timeout and stop on errors
-dla exec script.py --runtime my-runtime --timeout 30 --raise
+dla exec script.py --sandbox my-sandbox --timeout 30 --raise
 ```
 
 ## File Support
@@ -41,11 +51,11 @@ The exec module supports:
 - **Python files (.py)**: The entire file content is executed as a single cell
 - **Jupyter notebooks (.ipynb)**: Each code cell is executed sequentially, markdown cells are skipped
 
-## Runtime Connection
+## Code Sandbox Connection
 
 The exec module uses the modern `DatalayerClient` and `RuntimeManager` to:
 
-1. Connect to the specified runtime
+1. Connect to the specified code sandbox
 2. Start a kernel session
 3. Execute cells sequentially
 4. Handle interrupts (Ctrl+C) gracefully
@@ -54,7 +64,7 @@ The exec module uses the modern `DatalayerClient` and `RuntimeManager` to:
 ## Error Handling
 
 - File validation (existence, readability)
-- Runtime connection errors
+- Code sandbox connection errors
 - Cell execution errors (can continue or stop based on `--raise` flag)
 - Proper cleanup on interruption or failure
 
@@ -63,5 +73,5 @@ The exec module uses the modern `DatalayerClient` and `RuntimeManager` to:
 The exec functionality is implemented in:
 - `datalayer_core/cli/exec/exec.py`: Main Typer-based CLI commands
 - Uses `datalayer_core/utils/notebook.get_cells()` for file parsing
-- Uses `datalayer_core/cli/console/manager.RuntimeManager` for runtime connection
+- Uses `datalayer_core/cli/console/manager.RuntimeManager` for code sandbox connection
 - Integrates with the main CLI via `datalayer_core/cli/__main__.py`
diff --git a/datalayer_core/cli/commands/agents.py b/datalayer_core/cli/commands/agents.py
index 83798aef..e82d6148 100644
--- a/datalayer_core/cli/commands/agents.py
+++ b/datalayer_core/cli/commands/agents.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -13,10 +16,11 @@
 import typer
 import yaml
 from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
 
 from datalayer_core.client.client import DatalayerClient
-from datalayer_core.displays.runtimes import display_runtimes
-from datalayer_core.runtimes.local import (
+from datalayer_core.agents.agent_local import (
     DEFAULT_LOCAL_AGENT_NAME,
     DEFAULT_LOCAL_HOST,
     DEFAULT_LOCAL_LOG_LEVEL,
@@ -25,6 +29,8 @@
     start_local_agent_runtime,
     terminate_local_agent_runtime,
 )
+from datalayer_core.utils.network import fetch
+from datalayer_core.utils.date import timestamp_to_local_date
 from datalayer_core.utils.urls import DatalayerURLs
 
 DEFAULT_AGENT_SPEC_ID = "example-simple"
@@ -98,6 +104,127 @@ def _load_agent_spec(spec_source: str) -> dict[str, Any]:
     return parsed
 
 
+def _resolve_billable_account_details(
+    *,
+    client: DatalayerClient,
+    billable_account_uid: str,
+) -> dict[str, str]:
+    """Resolve account metadata from IAM whoami/memberships payloads.
+
+    When no explicit billable account UID is provided by the runtime payload,
+    fall back to the authenticated user profile from whoami.
+    """
+
+    resolved_token = str(client._get_token() or "").strip()
+    if not resolved_token:
+        return {"uid": billable_account_uid} if billable_account_uid else {}
+
+    iam_base = str(client.urls.iam_url).rstrip("/")
+    headers = {"Authorization": f"Bearer {resolved_token}"}
+
+    try:
+        whoami_response = requests.get(
+            f"{iam_base}/api/iam/v1/whoami",
+            headers=headers,
+            timeout=10,
+        )
+    except Exception:
+        whoami_response = None
+
+    if whoami_response is not None and whoami_response.status_code == 200:
+        payload = whoami_response.json()
+        profile = payload.get("profile") or {}
+        profile_uid = str(profile.get("uid") or "").strip()
+        if profile_uid and (not billable_account_uid or profile_uid == billable_account_uid):
+            full_name = str(profile.get("name") or "").strip()
+            if not full_name:
+                first_name = str(profile.get("first_name") or "").strip()
+                last_name = str(profile.get("last_name") or "").strip()
+                full_name = " ".join(p for p in [first_name, last_name] if p)
+            return {
+                "uid": profile_uid,
+                "handle": str(profile.get("handle") or "").strip(),
+                "type": str(profile.get("type") or "user").strip() or "user",
+                "name": full_name,
+                "description": str(profile.get("description") or "").strip(),
+            }
+
+    try:
+        memberships_response = requests.get(
+            f"{iam_base}/api/iam/v1/memberships",
+            headers=headers,
+            timeout=10,
+        )
+    except Exception:
+        memberships_response = None
+
+    if memberships_response is not None and memberships_response.status_code == 200:
+        memberships_payload = memberships_response.json()
+        memberships = memberships_payload.get("memberships") or []
+        for membership in memberships:
+            uid = str((membership or {}).get("uid") or "").strip()
+            if uid == billable_account_uid:
+                return {
+                    "uid": billable_account_uid,
+                    "handle": str((membership or {}).get("handle") or "").strip(),
+                    "type": str((membership or {}).get("type") or "").strip(),
+                    "name": str((membership or {}).get("name") or "").strip(),
+                    "description": str(
+                        (membership or {}).get("description") or ""
+                    ).strip(),
+                }
+
+    return {"uid": billable_account_uid} if billable_account_uid else {}
+
+
+def _resolve_agentspec_label(runtime_payload: dict[str, Any]) -> str:
+    """Best-effort extraction of agentspec identifier from runtime payload."""
+    candidates = [
+        runtime_payload.get("agent_spec_id"),
+        runtime_payload.get("agentspec_id"),
+        runtime_payload.get("agentSpecId"),
+    ]
+    for candidate in candidates:
+        value = str(candidate or "").strip()
+        if value:
+            return value
+    return "n/a"
+
+
+def _billable_uid_label(
+    *,
+    billable_uid: str,
+    authenticated_uid: str,
+    rich: bool = False,
+) -> str:
+    """Human label for billable UID in text/raw outputs."""
+    if billable_uid and authenticated_uid and billable_uid == authenticated_uid:
+        return "[bold green]me[/bold green]" if rich else "me"
+    return billable_uid or "n/a"
+
+
+def _print_runtime_summary_panel(
+    *,
+    title: str,
+    identifier: str,
+    agentspec: str,
+    url: str,
+) -> None:
+    """Render a compact runtime summary panel."""
+    lines = [
+        f"Identifier: {identifier}",
+        f"Agentspec: {agentspec}",
+        f"URL: {url}",
+    ]
+    console.print(
+        Panel(
+            "\n".join(lines),
+            title=title,
+            border_style="green",
+        )
+    )
+
+
 def _create_local_agent_runtime(
     *,
     agent_spec_id: str,
@@ -150,7 +277,7 @@ def _create_local_agent_runtime(
             f"[green]Local agent runtime '{agent_name}' started![/green]"
         )
         console.print(f"Base URL: {runtime.base_url}")
-        console.print(f"Agent spec id: {agent_spec_id}")
+        console.print(f"Agentspec id: {agent_spec_id}")
         console.print(f"Chat endpoint: {runtime.chat_endpoint}")
         console.print("[dim]Press Ctrl+C to stop the local runtime.[/dim]")
 
@@ -186,23 +313,62 @@ def list_agents(
     try:
         client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
         runtimes = client.list_runtimes()
-        runtime_dicts: list[dict[str, Any]] = []
+
+        authenticated_uid = str(
+            _resolve_billable_account_details(
+                client=client,
+                billable_account_uid="",
+            ).get("uid")
+            or ""
+        ).strip()
+
+        table = Table(title="Agents")
+        table.add_column("ID", style="cyan", no_wrap=True)
+        table.add_column("Name", style="cyan", no_wrap=True)
+        table.add_column("Environment", style="cyan", no_wrap=True)
+        table.add_column("Billable Account UID", style="cyan", no_wrap=True)
+        table.add_column("Expired At", style="cyan", no_wrap=True)
+
         for runtime in runtimes:
-            runtime_dicts.append(
-                {
-                    "given_name": runtime.name,
-                    "environment_name": runtime.environment,
-                    "pod_name": runtime.pod_name,
-                    "ingress": runtime.ingress,
-                    "reservation_id": runtime.reservation_id,
-                    "uid": runtime.uid,
-                    "burning_rate": runtime.burning_rate,
-                    "token": runtime.jupyter_token,
-                    "started_at": runtime.started_at,
-                    "expired_at": runtime.expired_at,
-                }
+            runtime_payload: dict[str, Any] = {}
+            ownership_payload: dict[str, Any] = {}
+            pod_name = str(runtime.pod_name or "")
+            if pod_name:
+                try:
+                    runtime_response = client._get_runtime(pod_name)
+                    runtime_payload = runtime_response.get("runtime") or {}
+                    ownership_payload = runtime_payload.get("ownership") or {}
+                except Exception:
+                    runtime_payload = {}
+                    ownership_payload = {}
+
+            billable_uid = str(
+                runtime_payload.get("billable_account_uid")
+                or ownership_payload.get("billable_account_uid")
+                or getattr(runtime, "billable_account_uid", "")
+                or ""
+            ).strip()
+            if not billable_uid and authenticated_uid:
+                billable_uid = authenticated_uid
+
+            display_billable_uid = _billable_uid_label(
+                billable_uid=billable_uid,
+                authenticated_uid=authenticated_uid,
+                rich=True,
+            )
+
+            expired_at = runtime.expired_at
+            table.add_row(
+                pod_name,
+                str(runtime.name or ""),
+                str(runtime.environment or ""),
+                display_billable_uid,
+                "Never"
+                if expired_at is None
+                else timestamp_to_local_date(expired_at),
             )
-        display_runtimes(runtime_dicts)
+
+        console.print(table)
     except Exception as exc:
         console.print(f"[red]Error listing agent runtimes: {exc}[/red]")
         raise typer.Exit(1)
@@ -220,14 +386,14 @@ def create_agent_runtime(
         None,
         "--agentspec-id",
         help=(
-            "Agent spec id for runtime bootstrap. "
+            "Agentspec id for runtime bootstrap. "
             f"Defaults to {DEFAULT_AGENT_SPEC_ID} when --agentspec is omitted."
         ),
     ),
     spec: Optional[str] = typer.Option(
         None,
         "--agentspec",
-        help="Agent spec source as YAML/JSON URL or local file path.",
+        help="Agentspec source as YAML/JSON URL or local file path.",
     ),
     time_reservation: Optional[float] = typer.Option(
         10.0,
@@ -272,7 +438,12 @@ def create_agent_runtime(
     local: bool = typer.Option(
         False,
         "--local",
-        help="Launch the agent as a local agent-runtimes server instead of a cloud runtime.",
+        help="Launch the agent as a local agent-runtimes server.",
+    ),
+    cloud: bool = typer.Option(
+        False,
+        "--cloud",
+        help="Launch the agent as a cloud runtime.",
     ),
     host: str = typer.Option(
         DEFAULT_LOCAL_HOST,
@@ -295,10 +466,10 @@ def create_agent_runtime(
         help="Log level for the local runtime process (only with --local).",
     ),
 ) -> None:
-    """Create a new runtime preloaded with an agent spec.
+    """Create a new runtime preloaded with an agentspec.
 
-    By default creates a cloud runtime. With ``--local`` it launches a local
-    ``agent-runtimes`` server and serves until interrupted (Ctrl+C).
+    By default creates a cloud runtime. Use ``--local`` for a local
+    ``agent-runtimes`` server, or ``--cloud`` to be explicit.
     """
     import questionary
 
@@ -308,6 +479,9 @@ def create_agent_runtime(
                 "Use either --agentspec-id or --agentspec, not both."
             )
 
+        if local and cloud:
+            raise typer.BadParameter("Use only one of --local or --cloud.")
+
         if local:
             if spec:
                 raise typer.BadParameter(
@@ -366,6 +540,35 @@ def create_agent_runtime(
             billable_account_handle=billable_account_handle,
         )
 
+        authenticated_uid = str(
+            _resolve_billable_account_details(
+                client=client,
+                billable_account_uid="",
+            ).get("uid")
+            or ""
+        ).strip()
+
+        created_runtime_payload: dict[str, Any] = {}
+        ownership_payload: dict[str, Any] = {}
+        created_pod_name = str(runtime.pod_name or "")
+        if created_pod_name:
+            try:
+                created_runtime_response = client._get_runtime(created_pod_name)
+                created_runtime_payload = created_runtime_response.get("runtime") or {}
+                ownership_payload = created_runtime_payload.get("ownership") or {}
+            except Exception:
+                created_runtime_payload = {}
+                ownership_payload = {}
+
+        billable_uid = str(
+            created_runtime_payload.get("billable_account_uid")
+            or ownership_payload.get("billable_account_uid")
+            or billable_account_uid
+            or ""
+        ).strip()
+        if not billable_uid and authenticated_uid:
+            billable_uid = authenticated_uid
+
         if raw:
             payload = {
                 "success": True,
@@ -379,6 +582,11 @@ def create_agent_runtime(
                     "burning_rate": runtime.burning_rate,
                     "started_at": runtime.started_at,
                     "expired_at": runtime.expired_at,
+                    "billable_account_uid": billable_uid or None,
+                    "billable_account_uid_label": _billable_uid_label(
+                        billable_uid=billable_uid,
+                        authenticated_uid=authenticated_uid,
+                    ),
                 },
                 "agent_spec_id": resolved_spec_id,
                 "agent_spec_source": spec or "",
@@ -386,15 +594,15 @@ def create_agent_runtime(
             console.print(json.dumps(payload, ensure_ascii=False))
             return
 
-        console.print(f"[green]Agent runtime '{runtime.name}' created successfully![/green]")
-        if runtime.pod_name:
-            console.print(f"Pod: {runtime.pod_name}")
-        if runtime.ingress:
-            console.print(f"Ingress: {runtime.ingress}")
-        if resolved_spec_id:
-            console.print(f"Agent spec id: {resolved_spec_id}")
-        elif spec:
-            console.print(f"Agent spec source: {spec}")
+        spec_label = resolved_spec_id or spec or "n/a"
+        identifier = str(runtime.pod_name or runtime.uid or runtime.name or "")
+        url = str(runtime.ingress or "")
+        _print_runtime_summary_panel(
+            title="Agent Runtime Created",
+            identifier=identifier,
+            agentspec=spec_label,
+            url=url,
+        )
 
     except typer.Exit:
         raise
@@ -459,7 +667,28 @@ def get_agent_runtime(
                 raise typer.Exit(0)
             pod_name = selected
 
+        runtime_response = client._get_runtime(pod_name)
+        runtime_payload = runtime_response.get("runtime") or {}
+        ownership_payload = runtime_payload.get("ownership") or {}
         runtime = client.get_runtime(pod_name)
+
+        authenticated_uid = str(
+            _resolve_billable_account_details(
+                client=client,
+                billable_account_uid="",
+            ).get("uid")
+            or ""
+        ).strip()
+
+        billable_uid = str(
+            runtime_payload.get("billable_account_uid")
+            or ownership_payload.get("billable_account_uid")
+            or getattr(runtime, "billable_account_uid", "")
+            or ""
+        ).strip()
+        if not billable_uid and authenticated_uid:
+            billable_uid = authenticated_uid
+
         runtime_dict = {
             "given_name": runtime.name,
             "environment_name": runtime.environment,
@@ -471,6 +700,11 @@ def get_agent_runtime(
             "token": runtime.jupyter_token,
             "started_at": runtime.started_at,
             "expired_at": runtime.expired_at,
+            "billable_account_uid": billable_uid or None,
+            "billable_account_uid_label": _billable_uid_label(
+                billable_uid=billable_uid,
+                authenticated_uid=authenticated_uid,
+            ),
         }
 
         if raw:
@@ -481,7 +715,12 @@ def get_agent_runtime(
             )
             return
 
-        display_runtimes([runtime_dict])
+        _print_runtime_summary_panel(
+            title="Agent Runtime",
+            identifier=str(runtime.pod_name or runtime.uid or runtime.name or ""),
+            agentspec=_resolve_agentspec_label(runtime_payload),
+            url=str(runtime.ingress or ""),
+        )
 
     except typer.Exit:
         raise
@@ -664,4 +903,307 @@ def agents_ls(
     ),
 ) -> None:
     """List running agent runtimes (root command alias)."""
-    list_agents(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
\ No newline at end of file
+    list_agents(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+
+
+@app.command(name="inspect")
+def inspect_agent_runtime(
+    agent: Optional[str] = typer.Option(
+        None,
+        "--agent",
+        "-a",
+        help="Agent identifier (pod name, uid, or given name). Defaults to first running runtime.",
+    ),
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+    iam_url: Optional[str] = typer.Option(
+        None,
+        "--iam-url",
+        help="Datalayer IAM server URL",
+    ),
+    runtimes_url: Optional[str] = typer.Option(
+        None,
+        "--runtimes-url",
+        help="Datalayer Runtimes server URL",
+    ),
+) -> None:
+    """Inspect an agent runtime and list available code sandboxes."""
+    try:
+        client = _make_client(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
+        runtimes = client.list_runtimes()
+        if not runtimes:
+            console.print("[yellow]No running runtimes found.[/yellow]")
+            raise typer.Exit(1)
+
+        selected = None
+        if agent:
+            for candidate in runtimes:
+                if agent in {candidate.pod_name, candidate.uid, candidate.name}:
+                    selected = candidate
+                    break
+            if selected is None:
+                console.print(f"[red]Agent '{agent}' not found.[/red]")
+                raise typer.Exit(1)
+        else:
+            selected = runtimes[0]
+
+        pod_name = selected.pod_name or ""
+        runtime_response = client._get_runtime(pod_name)
+        runtime_payload = runtime_response.get("runtime") or {}
+        ownership_payload = runtime_payload.get("ownership") or {}
+
+        refreshed = client.get_runtime(pod_name)
+        endpoint = str(refreshed.ingress or "").rstrip("/")
+        runtime_token = str(refreshed.jupyter_token or client._get_token() or "")
+        if not endpoint:
+            console.print("[red]Runtime has no ingress endpoint.[/red]")
+            raise typer.Exit(1)
+
+        billable_account_uid = str(
+            runtime_payload.get("billable_account_uid")
+            or ownership_payload.get("billable_account_uid")
+            or ""
+        ).strip()
+        billable_account_handle = str(
+            runtime_payload.get("billable_account_handle")
+            or ownership_payload.get("billable_account_handle")
+            or ""
+        ).strip()
+        billable_account_kind = str(
+            runtime_payload.get("billable_account_kind")
+            or ownership_payload.get("billable_account_kind")
+            or runtime_payload.get("billable_account_type")
+            or ownership_payload.get("billable_account_type")
+            or ""
+        ).strip()
+
+        account_details = _resolve_billable_account_details(
+            client=client,
+            billable_account_uid=billable_account_uid,
+        )
+        authenticated_uid = str(
+            _resolve_billable_account_details(
+                client=client,
+                billable_account_uid="",
+            ).get("uid")
+            or ""
+        ).strip()
+        billable_account_uid = str(
+            account_details.get("uid") or billable_account_uid or ""
+        ).strip()
+        display_billable_uid = _billable_uid_label(
+            billable_uid=billable_account_uid,
+            authenticated_uid=authenticated_uid,
+            rich=True,
+        )
+        resolved_handle = str(
+            account_details.get("handle") or billable_account_handle or ""
+        ).strip()
+        resolved_kind = str(
+            account_details.get("type") or billable_account_kind or ""
+        ).strip()
+        resolved_name = str(account_details.get("name") or "").strip()
+        resolved_description = str(account_details.get("description") or "").strip()
+
+        kernel_endpoints = [f"{endpoint}/api/kernels"]
+        if "/jupyter/server/" in endpoint:
+            host_prefix, remainder = endpoint.split("/jupyter/server/", 1)
+            path_parts = [part for part in remainder.split("/") if part]
+            if path_parts:
+                pool = path_parts[0]
+                kernel_endpoints.append(
+                    f"{host_prefix}/jupyter/server/{pool}/api/kernels"
+                )
+            kernel_endpoints.append(f"{host_prefix}/jupyter/api/kernels")
+        kernel_endpoints.append(f"{endpoint}/jupyter/api/kernels")
+
+        # Deduplicate while preserving order.
+        deduped_kernel_endpoints: list[str] = []
+        seen_endpoints: set[str] = set()
+        for kernel_url in kernel_endpoints:
+            if kernel_url not in seen_endpoints:
+                seen_endpoints.add(kernel_url)
+                deduped_kernel_endpoints.append(kernel_url)
+        kernel_endpoints = deduped_kernel_endpoints
+
+        kernels: list[Any] = []
+        kernel_endpoint_used = ""
+        kernel_lookup_error = ""
+        for kernel_url in kernel_endpoints:
+            try:
+                response = fetch(kernel_url, token=runtime_token, timeout=15)
+                payload = response.json() if response.content else []
+                if isinstance(payload, list):
+                    kernels = payload
+                else:
+                    kernels = []
+                kernel_endpoint_used = kernel_url
+                kernel_lookup_error = ""
+                break
+            except Exception as exc:
+                kernel_lookup_error = str(exc)
+
+        if not isinstance(kernels, list):
+            kernels = []
+
+        _print_runtime_summary_panel(
+            title="Agent Runtime Inspection",
+            identifier=str(refreshed.pod_name or refreshed.uid or refreshed.name or ""),
+            agentspec=_resolve_agentspec_label(runtime_payload),
+            url=endpoint,
+        )
+
+        summary = Table(title="Agent Runtime Inspection")
+        summary.add_column("Field", style="cyan")
+        summary.add_column("Value")
+        summary.add_row("Runtime", str(refreshed.name or pod_name))
+        summary.add_row("Pod", str(pod_name))
+        summary.add_row("UID", str(refreshed.uid or ""))
+        summary.add_row("Ingress", endpoint)
+        summary.add_row("Billable Account UID", display_billable_uid)
+        if kernel_endpoint_used:
+            summary.add_row("Kernels", str(len(kernels)))
+            summary.add_row("Kernel API", kernel_endpoint_used)
+        else:
+            summary.add_row("Kernels", "unavailable")
+            summary.add_row("Kernel API", "not exposed via ingress")
+        console.print(summary)
+
+        account_table = Table(title="Billable Account")
+        account_table.add_column("Field", style="cyan")
+        account_table.add_column("Value")
+        account_table.add_row("UID", display_billable_uid)
+        account_table.add_row("Handle", resolved_handle or "n/a")
+        account_table.add_row("Type", resolved_kind or "n/a")
+        account_table.add_row("Name", resolved_name or "n/a")
+        account_table.add_row("Description", resolved_description or "n/a")
+        console.print(account_table)
+
+        code_sandboxes_table = Table(title="Available Code Sandboxes")
+        code_sandboxes_table.add_column("ID", style="green")
+        code_sandboxes_table.add_column("Name")
+        code_sandboxes_table.add_column("State")
+        code_sandboxes_table.add_column("Connections")
+        code_sandboxes_table.add_column("Last Activity")
+
+        for kernel in kernels:
+            code_sandboxes_table.add_row(
+                str((kernel or {}).get("id") or ""),
+                str((kernel or {}).get("name") or ""),
+                str((kernel or {}).get("execution_state") or ""),
+                str((kernel or {}).get("connections") or "0"),
+                str((kernel or {}).get("last_activity") or ""),
+            )
+
+        if kernels:
+            console.print(code_sandboxes_table)
+        else:
+            if kernel_lookup_error:
+                console.print(
+                    "[yellow]Kernel list unavailable (all probed endpoints failed).[/yellow]"
+                )
+                console.print(
+                    "[dim]Probed endpoints:[/dim]"
+                )
+                for kernel_url in kernel_endpoints:
+                    console.print(f"[dim]- {kernel_url}[/dim]")
+                console.print(f"[dim]Last error: {kernel_lookup_error}[/dim]")
+            else:
+                console.print("[yellow]No kernels returned by runtime API.[/yellow]")
+    except typer.Exit:
+        raise
+    except Exception as exc:
+        console.print(f"[red]Error inspecting agent runtime: {exc}[/red]")
+        raise typer.Exit(1)
+
+
+@app.command(name="health")
+def health_agent_runtime(
+    agent: Optional[str] = typer.Option(
+        None,
+        "--agent",
+        "-a",
+        help="Agent identifier (pod name, uid, or given name). Defaults to first running runtime.",
+    ),
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+    api_key: Optional[str] = typer.Option(
+        None,
+        "--api-key",
+        help="Authentication API key (alias for --token).",
+    ),
+    iam_url: Optional[str] = typer.Option(
+        None,
+        "--iam-url",
+        help="Datalayer IAM server URL",
+    ),
+    runtimes_url: Optional[str] = typer.Option(
+        None,
+        "--runtimes-url",
+        help="Datalayer Runtimes server URL",
+    ),
+) -> None:
+    """Check agent runtime health by executing a probe on the sandbox."""
+    try:
+        client = _make_client(
+            token=token or api_key,
+            iam_url=iam_url,
+            runtimes_url=runtimes_url,
+        )
+        runtimes = client.list_runtimes()
+        if not runtimes:
+            console.print("[yellow]No running runtimes found.[/yellow]")
+            raise typer.Exit(1)
+
+        selected = None
+        if agent:
+            for candidate in runtimes:
+                if agent in {candidate.pod_name, candidate.uid, candidate.name}:
+                    selected = candidate
+                    break
+            if selected is None:
+                console.print(f"[red]Agent '{agent}' not found.[/red]")
+                raise typer.Exit(1)
+        else:
+            selected = runtimes[0]
+
+        pod_name = selected.pod_name or selected.uid or selected.name or ""
+        refreshed = client.get_runtime(pod_name)
+        health = client.check_runtime_health(
+            pod_name,
+            api_key=api_key,
+        )
+
+        health_status = "alive" if bool(health.get("success")) else "unreachable"
+        detail = str(health.get("message") or "health probe failed")
+        probe_mode = str(health.get("probe_mode") or "n/a")
+
+        table = Table(title="Agent Runtime Health")
+        table.add_column("Field", style="cyan")
+        table.add_column("Value")
+        table.add_row("Runtime", str(refreshed.name or pod_name))
+        table.add_row("Pod", str(pod_name))
+        table.add_row("UID", str(refreshed.uid or ""))
+        table.add_row("Ingress", str(refreshed.ingress or "n/a"))
+        table.add_row("Probe", probe_mode)
+        table.add_row("Status", health_status)
+        table.add_row("Detail", detail)
+        console.print(table)
+
+        stdout_tail = str(health.get("stdout_tail") or "").strip()
+        if stdout_tail:
+            console.print(f"[dim]Probe stdout: {stdout_tail}[/dim]")
+
+        if health_status != "alive":
+            raise typer.Exit(1)
+    except typer.Exit:
+        raise
+    except Exception as exc:
+        console.print(f"[red]Error checking agent runtime health: {exc}[/red]")
+        raise typer.Exit(1)
\ No newline at end of file
diff --git a/datalayer_core/cli/commands/api_keys.py b/datalayer_core/cli/commands/api_keys.py
new file mode 100644
index 00000000..e465a546
--- /dev/null
+++ b/datalayer_core/cli/commands/api_keys.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""API key commands for Datalayer CLI."""
+
+from typing import Optional
+
+import typer
+from rich.console import Console
+
+from datalayer_core.client.client import DatalayerClient
+from datalayer_core.displays.api_keys import display_api_keys
+from datalayer_core.models.api_key import ApiKeyType
+
+# Create a Typer app for API key commands
+app = typer.Typer(
+    name="api-keys",
+    help="API key management commands",
+    invoke_without_command=True,
+)
+
+console = Console()
+
+
+@app.callback()
+def api_keys_callback(ctx: typer.Context) -> None:
+    """API key management commands."""
+    if ctx.invoked_subcommand is None:
+        typer.echo(ctx.get_help())
+
+
+@app.command(name="ls")
+def list_api_keys(
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """List all API keys."""
+    try:
+        client = DatalayerClient(token=token)
+        api_keys = client.list_api_keys()
+
+        # Convert to dict format for display_api_keys
+        api_key_dicts = []
+        for api_key in api_keys:
+            api_key_dicts.append(
+                {
+                    "uid": api_key.uid,
+                    "name_s": api_key.name,
+                    "description_t": api_key.description,
+                    "variant_s": api_key.api_key_type,
+                }
+            )
+
+        display_api_keys(api_key_dicts)
+
+    except Exception as e:
+        console.print(f"[red]Error listing API keys: {e}[/red]")
+        raise typer.Exit(1)
+
+
+@app.command(name="list")
+def list_api_keys_verbose(
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """List all API keys."""
+    list_api_keys(token=token)
+
+
+@app.command(name="create")
+def create_api_key(
+    name: str = typer.Argument(..., help="Name of the API key"),
+    description: str = typer.Argument(..., help="Description of the API key"),
+    expiration_date: Optional[int] = typer.Option(
+        0,
+        "--expiration-date",
+        help="Expiration date in seconds since epoch (0 for no expiration)",
+    ),
+    api_key_type: str = typer.Option(
+        ApiKeyType.SECRET.value,
+        "--api-key-type",
+        help="Type of the API key (secret, publishable, restricted, temporary)",
+    ),
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """Create a new API key."""
+    try:
+        client = DatalayerClient(token=token)
+
+        result = client.create_api_key(
+            name=name,
+            description=description,
+            expiration_date=expiration_date or 0,
+            api_key_type=api_key_type,
+        )
+
+        if result.get("success", False):
+            api_key_data = result.get("api_key", result.get("token", {}))
+            console.print(
+                f"[green]API key '{name}' created successfully![/green]"
+            )
+            console.print(
+                f"[yellow]API key value: {result.get('access_token', 'N/A')}[/yellow]"
+            )
+            console.print(
+                "[dim]Please save this API key value securely - it won't be shown again![/dim]"
+            )
+
+            # Display the created API key info.
+            if api_key_data:
+                display_api_keys(
+                    [
+                        {
+                            "uid": api_key_data.get("uid"),
+                            "name_s": api_key_data.get("name_s", name),
+                            "description_t": api_key_data.get(
+                                "description_t", description
+                            ),
+                            "variant_s": api_key_data.get(
+                                "variant_s", api_key_type
+                            ),
+                        }
+                    ]
+                )
+        else:
+            console.print(
+                f"[red]Failed to create API key: {result.get('message', 'Unknown error')}[/red]"
+            )
+            raise typer.Exit(1)
+
+    except Exception as e:
+        console.print(f"[red]Error creating API key: {e}[/red]")
+        raise typer.Exit(1)
+
+
+@app.command(name="delete")
+def delete_api_key(
+    uid: str = typer.Argument(..., help="UID of the API key to delete"),
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """Delete an API key."""
+    try:
+        client = DatalayerClient(token=token)
+
+        success = client.delete_api_key(uid)
+
+        if success:
+            console.print(f"[green]API key '{uid}' deleted successfully![/green]")
+        else:
+            console.print(f"[red]Failed to delete API key '{uid}'[/red]")
+            raise typer.Exit(1)
+
+    except Exception as e:
+        console.print(f"[red]Error deleting API key: {e}[/red]")
+        raise typer.Exit(1)
+
+
+# Root level commands for convenience
+def api_keys_list(
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """List all API keys (root command)."""
+    list_api_keys(token=token)
+
+
+def api_keys_ls(
+    token: Optional[str] = typer.Option(
+        None,
+        "--token",
+        help="Authentication token (Bearer token for API requests).",
+    ),
+) -> None:
+    """List all API keys (root command alias)."""
+    list_api_keys(token=token)
diff --git a/datalayer_core/cli/commands/authn.py b/datalayer_core/cli/commands/authn.py
index 7bfc2b09..8d0335ad 100644
--- a/datalayer_core/cli/commands/authn.py
+++ b/datalayer_core/cli/commands/authn.py
@@ -449,10 +449,39 @@ def whoami(
         "--details",
         help="Show detailed user information",
     ),
+    urls_only: bool = typer.Option(
+        False,
+        "--urls",
+        help="Show only resolved Datalayer service URLs",
+    ),
 ) -> None:
     """Show current authenticated user."""
     try:
         urls = DatalayerURLs.from_environment(run_url=run_url, iam_url=iam_url)
+
+        if urls_only:
+            url_items = [
+                ("DATALAYER_RUN_URL", urls.run_url),
+                ("DATALAYER_IAM_URL", urls.iam_url),
+                ("DATALAYER_RUNTIMES_URL", urls.runtimes_url),
+                ("DATALAYER_SPACER_URL", urls.spacer_url),
+                ("DATALAYER_LIBRARY_URL", urls.library_url),
+                ("DATALAYER_MANAGER_URL", urls.manager_url),
+                ("DATALAYER_AI_AGENTS_URL", urls.ai_agents_url),
+                ("DATALAYER_AI_INFERENCE_URL", urls.ai_inference_url),
+                ("DATALAYER_OTEL_URL", urls.otel_url),
+                ("DATALAYER_GROWTH_URL", urls.growth_url),
+                ("DATALAYER_SUCCESS_URL", urls.success_url),
+                ("DATALAYER_STATUS_URL", urls.status_url),
+                ("DATALAYER_SUPPORT_URL", urls.support_url),
+                ("DATALAYER_MCP_SERVER_URL", urls.mcp_server_url),
+                ("DATALAYER_SCHEDULER_URL", urls.scheduler_url),
+            ]
+            console.print("[bold]Defined URLs:[/bold]")
+            for env_name, value in url_items:
+                console.print(f"  🌐 {env_name}: [green]{value}[/green]")
+            return
+
         auth = AuthenticationManager(urls.iam_url)
 
         # If token provided, store it temporarily for whoami
@@ -469,11 +498,29 @@ def whoami(
             console.print(f"👤 User: [cyan]{handle}[/cyan]")
             if email:
                 console.print(f"📧 Email: {email}")
-            console.print(f"🌐 Server: [green]{urls.run_url}[/green]")
+            console.print(f"🌐 Datalayer RUN URL: [green]{urls.run_url}[/green]")
 
             if details:
                 console.print("\n[bold]Detailed Information:[/bold]")
 
+                url_items = [
+                    ("DATALAYER_RUN_URL", urls.run_url),
+                    ("DATALAYER_IAM_URL", urls.iam_url),
+                    ("DATALAYER_RUNTIMES_URL", urls.runtimes_url),
+                    ("DATALAYER_SPACER_URL", urls.spacer_url),
+                    ("DATALAYER_LIBRARY_URL", urls.library_url),
+                    ("DATALAYER_MANAGER_URL", urls.manager_url),
+                    ("DATALAYER_AI_AGENTS_URL", urls.ai_agents_url),
+                    ("DATALAYER_AI_INFERENCE_URL", urls.ai_inference_url),
+                    ("DATALAYER_OTEL_URL", urls.otel_url),
+                    ("DATALAYER_GROWTH_URL", urls.growth_url),
+                    ("DATALAYER_SUCCESS_URL", urls.success_url),
+                    ("DATALAYER_STATUS_URL", urls.status_url),
+                    ("DATALAYER_SUPPORT_URL", urls.support_url),
+                    ("DATALAYER_MCP_SERVER_URL", urls.mcp_server_url),
+                    ("DATALAYER_SCHEDULER_URL", urls.scheduler_url),
+                ]
+
                 # Full name
                 first_name = user.get("first_name_t", "")
                 last_name = user.get("last_name_t", "")
@@ -558,20 +605,22 @@ def whoami(
                     teams = [m for m in memberships if (m.get("type") or "").lower() == "team"]
                     org_by_uid = {m.get("uid"): m for m in orgs}
 
+                    console.print("\n[bold]👥 Memberships:[/bold]")
+
                     if orgs:
-                        console.print("\n[bold]🏢 Organizations:[/bold]")
+                        console.print("  [bold]🏢 Organizations:[/bold]")
                         for org in orgs:
                             handle = org.get("handle") or org.get("uid") or "unknown"
                             name = org.get("name") or ""
                             roles = ", ".join(org.get("roles_ss") or []) or "-"
-                            label = f"  • [cyan]{handle}[/cyan]"
+                            label = f"    • [cyan]{handle}[/cyan]"
                             if name and name != handle:
                                 label += f" ({name})"
                             label += f"  uid={org.get('uid')}  roles={roles}"
                             console.print(label)
 
                     if teams:
-                        console.print("\n[bold]👥 Teams:[/bold]")
+                        console.print("  [bold]👥 Teams:[/bold]")
                         for team in teams:
                             handle = team.get("handle") or team.get("uid") or "unknown"
                             name = team.get("name") or ""
@@ -581,7 +630,7 @@ def whoami(
                             parent_label = (
                                 parent.get("handle") if parent else (org_uid or "unknown")
                             )
-                            label = f"  • [cyan]{handle}[/cyan]"
+                            label = f"    • [cyan]{handle}[/cyan]"
                             if name and name != handle:
                                 label += f" ({name})"
                             label += f"  in [magenta]{parent_label}[/magenta]"
@@ -589,7 +638,11 @@ def whoami(
                             console.print(label)
 
                     if not orgs and not teams:
-                        console.print("\n[dim]No organization or team memberships.[/dim]")
+                        console.print("  [dim]No organization or team memberships.[/dim]")
+
+                console.print("\n[bold]Defined URLs:[/bold]")
+                for env_name, value in url_items:
+                    console.print(f"  🌐 {env_name}: [green]{value}[/green]")
         else:
             console.print("[yellow]Not authenticated[/yellow]")
             console.print("Run 'datalayer login' to authenticate")
@@ -684,8 +737,19 @@ def whoami_root(
         "--details",
         help="Show detailed user information",
     ),
+    urls_only: bool = typer.Option(
+        False,
+        "--urls",
+        help="Show only resolved Datalayer service URLs",
+    ),
 ) -> None:
     """
     Show current authenticated user.
     """
-    whoami(run_url=run_url, iam_url=iam_url, token=token, details=details)
+    whoami(
+        run_url=run_url,
+        iam_url=iam_url,
+        token=token,
+        details=details,
+        urls_only=urls_only,
+    )
diff --git a/datalayer_core/cli/commands/runtime_checkpoints.py b/datalayer_core/cli/commands/checkpoints.py
similarity index 83%
rename from datalayer_core/cli/commands/runtime_checkpoints.py
rename to datalayer_core/cli/commands/checkpoints.py
index 16face1c..41b67a45 100644
--- a/datalayer_core/cli/commands/runtime_checkpoints.py
+++ b/datalayer_core/cli/commands/checkpoints.py
@@ -72,11 +72,11 @@ def checkpoints_callback(ctx: typer.Context) -> None:
 
 @app.command(name="ls")
 def checkpoints_list(
-    runtime_uid: Optional[str] = typer.Option(
+    agent_uid: Optional[str] = typer.Option(
         None,
-        "--runtime",
-        "-r",
-        help="Filter checkpoints by runtime UID (pod name). If omitted, lists all checkpoints.",
+        "--agent",
+        "-a",
+        help="Filter checkpoints by agent UID (pod name). If omitted, lists all checkpoints.",
     ),
     token: Optional[str] = typer.Option(
         None,
@@ -91,8 +91,8 @@ def checkpoints_list(
 ) -> None:
     """List runtime checkpoints."""
     try:
-        if runtime_uid:
-            path = f"/runtime-checkpoints/{runtime_uid}"
+        if agent_uid:
+            path = f"/runtime-checkpoints/{agent_uid}"
         else:
             path = "/runtime-checkpoints"
         data = _fetch_api(path, token=token, runtimes_url=runtimes_url)
@@ -109,11 +109,11 @@ def checkpoints_list(
 
 
 def checkpoints_ls(
-    runtime_uid: Optional[str] = typer.Option(
+    agent_uid: Optional[str] = typer.Option(
         None,
-        "--runtime",
-        "-r",
-        help="Filter checkpoints by runtime UID (pod name). If omitted, lists all checkpoints.",
+        "--agent",
+        "-a",
+        help="Filter checkpoints by agent UID (pod name). If omitted, lists all checkpoints.",
     ),
     token: Optional[str] = typer.Option(
         None,
@@ -127,17 +127,17 @@ def checkpoints_ls(
     ),
 ) -> None:
     """List runtime checkpoints (root command alias)."""
-    checkpoints_list(runtime_uid=runtime_uid, token=token, runtimes_url=runtimes_url)
+    checkpoints_list(agent_uid=agent_uid, token=token, runtimes_url=runtimes_url)
 
 
 @app.command(name="delete")
 def checkpoints_delete(
     checkpoint_uid: str = typer.Argument(..., help="Checkpoint UID to delete"),
-    runtime_uid: Optional[str] = typer.Option(
+    agent_uid: Optional[str] = typer.Option(
         None,
-        "--runtime",
-        "-r",
-        help="Runtime UID that owns the checkpoint. If omitted, will be looked up automatically.",
+        "--agent",
+        "-a",
+        help="Agent UID that owns the checkpoint. If omitted, will be looked up automatically.",
     ),
     token: Optional[str] = typer.Option(
         None,
@@ -158,8 +158,8 @@ def checkpoints_delete(
 ) -> None:
     """Delete a runtime checkpoint."""
     try:
-        # If runtime_uid not provided, look up the checkpoint first.
-        if not runtime_uid:
+        # If agent_uid not provided, look up the checkpoint first.
+        if not agent_uid:
             # List all checkpoints and find the one matching the uid.
             data = _fetch_api(
                 "/runtime-checkpoints", token=token, runtimes_url=runtimes_url
@@ -169,16 +169,16 @@ def checkpoints_delete(
             if not match:
                 console.print(f"[red]Checkpoint {checkpoint_uid} not found.[/red]")
                 raise typer.Exit(1)
-            runtime_uid = match["runtime_uid"]
+            agent_uid = match["runtime_uid"]
 
         if not yes:
             typer.confirm(
-                f"Delete checkpoint {checkpoint_uid} from runtime {runtime_uid}?",
+                f"Delete checkpoint {checkpoint_uid} from agent {agent_uid}?",
                 abort=True,
             )
 
         _fetch_api(
-            f"/runtime-checkpoints/{runtime_uid}/{checkpoint_uid}",
+            f"/runtime-checkpoints/{agent_uid}/{checkpoint_uid}",
             method="DELETE",
             token=token,
             runtimes_url=runtimes_url,
diff --git a/datalayer_core/cli/commands/cluster.py b/datalayer_core/cli/commands/cluster.py
index 61e6973b..bf65da12 100644
--- a/datalayer_core/cli/commands/cluster.py
+++ b/datalayer_core/cli/commands/cluster.py
@@ -112,7 +112,29 @@ def _build_anomalies_panel(nodes_with_pods: list[Any], unassigned: list[Any]) ->
         if bool((pod or {}).get("unschedulable")):
             unschedulable_pods += 1
 
+    yellow_total = pending_pods + len(unassigned) + pending_scale_up_nodes + pending_scale_down_nodes
+    red_total = unschedulable_pods + failed_pods + not_ready_nodes
+
+    if red_total > 0:
+        summary_label = "FAILURES"
+        summary_style = "red"
+        border_style = "red"
+    elif yellow_total > 0:
+        summary_label = "WARNING"
+        summary_style = "yellow"
+        border_style = "yellow"
+    else:
+        summary_label = "OK"
+        summary_style = "green"
+        border_style = "green"
+
     lines = Text()
+    lines.append("summary: ", style="bold")
+    lines.append(summary_label, style=f"bold {summary_style}")
+    lines.append("\n", style=summary_style)
+    lines.append(f"yellow flags: {yellow_total}\n", style="yellow")
+    lines.append(f"red flags: {red_total}\n", style="red")
+    lines.append("----------------------------------------\n", style="dim")
     lines.append("Pods\n", style="bold")
     lines.append(f"pending pods: {pending_pods}\n", style="yellow")
     lines.append(f"unschedulable pods: {unschedulable_pods}\n", style="red")
@@ -124,7 +146,7 @@ def _build_anomalies_panel(nodes_with_pods: list[Any], unassigned: list[Any]) ->
     lines.append(f"pending scale-up nodes: {pending_scale_up_nodes}\n", style="cyan")
     lines.append(f"pending scale-down nodes: {pending_scale_down_nodes}", style="cyan")
 
-    return Panel(lines, title="Anomalies", border_style="yellow")
+    return Panel(lines, title="Anomalies", border_style=border_style)
 
 
 @app.callback()
diff --git a/datalayer_core/cli/commands/console.py b/datalayer_core/cli/commands/console.py
index ec13c02e..caad41b3 100644
--- a/datalayer_core/cli/commands/console.py
+++ b/datalayer_core/cli/commands/console.py
@@ -14,7 +14,7 @@
 
 # Create a Typer app for console commands
 app = typer.Typer(
-    name="console", help="Runtime console commands", invoke_without_command=True
+    name="console", help="Agent console commands", invoke_without_command=True
 )
 
 console = Console()
@@ -22,7 +22,7 @@
 
 @app.callback()
 def console_callback(ctx: typer.Context) -> None:
-    """Runtime console commands."""
+    """Agent console commands."""
     if ctx.invoked_subcommand is None:
         typer.echo(ctx.get_help())
 
@@ -31,8 +31,8 @@ def console_callback(ctx: typer.Context) -> None:
 def console_connect(
     runtime_name: Optional[str] = typer.Option(
         None,
-        "--runtime",
-        help="The name of the Runtime to connect to",
+        "--agent",
+        help="The name of the Agent to connect to",
     ),
     run_url: Optional[str] = typer.Option(
         None,
@@ -73,22 +73,22 @@ def console_connect(
         None, help="Additional arguments to pass to the console application"
     ),
 ) -> None:
-    """Connect to a Datalayer runtime console."""
+    """Connect to a Datalayer agent console."""
     try:
         # Get URLs configuration
         urls = DatalayerURLs.from_environment(run_url=run_url)
 
-        console.print("[green]Starting Datalayer runtime console...[/green]")
+        console.print("[green]Starting Datalayer agent console...[/green]")
         console.print(f"Run URL: {urls.run_url}")
         if runtime_name:
-            console.print(f"Runtime: {runtime_name}")
+            console.print(f"Agent: {runtime_name}")
         console.print("[yellow]Press Ctrl+D or Ctrl+C to exit the console[/yellow]")
 
         # Prepare sys.argv for the RuntimesConsoleApp
         args = []
 
         if runtime_name:
-            args.extend(["--runtime", runtime_name])
+            args.extend(["--agent", runtime_name])
         if urls.run_url:
             args.extend(["--run-url", urls.run_url])
         if token:
@@ -124,7 +124,7 @@ def console_connect(
     except KeyboardInterrupt:
         console.print("\n[yellow]Console session ended.[/yellow]")
     except Exception as e:
-        console.print(f"[red]Error connecting to runtime console: {e}[/red]")
+        console.print(f"[red]Error connecting to agent console: {e}[/red]")
         raise typer.Exit(1)
 
 
@@ -134,8 +134,8 @@ def console_callback_default(
     ctx: typer.Context,
     runtime_name: Optional[str] = typer.Option(
         None,
-        "--runtime",
-        help="The name of the Runtime to connect to",
+        "--agent",
+        help="The name of the Agent to connect to",
     ),
     run_url: Optional[str] = typer.Option(
         None,
@@ -173,7 +173,7 @@ def console_callback_default(
         help="Connect to an existing kernel instead of starting a new one",
     ),
 ) -> None:
-    """Connect to a Datalayer runtime console (default behavior)."""
+    """Connect to a Datalayer agent console (default behavior)."""
     if ctx.invoked_subcommand is None:
         # Get any remaining arguments that weren't parsed
         extra_args: list[str] = []
diff --git a/datalayer_core/cli/commands/envs.py b/datalayer_core/cli/commands/envs.py
index fbc8d71e..b949f00f 100644
--- a/datalayer_core/cli/commands/envs.py
+++ b/datalayer_core/cli/commands/envs.py
@@ -2,7 +2,6 @@
 # Distributed under the terms of the Modified BSD License.
 
 """Environment commands for Datalayer CLI."""
-
 from typing import Any, Dict, Optional
 
 import typer
@@ -67,25 +66,26 @@ def list_environments(
         # Convert to dict format for display_environments
         env_dicts: list[Dict[str, Any]] = []
         for env in environments:
-            env_dicts.append(
-                {
-                    "name": env.name,
-                    "title": env.title,
-                    "burning_rate": env.burning_rate,
-                    "language": env.language,
-                    "owner": env.owner,
-                    "visibility": env.visibility,
-                    **(env.metadata or {}),
-                }
-            )
+            env_dict: Dict[str, Any] = {
+                "name": env.name,
+                "title": env.title,
+                "burning_rate": env.burning_rate,
+                "language": env.language,
+                "owner": env.owner,
+                "visibility": env.visibility,
+            }
+            for key, value in (env.metadata or {}).items():
+                if key not in env_dict:
+                    env_dict[key] = value
+            env_dicts.append(env_dict)
 
         display_environments(env_dicts)
 
         if len(env_dicts) > 0:
-            console.print("\n[dim]Create a Runtime with e.g.[/dim]")
+            console.print("\n[dim]Create an Agent with e.g.[/dim]")
             for env_dict in env_dicts:
                 console.print(
-                    f"[dim]datalayer runtimes create --given-name my-runtime --credits-limit 3 {env_dict['name']}[/dim]"
+                    f"[dim]datalayer agents create --given-name my-agent {env_dict['name']}[/dim]"
                 )
             console.print()
 
diff --git a/datalayer_core/cli/commands/evals.py b/datalayer_core/cli/commands/evals.py
index 72f27732..ea4d3734 100644
--- a/datalayer_core/cli/commands/evals.py
+++ b/datalayer_core/cli/commands/evals.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -5,11 +8,7 @@
 
 from __future__ import annotations
 
-from datetime import datetime, timezone
-import csv
 import json
-import math
-import re
 import time
 from pathlib import Path
 from typing import Any, Optional
@@ -19,8 +18,36 @@
 from rich.table import Table
 from rich.tree import Tree
 
-from datalayer_core.client.client import DatalayerClient
-from datalayer_core.utils.urls import DatalayerURLs
+from datalayer_core.evals.evals import (
+    load_evalset_spec,
+)
+from datalayer_core.evals.evals import (
+    make_client as _make_client,
+)
+from datalayer_core.evals.evals import (
+    merge_dicts as _merge_dicts,
+)
+from datalayer_core.evals.evals import (
+    parse_json_file as _parse_json_file,
+)
+from datalayer_core.evals.evals import (
+    parse_json_value as _parse_json_value,
+)
+from datalayer_core.evals.evals import (
+    resolve_billable_account_uid as _resolve_billable_account_uid,
+)
+from datalayer_core.evals.evaluators import evaluate_evalset
+from datalayer_core.evals.report import (
+    _now_iso,
+    _parse_csv_values,
+    _parse_evaluator_specs,
+    _print_report_console,
+    _report_data,
+    _report_markdown,
+    _status_style,
+    _timestamp_slug,
+    _write_report_csv,
+)
 
 app = typer.Typer(
     name="evals",
@@ -36,1197 +63,6 @@
 console = Console()
 
 
-def _now_iso() -> str:
-    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
-
-
-def _timestamp_slug(raw_iso: str) -> str:
-    cleaned = raw_iso.replace("-", "").replace(":", "").replace(".", "")
-    cleaned = cleaned.replace("+0000", "Z").replace("+00:00", "Z")
-    cleaned = cleaned.replace("T", "T")
-    if cleaned.endswith("Z"):
-        return cleaned
-    return f"{cleaned}Z"
-
-
-def _parse_json_value(raw: Optional[str], flag_name: str) -> dict[str, Any]:
-    if not raw:
-        return {}
-    try:
-        parsed = json.loads(raw)
-    except Exception as exc:
-        raise typer.BadParameter(f"Invalid JSON for {flag_name}: {exc}") from exc
-    if not isinstance(parsed, dict):
-        raise typer.BadParameter(f"{flag_name} must decode to an object")
-    return parsed
-
-
-def _parse_json_file(path_value: Optional[str], flag_name: str) -> dict[str, Any]:
-    if not path_value:
-        return {}
-    path = Path(path_value)
-    if not path.exists():
-        raise typer.BadParameter(f"File not found for {flag_name}: {path}")
-    text = path.read_text(encoding="utf-8")
-    return _parse_json_value(text, flag_name)
-
-
-def _merge_dicts(*parts: dict[str, Any]) -> dict[str, Any]:
-    merged: dict[str, Any] = {}
-    for part in parts:
-        merged.update(part)
-    return merged
-
-
-def _make_client(
-    token: Optional[str] = None,
-    ai_agents_url: Optional[str] = None,
-) -> DatalayerClient:
-    urls = DatalayerURLs.from_environment(ai_agents_url=ai_agents_url)
-    return DatalayerClient(urls=urls, token=token)
-
-
-def _status_style(status: str) -> str:
-    normalized = status.lower()
-    if normalized in {"completed", "success", "passed"}:
-        return "green"
-    if normalized in {"running", "queued", "pending"}:
-        return "yellow"
-    if normalized in {"failed", "error"}:
-        return "red"
-    return "white"
-
-
-def _run_pass_rate(run: dict[str, Any]) -> float | None:
-    metrics = run.get("metrics") or {}
-    raw = metrics.get("pass_rate")
-    if isinstance(raw, (int, float)):
-        value = float(raw)
-        if value < 0:
-            return 0.0
-        if value > 1:
-            return 1.0
-        return value
-    return None
-
-
-def _fmt_pct(raw: float | None) -> str:
-    if raw is None:
-        return "n/a"
-    return f"{raw * 100:.1f}%"
-
-
-def _style_text(value: str, style: str | None, colorize: bool) -> str:
-    if not colorize or not style:
-        return value
-    return f"[{style}]{value}[/{style}]"
-
-
-def _compute_baseline_and_drift(runs: list[dict[str, Any]]) -> tuple[float | None, float | None, float | None]:
-    pass_rates = [rate for rate in (_run_pass_rate(run) for run in runs) if rate is not None]
-    if not pass_rates:
-        return None, None, None
-    baseline_size = min(3, max(1, len(pass_rates) // 2))
-    baseline_slice = pass_rates[:baseline_size]
-    baseline = sum(baseline_slice) / baseline_size
-    latest = pass_rates[-1]
-    drift = latest - baseline
-    return baseline, latest, drift
-
-
-def _classify_legacy_failure(message: str) -> dict[str, Any]:
-    """Infer a structured stage/type/url from a free-form legacy error message.
-
-    Older runs (and any path that only persisted a plain error string) lack a
-    structured ``failure_cause``. Rather than rendering ``unknown`` /
-    ``legacy_error`` with an empty detail excerpt, classify the most common
-    error shapes so the report stays actionable.
-    """
-    text = message.strip()
-    lowered = text.lower()
-
-    url_match = re.search(r"https?://[^\s]+", text)
-    execution_url = url_match.group(0).rstrip(".,)") if url_match else ""
-
-    stage = "unknown"
-    failure_type = "legacy_error"
-    if "all connection attempts failed" in lowered or "connection refused" in lowered or "request failed" in lowered:
-        stage = "runtime_execution"
-        failure_type = "runtime_unreachable"
-    elif "returned http" in lowered or re.search(r"\bhttp\s*[45]\d\d\b", lowered):
-        stage = "runtime_execution"
-        failure_type = "runtime_http_error"
-    elif "traceback" in lowered:
-        stage = "runtime_execution"
-        failure_type = "runtime_traceback"
-    elif "no submitted code" in lowered or "missing" in lowered and "code" in lowered:
-        stage = "run_preparation"
-        failure_type = "missing_submitted_code"
-    elif "no interactive runtime url" in lowered or "not configured" in lowered:
-        stage = "runtime_resolution"
-        failure_type = "no_runtime_url"
-
-    cause: dict[str, Any] = {
-        "stage": stage,
-        "type": failure_type,
-        "message": text,
-        "detail_excerpt": text,
-    }
-    if execution_url:
-        cause["execution_url"] = execution_url
-    return cause
-
-
-def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
-    """Extract a structured failure cause from a run's report/summary payload."""
-    for container_key in ("report", "summary"):
-        container = run.get(container_key)
-        if isinstance(container, dict):
-            cause = container.get("failure_cause")
-            if isinstance(cause, dict) and cause:
-                return cause
-    # Fallback: synthesize a structured cause from legacy error fields.
-    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
-    report = run.get("report") if isinstance(run.get("report"), dict) else {}
-    message = (
-        summary.get("failure_reason")
-        or summary.get("execution_error")
-        or report.get("error")
-    )
-    if isinstance(message, str) and message.strip():
-        return _classify_legacy_failure(message)
-    return None
-
-
-def _format_failure_cause(cause: dict[str, Any] | None) -> str:
-    """Render a failure cause as a concise single-line string."""
-    if not isinstance(cause, dict) or not cause:
-        return ""
-    failure_type = str(cause.get("type") or "").strip()
-    message = str(cause.get("message") or "").strip()
-    parts: list[str] = []
-    if failure_type:
-        parts.append(f"[{failure_type}]")
-    if message:
-        parts.append(message)
-    return " ".join(parts).strip()
-
-
-def _failure_cause_detail_lines(cause: dict[str, Any]) -> list[str]:
-    """Render the full failure cause (message, context, diagnostics, attempts) as markdown lines."""
-    lines: list[str] = []
-    message = str(cause.get("message") or "").strip()
-    if message:
-        lines.append(f"- Message: {message}")
-    for key, label in (
-        ("stage", "Stage"),
-        ("type", "Type"),
-        ("runtime_pod_name", "Runtime pod"),
-        ("environment_name", "Environment"),
-        ("execution_url", "Execution URL"),
-    ):
-        value = str(cause.get(key) or "").strip()
-        if value:
-            lines.append(f"- {label}: `{value}`")
-
-    detail = str(cause.get("detail_excerpt") or "").strip()
-    if detail:
-        lines.append("- Detail excerpt:")
-        lines.append("")
-        lines.append("```text")
-        lines.extend(detail.splitlines() or [detail])
-        lines.append("```")
-
-    diagnostics = cause.get("diagnostics")
-    if isinstance(diagnostics, dict) and diagnostics:
-        for key, label in (
-            ("agent_runtimes_url", "Agent runtimes URL"),
-            ("run_url", "Run URL"),
-        ):
-            value = diagnostics.get(key)
-            if value:
-                lines.append(f"- {label}: `{value}`")
-        for key, label in (
-            ("route_ids", "Route IDs tried"),
-            ("discovered_agent_ids", "Discovered agent IDs"),
-            ("candidate_urls", "Candidate URLs"),
-        ):
-            value = diagnostics.get(key)
-            if isinstance(value, list) and value:
-                rendered = ", ".join(f"`{item}`" for item in value)
-                lines.append(f"- {label}: {rendered}")
-
-        attempts = diagnostics.get("attempts")
-        if isinstance(attempts, list) and attempts:
-            lines.append("- Connection attempts:")
-            attempt_rows: list[list[str]] = []
-            for attempt in attempts:
-                if not isinstance(attempt, dict):
-                    continue
-                status_code = attempt.get("status_code")
-                attempt_rows.append(
-                    [
-                        str(attempt.get("url") or "-"),
-                        "ok" if attempt.get("ok") else "failed",
-                        "-" if status_code is None else str(status_code),
-                        str(attempt.get("error") or "-"),
-                    ]
-                )
-            if attempt_rows:
-                lines.append("")
-                lines.extend(
-                    _markdown_table(
-                        ["URL", "Result", "HTTP", "Error"],
-                        attempt_rows,
-                        ["left", "left", "right", "left"],
-                    )
-                )
-    return lines
-
-
-def _run_detail_record(run: dict[str, Any]) -> dict[str, Any]:
-    metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
-    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
-    report = run.get("report") if isinstance(run.get("report"), dict) else {}
-    return {
-        "id": str(run.get("id", "")),
-        "status": str(run.get("status", "")),
-        "created_at": str(run.get("created_at", "")),
-        "updated_at": str(run.get("updated_at", "")),
-        "pass_rate": _run_pass_rate(run),
-        "metrics": metrics,
-        "summary": summary,
-        "report": report,
-        "failure_cause": _extract_failure_cause(run),
-    }
-
-
-def _report_data(
-    client: DatalayerClient,
-    evalset_id: str,
-    run_limit: int,
-    account_uid: Optional[str],
-) -> dict[str, Any]:
-    experiments_payload = client.evals_list_experiments(
-        evalset_id=evalset_id,
-        limit=200,
-        offset=0,
-        account_uid=account_uid,
-    )
-    experiments = experiments_payload.get("experiments") or []
-
-    report: dict[str, Any] = {
-        "evalset_id": evalset_id,
-        "generated_at": _now_iso(),
-        "experiments": [],
-    }
-
-    for experiment in experiments:
-        experiment_id = str(experiment.get("id", ""))
-        experiment_name = str(experiment.get("name", experiment_id))
-
-        runs_payload = client.evals_list_runs(
-            experiment_id,
-            limit=run_limit,
-            offset=0,
-            account_uid=account_uid,
-        )
-        runs = runs_payload.get("runs") or []
-        total_runs = int(runs_payload.get("total") or len(runs))
-        baseline, latest, drift = _compute_baseline_and_drift(runs)
-
-        latest_two_delta: float | None = None
-        latest_two_run_ids: list[str] = []
-        latest_two_compare: dict[str, Any] | None = None
-        if len(runs) >= 2:
-            latest_two_run_ids = [str(runs[0].get("id", "")), str(runs[1].get("id", ""))]
-            compare_payload = client.evals_compare_runs(
-                latest_two_run_ids,
-                account_uid=account_uid,
-            )
-            compared_runs = compare_payload.get("runs") or []
-            compared_by_id = {
-                str(run.get("id", "")): run
-                for run in compared_runs
-                if isinstance(run, dict)
-            }
-            run_a = compared_by_id.get(latest_two_run_ids[0], runs[0])
-            run_b = compared_by_id.get(latest_two_run_ids[1], runs[1])
-            pass_a = _run_pass_rate(run_a)
-            pass_b = _run_pass_rate(run_b)
-            if pass_a is not None and pass_b is not None:
-                latest_two_delta = pass_a - pass_b
-            latest_two_compare = {
-                "run_ids": latest_two_run_ids,
-                "run_a": _run_detail_record(run_a),
-                "run_b": _run_detail_record(run_b),
-                "delta_pass_rate": latest_two_delta,
-            }
-
-        consecutive_comparisons: list[dict[str, Any]] = []
-        for idx in range(max(0, len(runs) - 1)):
-            run_a = runs[idx]
-            run_b = runs[idx + 1]
-            pass_a = _run_pass_rate(run_a)
-            pass_b = _run_pass_rate(run_b)
-            delta = None
-            if pass_a is not None and pass_b is not None:
-                delta = pass_a - pass_b
-            consecutive_comparisons.append(
-                {
-                    "run_a_id": str(run_a.get("id", "")),
-                    "run_b_id": str(run_b.get("id", "")),
-                    "run_a_status": str(run_a.get("status", "")),
-                    "run_b_status": str(run_b.get("status", "")),
-                    "run_a_pass_rate": pass_a,
-                    "run_b_pass_rate": pass_b,
-                    "delta_pass_rate": delta,
-                }
-            )
-
-        pass_rates = [
-            _run_pass_rate(run)
-            for run in runs
-            if isinstance(_run_pass_rate(run), (int, float))
-        ]
-        numeric_pass_rates = [float(value) for value in pass_rates if isinstance(value, (int, float))]
-        mean_pass = sum(numeric_pass_rates) / len(numeric_pass_rates) if numeric_pass_rates else None
-        stddev_pass = None
-        if numeric_pass_rates:
-            variance = sum((value - mean_pass) ** 2 for value in numeric_pass_rates) / len(numeric_pass_rates)
-            stddev_pass = math.sqrt(variance)
-
-        report["experiments"].append(
-            {
-                "id": experiment_id,
-                "name": experiment_name,
-                "runs_total": total_runs,
-                "runs_fetched": len(runs),
-                "latest_pass_rate": latest,
-                "baseline_pass_rate": baseline,
-                "drift_delta": drift,
-                "latest_two_run_ids": latest_two_run_ids,
-                "latest_two_delta": latest_two_delta,
-                "latest_two_comparison": latest_two_compare,
-                "mean_pass_rate": mean_pass,
-                "stddev_pass_rate": stddev_pass,
-                "runs": [_run_detail_record(run) for run in runs],
-                "consecutive_comparisons": consecutive_comparisons,
-            }
-        )
-    return report
-
-
-def _ascii_bar(
-    value: float | None,
-    width: int = 28,
-    *,
-    full_blocks: bool = True,
-    colorize: bool = False,
-) -> str:
-    if value is None:
-        return "-"
-    bounded = max(0.0, min(1.0, float(value)))
-    filled = int(round(bounded * width))
-    fill_char = "█" if full_blocks else "#"
-    empty_char = "░" if full_blocks else "."
-    filled_part = fill_char * filled
-    empty_part = empty_char * (width - filled)
-    if not colorize:
-        return filled_part + empty_part
-    if bounded >= 0.85:
-        style = "green"
-    elif bounded >= 0.75:
-        style = "yellow"
-    else:
-        style = "red"
-    return _style_text(filled_part, style, True) + _style_text(empty_part, "grey39", True)
-
-
-def _fmt_pts(value: float) -> str:
-    return f"{value * 100:.1f}"
-
-
-def _ascii_histogram(
-    values: list[float],
-    *,
-    bins: int = 8,
-    width: int = 22,
-    min_value: float | None = None,
-    max_value: float | None = None,
-    full_blocks: bool = True,
-    colorize: bool = False,
-    drift_palette: bool = False,
-) -> list[str]:
-    if not values:
-        return ["n/a"]
-
-    lo = min_value if isinstance(min_value, (int, float)) else min(values)
-    hi = max_value if isinstance(max_value, (int, float)) else max(values)
-    if hi <= lo:
-        hi = lo + 1e-9
-
-    bins = max(2, bins)
-    counts = [0 for _ in range(bins)]
-    span = hi - lo
-    for value in values:
-        ratio = (value - lo) / span
-        idx = int(ratio * bins)
-        idx = max(0, min(bins - 1, idx))
-        counts[idx] += 1
-
-    peak = max(counts) if counts else 1
-    fill_char = "█" if full_blocks else "#"
-    empty_char = "░" if full_blocks else "."
-    lines: list[str] = []
-    for idx, count in enumerate(counts):
-        left = lo + (span * idx / bins)
-        right = lo + (span * (idx + 1) / bins)
-        filled = int(round((count / peak) * width)) if peak > 0 else 0
-        filled_part = fill_char * filled
-        empty_part = empty_char * (width - filled)
-        if colorize:
-            if drift_palette:
-                if right <= 0:
-                    bar_style = "red"
-                elif left >= 0:
-                    bar_style = "green"
-                else:
-                    bar_style = "yellow"
-            elif peak > 0 and count / peak >= 0.67:
-                bar_style = "cyan"
-            elif peak > 0 and count / peak >= 0.34:
-                bar_style = "blue"
-            else:
-                bar_style = "magenta"
-            bar = _style_text(filled_part, bar_style, True) + _style_text(empty_part, "grey39", True)
-        else:
-            bar = filled_part + empty_part
-        lines.append(
-            f"{_fmt_pts(left):>6} to {_fmt_pts(right):>6} pts |{bar}| {count}"
-        )
-    return lines
-
-
-def _fmt_delta(value: float | None, *, colorize: bool = False) -> str:
-    if value is None:
-        return "n/a"
-    rendered = f"{value * 100:+.1f} pts"
-    if value > 0:
-        return _style_text(rendered, "green", colorize)
-    if value < 0:
-        return _style_text(rendered, "red", colorize)
-    return _style_text(rendered, "yellow", colorize)
-
-
-def _sparkline(values: list[float], *, colorize: bool = False) -> str:
-    if not values:
-        return "n/a"
-    ticks = "▁▂▃▄▅▆▇█"
-    lo = min(values)
-    hi = max(values)
-    if hi <= lo:
-        base = ticks[-2] * len(values)
-    else:
-        span = hi - lo
-        chars = []
-        for value in values:
-            idx = int(round(((value - lo) / span) * (len(ticks) - 1)))
-            idx = max(0, min(len(ticks) - 1, idx))
-            chars.append(ticks[idx])
-        base = "".join(chars)
-    if not colorize:
-        return base
-    if values[-1] >= 0.85:
-        style = "green"
-    elif values[-1] >= 0.75:
-        style = "yellow"
-    else:
-        style = "red"
-    return _style_text(base, style, True)
-
-
-def _pairwise_latest_deltas(experiments: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    pairs: list[dict[str, Any]] = []
-    for idx, left in enumerate(experiments):
-        left_latest = left.get("latest_pass_rate")
-        if not isinstance(left_latest, (int, float)):
-            continue
-        for right in experiments[idx + 1 :]:
-            right_latest = right.get("latest_pass_rate")
-            if not isinstance(right_latest, (int, float)):
-                continue
-            pairs.append(
-                {
-                    "left": str(left.get("name", "")),
-                    "right": str(right.get("name", "")),
-                    "left_latest": float(left_latest),
-                    "right_latest": float(right_latest),
-                    "delta": float(left_latest) - float(right_latest),
-                }
-            )
-    pairs.sort(key=lambda item: abs(item["delta"]), reverse=True)
-    return pairs
-
-
-def _markdown_table(headers: list[str], rows: list[list[str]], aligns: list[str]) -> list[str]:
-    widths = [len(header) for header in headers]
-    for row in rows:
-        for idx, cell in enumerate(row):
-            widths[idx] = max(widths[idx], len(cell))
-
-    def _pad(cell: str, width: int, align: str) -> str:
-        if align == "right":
-            return cell.rjust(width)
-        return cell.ljust(width)
-
-    header_line = "| " + " | ".join(headers[idx].ljust(widths[idx]) for idx in range(len(headers))) + " |"
-
-    sep_parts: list[str] = []
-    for idx, align in enumerate(aligns):
-        width = max(3, widths[idx])
-        if align == "right":
-            sep_parts.append("-" * (width - 1) + ":")
-        else:
-            sep_parts.append(":" + "-" * (width - 1))
-    sep_line = "| " + " | ".join(sep_parts) + " |"
-
-    body_lines = [
-        "| " + " | ".join(_pad(row[idx], widths[idx], aligns[idx]) for idx in range(len(headers))) + " |"
-        for row in rows
-    ]
-    return [header_line, sep_line, *body_lines]
-
-
-def _report_markdown(report: dict[str, Any], run_limit: int, *, colorize: bool = False) -> str:
-    evalset_id = str(report.get("evalset_id", ""))
-    generated_at = str(report.get("generated_at", ""))
-    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
-
-    lines: list[str] = []
-    lines.append(f"# Evals Report: {evalset_id}")
-    lines.append("")
-    lines.append(f"- Generated at: {generated_at}")
-    lines.append(f"- Experiments: {len(experiments)}")
-    lines.append(f"- Run window per experiment: {run_limit}")
-    lines.append("")
-
-    lines.append("## Experiment Overview")
-    lines.append("")
-    overview_rows: list[list[str]] = []
-    for experiment in experiments:
-        runs_fetched = int(experiment.get("runs_fetched") or 0)
-        runs_total = int(experiment.get("runs_total") or 0)
-        overview_rows.append(
-            [
-                f"{experiment.get('name', '')}",
-                f"{runs_fetched}/{runs_total}",
-                _fmt_pct(experiment.get('latest_pass_rate') if isinstance(experiment.get('latest_pass_rate'), (int, float)) else None),
-                _fmt_pct(experiment.get('baseline_pass_rate') if isinstance(experiment.get('baseline_pass_rate'), (int, float)) else None),
-                _fmt_delta(experiment.get('drift_delta') if isinstance(experiment.get('drift_delta'), (int, float)) else None, colorize=colorize),
-                _fmt_delta(experiment.get('latest_two_delta') if isinstance(experiment.get('latest_two_delta'), (int, float)) else None, colorize=colorize),
-            ]
-        )
-    lines.extend(
-        _markdown_table(
-            ["Experiment", "Runs (fetched/total)", "Latest", "Baseline", "Drift", "Latest-2 Delta"],
-            overview_rows,
-            ["left", "right", "right", "right", "right", "right"],
-        )
-    )
-    lines.append("")
-
-    lines.append("## Comparison Combinations")
-    lines.append("")
-
-    ranked_latest = sorted(
-        [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
-        key=lambda item: float(item.get("latest_pass_rate") or 0.0),
-        reverse=True,
-    )
-    lines.append("### By Latest Pass Rate")
-    lines.append("")
-    latest_rows: list[list[str]] = []
-    for idx, item in enumerate(ranked_latest, start=1):
-        latest_rows.append([str(idx), f"{item.get('name', '')}", _fmt_pct(float(item.get('latest_pass_rate') or 0.0))])
-    lines.extend(_markdown_table(["Rank", "Experiment", "Latest"], latest_rows, ["right", "left", "right"]))
-    latest_values = [
-        float(item.get("latest_pass_rate"))
-        for item in ranked_latest
-        if isinstance(item.get("latest_pass_rate"), (int, float))
-    ]
-    lines.append("")
-    lines.append("Latest pass-rate histogram (pts):")
-    for hist_line in _ascii_histogram(
-        latest_values,
-        bins=8,
-        width=20,
-        min_value=0.0,
-        max_value=1.0,
-        full_blocks=True,
-        colorize=colorize,
-    ):
-        lines.append(f"`{hist_line}`")
-    lines.append("")
-
-    ranked_drift = sorted(
-        [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
-        key=lambda item: float(item.get("drift_delta") or 0.0),
-    )
-    lines.append("### By Drift (Most Negative To Most Positive)")
-    lines.append("")
-    drift_rows: list[list[str]] = []
-    for idx, item in enumerate(ranked_drift, start=1):
-        drift_rows.append([str(idx), f"{item.get('name', '')}", _fmt_delta(float(item.get('drift_delta') or 0.0), colorize=colorize)])
-    lines.extend(_markdown_table(["Rank", "Experiment", "Drift"], drift_rows, ["right", "left", "right"]))
-    drift_values = [
-        float(item.get("drift_delta"))
-        for item in ranked_drift
-        if isinstance(item.get("drift_delta"), (int, float))
-    ]
-    lines.append("")
-    lines.append("Drift histogram (delta pts):")
-    for hist_line in _ascii_histogram(
-        drift_values,
-        bins=8,
-        width=20,
-        full_blocks=True,
-        colorize=colorize,
-        drift_palette=True,
-    ):
-        lines.append(f"`{hist_line}`")
-    lines.append("")
-
-    ranked_stability = sorted(
-        [item for item in experiments if isinstance(item.get("stddev_pass_rate"), (int, float))],
-        key=lambda item: float(item.get("stddev_pass_rate") or 0.0),
-    )
-    lines.append("### By Stability (Lowest Pass-Rate StdDev)")
-    lines.append("")
-    stability_rows: list[list[str]] = []
-    for idx, item in enumerate(ranked_stability, start=1):
-        stddev = item.get("stddev_pass_rate")
-        mean = item.get("mean_pass_rate")
-        stability_rows.append(
-            [
-                str(idx),
-                f"{item.get('name', '')}",
-                (f"{float(stddev) * 100:.2f} pts" if isinstance(stddev, (int, float)) else "n/a"),
-                (_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else "n/a"),
-            ]
-        )
-    lines.extend(_markdown_table(["Rank", "Experiment", "StdDev", "Mean"], stability_rows, ["right", "left", "right", "right"]))
-    lines.append("")
-
-    pairwise = _pairwise_latest_deltas(experiments)
-    lines.append("### Pairwise Latest-Pass Deltas")
-    lines.append("")
-    pair_rows: list[list[str]] = []
-    for pair in pairwise:
-        pair_rows.append(
-            [
-                f"{pair['left']} vs {pair['right']}",
-                _fmt_pct(pair['left_latest']),
-                _fmt_pct(pair['right_latest']),
-                _fmt_delta(pair['delta'], colorize=colorize),
-            ]
-        )
-    if not pairwise:
-        pair_rows.append(["n/a", "n/a", "n/a", "n/a"])
-    lines.extend(
-        _markdown_table(
-            ["Pair", "Left Latest", "Right Latest", "Delta (Left-Right)"],
-            pair_rows,
-            ["left", "right", "right", "right"],
-        )
-    )
-    pair_deltas = [float(pair["delta"]) for pair in pairwise if isinstance(pair.get("delta"), (int, float))]
-    lines.append("")
-    lines.append("Pairwise latest-delta histogram (pts):")
-    for hist_line in _ascii_histogram(
-        pair_deltas,
-        bins=8,
-        width=20,
-        full_blocks=True,
-        colorize=colorize,
-        drift_palette=True,
-    ):
-        lines.append(f"`{hist_line}`")
-    lines.append("")
-
-    lines.append("### Insight Highlights")
-    lines.append("")
-    best_latest = ranked_latest[0] if ranked_latest else None
-    worst_latest = ranked_latest[-1] if ranked_latest else None
-    most_negative = ranked_drift[0] if ranked_drift else None
-    most_positive = ranked_drift[-1] if ranked_drift else None
-    most_stable = ranked_stability[0] if ranked_stability else None
-    if best_latest:
-        lines.append(
-            "- Top latest pass-rate: "
-            + f"{best_latest.get('name', '')} ({_fmt_pct(float(best_latest.get('latest_pass_rate') or 0.0))})."
-        )
-    if worst_latest:
-        lines.append(
-            "- Lowest latest pass-rate: "
-            + f"{worst_latest.get('name', '')} ({_fmt_pct(float(worst_latest.get('latest_pass_rate') or 0.0))})."
-        )
-    if most_positive:
-        drift_pos = float(most_positive.get("drift_delta") or 0.0)
-        lines.append(
-            "- Strongest positive drift: "
-            + f"{most_positive.get('name', '')} ({_fmt_delta(drift_pos, colorize=colorize)})."
-        )
-    if most_negative:
-        drift_neg = float(most_negative.get("drift_delta") or 0.0)
-        lines.append(
-            "- Strongest negative drift: "
-            + f"{most_negative.get('name', '')} ({_fmt_delta(drift_neg, colorize=colorize)})."
-        )
-    if most_stable:
-        std = most_stable.get("stddev_pass_rate")
-        mean = most_stable.get("mean_pass_rate")
-        lines.append(
-            "- Stability leader: "
-            + f"{most_stable.get('name', '')} "
-            + f"(stddev={(float(std) * 100):.2f} pts, mean={_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else 'n/a'})."
-        )
-
-    drift_neg_count = len([value for value in drift_values if value < 0])
-    drift_flat_count = len([value for value in drift_values if value == 0])
-    drift_pos_count = len([value for value in drift_values if value > 0])
-    total = max(1, drift_neg_count + drift_flat_count + drift_pos_count)
-    neg_meter = "█" * int(round((drift_neg_count / total) * 14))
-    flat_meter = "█" * int(round((drift_flat_count / total) * 14))
-    pos_meter = "█" * int(round((drift_pos_count / total) * 14))
-    neg_meter = neg_meter or "·"
-    flat_meter = flat_meter or "·"
-    pos_meter = pos_meter or "·"
-    lines.append("")
-    lines.append("Drift balance meter:")
-    lines.append(
-        "`NEG "
-        + _style_text(neg_meter, "red", colorize)
-        + f" ({drift_neg_count}) | FLAT "
-        + _style_text(flat_meter, "yellow", colorize)
-        + f" ({drift_flat_count}) | POS "
-        + _style_text(pos_meter, "green", colorize)
-        + f" ({drift_pos_count})`"
-    )
-    lines.append("")
-
-    lines.append("## Per-Experiment Details")
-    lines.append("")
-    for experiment in experiments:
-        lines.append(f"### {experiment.get('name', '')}")
-        lines.append("")
-        lines.append("#### Run Timeline")
-        lines.append("")
-        run_rows: list[list[str]] = []
-        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
-        for idx, run in enumerate(runs, start=1):
-            pass_rate = run.get("pass_rate") if isinstance(run.get("pass_rate"), (int, float)) else None
-            cause_text = _format_failure_cause(run.get("failure_cause"))
-            run_rows.append(
-                [
-                    str(idx),
-                    str(run.get('id', '')),
-                    str(run.get('status', '')),
-                    _fmt_pct(float(pass_rate)) if isinstance(pass_rate, (int, float)) else 'n/a',
-                    f"`{_ascii_bar(float(pass_rate), full_blocks=True, colorize=colorize) if isinstance(pass_rate, (int, float)) else '-'}`",
-                    cause_text or "-",
-                ]
-            )
-        if not runs:
-            run_rows.append(["1", "n/a", "n/a", "n/a", "`-`", "-"])
-        lines.extend(_markdown_table(["#", "Run ID", "Status", "Pass Rate", "ASCII Trend", "Failure Cause"], run_rows, ["right", "left", "left", "right", "left", "left"]))
-        lines.append("")
-        failure_rows: list[list[str]] = []
-        for idx, run in enumerate(runs, start=1):
-            cause = run.get("failure_cause")
-            if not isinstance(cause, dict) or not cause:
-                continue
-            detail = str(cause.get("detail_excerpt") or "").strip()
-            detail_single = " ".join(detail.split())
-            if len(detail_single) > 240:
-                detail_single = detail_single[:237] + "..."
-            failure_rows.append(
-                [
-                    str(idx),
-                    str(run.get("id", "")),
-                    str(cause.get("stage") or "-"),
-                    str(cause.get("type") or "-"),
-                    str(cause.get("message") or "-"),
-                    detail_single or "-",
-                ]
-            )
-        if failure_rows:
-            lines.append("#### Failure Causes")
-            lines.append("")
-            lines.extend(
-                _markdown_table(
-                    ["#", "Run ID", "Stage", "Type", "Message", "Detail Excerpt"],
-                    failure_rows,
-                    ["right", "left", "left", "left", "left", "left"],
-                )
-            )
-            lines.append("")
-            for idx, run in enumerate(runs, start=1):
-                cause = run.get("failure_cause")
-                if not isinstance(cause, dict) or not cause:
-                    continue
-                detail_lines = _failure_cause_detail_lines(cause)
-                if not detail_lines:
-                    continue
-                lines.append(f"<details><summary>Run {idx} failure detail ({run.get('id', '')})</summary>")
-                lines.append("")
-                lines.extend(detail_lines)
-                lines.append("")
-                lines.append("</details>")
-                lines.append("")
-        timeline_values = [
-            float(run.get("pass_rate"))
-            for run in runs
-            if isinstance(run.get("pass_rate"), (int, float))
-        ]
-        lines.append(
-            "Pass-rate sparkline: "
-            + f"`{_sparkline(timeline_values, colorize=colorize) if timeline_values else 'n/a'}`"
-        )
-        lines.append("")
-
-        comparisons = [
-            item for item in (experiment.get("consecutive_comparisons") or [])
-            if isinstance(item, dict)
-        ]
-        lines.append("#### Consecutive Run Deltas (A-B)")
-        lines.append("")
-        comparison_rows: list[list[str]] = []
-        for item in comparisons:
-            run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
-            run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
-            delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
-            comparison_rows.append(
-                [
-                    str(item.get('run_a_id', '')),
-                    str(item.get('run_b_id', '')),
-                    _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else 'n/a',
-                    _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else 'n/a',
-                    _fmt_delta(float(delta), colorize=colorize) if isinstance(delta, (int, float)) else 'n/a',
-                ]
-            )
-        if not comparisons:
-            comparison_rows.append(["n/a", "n/a", "n/a", "n/a", "n/a"])
-        lines.extend(_markdown_table(["Run A", "Run B", "A Pass", "B Pass", "Delta"], comparison_rows, ["left", "left", "right", "right", "right"]))
-        lines.append("")
-
-    lines.append("## Notes")
-    lines.append("")
-    lines.append("- Drift is computed as latest - baseline.")
-    lines.append("- Baseline uses the first half of fetched runs (minimum 1, maximum 3).")
-    lines.append("- Latest-2 delta uses the latest two runs returned in the fetched window.")
-    lines.append("")
-
-    return "\n".join(lines)
-
-
-def _write_report_csv(report: dict[str, Any], output_path: Path) -> None:
-    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
-    fieldnames = [
-        "row_type",
-        "evalset_id",
-        "experiment_id",
-        "experiment_name",
-        "run_index",
-        "run_id",
-        "run_status",
-        "run_pass_rate",
-        "runs_fetched",
-        "runs_total",
-        "baseline_pass_rate",
-        "latest_pass_rate",
-        "drift_delta",
-        "latest_two_delta",
-        "mean_pass_rate",
-        "stddev_pass_rate",
-        "failure_stage",
-        "failure_type",
-        "failure_message",
-        "generated_at",
-    ]
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    with output_path.open("w", encoding="utf-8", newline="") as stream:
-        writer = csv.DictWriter(stream, fieldnames=fieldnames)
-        writer.writeheader()
-        for experiment in experiments:
-            writer.writerow(
-                {
-                    "row_type": "experiment",
-                    "evalset_id": str(report.get("evalset_id", "")),
-                    "experiment_id": str(experiment.get("id", "")),
-                    "experiment_name": str(experiment.get("name", "")),
-                    "run_index": "",
-                    "run_id": "",
-                    "run_status": "",
-                    "run_pass_rate": "",
-                    "runs_fetched": int(experiment.get("runs_fetched") or 0),
-                    "runs_total": int(experiment.get("runs_total") or 0),
-                    "baseline_pass_rate": experiment.get("baseline_pass_rate"),
-                    "latest_pass_rate": experiment.get("latest_pass_rate"),
-                    "drift_delta": experiment.get("drift_delta"),
-                    "latest_two_delta": experiment.get("latest_two_delta"),
-                    "mean_pass_rate": experiment.get("mean_pass_rate"),
-                    "stddev_pass_rate": experiment.get("stddev_pass_rate"),
-                    "failure_stage": "",
-                    "failure_type": "",
-                    "failure_message": "",
-                    "generated_at": str(report.get("generated_at", "")),
-                }
-            )
-            runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
-            for idx, run in enumerate(runs, start=1):
-                cause = run.get("failure_cause") if isinstance(run.get("failure_cause"), dict) else {}
-                writer.writerow(
-                    {
-                        "row_type": "run",
-                        "evalset_id": str(report.get("evalset_id", "")),
-                        "experiment_id": str(experiment.get("id", "")),
-                        "experiment_name": str(experiment.get("name", "")),
-                        "run_index": idx,
-                        "run_id": str(run.get("id", "")),
-                        "run_status": str(run.get("status", "")),
-                        "run_pass_rate": run.get("pass_rate"),
-                        "runs_fetched": int(experiment.get("runs_fetched") or 0),
-                        "runs_total": int(experiment.get("runs_total") or 0),
-                        "baseline_pass_rate": experiment.get("baseline_pass_rate"),
-                        "latest_pass_rate": experiment.get("latest_pass_rate"),
-                        "drift_delta": experiment.get("drift_delta"),
-                        "latest_two_delta": experiment.get("latest_two_delta"),
-                        "mean_pass_rate": experiment.get("mean_pass_rate"),
-                        "stddev_pass_rate": experiment.get("stddev_pass_rate"),
-                        "failure_stage": str(cause.get("stage", "")),
-                        "failure_type": str(cause.get("type", "")),
-                        "failure_message": str(cause.get("message", "")),
-                        "generated_at": str(report.get("generated_at", "")),
-                    }
-                )
-
-
-def _print_report_console(report: dict[str, Any], run_limit: int) -> None:
-    evalset_id = str(report.get("evalset_id", ""))
-    generated_at = str(report.get("generated_at", ""))
-    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
-
-    console.rule(f"[bold cyan]Evals Report[/bold cyan] {evalset_id}")
-    console.print(f"Generated at: {generated_at}")
-    console.print(f"Experiments: {len(experiments)} | Run window per experiment: {run_limit}")
-    console.print("")
-
-    overview = Table(title="Experiment Overview")
-    overview.add_column("Experiment", style="white")
-    overview.add_column("Runs", justify="right")
-    overview.add_column("Latest", justify="right")
-    overview.add_column("Baseline", justify="right")
-    overview.add_column("Drift", justify="right")
-    overview.add_column("Latest-2", justify="right")
-    for experiment in experiments:
-        overview.add_row(
-            str(experiment.get("name", "")),
-            f"{int(experiment.get('runs_fetched') or 0)}/{int(experiment.get('runs_total') or 0)}",
-            _fmt_pct(experiment.get("latest_pass_rate") if isinstance(experiment.get("latest_pass_rate"), (int, float)) else None),
-            _fmt_pct(experiment.get("baseline_pass_rate") if isinstance(experiment.get("baseline_pass_rate"), (int, float)) else None),
-            _fmt_delta(experiment.get("drift_delta") if isinstance(experiment.get("drift_delta"), (int, float)) else None, colorize=True),
-            _fmt_delta(experiment.get("latest_two_delta") if isinstance(experiment.get("latest_two_delta"), (int, float)) else None, colorize=True),
-        )
-    console.print(overview)
-
-    ranked_latest = sorted(
-        [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
-        key=lambda item: float(item.get("latest_pass_rate") or 0.0),
-        reverse=True,
-    )
-    latest_table = Table(title="By Latest Pass Rate")
-    latest_table.add_column("Rank", justify="right", no_wrap=True)
-    latest_table.add_column("Experiment", style="white")
-    latest_table.add_column("Latest", justify="right", no_wrap=True)
-    for idx, item in enumerate(ranked_latest, start=1):
-        latest_table.add_row(str(idx), str(item.get("name", "")), _fmt_pct(float(item.get("latest_pass_rate") or 0.0)))
-    console.print(latest_table)
-    latest_values = [
-        float(item.get("latest_pass_rate"))
-        for item in ranked_latest
-        if isinstance(item.get("latest_pass_rate"), (int, float))
-    ]
-    console.print("Latest histogram:")
-    for hist_line in _ascii_histogram(
-        latest_values,
-        bins=8,
-        width=20,
-        min_value=0.0,
-        max_value=1.0,
-        full_blocks=True,
-        colorize=True,
-    ):
-        console.print(hist_line)
-
-    ranked_drift = sorted(
-        [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
-        key=lambda item: float(item.get("drift_delta") or 0.0),
-    )
-    drift_table = Table(title="By Drift (Negative To Positive)")
-    drift_table.add_column("Rank", justify="right", no_wrap=True)
-    drift_table.add_column("Experiment", style="white")
-    drift_table.add_column("Drift", justify="right", no_wrap=True)
-    for idx, item in enumerate(ranked_drift, start=1):
-        drift_table.add_row(
-            str(idx),
-            str(item.get("name", "")),
-            _fmt_delta(float(item.get("drift_delta") or 0.0), colorize=True),
-        )
-    console.print(drift_table)
-    drift_values = [
-        float(item.get("drift_delta"))
-        for item in ranked_drift
-        if isinstance(item.get("drift_delta"), (int, float))
-    ]
-    console.print("Drift histogram:")
-    for hist_line in _ascii_histogram(
-        drift_values,
-        bins=8,
-        width=20,
-        full_blocks=True,
-        colorize=True,
-        drift_palette=True,
-    ):
-        console.print(hist_line)
-
-    pairwise = _pairwise_latest_deltas(experiments)
-    pairwise_table = Table(title="Pairwise Latest-Pass Deltas")
-    pairwise_table.add_column("Pair", style="white")
-    pairwise_table.add_column("Left", justify="right", no_wrap=True)
-    pairwise_table.add_column("Right", justify="right", no_wrap=True)
-    pairwise_table.add_column("Delta", justify="right", no_wrap=True)
-    for pair in pairwise:
-        pairwise_table.add_row(
-            f"{pair['left']} vs {pair['right']}",
-            _fmt_pct(pair["left_latest"]),
-            _fmt_pct(pair["right_latest"]),
-            _fmt_delta(pair["delta"], colorize=True),
-        )
-    if not pairwise:
-        pairwise_table.add_row("n/a", "n/a", "n/a", "n/a")
-    console.print(pairwise_table)
-
-    if ranked_latest:
-        console.print(
-            "[bold]Insight:[/bold] top latest "
-            f"[green]{ranked_latest[0].get('name', '')}[/green] "
-            f"({_fmt_pct(float(ranked_latest[0].get('latest_pass_rate') or 0.0))})"
-        )
-    if ranked_drift:
-        console.print(
-            "[bold]Insight:[/bold] strongest drift "
-            f"{ranked_drift[-1].get('name', '')} "
-            f"({_fmt_delta(float(ranked_drift[-1].get('drift_delta') or 0.0), colorize=True)})"
-        )
-    console.print("")
-
-    for experiment in experiments:
-        console.print("")
-        console.print(f"[bold]Run Timeline:[/bold] {experiment.get('name', '')}")
-        run_table = Table()
-        run_table.add_column("#", justify="right", style="cyan", no_wrap=True)
-        run_table.add_column("Run ID", style="white", no_wrap=True)
-        run_table.add_column("Status", no_wrap=True)
-        run_table.add_column("Pass Rate", justify="right", no_wrap=True)
-        run_table.add_column("Trend", style="white", no_wrap=True)
-        run_table.add_column("Failure Cause", style="red", overflow="fold")
-
-        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
-        for idx, run in enumerate(runs, start=1):
-            status_value = str(run.get("status", ""))
-            pass_rate = float(run.get("pass_rate")) if isinstance(run.get("pass_rate"), (int, float)) else None
-            cause_text = _format_failure_cause(run.get("failure_cause"))
-            run_table.add_row(
-                str(idx),
-                str(run.get("id", "")),
-                f"[{_status_style(status_value)}]{status_value}[/{_status_style(status_value)}]",
-                _fmt_pct(pass_rate),
-                _ascii_bar(pass_rate, width=28, full_blocks=True, colorize=True) if pass_rate is not None else "-",
-                cause_text or "-",
-            )
-        if not runs:
-            run_table.add_row("1", "n/a", "n/a", "n/a", "-", "-")
-        console.print(run_table)
-
-        for idx, run in enumerate(runs, start=1):
-            cause = run.get("failure_cause")
-            if not isinstance(cause, dict) or not cause:
-                continue
-            console.print(
-                f"[red bold]Run {idx} failure:[/red bold] "
-                f"[red]{str(cause.get('message') or 'Unknown failure.')}[/red]"
-            )
-            for key, label in (
-                ("stage", "stage"),
-                ("type", "type"),
-                ("execution_url", "execution url"),
-            ):
-                value = str(cause.get(key) or "").strip()
-                if value:
-                    console.print(f"    {label}: {value}")
-            diagnostics = cause.get("diagnostics")
-            if isinstance(diagnostics, dict):
-                for key, label in (
-                    ("agent_runtimes_url", "agent runtimes url"),
-                    ("run_url", "run url"),
-                ):
-                    value = diagnostics.get(key)
-                    if value:
-                        console.print(f"    {label}: {value}")
-                candidate_urls = diagnostics.get("candidate_urls")
-                if isinstance(candidate_urls, list) and candidate_urls:
-                    console.print(f"    candidate urls: {', '.join(str(u) for u in candidate_urls)}")
-                attempts = diagnostics.get("attempts")
-                if isinstance(attempts, list) and attempts:
-                    for attempt in attempts:
-                        if not isinstance(attempt, dict):
-                            continue
-                        outcome = "ok" if attempt.get("ok") else "failed"
-                        console.print(
-                            f"    attempt: {attempt.get('url', '')} -> {outcome} "
-                            f"{attempt.get('error') or ''}".rstrip()
-                        )
-            detail = str(cause.get("detail_excerpt") or "").strip()
-            if detail:
-                console.print(f"    detail: {detail}")
-
-        deltas_table = Table(title="Consecutive Run Deltas")
-        deltas_table.add_column("Run A", style="white", no_wrap=True)
-        deltas_table.add_column("Run B", style="white", no_wrap=True)
-        deltas_table.add_column("A Pass", justify="right", no_wrap=True)
-        deltas_table.add_column("B Pass", justify="right", no_wrap=True)
-        deltas_table.add_column("Delta", justify="right", no_wrap=True)
-        comparisons = [
-            item for item in (experiment.get("consecutive_comparisons") or [])
-            if isinstance(item, dict)
-        ]
-        for item in comparisons:
-            run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
-            run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
-            delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
-            deltas_table.add_row(
-                str(item.get("run_a_id", "")),
-                str(item.get("run_b_id", "")),
-                _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else "n/a",
-                _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else "n/a",
-                _fmt_delta(float(delta), colorize=True) if isinstance(delta, (int, float)) else "n/a",
-            )
-        if not comparisons:
-            deltas_table.add_row("n/a", "n/a", "n/a", "n/a", "n/a")
-        console.print(deltas_table)
-
-
 @app.callback()
 def evals_callback(ctx: typer.Context) -> None:
     """Evals command group."""
@@ -1237,8 +73,9 @@ def evals_callback(ctx: typer.Context) -> None:
 @app.command(name="ls")
 def evals_ls(
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     run_environment: Optional[str] = typer.Option(None, "--run-environment", help="Filter by run environment (ui/sdk)."),
     kind: Optional[str] = typer.Option(None, "--kind", help="Filter by kind (batch/interactive)."),
     q: Optional[str] = typer.Option(None, "--q", help="Search query."),
@@ -1247,14 +84,15 @@ def evals_ls(
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """List all evalsets and their experiments."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     evalsets_payload = client.evals_list_evals(
         run_environment=run_environment,
         kind=kind,
         q=q,
         limit=limit,
         offset=offset,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     evalsets = [item for item in (evalsets_payload.get("evalsets") or []) if isinstance(item, dict)]
 
@@ -1267,7 +105,7 @@ def evals_ls(
             evalset_id=evalset_id,
             limit=200,
             offset=0,
-            account_uid=account_uid,
+            account_uid=resolved_account_uid,
         )
         experiments_by_evalset[evalset_id] = [
             item
@@ -1316,8 +154,9 @@ def evals_delete_top(
     evalset_id: str = typer.Argument(..., help="Evalset UID to delete."),
     yes: bool = typer.Option(False, "--yes", "-y", help="Skip the confirmation prompt."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
 ) -> None:
     """Delete an evalset and its associated experiments, runs, and cases."""
     if not yes:
@@ -1325,8 +164,9 @@ def evals_delete_top(
             f"Delete evalset {evalset_id} and all associated experiments, runs, and cases?",
             abort=True,
         )
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
-    payload = client.evals_delete_eval(evalset_id, account_uid=account_uid)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
+    payload = client.evals_delete_eval(evalset_id, account_uid=resolved_account_uid)
     cascade = payload.get("cascade") or {}
     console.print(
         f"[green]Eval deleted:[/green] {evalset_id} "
@@ -1339,8 +179,9 @@ def evals_delete_top(
 @evals_app.command(name="ls")
 def evals_list(
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     run_environment: Optional[str] = typer.Option(None, "--run-environment", help="Filter by run environment (ui/sdk)."),
     kind: Optional[str] = typer.Option(None, "--kind", help="Filter by kind (batch/interactive)."),
     q: Optional[str] = typer.Option(None, "--q", help="Search query."),
@@ -1349,14 +190,15 @@ def evals_list(
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """List evalsets."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_list_evals(
         run_environment=run_environment,
         kind=kind,
         q=q,
         limit=limit,
         offset=offset,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     if raw:
         console.print(payload)
@@ -1392,10 +234,26 @@ def evals_create(
     schema_json: Optional[str] = typer.Option(None, "--schema-json", help="Schema JSON object."),
     metadata_json: Optional[str] = typer.Option(None, "--metadata-json", help="Metadata JSON object."),
     cases_file: Optional[str] = typer.Option(None, "--cases-file", help="Path to JSON array of cases."),
+    evalset_evaluator_json: list[str] = typer.Option(
+        [],
+        "--evalset-evaluator-json",
+        help="Repeatable JSON object applied as an evalset-level evaluator for the evalset.",
+    ),
+    report_evaluator_json: list[str] = typer.Option(
+        [],
+        "--report-evaluator-json",
+        help="Repeatable JSON object applied as a report-level evaluator for the evalset.",
+    ),
+    case_evaluator_json: list[str] = typer.Option(
+        [],
+        "--case-evaluator-json",
+        help="Repeatable JSON object applied as a case evaluator to every case in the payload.",
+    ),
     tags: list[str] = typer.Option([], "--tag", help="Repeatable tag."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """Create an evalset."""
@@ -1419,6 +277,33 @@ def evals_create(
             raise typer.BadParameter("--cases-file must contain a JSON array")
         cases = [case for case in decoded if isinstance(case, dict)]
 
+    evalset_evaluators = [
+        item for item in (spec.get("evalset_evaluators") or []) if isinstance(item, dict)
+    ]
+    report_evaluators = [
+        item for item in (spec.get("report_evaluators") or []) if isinstance(item, dict)
+    ]
+    evalset_evaluators.extend(
+        _parse_evaluator_specs(evalset_evaluator_json, "--evalset-evaluator-json")
+    )
+    report_evaluators.extend(
+        _parse_evaluator_specs(report_evaluator_json, "--report-evaluator-json")
+    )
+
+    default_case_evaluators = _parse_evaluator_specs(
+        case_evaluator_json,
+        "--case-evaluator-json",
+    )
+    if default_case_evaluators:
+        for case in cases:
+            existing = case.get("evaluators")
+            if isinstance(existing, list):
+                case["evaluators"] = [
+                    item for item in existing if isinstance(item, dict)
+                ] + default_case_evaluators
+            else:
+                case["evaluators"] = list(default_case_evaluators)
+
     resolved_name = str(name or spec.get("name") or "").strip()
     if not resolved_name:
         raise typer.BadParameter("name argument is required unless provided in --spec-file")
@@ -1429,17 +314,20 @@ def evals_create(
     spec_tags = spec.get("tags") if isinstance(spec.get("tags"), list) else []
     resolved_tags = tags if tags else [str(tag) for tag in spec_tags if str(tag).strip()]
 
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_create_eval(
         name=resolved_name,
         description=resolved_description,
         run_environment=resolved_run_environment,
         kind=resolved_kind,
         schema=schema,
+        evalset_evaluators=evalset_evaluators,
+        report_evaluators=report_evaluators,
         metadata=metadata,
         tags=resolved_tags,
         cases=cases,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     if raw:
         typer.echo(json.dumps(payload))
@@ -1452,12 +340,14 @@ def evals_create(
 def evals_delete(
     evalset_id: str = typer.Argument(..., help="Evalset ID."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
 ) -> None:
     """Delete an evalset (cascade delete runs/experiments)."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
-    payload = client.evals_delete_eval(evalset_id, account_uid=account_uid)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
+    payload = client.evals_delete_eval(evalset_id, account_uid=resolved_account_uid)
     cascade = payload.get("cascade") or {}
     console.print(
         "[green]Eval deleted.[/green] "
@@ -1471,20 +361,22 @@ def _render_report(
     evalset_id: Optional[str],
     run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
     export: bool = typer.Option(False, "--export", help="Export timestamped report files report-<timestamp>.md and report-<timestamp>.csv."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
 ) -> None:
     """Generate a full evalset report with cross-experiment comparisons."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     resolved_evalset_id = (evalset_id or "").strip()
     if not resolved_evalset_id:
         payload = client.evals_list_evals(
             limit=200,
             offset=0,
-            account_uid=account_uid,
+            account_uid=resolved_account_uid,
         )
         evalsets = [item for item in (payload.get("evalsets") or []) if isinstance(item, dict)]
         if not evalsets:
@@ -1506,7 +398,7 @@ def _updated_key(item: dict[str, Any]) -> str:
         client=client,
         evalset_id=resolved_evalset_id,
         run_limit=run_limit,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     experiments = report.get("experiments") or []
     if not experiments:
@@ -1538,8 +430,9 @@ def evals_report(
     evalset_id: Optional[str] = typer.Argument(None, help="Evalset ID to report. Defaults to latest updated evalset."),
     run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
     export: bool = typer.Option(False, "--export", help="Export timestamped report files report-<timestamp>.md and report-<timestamp>.csv."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
@@ -1549,7 +442,8 @@ def evals_report(
         evalset_id=evalset_id,
         run_limit=run_limit,
         token=token,
-        ai_agents_url=ai_agents_url,
+        api_key=api_key,
+        billable_account_uid=billable_account_uid,
         account_uid=account_uid,
         output_file=output_file,
         export=export,
@@ -1557,29 +451,79 @@ def evals_report(
     )
 
 
-@evals_app.command(name="compare-report")
-def evals_compare_report_compat(
-    evalset_id: Optional[str] = typer.Argument(None, help="Evalset ID to report. Defaults to latest updated evalset."),
-    run_limit: int = typer.Option(50, "--run-limit", min=2, max=200, help="Runs fetched per experiment."),
-    token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
-    output_file: Optional[str] = typer.Option(None, "--output", help="Write markdown report to file."),
-    export: bool = typer.Option(False, "--export", help="Export timestamped report files report-<timestamp>.md and report-<timestamp>.csv."),
-    raw: bool = typer.Option(False, "--raw", help="Print raw JSON report output."),
+@app.command(name="evaluate")
+def evals_evaluate(
+    evalset_spec: str = typer.Argument(..., help="Path to an evalset spec JSON file (with cases and evaluators)."),
+    outputs_file: str = typer.Option(..., "--outputs", help="JSON file of agent outputs aligned with the evalset cases (list of strings or {text} objects, or {\"outputs\": [...]})."),
+    statuses_file: Optional[str] = typer.Option(None, "--statuses", help="Optional JSON file of per-case run statuses aligned with cases."),
+    output_file: Optional[str] = typer.Option(None, "--output", help="Write the computed metrics JSON to this file."),
+    raw: bool = typer.Option(False, "--raw", help="Print the full metrics JSON."),
 ) -> None:
-    """Compatibility alias for report. Prefer: datalayer evals report <evalset-id>."""
-    console.print("[yellow]Deprecated:[/yellow] use [bold]datalayer evals report <evalset-id>[/bold].")
-    _render_report(
-        evalset_id=evalset_id,
-        run_limit=run_limit,
-        token=token,
-        ai_agents_url=ai_agents_url,
-        account_uid=account_uid,
-        output_file=output_file,
-        export=export,
-        raw=raw,
-    )
+    """Run per-case and global evaluators over real agent outputs.
+
+    Grades the provided outputs against an evalset spec using the shared evals
+    API (``datalayer_core.evals.evaluate_evalset``) and emits run metrics
+    (``case_results`` + ``evaluator_results``). Callers produce outputs and
+    delegate all evaluator execution here instead of re-implementing it.
+    """
+    spec = load_evalset_spec(evalset_spec, require_cases=True)
+    outputs_payload = json.loads(Path(outputs_file).read_text(encoding="utf-8"))
+    if isinstance(outputs_payload, dict) and "outputs" in outputs_payload:
+        outputs = outputs_payload["outputs"]
+    else:
+        outputs = outputs_payload
+    if not isinstance(outputs, list):
+        raise typer.BadParameter('--outputs must be a JSON list (or {"outputs": [...]}).')
+    statuses: Optional[list] = None
+    if statuses_file:
+        statuses_payload = json.loads(Path(statuses_file).read_text(encoding="utf-8"))
+        if isinstance(statuses_payload, dict) and "statuses" in statuses_payload:
+            statuses_payload = statuses_payload["statuses"]
+        if statuses_payload is not None and not isinstance(statuses_payload, list):
+            raise typer.BadParameter("--statuses must be a JSON list.")
+        statuses = statuses_payload
+
+    metrics = evaluate_evalset(spec, outputs, statuses=statuses)
+
+    if output_file:
+        Path(output_file).write_text(json.dumps(metrics, indent=2) + "\n", encoding="utf-8")
+        console.print(f"[green]Metrics written:[/green] {output_file}")
+
+    if raw:
+        console.print_json(json.dumps(metrics))
+        return
+
+    summary = Table(title="Eval Metrics")
+    summary.add_column("Metric", style="cyan")
+    summary.add_column("Value", style="white")
+    summary.add_row("Pass rate", f"{float(metrics.get('pass_rate', 0.0)):.2%}")
+    summary.add_row("Cases", str(metrics.get("total_cases", 0)))
+    summary.add_row("Passed", str(metrics.get("passed", 0)))
+    summary.add_row("Failed", str(metrics.get("failed", 0)))
+    summary.add_row("Avg score", f"{float(metrics.get('avg_score', 0.0)):.4f}")
+    console.print(summary)
+
+    evaluator_results = metrics.get("evaluator_results") or []
+    if evaluator_results:
+        evaluators_table = Table(title="Evaluator Results")
+        evaluators_table.add_column("Evaluator", style="cyan")
+        evaluators_table.add_column("Scope", style="white")
+        evaluators_table.add_column("Score", style="white")
+        evaluators_table.add_column("Passed", style="white")
+        evaluators_table.add_column("Summary", style="white")
+        for item in evaluator_results:
+            if not isinstance(item, dict):
+                continue
+            score = item.get("score")
+            passed = bool(item.get("passed"))
+            evaluators_table.add_row(
+                str(item.get("name", "")),
+                str(item.get("scope", "")),
+                "n/a" if score is None else f"{float(score):.4f}",
+                f"[{'green' if passed else 'red'}]{'pass' if passed else 'fail'}[/{'green' if passed else 'red'}]",
+                str(item.get("summary", "")),
+            )
+        console.print(evaluators_table)
 
 
 @experiments_app.command(name="ls")
@@ -1589,18 +533,20 @@ def experiments_list(
     limit: int = typer.Option(50, "--limit", min=1, max=200),
     offset: int = typer.Option(0, "--offset", min=0),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """List evalset experiments."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_list_experiments(
         evalset_id=evalset_id,
         status=status,
         limit=limit,
         offset=offset,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     if raw:
         console.print(payload)
@@ -1630,13 +576,16 @@ def experiments_create(
     evalset_id: Optional[str] = typer.Option(None, "--evalset-id", help="Evalset ID."),
     description: Optional[str] = typer.Option(None, "--description", help="Description."),
     status: Optional[str] = typer.Option(None, "--status", help="Initial status."),
-    spec_file: Optional[str] = typer.Option(None, "--spec-file", help="Path to experiment spec JSON file."),
+    spec_file: Optional[str] = typer.Option(None, "--spec-file", help="Path to experimentspec JSON file."),
+    agent_spec_id: Optional[str] = typer.Option(None, "--agent-spec-id", help="Single agentspec id."),
+    agent_spec_ids: Optional[str] = typer.Option(None, "--agent-spec-ids", help="Comma-separated agentspec ids for multi-experiment creation."),
     config_json: Optional[str] = typer.Option(None, "--config-json", help="Config JSON object."),
     summary_json: Optional[str] = typer.Option(None, "--summary-json", help="Summary JSON object."),
     tags: list[str] = typer.Option([], "--tag", help="Repeatable tag."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """Create an evalset experiment."""
@@ -1659,22 +608,64 @@ def experiments_create(
     spec_tags = spec.get("tags") if isinstance(spec.get("tags"), list) else []
     resolved_tags = tags if tags else [str(tag) for tag in spec_tags if str(tag).strip()]
 
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
-    payload = client.evals_create_experiment(
-        name=resolved_name,
-        evalset_id=resolved_evalset_id,
-        description=resolved_description,
-        status=resolved_status,
-        config=resolved_config,
-        summary=resolved_summary,
-        tags=resolved_tags,
-        account_uid=account_uid,
-    )
+    selected_agent_specs = _parse_csv_values(agent_spec_ids)
+    if agent_spec_id:
+        selected_agent_specs = [str(agent_spec_id).strip(), *selected_agent_specs]
+    selected_agent_specs = [value for value in _parse_csv_values(",".join(selected_agent_specs)) if value]
+
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
+    payloads: list[dict[str, Any]] = []
+    targets = selected_agent_specs or [""]
+    for spec_index, target_agent_spec_id in enumerate(targets, start=1):
+        config_payload = dict(resolved_config)
+        summary_payload = dict(resolved_summary)
+        experiment_name = resolved_name
+        if target_agent_spec_id:
+            config_payload["agent_spec_id"] = target_agent_spec_id
+            if not str(config_payload.get("agent_spec_name") or "").strip():
+                config_payload["agent_spec_name"] = target_agent_spec_id
+            summary_payload["agent_spec_id"] = target_agent_spec_id
+            if not str(summary_payload.get("agent_spec_name") or "").strip():
+                summary_payload["agent_spec_name"] = str(config_payload.get("agent_spec_name") or target_agent_spec_id)
+            if len(targets) > 1:
+                experiment_name = f"{resolved_name}-{target_agent_spec_id}"
+                summary_payload["agentspec_variant_index"] = spec_index
+
+        payload = client.evals_create_experiment(
+            name=experiment_name,
+            evalset_id=resolved_evalset_id,
+            description=resolved_description,
+            status=resolved_status,
+            config=config_payload,
+            summary=summary_payload,
+            tags=resolved_tags,
+            account_uid=resolved_account_uid,
+        )
+        payloads.append(payload)
+
     if raw:
-        typer.echo(json.dumps(payload))
+        typer.echo(json.dumps({"experiments": [item.get("experiment") for item in payloads]}))
+        return
+
+    if len(payloads) == 1:
+        experiment = payloads[0].get("experiment") or {}
+        console.print(f"[green]Experiment created:[/green] {experiment.get('id', '')} ({experiment.get('name', '')})")
         return
-    experiment = payload.get("experiment") or {}
-    console.print(f"[green]Experiment created:[/green] {experiment.get('id', '')} ({experiment.get('name', '')})")
+
+    table = Table(title=f"Experiments Created ({len(payloads)})")
+    table.add_column("ID", style="cyan")
+    table.add_column("Name", style="white")
+    table.add_column("Agentspec", style="white")
+    for payload in payloads:
+        experiment = payload.get("experiment") or {}
+        config = experiment.get("config") if isinstance(experiment.get("config"), dict) else {}
+        table.add_row(
+            str(experiment.get("id", "")),
+            str(experiment.get("name", "")),
+            str(config.get("agent_spec_id") or "-"),
+        )
+    console.print(table)
 
 
 @runs_app.command(name="ls")
@@ -1683,17 +674,19 @@ def runs_list(
     limit: int = typer.Option(50, "--limit", min=1, max=200),
     offset: int = typer.Option(0, "--offset", min=0),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """List runs for an experiment."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_list_runs(
         experiment_id,
         limit=limit,
         offset=offset,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     if raw:
         console.print(payload)
@@ -1730,8 +723,18 @@ def runs_launch(
     experiment_id: str = typer.Option(..., "--experiment-id", help="Experiment ID."),
     status: str = typer.Option("queued", "--status", help="Initial run status."),
     run_mode: Optional[str] = typer.Option(None, "--run-mode", help="Run mode hint (batch/interactive)."),
-    runtime_pod_name: Optional[str] = typer.Option(None, "--runtime-pod-name", help="Runtime pod for interactive execution."),
+    agent_pod_name: Optional[str] = typer.Option(None, "--agent-pod-name", help="Agent pod for interactive execution."),
     submitted_code_file: Optional[str] = typer.Option(None, "--submitted-code-file", help="Python file to execute in interactive mode."),
+    evalset_evaluator_json: list[str] = typer.Option(
+        [],
+        "--evalset-evaluator-json",
+        help="Repeatable JSON object for evalset-level evaluators attached to this run context.",
+    ),
+    report_evaluator_json: list[str] = typer.Option(
+        [],
+        "--report-evaluator-json",
+        help="Repeatable JSON object for evalset-level report evaluators attached to this run context.",
+    ),
     metrics_json: Optional[str] = typer.Option(None, "--metrics-json", help="Inline metrics JSON object."),
     summary_json: Optional[str] = typer.Option(None, "--summary-json", help="Inline summary JSON object."),
     report_json: Optional[str] = typer.Option(None, "--report-json", help="Inline report JSON object."),
@@ -1741,8 +744,9 @@ def runs_launch(
     started_at: Optional[str] = typer.Option(None, "--started-at", help="ISO timestamp override."),
     ended_at: Optional[str] = typer.Option(None, "--ended-at", help="ISO timestamp override."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
 ) -> None:
     """Launch an evalset run on SaaS and tag it as CLI-launched."""
     cli_summary: dict[str, Any] = {
@@ -1751,14 +755,27 @@ def runs_launch(
     }
     if run_mode:
         cli_summary["run_mode"] = run_mode
-    if runtime_pod_name:
-        cli_summary["runtime_pod_name"] = runtime_pod_name
+    if agent_pod_name:
+        cli_summary["runtime_pod_name"] = agent_pod_name
     if submitted_code_file:
         path = Path(submitted_code_file)
         if not path.exists():
             raise typer.BadParameter(f"submitted code file not found: {submitted_code_file}")
         cli_summary["submitted_code"] = path.read_text(encoding="utf-8")
 
+    evalset_evaluators = _parse_evaluator_specs(
+        evalset_evaluator_json,
+        "--evalset-evaluator-json",
+    )
+    report_evaluators = _parse_evaluator_specs(
+        report_evaluator_json,
+        "--report-evaluator-json",
+    )
+    if evalset_evaluators:
+        cli_summary["evalset_evaluators"] = evalset_evaluators
+    if report_evaluators:
+        cli_summary["report_evaluators"] = report_evaluators
+
     metrics = _merge_dicts(
         _parse_json_file(metrics_file, "--metrics-file"),
         _parse_json_value(metrics_json, "--metrics-json"),
@@ -1772,8 +789,19 @@ def runs_launch(
         _parse_json_file(report_file, "--report-file"),
         _parse_json_value(report_json, "--report-json"),
     )
+    if evalset_evaluators or report_evaluators:
+        report = _merge_dicts(
+            report,
+            {
+                "evalset_evaluators": {
+                    "evalset_evaluators": evalset_evaluators,
+                    "report_evaluators": report_evaluators,
+                }
+            },
+        )
 
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_create_run(
         experiment_id,
         status=status,
@@ -1782,7 +810,7 @@ def runs_launch(
         metrics=metrics,
         summary=summary,
         report=report,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     run = payload.get("run") or {}
     run_id = str(run.get("id", ""))
@@ -1797,16 +825,18 @@ def runs_watch(
     interval_seconds: float = typer.Option(3.0, "--interval", min=0.5, help="Polling interval."),
     timeout_seconds: int = typer.Option(600, "--timeout", min=5, help="Timeout in seconds."),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
 ) -> None:
     """Watch a run until completion/failure."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     started = time.time()
     last_status = ""
 
     while True:
-        payload = client.evals_get_run(run_id, account_uid=account_uid)
+        payload = client.evals_get_run(run_id, account_uid=resolved_account_uid)
         run = payload.get("run") or {}
         status = str(run.get("status", "unknown"))
         if status != last_status:
@@ -1837,16 +867,18 @@ def live_targets(
     window: str = typer.Option("24h", "--window", help="Window: 1h, 6h, 24h, 7d, 30d."),
     limit: int = typer.Option(50, "--limit", min=1, max=200),
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
-    ai_agents_url: Optional[str] = typer.Option(None, "--ai-agents-url", help="AI Agents base URL."),
-    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Organization/account UID context."),
+    api_key: Optional[str] = typer.Option(None, "--api-key", help="Authentication API key (alias for --token)."),
+    billable_account_uid: Optional[str] = typer.Option(None, "--billable-account-uid", help="Billable account UID context (organization/team/user)."),
+    account_uid: Optional[str] = typer.Option(None, "--account-uid", help="Deprecated alias for --billable-account-uid."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON output."),
 ) -> None:
     """List live monitoring targets."""
-    client = _make_client(token=token, ai_agents_url=ai_agents_url)
+    resolved_account_uid = _resolve_billable_account_uid(billable_account_uid, account_uid)
+    client = _make_client(token=token, api_key=api_key)
     payload = client.evals_list_live_targets(
         window=window,
         limit=limit,
-        account_uid=account_uid,
+        account_uid=resolved_account_uid,
     )
     if raw:
         console.print(payload)
diff --git a/datalayer_core/cli/commands/exec.py b/datalayer_core/cli/commands/exec.py
index 999123c3..6527f3a1 100644
--- a/datalayer_core/cli/commands/exec.py
+++ b/datalayer_core/cli/commands/exec.py
@@ -1,42 +1,53 @@
 # Copyright (c) 2023-2025 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
-"""Execution application for running code in Datalayer runtimes."""
+"""Execution application for running code in Datalayer code sandboxes."""
 
 from __future__ import annotations
 
 import json
 import signal
 import sys
+import tempfile
+import time
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Optional
+from uuid import uuid4
 
 import typer
 from rich.console import Console
+from rich.table import Table
 
 from datalayer_core.client.client import DatalayerClient
 from datalayer_core.console.manager import RuntimeManager
+from datalayer_core.utils.defaults import DEFAULT_ENVIRONMENT
+from datalayer_core.utils.network import fetch
 from datalayer_core.utils.notebook import get_cells
 
 # Create the main Typer app for exec functionality
 app = typer.Typer(
     name="exec",
-    help="Execute files or notebooks on runtimes",
+    help="Execute files or notebooks on code sandboxes",
     invoke_without_command=True,
 )
 
 console = Console()
 
+KERNEL_READY_TIMEOUT_SECONDS = 20.0
+KERNEL_PROBE_TIMEOUT_SECONDS = 20.0
+DEFAULT_EXEC_TIMEOUT_SECONDS = 10.0
+
 
 @app.callback()
 def exec_callback(ctx: typer.Context) -> None:
-    """Execute files or notebooks on runtimes."""
+    """Execute files or notebooks on code sandboxes."""
     if ctx.invoked_subcommand is None:
         typer.echo(ctx.get_help())
 
 
-class RuntimesExecService:
-    """Service for executing files on Datalayer runtimes."""
+class CodeSandboxExecService:
+    """Service for executing files on Datalayer code sandboxes."""
 
     def __init__(self, token: Optional[str] = None) -> None:
         """Initialize the exec service."""
@@ -60,51 +71,89 @@ def handle_sigint(self, *args: Any) -> None:
             # so that the interact loop advances, and prompt is redrawn, etc.
             raise KeyboardInterrupt
 
-    def init_kernel_manager(self, runtime_name: str) -> None:
-        """Initialize the kernel manager and connect to runtime."""
-        try:
-            # Validate runtime only when explicitly provided.
-            # Empty runtime name delegates selection/creation to RuntimeManager.start_kernel.
-            if runtime_name:
-                runtimes = self._client.list_runtimes()
-                target_runtime = None
-
-                for runtime in runtimes:
-                    if runtime.name == runtime_name or runtime.uid == runtime_name:
-                        target_runtime = runtime
-                        break
-
-                if target_runtime is None:
-                    raise RuntimeError(f"Runtime '{runtime_name}' not found")
-
-            # Get token using the same method as DatalayerClient
-            token = self._client._get_token()
-
-            # Create a RuntimeManager with proper credentials
-            self.kernel_manager = RuntimeManager(
-                run_url=self._client.urls.run_url,
-                token=token or "",
-                username="",  # Username is not required for token-based auth
-            )
+    def init_kernel_manager(self, sandbox_name: str) -> None:
+        """Initialize the kernel manager and connect to a code sandbox."""
+        max_attempts = 2
+        last_error: Exception | None = None
+        # Set up signal handler once.
+        signal.signal(signal.SIGINT, self.handle_sigint)
 
-            # Set up signal handler
-            signal.signal(signal.SIGINT, self.handle_sigint)
+        for attempt in range(1, max_attempts + 1):
+            try:
+                # Validate sandbox only when explicitly provided.
+                # Empty sandbox name delegates selection/creation to RuntimeManager.start_kernel.
+                if sandbox_name:
+                    runtimes = self._client.list_runtimes()
+                    target_sandbox = None
 
-            # Start kernel and get client
-            self.kernel_manager.start_kernel(name=runtime_name or "")
-            self.kernel_client = self.kernel_manager.client
+                    for runtime in runtimes:
+                        if runtime.name == sandbox_name or runtime.uid == sandbox_name:
+                            target_sandbox = runtime
+                            break
 
-            if self.kernel_client:
-                self.kernel_client.start_channels()
-                console.print(
-                    f"[green]Connected to runtime: {runtime_name or 'auto-selected'}[/green]"
+                    if target_sandbox is None:
+                        raise RuntimeError(f"Code sandbox '{sandbox_name}' not found")
+
+                # Get token using the same method as DatalayerClient
+                token = self._client._get_token()
+
+                # Create a RuntimeManager with proper credentials
+                self.kernel_manager = RuntimeManager(
+                    run_url=self._client.urls.run_url,
+                    token=token or "",
+                    username="",  # Username is not required for token-based auth
                 )
-            else:
-                raise RuntimeError("Failed to create kernel client")
 
-        except Exception as e:
+                # Start kernel and get client
+                self.kernel_manager.start_kernel(name=sandbox_name or "")
+
+                if bool(getattr(self.kernel_manager, "runtime_created_in_start", False)):
+                    self._inspect_created_code_sandbox_kernels()
+
+                self.kernel_client = self.kernel_manager.client
+
+                if not self.kernel_client:
+                    raise RuntimeError("Failed to create kernel client")
+
+                self.kernel_client.start_channels()
+                # Fresh runtimes can report healthy before the kernel channels are
+                # fully ready for requests. Wait explicitly to avoid hanging on
+                # the first execute call.
+                self.kernel_client.wait_for_ready(timeout=KERNEL_READY_TIMEOUT_SECONDS)
+                self._probe_kernel_execution()
+                manager_runtime_name = str(getattr(self.kernel_manager, "runtime_name", "") or sandbox_name or "auto-selected")
+                manager_runtime_uid = str(getattr(self.kernel_manager, "runtime_uid", "") or "")
+                manager_kernel_id = str(getattr(self.kernel_manager, "_kernel_id", "") or "")
+                if manager_runtime_uid or manager_kernel_id:
+                    runtime_ref = f"{manager_runtime_uid}#{manager_kernel_id}".strip("#")
+                    console.print(
+                        f"[green]Connected to code sandbox: {manager_runtime_name} ({runtime_ref})[/green]"
+                    )
+                else:
+                    console.print(
+                        f"[green]Connected to code sandbox: {sandbox_name or 'auto-selected'}[/green]"
+                    )
+                return
+            except Exception as e:
+                last_error = e
+                self.cleanup()
+                self.kernel_manager = None
+                self.kernel_client = None
+                if attempt < max_attempts:
+                    console.print(
+                        "[yellow]Kernel not ready yet, retrying connection...[/yellow]"
+                    )
+                    time.sleep(1.5 * attempt)
+                    continue
+                break
+
+        if last_error is None:
+            last_error = RuntimeError("Unknown code sandbox initialization failure")
+
+        e = last_error
+        try:
             console.print(
-                f"[red]Failed to connect to runtime '{runtime_name}': {e}[/red]"
+                f"[red]Failed to connect to code sandbox '{sandbox_name}': {e}[/red]"
             )
 
             # Provide helpful authentication guidance
@@ -117,18 +166,82 @@ def init_kernel_manager(self, runtime_name: str) -> None:
                     "[yellow]  2. Set DATALAYER_API_KEY environment variable[/yellow]"
                 )
                 console.print("[yellow]  3. Use --token option if available[/yellow]")
-
+        finally:
             raise typer.Exit(1)
 
+    def _inspect_created_code_sandbox_kernels(self) -> None:
+        """Inspect kernels after sandbox auto-creation and fail fast when count != 1."""
+        if not self.kernel_manager:
+            raise RuntimeError("Code sandbox manager is not initialized")
+
+        server_url = str(getattr(self.kernel_manager, "server_url", "") or "").rstrip("/")
+        sandbox_token = str(getattr(self.kernel_manager, "token", "") or "")
+        sandbox_name = str(getattr(self.kernel_manager, "runtime_name", "") or "")
+        sandbox_uid = str(getattr(self.kernel_manager, "runtime_uid", "") or "")
+        sandbox_pod = str(getattr(self.kernel_manager, "runtime_pod_name", "") or "")
+
+        response = fetch(f"{server_url}/api/kernels", token=sandbox_token, timeout=15)
+        kernels = response.json() if response.content else []
+        if not isinstance(kernels, list):
+            kernels = []
+
+        summary = Table(title="Code Sandbox Inspection (auto-created by exec)")
+        summary.add_column("Field", style="cyan")
+        summary.add_column("Value")
+        summary.add_row("Code Sandbox", sandbox_name or sandbox_pod)
+        summary.add_row("Pod", sandbox_pod)
+        summary.add_row("UID", sandbox_uid)
+        summary.add_row("Ingress", server_url)
+        summary.add_row("Kernels", str(len(kernels)))
+        console.print(summary)
+
+        code_sandboxes_table = Table(title="Available Code Sandboxes")
+        code_sandboxes_table.add_column("ID", style="green")
+        code_sandboxes_table.add_column("Name")
+        code_sandboxes_table.add_column("State")
+        code_sandboxes_table.add_column("Connections")
+        code_sandboxes_table.add_column("Last Activity")
+        for kernel in kernels:
+            code_sandboxes_table.add_row(
+                str((kernel or {}).get("id") or ""),
+                str((kernel or {}).get("name") or ""),
+                str((kernel or {}).get("execution_state") or ""),
+                str((kernel or {}).get("connections") or "0"),
+                str((kernel or {}).get("last_activity") or ""),
+            )
+        if kernels:
+            console.print(code_sandboxes_table)
+
+        if len(kernels) != 1:
+            raise RuntimeError(
+                f"Auto-created code sandbox expected exactly one kernel, found {len(kernels)}"
+            )
+
+    def _probe_kernel_execution(self) -> None:
+        """Validate the kernel can execute a trivial statement before running user code."""
+        if not self.kernel_client:
+            raise RuntimeError("Kernel client not initialized")
+
+        def _noop_output_hook(msg: dict[str, Any]) -> None:
+            # A stream-based probe validates the same IOPub path used by cells.
+            _ = msg
+
+        self.kernel_client.execute_interactive(
+            "print('__datalayer_probe__')",
+            silent=False,
+            timeout=KERNEL_PROBE_TIMEOUT_SECONDS,
+            output_hook=_noop_output_hook,
+        )
+
     def execute_file(
         self,
         filepath: Path,
         silent: bool = True,
         timeout: Optional[float] = None,
         raise_exceptions: bool = False,
-    ) -> None:
+    ) -> dict[str, Any]:
         """
-        Execute a file or notebook on the connected runtime.
+        Execute a file or notebook on the connected code sandbox.
 
         Parameters
         ----------
@@ -144,19 +257,35 @@ def execute_file(
         if not self.kernel_client:
             raise RuntimeError("Kernel client not initialized")
 
+        report: dict[str, Any] = {
+            "input_file": str(filepath),
+            "cells": [],
+        }
+
         try:
             self._executing = True
             console.print(f"[blue]Executing file: {filepath}[/blue]")
 
+            # Guardrail: ensure the selected code sandbox endpoint is reachable
+            # before submitting any execute requests.
+            self._assert_code_sandbox_alive()
+            self._prepare_kernel_before_execution()
+
             # Get cells from the file
             cells = list(get_cells(filepath))
 
             if not cells:
                 console.print("[yellow]No executable cells found in file[/yellow]")
-                return
+                return report
 
             total_cells = len(cells)
             console.print(f"[blue]Found {total_cells} cell(s) to execute[/blue]")
+            failed_cells = 0
+            effective_timeout = (
+                float(timeout)
+                if timeout is not None
+                else DEFAULT_EXEC_TIMEOUT_SECONDS
+            )
 
             # Execute each cell
             for i, (cell_id, cell_source) in enumerate(cells, 1):
@@ -164,11 +293,69 @@ def execute_file(
                     continue
 
                 console.print(f"[blue]Executing cell {i}/{total_cells}...[/blue]")
+                self._print_cell_source(i, cell_source)
+                captured_outputs: list[dict[str, Any]] = []
+
+                def output_hook(msg: dict[str, Any]) -> None:
+                    msg_type = str(msg.get("msg_type") or "")
+                    content = msg.get("content") or {}
+
+                    if msg_type == "stream":
+                        captured_outputs.append(
+                            {
+                                "output_type": "stream",
+                                "name": content.get("name", "stdout"),
+                                "text": content.get("text", ""),
+                            }
+                        )
+                        return
+
+                    if msg_type in {"display_data", "execute_result"}:
+                        data = content.get("data") or {}
+                        captured_outputs.append(
+                            {
+                                "output_type": msg_type,
+                                "data": data,
+                                "metadata": content.get("metadata") or {},
+                                "execution_count": content.get("execution_count"),
+                            }
+                        )
+                        return
+
+                    if msg_type == "error":
+                        captured_outputs.append(
+                            {
+                                "output_type": "error",
+                                "ename": content.get("ename"),
+                                "evalue": content.get("evalue"),
+                                "traceback": content.get("traceback") or [],
+                            }
+                        )
+
+                cell_report: dict[str, Any] = {
+                    "cell_index": i,
+                    "cell_id": cell_id,
+                    "status": "ok",
+                    "outputs": captured_outputs,
+                }
 
                 try:
-                    reply = self.kernel_client.execute_interactive(
-                        cell_source, silent=silent, timeout=timeout
-                    )
+                    try:
+                        reply = self.kernel_client.execute_interactive(
+                            cell_source,
+                            silent=silent,
+                            timeout=effective_timeout,
+                            output_hook=output_hook,
+                        )
+                    except TypeError:
+                        # Backward compatibility when output_hook is not available.
+                        reply = self.kernel_client.execute_interactive(
+                            cell_source,
+                            silent=silent,
+                            timeout=effective_timeout,
+                        )
+
+                    cell_report["reply"] = reply.get("content") if isinstance(reply, dict) else {}
 
                     if raise_exceptions and reply["content"]["status"] != "ok":
                         content = reply["content"]
@@ -186,6 +373,8 @@ def execute_file(
                                 f"Unknown failure: {json.dumps(content)}"
                             )
 
+                    self._print_cell_outputs(i, captured_outputs)
+
                     # Show success for each cell if not silent
                     if not silent:
                         status = reply["content"]["status"]
@@ -198,13 +387,34 @@ def execute_file(
                                 f"[yellow]⚠ Cell {i} completed with status: {status}[/yellow]"
                             )
 
+                    if reply["content"].get("status") != "ok":
+                        cell_report["status"] = str(reply["content"].get("status") or "error")
+                        failed_cells += 1
+
                 except Exception as e:
                     if raise_exceptions:
                         raise
+                    failed_cells += 1
+                    cell_report["status"] = "error"
+                    cell_report["error"] = str(e)
                     console.print(f"[yellow]Warning: Cell {i} failed: {e}[/yellow]")
+                finally:
+                    report["cells"].append(cell_report)
 
-            console.print("[green]✓ Execution completed successfully[/green]")
+            if failed_cells > 0:
+                console.print(
+                    f"[red]Execution completed with {failed_cells} failed cell(s).[/red]"
+                )
+                report["failed_cells"] = failed_cells
+                report["success"] = False
+            else:
+                console.print("[green]✓ Execution completed successfully[/green]")
+                report["failed_cells"] = 0
+                report["success"] = True
+            return report
 
+        except typer.Exit:
+            raise
         except Exception as e:
             if raise_exceptions:
                 raise
@@ -213,6 +423,129 @@ def execute_file(
         finally:
             self._executing = False
 
+    def _print_cell_outputs(self, cell_index: int, outputs: list[dict[str, Any]]) -> None:
+        """Print collected outputs for a cell after execution."""
+        if not outputs:
+            console.print(f"[dim]Cell {cell_index} output: (no output)[/dim]")
+            return
+
+        console.print(f"[cyan]Cell {cell_index} output:[/cyan]")
+        for output in outputs:
+            output_type = str(output.get("output_type") or "")
+            if output_type == "stream":
+                text = str(output.get("text") or "").rstrip("\n")
+                if text:
+                    console.print(text)
+                continue
+
+            if output_type in {"display_data", "execute_result"}:
+                data = output.get("data") or {}
+                text_plain = ""
+                if isinstance(data, dict):
+                    text_plain = str(data.get("text/plain") or "").rstrip("\n")
+                if text_plain:
+                    console.print(text_plain)
+                else:
+                    console.print(json.dumps(output, ensure_ascii=False))
+                continue
+
+            if output_type == "error":
+                traceback = output.get("traceback") or []
+                if traceback:
+                    console.print("[red]" + "\n".join(str(line) for line in traceback) + "[/red]")
+                else:
+                    ename = str(output.get("ename") or "Error")
+                    evalue = str(output.get("evalue") or "")
+                    console.print(f"[red]{ename}: {evalue}[/red]")
+                continue
+
+            console.print(json.dumps(output, ensure_ascii=False))
+
+    def _print_cell_source(self, cell_index: int, source: str) -> None:
+        """Print the source code that will be sent to the kernel for execution."""
+        console.print(f"[cyan]Cell {cell_index} source:[/cyan]")
+        console.print("[dim]<code>[/dim]")
+        console.print(source.rstrip("\n"))
+        console.print("[dim]</code>[/dim]")
+
+    def _assert_code_sandbox_alive(self) -> None:
+        """Fail early when the selected code sandbox endpoint is not reachable."""
+        if not self.kernel_manager:
+            raise RuntimeError("Code sandbox manager is not initialized")
+
+        server_url = str(getattr(self.kernel_manager, "server_url", "") or "").rstrip("/")
+        sandbox_token = str(getattr(self.kernel_manager, "token", "") or "")
+        if not server_url:
+            raise RuntimeError("Code sandbox endpoint is not available")
+
+        attempts = 5
+        last_error: Exception | None = None
+        for attempt in range(1, attempts + 1):
+            try:
+                fetch(f"{server_url}/api/kernels", token=sandbox_token, timeout=15)
+                return
+            except Exception as e:
+                last_error = e
+                if attempt < attempts:
+                    time.sleep(0.4 * attempt)
+                    continue
+                break
+
+        raise RuntimeError(
+                f"Code sandbox health check failed for '{server_url}': {last_error}"
+        ) from last_error
+
+    def _prepare_kernel_before_execution(self) -> None:
+        """List kernels visible on the code sandbox before execution starts."""
+        kernels = self._fetch_code_sandbox_kernels()
+        self._print_available_kernels(
+            title="Kernels available before execution:",
+            kernels=kernels,
+        )
+
+    def _fetch_code_sandbox_kernels(self) -> list[dict[str, Any]]:
+        """Fetch kernels from the current code sandbox."""
+        if not self.kernel_manager:
+            return []
+
+        server_url = str(getattr(self.kernel_manager, "server_url", "") or "").rstrip("/")
+        sandbox_token = str(getattr(self.kernel_manager, "token", "") or "")
+        if not server_url:
+            return []
+
+        response = fetch(f"{server_url}/api/kernels", token=sandbox_token, timeout=15)
+        kernels = response.json() if response.content else []
+        if not isinstance(kernels, list):
+            return []
+        return [kernel for kernel in kernels if isinstance(kernel, dict)]
+
+    def _print_available_kernels(
+        self,
+        title: str,
+        kernels: list[dict[str, Any]],
+    ) -> None:
+        """Print kernels currently visible on the code sandbox."""
+        selected_kernel_id = str(getattr(self.kernel_manager, "_kernel_id", "") or "")
+
+        if not kernels:
+            console.print(f"[yellow]{title} none[/yellow]")
+            return
+
+        console.print(f"[blue]{title}[/blue]")
+        for kernel in sorted(
+            kernels,
+            key=lambda kernel: str((kernel or {}).get("id") or ""),
+        ):
+            kernel_id = str((kernel or {}).get("id") or "")
+            kernel_name = str((kernel or {}).get("name") or "")
+            execution_state = str((kernel or {}).get("execution_state") or "")
+            connections = (kernel or {}).get("connections")
+            last_activity = str((kernel or {}).get("last_activity") or "")
+            marker = "*" if selected_kernel_id and kernel_id == selected_kernel_id else " "
+            console.print(
+                f"  [{marker}] id={kernel_id} name={kernel_name} state={execution_state} connections={connections} last_activity={last_activity}"
+            )
+
     def cleanup(self) -> None:
         """Clean up resources."""
         if self.kernel_client:
@@ -225,18 +558,24 @@ def cleanup(self) -> None:
 # Main execution function decorated as the default command
 @app.command()
 def main(
-    filename: str = typer.Argument(..., help="Path to the file or notebook to execute"),
-    runtime: Optional[str] = typer.Option(
+    filename: Optional[str] = typer.Argument(
         None,
-        "--runtime",
-        "-r",
-        help="Name of the runtime to execute on (uses first available if not specified)",
+        help="Path to the file or notebook to execute",
+    ),
+    sandbox: Optional[str] = typer.Option(
+        None,
+        "--sandbox",
+        "-s",
+        help="Name of the code sandbox to execute on (uses first available if not specified)",
     ),
     verbose: bool = typer.Option(
         False, "--verbose", "-v", help="Show all cell outputs"
     ),
     timeout: Optional[float] = typer.Option(
-        None, "--timeout", "-t", help="Execution timeout for each cell in seconds"
+        None,
+        "--timeout",
+        "-t",
+        help="Execution timeout for each cell in seconds",
     ),
     raise_exceptions: bool = typer.Option(
         False, "--raise", help="Stop executing if an exception occurs"
@@ -246,66 +585,254 @@ def main(
         "--token",
         help="Authentication token (Bearer token for API requests).",
     ),
+    api_key: Optional[str] = typer.Option(
+        None,
+        "--api-key",
+        help="Authentication API key (alias for --token).",
+    ),
+    example_notebook: bool = typer.Option(
+        False,
+        "--example-notebook",
+        help="Create a temporary example notebook, execute it, then remove it.",
+    ),
+    example_py: bool = typer.Option(
+        False,
+        "--example-py",
+        help="Create a temporary example Python file, execute it, then remove it.",
+    ),
+    output_name: Optional[str] = typer.Option(
+        None,
+        "--output-name",
+        help="Output report filename/path. Defaults to <input-name>.out.json next to the input file.",
+    ),
 ) -> None:
-    """Execute a Python file or Jupyter notebook on a Datalayer runtime."""
-
-    # Resolve file path
-    filepath = Path(filename).expanduser().resolve()
+    """Execute a Python file or Jupyter notebook on a Datalayer code sandbox."""
 
-    # Check if file exists and is readable
-    if not filepath.exists():
-        console.print(f"[red]Error: File '{filepath}' does not exist[/red]")
-        raise typer.Exit(1)
+    auth_token = token or api_key
 
-    if not filepath.is_file():
-        console.print(f"[red]Error: '{filepath}' is not a file[/red]")
+    if example_notebook and example_py:
+        console.print(
+            "[red]Error: --example-notebook and --example-py are mutually exclusive[/red]"
+        )
         raise typer.Exit(1)
 
-    try:
-        with filepath.open("rb"):
-            pass
-    except Exception as e:
+    if filename and (example_notebook or example_py):
         console.print(
-            f"[red]Error: Could not open file '{filepath}' for reading: {e}[/red]"
+            "[red]Error: provide either a filename or one --example-* flag, not both[/red]"
         )
         raise typer.Exit(1)
 
-    # Check file extension
-    if filepath.suffix not in [".py", ".ipynb"]:
+    if not filename and not example_notebook and not example_py:
         console.print(
-            f"[yellow]Warning: File extension '{filepath.suffix}' is not .py or .ipynb[/yellow]"
+            "[red]Error: missing FILE_PATH or an --example-* option[/red]"
         )
+        raise typer.Exit(1)
 
-    # Determine which runtime to use
-    selected_runtime = runtime
-    if selected_runtime is None:
-        selected_runtime = _select_runtime(token=token)
-
-    # Create exec service and execute
-    exec_service = RuntimesExecService(token=token)
+    generated_example = False
+    filepath: Path
+    if example_notebook:
+        filepath = _create_example_notebook_file()
+        generated_example = True
+        console.print(f"[blue]Generated example notebook: {filepath}[/blue]")
+    elif example_py:
+        filepath = _create_example_python_file()
+        generated_example = True
+        console.print(f"[blue]Generated example Python file: {filepath}[/blue]")
+    else:
+        # Resolve file path
+        filepath = Path(str(filename)).expanduser().resolve()
 
     try:
-        # Initialize connection to runtime
-        exec_service.init_kernel_manager(selected_runtime)
-
-        # Execute the file
-        exec_service.execute_file(
-            filepath=filepath,
-            silent=not verbose,
-            timeout=timeout,
-            raise_exceptions=raise_exceptions,
-        )
+        # Check if file exists and is readable
+        if not filepath.exists():
+            console.print(f"[red]Error: File '{filepath}' does not exist[/red]")
+            raise typer.Exit(1)
 
+        if not filepath.is_file():
+            console.print(f"[red]Error: '{filepath}' is not a file[/red]")
+            raise typer.Exit(1)
+
+        try:
+            with filepath.open("rb"):
+                pass
+        except Exception as e:
+            console.print(
+                f"[red]Error: Could not open file '{filepath}' for reading: {e}[/red]"
+            )
+            raise typer.Exit(1)
+
+        # Check file extension
+        if filepath.suffix not in [".py", ".ipynb"]:
+            console.print(
+                f"[yellow]Warning: File extension '{filepath.suffix}' is not .py or .ipynb[/yellow]"
+            )
+
+        # Determine which code sandbox to use
+        selected_sandbox = sandbox
+        if selected_sandbox is None:
+            selected_sandbox = _select_code_sandbox(token=auth_token)
+
+        # Create exec service and execute
+        exec_service = CodeSandboxExecService(token=auth_token)
+
+        try:
+            # Initialize connection to code sandbox
+            exec_service.init_kernel_manager(selected_sandbox)
+
+            # Execute the file
+            execution_report = exec_service.execute_file(
+                filepath=filepath,
+                silent=not verbose,
+                timeout=timeout,
+                raise_exceptions=raise_exceptions,
+            )
+
+            report_path = _resolve_output_report_path(filepath, output_name)
+            execution_report["output_file"] = str(report_path)
+            report_path.write_text(
+                json.dumps(execution_report, indent=2, ensure_ascii=False),
+                encoding="utf-8",
+            )
+            console.print(f"[green]Saved execution outputs: {report_path}[/green]")
+            console.print(f"[green]Full output report path: {report_path.resolve()}[/green]")
+            if int(execution_report.get("failed_cells") or 0) > 0:
+                raise typer.Exit(1)
+
+        finally:
+            # Always cleanup
+            exec_service.cleanup()
     finally:
-        # Always cleanup
-        exec_service.cleanup()
+        if generated_example:
+            try:
+                filepath.unlink(missing_ok=True)
+            except Exception as e:
+                console.print(
+                    f"[yellow]Warning: could not remove temporary example file '{filepath}': {e}[/yellow]"
+                )
+
+
+def _example_file_path(suffix: str) -> Path:
+    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
+    name = f"datalayer-exec-example-{ts}-{uuid4().hex[:8]}{suffix}"
+    return Path(tempfile.gettempdir()) / name
 
 
-def _select_runtime(token: Optional[str] = None) -> str:
+def _create_example_python_file() -> Path:
+    path = _example_file_path(".py")
+    path.write_text(
+        "import json\n"
+        "import pandas as pd\n\n"
+        "pd.set_option('display.max_rows', None)\n"
+        "pd.set_option('display.max_columns', None)\n"
+        "pd.set_option('display.width', None)\n\n"
+        "print('Python example: building sample sales dataframe')\n"
+        "df = pd.DataFrame({\n"
+        "    'day': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],\n"
+        "    'region': ['north', 'north', 'south', 'south', 'west', 'west'],\n"
+        "    'orders': [12, 14, 8, 11, 9, 15],\n"
+        "    'revenue': [240, 310, 175, 220, 190, 360],\n"
+        "})\n\n"
+        "print('DataFrame:')\n"
+        "print(df.to_string(index=False))\n\n"
+        "print('Grouped summary by region:')\n"
+        "summary = (\n"
+        "    df.groupby('region', as_index=False)\n"
+        "      .agg(total_orders=('orders', 'sum'), total_revenue=('revenue', 'sum'))\n"
+        "      .sort_values('total_revenue', ascending=False)\n"
+        ")\n"
+        "print(summary.to_string(index=False))\n\n"
+        "payload = {\n"
+        "    'rows': int(len(df)),\n"
+        "    'best_region': str(summary.iloc[0]['region']),\n"
+        "    'total_revenue': int(df['revenue'].sum()),\n"
+        "}\n"
+        "print('JSON summary:')\n"
+        "print(json.dumps(payload, indent=2))\n",
+        encoding="utf-8",
+    )
+    return path
+
+
+def _create_example_notebook_file() -> Path:
+    path = _example_file_path(".ipynb")
+    notebook_payload = {
+        "cells": [
+            {
+                "id": f"cell-{uuid4().hex[:8]}",
+                "cell_type": "code",
+                "execution_count": None,
+                "metadata": {"id": f"cell-{uuid4().hex[:8]}", "language": "python"},
+                "outputs": [],
+                "source": [
+                    "import pandas as pd\n",
+                    "pd.set_option('display.max_rows', None)\n",
+                    "pd.set_option('display.max_columns', None)\n",
+                    "pd.set_option('display.width', None)\n",
+                    "print('Notebook example: pandas setup complete')\n",
+                ],
+            },
+            {
+                "id": f"cell-{uuid4().hex[:8]}",
+                "cell_type": "code",
+                "execution_count": None,
+                "metadata": {"id": f"cell-{uuid4().hex[:8]}", "language": "python"},
+                "outputs": [],
+                "source": [
+                    "df = pd.DataFrame({\n",
+                    "    'day': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],\n",
+                    "    'region': ['north', 'north', 'south', 'south', 'west', 'west'],\n",
+                    "    'orders': [12, 14, 8, 11, 9, 15],\n",
+                    "    'revenue': [240, 310, 175, 220, 190, 360],\n",
+                    "})\n",
+                    "print('Raw dataframe:')\n",
+                    "print(df.to_string(index=False))\n",
+                ],
+            },
+            {
+                "id": f"cell-{uuid4().hex[:8]}",
+                "cell_type": "code",
+                "execution_count": None,
+                "metadata": {"id": f"cell-{uuid4().hex[:8]}", "language": "python"},
+                "outputs": [],
+                "source": [
+                    "summary = (\n",
+                    "    df.groupby('region', as_index=False)\n",
+                    "      .agg(total_orders=('orders', 'sum'), total_revenue=('revenue', 'sum'))\n",
+                    "      .sort_values('total_revenue', ascending=False)\n",
+                    ")\n",
+                    "print('Revenue summary by region:')\n",
+                    "print(summary.to_string(index=False))\n",
+                    "print('Top region:', summary.iloc[0]['region'])\n",
+                ],
+            },
+        ],
+        "metadata": {},
+        "nbformat": 4,
+        "nbformat_minor": 5,
+    }
+    path.write_text(json.dumps(notebook_payload), encoding="utf-8")
+    return path
+
+
+def _resolve_output_report_path(filepath: Path, output_name: Optional[str]) -> Path:
+    """Compute output report path for collected execution outputs."""
+    if output_name:
+        candidate = Path(output_name).expanduser()
+        if candidate.is_absolute():
+            return candidate
+        return filepath.parent / candidate
+
+    # notebook-name.ipynb -> notebook-name.out.json
+    # script.py -> script.out.json
+    return filepath.with_suffix(".out.json")
+
+
+def _select_code_sandbox(token: Optional[str] = None) -> str:
     """
-    Select a runtime to use for execution.
+    Select a code sandbox to use for execution.
 
-    Returns the first available runtime, or prompts to create one if none exist.
+    Returns the first available code sandbox, or interactively provisions one when
+    no code sandbox is available.
 
     Parameters
     ----------
@@ -315,21 +842,147 @@ def _select_runtime(token: Optional[str] = None) -> str:
     Returns
     -------
     str
-        The name/ID of the runtime to use.
+        The name/ID of the code sandbox to use.
     """
     try:
         client = DatalayerClient(token=token)
         runtimes = client.list_runtimes()
 
         if not runtimes:
-            # Return an empty runtime name to trigger RuntimeManager's built-in
-            # interactive flow that can launch a runtime from an environment.
-            return ""
+            console.print("[yellow]No code sandbox is running.[/yellow]")
+
+            should_create = typer.confirm(
+                "No code sandbox is available. Create one now?",
+                default=True,
+            )
+            if not should_create:
+                console.print("[red]Execution aborted: no code sandbox selected.[/red]")
+                raise typer.Exit(1)
+
+            environment = DEFAULT_ENVIRONMENT
+            burn_rate = _get_environment_burning_rate(client, environment)
+            remaining_credits = _get_remaining_credits_after_reservations(client)
+            default_seconds = _default_code_sandbox_seconds(
+                remaining_credits=remaining_credits,
+                burn_rate=burn_rate,
+            )
+
+            console.print(
+                f"[blue]Environment: {environment} (burning_rate={burn_rate:.6f} credits/s)[/blue]"
+            )
+            console.print(
+                f"[blue]Remaining credits (after reservations): {remaining_credits:.6f}[/blue]"
+            )
+            console.print(
+                f"[blue]Suggested code sandbox duration: {default_seconds:.2f} seconds (33% of remaining credits)[/blue]"
+            )
+
+            requested_seconds = typer.prompt(
+                "Code sandbox duration in seconds",
+                type=float,
+                default=default_seconds,
+                show_default=True,
+            )
+            if requested_seconds <= 0:
+                console.print("[red]Code sandbox duration must be greater than 0 seconds.[/red]")
+                raise typer.Exit(1)
+
+            requested_credits = burn_rate * requested_seconds
+            time_reservation_minutes = requested_seconds / 60.0
+            console.print(
+                f"[blue]Requested reservation: {requested_seconds:.2f}s -> {requested_credits:.6f} credits[/blue]"
+            )
+
+            created_runtime = client.create_runtime(
+                environment=environment,
+                time_reservation=time_reservation_minutes,
+            )
 
-        # Use the first available runtime
+            sandbox_name = str(created_runtime.name or "")
+            sandbox_uid = str(created_runtime.uid or "")
+            sandbox_pod = str(created_runtime.pod_name or "")
+            sandbox_ingress = str(created_runtime.ingress or "").rstrip("/")
+            sandbox_token = str(
+                created_runtime.jupyter_token or client._get_token() or ""
+            )
+
+            if not sandbox_ingress or not sandbox_token:
+                console.print(
+                    "[red]Code sandbox created but ingress/token is not available for inspection.[/red]"
+                )
+                raise typer.Exit(1)
+
+            pre_confirm_kernel_id = _inspect_code_sandbox_kernels_unique(
+                sandbox_name=sandbox_name or sandbox_pod or sandbox_uid,
+                sandbox_uid=sandbox_uid,
+                sandbox_pod=sandbox_pod,
+                sandbox_ingress=sandbox_ingress,
+                sandbox_token=sandbox_token,
+                inspection_label="post-create",
+            )
+
+            proceed = typer.confirm(
+                "Proceed with execution on this code sandbox?",
+                default=True,
+            )
+            if not proceed:
+                console.print("[red]Execution aborted by user.[/red]")
+                raise typer.Exit(1)
+
+            post_confirm_kernel_id = _inspect_code_sandbox_kernels_unique(
+                sandbox_name=sandbox_name or sandbox_pod or sandbox_uid,
+                sandbox_uid=sandbox_uid,
+                sandbox_pod=sandbox_pod,
+                sandbox_ingress=sandbox_ingress,
+                sandbox_token=sandbox_token,
+                inspection_label="pre-exec confirmation",
+            )
+
+            if post_confirm_kernel_id != pre_confirm_kernel_id:
+                console.print(
+                    "[red]Kernel changed between inspections. Failing fast before execution.[/red]"
+                )
+                raise typer.Exit(1)
+
+            selected_name = sandbox_uid or sandbox_name or sandbox_pod
+            if not selected_name:
+                console.print(
+                    "[red]Code sandbox created but no code sandbox identifier is available.[/red]"
+                )
+                raise typer.Exit(1)
+
+            console.print(
+                f"[green]Using newly created code sandbox: {selected_name}#{post_confirm_kernel_id}[/green]"
+            )
+            return selected_name
+
+        # Use the first available code sandbox
         selected = runtimes[0]
+        sandbox_uid = str(selected.uid or "")
+        kernel_id = ""
+        try:
+            runtime_token = str(getattr(selected, "jupyter_token", "") or client._get_token() or "")
+            ingress = str(getattr(selected, "ingress", "") or "").rstrip("/")
+            if ingress and runtime_token:
+                response = fetch(f"{ingress}/api/kernels", token=runtime_token, timeout=10)
+                kernels = response.json() if response.content else []
+                if isinstance(kernels, list) and kernels:
+                    ordered = sorted(
+                        (
+                            str((kernel or {}).get("id") or "")
+                            for kernel in kernels
+                        )
+                    )
+                    kernel_id = ordered[0] if ordered else ""
+        except Exception:
+            kernel_id = ""
+
+        sandbox_ref = sandbox_uid
+        if sandbox_uid and kernel_id:
+            sandbox_ref = f"{sandbox_uid}#{kernel_id}"
+
         console.print(
-            f"[blue]No runtime specified, using: {selected.name} ({selected.uid})[/blue]"
+            f"[blue]No code sandbox specified, using: {selected.name} ({sandbox_ref})[/blue]"
         )
         return selected.name or selected.uid or ""
 
@@ -337,12 +990,132 @@ def _select_runtime(token: Optional[str] = None) -> str:
         # Re-raise typer.Exit without modification
         raise
     except Exception as e:
-        console.print(f"[red]Error checking available runtimes: {e}[/red]")
+        console.print(f"[red]Error checking available code sandboxes: {e}[/red]")
         console.print(
             "[yellow]Hint: Make sure you're authenticated with 'dla login'[/yellow]"
         )
         raise typer.Exit(1)
 
 
+def _get_environment_burning_rate(client: DatalayerClient, environment: str) -> float:
+    """Get environment burning rate in credits/second."""
+    environments = client.list_environments()
+    for env in environments:
+        if str(env.name or "") == environment:
+            burn_rate = float(env.burning_rate or 0.0)
+            if burn_rate <= 0:
+                raise RuntimeError(
+                    f"Environment '{environment}' has invalid burning rate: {burn_rate}"
+                )
+            return burn_rate
+    raise RuntimeError(
+        f"Environment '{environment}' not found. Available environments: {[str(env.name or '') for env in environments]}"
+    )
+
+
+def _to_float(value: Any, default: float = 0.0) -> float:
+    """Safely parse a float-like value."""
+    try:
+        if value is None:
+            return default
+        return float(value)
+    except Exception:
+        return default
+
+
+def _get_remaining_credits_after_reservations(client: DatalayerClient) -> float:
+    """Compute remaining credits after reservations from usage payload."""
+    usage = client.get_usage_credits()
+    if not usage.get("success", True):
+        raise RuntimeError(
+            f"Failed to load usage credits: {usage.get('message', 'Unknown error')}"
+        )
+
+    credits = usage.get("credits", {}) or {}
+    reservations = usage.get("reservations", []) or []
+
+    credits_value = _to_float(credits.get("credits"), 0.0)
+    quota = credits.get("quota")
+
+    if quota is None:
+        available_before_reservations = credits_value
+    else:
+        available_before_reservations = _to_float(quota, 0.0) - credits_value
+
+    reserved_total = 0.0
+    for reservation in reservations:
+        if not isinstance(reservation, dict):
+            continue
+        reserved_total += _to_float(reservation.get("credits"), 0.0)
+
+    remaining = available_before_reservations - reserved_total
+    return max(0.0, remaining)
+
+
+def _default_code_sandbox_seconds(remaining_credits: float, burn_rate: float) -> float:
+    """Suggest code sandbox duration in seconds using 33% of remaining credits."""
+    proposed_credits = max(0.0, remaining_credits * 0.33)
+    if burn_rate <= 0:
+        raise RuntimeError("Burning rate must be positive to compute duration")
+    seconds = proposed_credits / burn_rate
+    # Keep a practical positive default even when credits are very low.
+    return max(10.0, seconds)
+
+
+def _inspect_code_sandbox_kernels_unique(
+    sandbox_name: str,
+    sandbox_uid: str,
+    sandbox_pod: str,
+    sandbox_ingress: str,
+    sandbox_token: str,
+    inspection_label: str,
+) -> str:
+    """Inspect code sandbox kernels and return the unique kernel id.
+
+    Fails fast if the code sandbox does not expose exactly one kernel.
+    """
+    response = fetch(f"{sandbox_ingress}/api/kernels", token=sandbox_token, timeout=15)
+    kernels = response.json() if response.content else []
+    if not isinstance(kernels, list):
+        kernels = []
+
+    summary = Table(title=f"Code Sandbox Inspection ({inspection_label})")
+    summary.add_column("Field", style="cyan")
+    summary.add_column("Value")
+    summary.add_row("Code Sandbox", sandbox_name)
+    summary.add_row("Pod", sandbox_pod)
+    summary.add_row("UID", sandbox_uid)
+    summary.add_row("Ingress", sandbox_ingress)
+    summary.add_row("Code Sandboxes", str(len(kernels)))
+    console.print(summary)
+
+    code_sandboxes_table = Table(title="Available Code Sandboxes")
+    code_sandboxes_table.add_column("ID", style="green")
+    code_sandboxes_table.add_column("Name")
+    code_sandboxes_table.add_column("State")
+    code_sandboxes_table.add_column("Connections")
+    code_sandboxes_table.add_column("Last Activity")
+    for kernel in kernels:
+        code_sandboxes_table.add_row(
+            str((kernel or {}).get("id") or ""),
+            str((kernel or {}).get("name") or ""),
+            str((kernel or {}).get("execution_state") or ""),
+            str((kernel or {}).get("connections") or "0"),
+            str((kernel or {}).get("last_activity") or ""),
+        )
+    if kernels:
+        console.print(code_sandboxes_table)
+
+    if len(kernels) != 1:
+        raise RuntimeError(
+            f"Code sandbox inspection requires exactly one kernel; found {len(kernels)}"
+        )
+
+    kernel_id = str((kernels[0] or {}).get("id") or "").strip()
+    if not kernel_id:
+        raise RuntimeError("Code sandbox inspection returned a kernel without an id")
+    return kernel_id
+
+
 if __name__ == "__main__":
     app()
diff --git a/datalayer_core/cli/commands/plans.py b/datalayer_core/cli/commands/plans.py
index db55ed4b..a9b68052 100644
--- a/datalayer_core/cli/commands/plans.py
+++ b/datalayer_core/cli/commands/plans.py
@@ -3,6 +3,7 @@
 
 """Plans commands for Datalayer CLI."""
 
+import os
 from typing import Any, Optional
 
 import typer
@@ -51,7 +52,7 @@ def _make_client(
 def plans_callback(ctx: typer.Context) -> None:
     """Plans and subscription commands."""
     if ctx.invoked_subcommand is None:
-        ctx.invoke(plans_show)
+        plans_show(token=None, iam_url=None, raw=False)
 
 
 def _format_number(value: Any, fallback: str = "-") -> str:
@@ -323,7 +324,8 @@ def plans_catalog(
         help="Datalayer IAM server URL",
     ),
     billable_account_uid: Optional[str] = typer.Option(
-        None,
+        os.environ.get("DATALAYER_ACCOUNT_UID")
+        or os.environ.get("DATALAYER_BILLABLE_ACCOUNT_UID"),
         "--billable-account-uid",
         help="Optional billable account UID scope.",
     ),
diff --git a/datalayer_core/cli/commands/pools.py b/datalayer_core/cli/commands/pools.py
index 6d19244e..8c61e4ae 100644
--- a/datalayer_core/cli/commands/pools.py
+++ b/datalayer_core/cli/commands/pools.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
diff --git a/datalayer_core/cli/commands/ray.py b/datalayer_core/cli/commands/ray.py
index b9060c9e..3773fe6e 100644
--- a/datalayer_core/cli/commands/ray.py
+++ b/datalayer_core/cli/commands/ray.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -39,13 +42,25 @@
 )
 
 console = Console()
+_RAY_RUNTIMES_URL_OVERRIDE: Optional[str] = None
 
 _ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
 
 
 @app.callback()
-def ray_callback(ctx: typer.Context) -> None:
+def ray_callback(
+    ctx: typer.Context,
+    runtimes_url: Optional[str] = typer.Option(
+        None,
+        "--runtimes-url",
+        help="Datalayer Runtimes server URL.",
+    ),
+) -> None:
     """Ray management commands."""
+    global _RAY_RUNTIMES_URL_OVERRIDE
+    _RAY_RUNTIMES_URL_OVERRIDE = (
+        str(runtimes_url).strip().rstrip("/") if runtimes_url else None
+    )
     if ctx.invoked_subcommand is None:
         typer.echo(ctx.get_help())
 
@@ -67,9 +82,7 @@ def jobs_callback(ctx: typer.Context) -> None:
 def _make_client(
     token: Optional[str] = None,
 ) -> DatalayerClient:
-    urls = DatalayerURLs.from_environment()
-    # Ray CLI is intentionally routed via runtimes, never directly to ray_url.
-    urls.ray_url = urls.runtimes_url
+    urls = DatalayerURLs.from_environment(runtimes_url=_RAY_RUNTIMES_URL_OVERRIDE)
     return DatalayerClient(urls=urls, token=token)
 
 
@@ -348,31 +361,50 @@ def jobs_list(
     token: Optional[str] = typer.Option(None, "--token", help="API token."),
     raw: bool = typer.Option(False, "--raw", help="Print raw JSON."),
 ) -> None:
-    client = _make_client(token=token)
-    payload = client.ray_list_jobs(namespace=namespace, cluster_name=cluster_name)
-    if raw:
-        _print_json(payload)
-        return
+    try:
+        client = _make_client(token=token)
+        payload = client.ray_list_jobs(namespace=namespace, cluster_name=cluster_name)
+        if raw:
+            _print_json(payload)
+            return
 
-    items = payload.get("jobs") or []
-    table = Table(title=f"Ray Jobs ({len(items)})")
-    table.add_column("Name", style="cyan")
-    table.add_column("Namespace")
-    table.add_column("Cluster")
-    table.add_column("Status")
+        items = payload.get("jobs") or []
+        table = Table(title=f"Ray Jobs ({len(items)})")
+        table.add_column("Name", style="cyan")
+        table.add_column("Namespace")
+        table.add_column("Cluster")
+        table.add_column("Status")
+
+        for item in items:
+            metadata = item.get("metadata") or {}
+            labels = metadata.get("labels") or {}
+            status = item.get("status") or {}
+            table.add_row(
+                str(metadata.get("name", "")),
+                str(metadata.get("namespace", namespace)),
+                str(labels.get("ray.io/cluster", "")),
+                str(status.get("jobStatus", "")),
+            )
 
-    for item in items:
-        metadata = item.get("metadata") or {}
-        labels = metadata.get("labels") or {}
-        status = item.get("status") or {}
-        table.add_row(
-            str(metadata.get("name", "")),
-            str(metadata.get("namespace", namespace)),
-            str(labels.get("ray.io/cluster", "")),
-            str(status.get("jobStatus", "")),
-        )
+        console.print(table)
+    except Exception as exc:
+        message = str(exc).strip() or "Unknown Ray jobs error"
+        lowered = message.lower()
 
-    console.print(table)
+        if "no ray provider registered" in lowered:
+            console.print("[red]Unable to list Ray jobs:[/red] No Ray provider registered.")
+            console.print(
+                "[yellow]Hint:[/yellow] Start or register a Ray provider in the runtimes service, then retry [bold]d ray jobs ls[/bold]."
+            )
+        elif "status=503" in lowered:
+            console.print("[red]Unable to list Ray jobs:[/red] Ray service unavailable (503).")
+            console.print(
+                "[yellow]Hint:[/yellow] Check runtimes/operator health and Ray provider registration."
+            )
+        else:
+            console.print(f"[red]Unable to list Ray jobs:[/red] {message}")
+
+        raise typer.Exit(code=1)
 
 
 @jobs_app.command(name="status")
diff --git a/datalayer_core/cli/commands/runtimes.py b/datalayer_core/cli/commands/runtimes.py
deleted file mode 100644
index 7a0de637..00000000
--- a/datalayer_core/cli/commands/runtimes.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
-
-"""Runtime commands for Datalayer CLI."""
-
-from typing import Optional
-
-import typer
-from rich.console import Console
-
-from datalayer_core.client.client import DatalayerClient
-from datalayer_core.displays.runtimes import display_runtimes
-from datalayer_core.utils.urls import DatalayerURLs
-
-# Create a Typer app for runtime commands
-app = typer.Typer(
-    name="runtimes", help="Runtime management commands", invoke_without_command=True
-)
-
-console = Console()
-
-
-@app.callback()
-def runtimes_callback(ctx: typer.Context) -> None:
-    """Runtime management commands."""
-    if ctx.invoked_subcommand is None:
-        typer.echo(ctx.get_help())
-
-
-def _make_client(
-    token: Optional[str] = None,
-    iam_url: Optional[str] = None,
-    runtimes_url: Optional[str] = None,
-) -> DatalayerClient:
-    """Create a DatalayerClient with optional runtimes URL override."""
-    urls = DatalayerURLs.from_environment(iam_url=iam_url, runtimes_url=runtimes_url)
-    return DatalayerClient(urls=urls, token=token)
-
-
-@app.command(name="ls")
-def list_runtimes(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-    iam_url: Optional[str] = typer.Option(
-        None,
-        "--iam-url",
-        help="Datalayer IAM server URL",
-    ),
-    runtimes_url: Optional[str] = typer.Option(
-        None,
-        "--runtimes-url",
-        help="Datalayer Runtimes server URL",
-    ),
-) -> None:
-    """List running runtimes."""
-    try:
-        client = _make_client(
-            token=token,
-            iam_url=iam_url,
-            runtimes_url=runtimes_url,
-        )
-        runtimes = client.list_runtimes()
-
-        # Convert to dict format for display_runtimes
-        runtime_dicts = []
-        for runtime in runtimes:
-            runtime_dicts.append(
-                {
-                    "given_name": runtime.name,
-                    "environment_name": runtime.environment,
-                    "pod_name": runtime.pod_name,
-                    "ingress": runtime.ingress,
-                    "reservation_id": runtime.reservation_id,
-                    "uid": runtime.uid,
-                    "burning_rate": runtime.burning_rate,
-                    "token": runtime.jupyter_token,
-                    "started_at": runtime.started_at,
-                    "expired_at": runtime.expired_at,
-                }
-            )
-
-        display_runtimes(runtime_dicts)
-
-    except Exception as e:
-        console.print(f"[red]Error listing runtimes: {e}[/red]")
-        raise typer.Exit(1)
-
-
-@app.command(name="create")
-def create_runtime(
-    environment: Optional[str] = typer.Argument(None, help="Environment name"),
-    given_name: Optional[str] = typer.Option(
-        None,
-        "--given-name",
-        help="Custom name for the runtime",
-    ),
-    credits_limit: Optional[float] = typer.Option(
-        None,
-        "--credits-limit",
-        help="Maximum amount of credits that can be consumed by the runtime",
-    ),
-    time_reservation: Optional[float] = typer.Option(
-        10.0,
-        "--time-reservation",
-        help="Time reservation in minutes for the runtime",
-    ),
-    billable_account_uid: Optional[str] = typer.Option(
-        None,
-        "--billable-account-uid",
-        help="Account UID to bill the runtime to (org/team). Defaults to the authenticated user.",
-    ),
-    billable_account_type: Optional[str] = typer.Option(
-        None,
-        "--billable-account-type",
-        help="Billable account type: user, organization, or team.",
-    ),
-    billable_account_handle: Optional[str] = typer.Option(
-        None,
-        "--billable-account-handle",
-        help="Billable account handle (informational).",
-    ),
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-    iam_url: Optional[str] = typer.Option(
-        None,
-        "--iam-url",
-        help="Datalayer IAM server URL",
-    ),
-    runtimes_url: Optional[str] = typer.Option(
-        None,
-        "--runtimes-url",
-        help="Datalayer Runtimes server URL",
-    ),
-) -> None:
-    """Create a new runtime."""
-    import questionary
-
-    try:
-        client = _make_client(
-            token=token,
-            iam_url=iam_url,
-            runtimes_url=runtimes_url,
-        )
-
-        if environment is None:
-            # List environments and let the user pick one
-            environments = client.list_environments()
-            if not environments:
-                console.print("[yellow]No environments available.[/yellow]")
-                raise typer.Exit(0)
-
-            choices = []
-            for env in environments:
-                label = env.name
-                if env.title:
-                    label += f"  ({env.title})"
-                choices.append(questionary.Choice(title=label, value=env.name))
-
-            selected = questionary.select(
-                "Select the environment for the new runtime:",
-                choices=choices,
-            ).ask()
-
-            if selected is None:
-                raise typer.Exit(0)
-            environment = selected
-
-        # Create runtime
-        final_time_reservation = time_reservation or 10.0
-        runtime = client.create_runtime(
-            name=given_name,
-            environment=environment,
-            time_reservation=final_time_reservation,
-            billable_account_uid=billable_account_uid,
-            billable_account_type=billable_account_type,
-            billable_account_handle=billable_account_handle,
-        )
-
-        console.print(
-            f"Runtime will use credits limit: {(runtime.burning_rate or 0.0) * 60.0 * final_time_reservation:.2f}"
-        )
-        console.print(f"Runtime created successfully: {runtime.name}")
-        console.print(f"[green]Runtime '{runtime.name}' created successfully![/green]")
-
-    except typer.Exit:
-        raise
-    except Exception as e:
-        console.print(f"[red]Error creating runtime: {e}[/red]")
-        raise typer.Exit(1)
-
-
-@app.command(name="terminate")
-def terminate_runtime(
-    pod_name: Optional[str] = typer.Argument(
-        None, help="Pod name of the runtime to terminate"
-    ),
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-    iam_url: Optional[str] = typer.Option(
-        None,
-        "--iam-url",
-        help="Datalayer IAM server URL",
-    ),
-    runtimes_url: Optional[str] = typer.Option(
-        None,
-        "--runtimes-url",
-        help="Datalayer Runtimes server URL",
-    ),
-) -> None:
-    """Terminate a running runtime."""
-    import questionary
-
-    try:
-        client = _make_client(
-            token=token,
-            iam_url=iam_url,
-            runtimes_url=runtimes_url,
-        )
-
-        if pod_name is None:
-            # List runtimes and let the user pick one
-            runtimes = client.list_runtimes()
-            if not runtimes:
-                console.print("[yellow]No running runtimes found.[/yellow]")
-                raise typer.Exit(0)
-
-            choices = []
-            for rt in runtimes:
-                label = rt.pod_name or ""
-                if rt.name:
-                    label = f"{rt.pod_name}  ({rt.name})"
-                if rt.environment:
-                    label += f"  [{rt.environment}]"
-                choices.append(questionary.Choice(title=label, value=rt.pod_name))
-
-            selected = questionary.select(
-                "Select the runtime to terminate:",
-                choices=choices,
-            ).ask()
-
-            if selected is None:
-                # User cancelled (Ctrl-C / Esc)
-                raise typer.Exit(0)
-            pod_name = selected
-
-        success = client.terminate_runtime(pod_name)
-
-        if success:
-            console.print(
-                f"[green]Runtime '{pod_name}' terminated successfully![/green]"
-            )
-        else:
-            console.print(f"[red]Failed to terminate runtime '{pod_name}'[/red]")
-            raise typer.Exit(1)
-
-    except typer.Exit:
-        raise
-    except Exception as e:
-        console.print(f"[red]Error terminating runtime: {e}[/red]")
-        raise typer.Exit(1)
-
-
-# Root level commands for convenience
-def runtimes_list(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-    iam_url: Optional[str] = typer.Option(
-        None,
-        "--iam-url",
-        help="Datalayer IAM server URL",
-    ),
-    runtimes_url: Optional[str] = typer.Option(
-        None,
-        "--runtimes-url",
-        help="Datalayer Runtimes server URL",
-    ),
-) -> None:
-    """List running runtimes (root command)."""
-    list_runtimes(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
-
-
-def runtimes_ls(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-    iam_url: Optional[str] = typer.Option(
-        None,
-        "--iam-url",
-        help="Datalayer IAM server URL",
-    ),
-    runtimes_url: Optional[str] = typer.Option(
-        None,
-        "--runtimes-url",
-        help="Datalayer Runtimes server URL",
-    ),
-) -> None:
-    """List running runtimes (root command alias)."""
-    list_runtimes(token=token, iam_url=iam_url, runtimes_url=runtimes_url)
diff --git a/datalayer_core/cli/commands/schedules.py b/datalayer_core/cli/commands/schedules.py
new file mode 100644
index 00000000..5e3d9cc1
--- /dev/null
+++ b/datalayer_core/cli/commands/schedules.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Schedule commands for Datalayer CLI."""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Optional
+
+import requests
+import typer
+from rich.console import Console
+from rich.table import Table
+
+from datalayer_core.utils.urls import DatalayerURLs
+
+
+app = typer.Typer(
+    name="schedules",
+    help="Scheduler management commands.",
+    invoke_without_command=True,
+)
+
+console = Console()
+
+
+@app.callback()
+def schedules_callback(ctx: typer.Context) -> None:
+    """Scheduler management commands."""
+    if ctx.invoked_subcommand is None:
+        typer.echo(ctx.get_help())
+
+
+def _resolve_token(token: Optional[str] = None) -> str:
+    if token:
+        return token
+    env_token = os.environ.get("DATALAYER_API_KEY")
+    if env_token:
+        return env_token
+    try:
+        from datalayer_core.client.client import DatalayerClient
+
+        client = DatalayerClient()
+        return client._get_token() or ""
+    except Exception:
+        return ""
+
+
+def _fetch_scheduler(
+    *,
+    path: str,
+    token: Optional[str] = None,
+    scheduler_url: Optional[str] = None,
+) -> dict[str, Any]:
+    resolved_token = _resolve_token(token)
+    if not resolved_token:
+        raise RuntimeError(
+            "No authentication token found. Pass --token, set DATALAYER_API_KEY, or run 'datalayer login'."
+        )
+
+    urls = DatalayerURLs.from_environment(scheduler_url=scheduler_url)
+    url = f"{urls.scheduler_url}/api/scheduler/v1{path}"
+    headers = {"Authorization": f"Bearer {resolved_token}"}
+
+    response = requests.get(url, headers=headers, timeout=30)
+    response.raise_for_status()
+    data = response.json() if response.content else {}
+    if not isinstance(data, dict):
+        raise RuntimeError("Unexpected scheduler response payload.")
+    return data
+
+
+def _render_schedules(schedules: list[dict[str, Any]]) -> None:
+    table = Table(title="Schedules")
+    table.add_column("UID", style="cyan")
+    table.add_column("Notebook UID")
+    table.add_column("Cron")
+    table.add_column("Preset")
+    table.add_column("Enabled")
+    table.add_column("Next Planned")
+
+    for schedule in schedules:
+        table.add_row(
+            str(schedule.get("uid", "")),
+            str(schedule.get("notebook_uid_s", "")),
+            str(schedule.get("cron_expression_s", "")),
+            str(schedule.get("preset_s", "")),
+            "yes" if bool(schedule.get("enabled_b", True)) else "no",
+            str(schedule.get("next_planned_ts_dt", "")),
+        )
+    console.print(table)
+
+
+def _render_runs(runs: list[dict[str, Any]]) -> None:
+    table = Table(title="Schedule Runs")
+    table.add_column("UID", style="cyan")
+    table.add_column("Schedule UID")
+    table.add_column("Notebook UID")
+    table.add_column("State")
+    table.add_column("Success")
+    table.add_column("Planned")
+    table.add_column("Executed")
+
+    for run in runs:
+        table.add_row(
+            str(run.get("uid", "")),
+            str(run.get("schedule_uid_s", "")),
+            str(run.get("notebook_uid_s", "")),
+            str(run.get("state_s", "")),
+            str(run.get("success_b", "")),
+            str(run.get("planned_ts_dt", "")),
+            str(run.get("executed_ts_dt", "")),
+        )
+    console.print(table)
+
+
+@app.command(name="ls")
+def list_schedules(
+    runs: bool = typer.Option(False, "--runs", help="List schedule runs instead of schedule definitions."),
+    token: Optional[str] = typer.Option(None, "--token", help="Authentication token."),
+    scheduler_url: Optional[str] = typer.Option(None, "--scheduler-url", help="Datalayer Scheduler service URL."),
+) -> None:
+    """List scheduler definitions or scheduler runs."""
+    try:
+        if runs:
+            payload = _fetch_scheduler(path="/schedules/runs", token=token, scheduler_url=scheduler_url)
+            _render_runs(payload.get("runs") or [])
+            return
+
+        payload = _fetch_scheduler(path="/schedules", token=token, scheduler_url=scheduler_url)
+        _render_schedules(payload.get("schedules") or [])
+    except Exception as exc:
+        console.print(f"[red]Error listing schedules: {exc}[/red]")
+        raise typer.Exit(1)
diff --git a/datalayer_core/cli/commands/tokens.py b/datalayer_core/cli/commands/tokens.py
deleted file mode 100644
index 3d7d50f4..00000000
--- a/datalayer_core/cli/commands/tokens.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
-
-"""Token commands for Datalayer CLI."""
-
-from typing import Optional
-
-import typer
-from rich.console import Console
-
-from datalayer_core.client.client import DatalayerClient
-from datalayer_core.displays.tokens import display_tokens
-from datalayer_core.models.token import TokenType
-
-# Create a Typer app for token commands
-app = typer.Typer(
-    name="tokens", help="Token management commands", invoke_without_command=True
-)
-
-console = Console()
-
-
-@app.callback()
-def tokens_callback(ctx: typer.Context) -> None:
-    """Token management commands."""
-    if ctx.invoked_subcommand is None:
-        typer.echo(ctx.get_help())
-
-
-@app.command(name="ls")
-def list_tokens(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-) -> None:
-    """List all tokens."""
-    try:
-        client = DatalayerClient(token=token)
-        tokens = client.list_tokens()
-
-        # Convert to dict format for display_tokens
-        token_dicts = []
-        for token in tokens:
-            token_dicts.append(
-                {
-                    "uid": token.uid,
-                    "name_s": token.name,
-                    "description_t": token.description,
-                    "variant_s": token.token_type,
-                }
-            )
-
-        display_tokens(token_dicts)
-
-    except Exception as e:
-        console.print(f"[red]Error listing tokens: {e}[/red]")
-        raise typer.Exit(1)
-
-
-@app.command(name="create")
-def create_token(
-    name: str = typer.Argument(..., help="Name of the token"),
-    description: str = typer.Argument(..., help="Description of the token"),
-    expiration_date: Optional[int] = typer.Option(
-        0,
-        "--expiration-date",
-        help="Expiration date in seconds since epoch (0 for no expiration)",
-    ),
-    token_type: str = typer.Option(
-        TokenType.USER,
-        "--token-type",
-        help="Type of the token (user, admin)",
-    ),
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-) -> None:
-    """Create a new token."""
-    try:
-        client = DatalayerClient(token=token)
-
-        result = client.create_token(
-            name=name,
-            description=description,
-            expiration_date=expiration_date or 0,
-            token_type=token_type,
-        )
-
-        if result.get("success", False):
-            token_data = result.get("token", {})
-            console.print(f"[green]Token '{name}' created successfully![/green]")
-            console.print(
-                f"[yellow]Token value: {result.get('access_token', 'N/A')}[/yellow]"
-            )
-            console.print(
-                "[dim]Please save this token value securely - it won't be shown again![/dim]"
-            )
-
-            # Display the created token info
-            if token_data:
-                display_tokens(
-                    [
-                        {
-                            "uid": token_data.get("uid"),
-                            "name_s": token_data.get("name_s", name),
-                            "description_t": token_data.get(
-                                "description_t", description
-                            ),
-                            "variant_s": token_data.get("variant_s", token_type),
-                        }
-                    ]
-                )
-        else:
-            console.print(
-                f"[red]Failed to create token: {result.get('message', 'Unknown error')}[/red]"
-            )
-            raise typer.Exit(1)
-
-    except Exception as e:
-        console.print(f"[red]Error creating token: {e}[/red]")
-        raise typer.Exit(1)
-
-
-@app.command(name="delete")
-def delete_token(
-    uid: str = typer.Argument(..., help="UID of the token to delete"),
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-) -> None:
-    """Delete a token."""
-    try:
-        client = DatalayerClient(token=token)
-
-        success = client.delete_token(uid)
-
-        if success:
-            console.print(f"[green]Token '{uid}' deleted successfully![/green]")
-        else:
-            console.print(f"[red]Failed to delete token '{uid}'[/red]")
-            raise typer.Exit(1)
-
-    except Exception as e:
-        console.print(f"[red]Error deleting token: {e}[/red]")
-        raise typer.Exit(1)
-
-
-# Root level commands for convenience
-def tokens_list(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-) -> None:
-    """List all tokens (root command)."""
-    list_tokens(token=token)
-
-
-def tokens_ls(
-    token: Optional[str] = typer.Option(
-        None,
-        "--token",
-        help="Authentication token (Bearer token for API requests).",
-    ),
-) -> None:
-    """List all tokens (root command alias)."""
-    list_tokens(token=token)
diff --git a/datalayer_core/cli/commands/usage.py b/datalayer_core/cli/commands/usage.py
index accd4316..34a85548 100644
--- a/datalayer_core/cli/commands/usage.py
+++ b/datalayer_core/cli/commands/usage.py
@@ -4,6 +4,7 @@
 """Usage/credits commands for Datalayer CLI."""
 
 from datetime import datetime, timezone
+import os
 from typing import Any, Optional
 
 import typer
@@ -130,7 +131,8 @@ def usage_records(
         help="Datalayer IAM server URL",
     ),
     billable_account_uid: Optional[str] = typer.Option(
-        None,
+        os.environ.get("DATALAYER_ACCOUNT_UID")
+        or os.environ.get("DATALAYER_BILLABLE_ACCOUNT_UID"),
         "--billable-account-uid",
         help="Optional account UID scope. Defaults to the authenticated account.",
     ),
@@ -268,7 +270,8 @@ def usage_reservations(
         help="Optional reservation type filter.",
     ),
     billable_account_uid: Optional[str] = typer.Option(
-        None,
+        os.environ.get("DATALAYER_ACCOUNT_UID")
+        or os.environ.get("DATALAYER_BILLABLE_ACCOUNT_UID"),
         "--billable-account-uid",
         help="Optional account UID scope for fallback credits view.",
     ),
diff --git a/datalayer_core/client/client.py b/datalayer_core/client/client.py
index 8bd226fa..949433ea 100644
--- a/datalayer_core/client/client.py
+++ b/datalayer_core/client/client.py
@@ -14,6 +14,8 @@
 from functools import lru_cache
 from typing import Any, Optional, Union
 
+from jupyter_kernel_client import KernelClient
+
 from datalayer_core.mixins.authn import AuthnMixin
 from datalayer_core.mixins.environments import EnvironmentsMixin
 from datalayer_core.mixins.evals import EvalsMixin
@@ -22,16 +24,16 @@
 from datalayer_core.mixins.sandbox_snapshots import SandboxSnapshotsMixin
 from datalayer_core.mixins.runtimes import RuntimesMixin
 from datalayer_core.mixins.secrets import SecretsMixin
-from datalayer_core.mixins.tokens import TokensMixin
+from datalayer_core.mixins.api_keys import ApiKeysMixin
 from datalayer_core.mixins.usage import UsageMixin
 from datalayer_core.mixins.whoami import WhoamiAppMixin
 from datalayer_core.models import UserModel
+from datalayer_core.models.api_key import ApiKeyModel, ApiKeyType
 from datalayer_core.models.environment import EnvironmentModel
 from datalayer_core.models.sandbox_snapshot import SandboxSnapshotModel
 from datalayer_core.models.secret import SecretModel, SecretVariant
-from datalayer_core.models.token import TokenModel, TokenType
-from datalayer_core.runtimes.runtime import RuntimeService
-from datalayer_core.runtimes.sandbox_snapshot import (
+from datalayer_core.agents.agent_cloud import RuntimeService
+from datalayer_core.sandboxes.code_sandbox_snapshots import (
     as_code_sandbox_snapshots,
     create_snapshot,
 )
@@ -54,7 +56,7 @@ class DatalayerClient(
     RayMixin,
     SecretsMixin,
     SandboxSnapshotsMixin,
-    TokensMixin,
+    ApiKeysMixin,
     UsageMixin,
     WhoamiAppMixin,
 ):
@@ -269,6 +271,7 @@ def create_runtime(
         billable_account_uid: Optional[str] = None,
         billable_account_type: Optional[str] = None,
         billable_account_handle: Optional[str] = None,
+        api_key: Optional[str] = None,
     ) -> RuntimeService:
         """
         Create a new runtime (kernel) for code execution.
@@ -312,6 +315,10 @@ def create_runtime(
 
         # print(f"Runtime {name}")
 
+        client_for_request = self
+        if api_key:
+            client_for_request = DatalayerClient(urls=self._urls, token=api_key)
+
         if snapshot_name is not None:
             snapshots = self.list_snapshots()
             snapshot_uid = None
@@ -325,7 +332,7 @@ def create_runtime(
                     f"Snapshot '{snapshot_name}' not found. Available snapshots: {[s.name for s in snapshots]}"
                 )
 
-            response = self._create_runtime(
+            response = client_for_request._create_runtime(
                 given_name=name,
                 environment_name=environment,
                 from_snapshot_uid=snapshot_uid,
@@ -338,7 +345,7 @@ def create_runtime(
             )
         else:
             # Create runtime without snapshot
-            response = self._create_runtime(
+            response = client_for_request._create_runtime(
                 given_name=name,
                 environment_name=environment,
                 agent_spec_id=agent_spec_id,
@@ -374,7 +381,7 @@ def create_runtime(
             environment=runtime_data["environment_name"],
             run_url=self._urls.run_url,
             iam_url=self._urls.iam_url,
-            token=self._token,
+            token=api_key or self._token,
             ingress=runtime_data["ingress"],
             jupyter_token=runtime_data["token"],
             pod_name=runtime_data["pod_name"],
@@ -434,7 +441,11 @@ def list_runtimes(self) -> list[RuntimeService]:
             )
         return runtime_services
 
-    def terminate_runtime(self, runtime: Union[RuntimeService, str]) -> bool:
+    def terminate_runtime(
+        self,
+        runtime: Union[RuntimeService, str],
+        api_key: Optional[str] = None,
+    ) -> bool:
         """
         Terminate a running Runtime.
 
@@ -450,6 +461,9 @@ def terminate_runtime(self, runtime: Union[RuntimeService, str]) -> bool:
         """
         pod_name = runtime.pod_name if isinstance(runtime, RuntimeService) else runtime
         if pod_name is not None:
+            if api_key:
+                client_for_request = DatalayerClient(urls=self._urls, token=api_key)
+                return client_for_request._terminate_runtime(pod_name).get("success", False)
             return self._terminate_runtime(pod_name)["success"]
         else:
             return False
@@ -539,6 +553,111 @@ def update_runtime(
             raise RuntimeError(f"Failed to update runtime '{pod_name}': {message}")
         return True
 
+    def check_runtime_health(
+        self,
+        runtime: Union[RuntimeService, str],
+        probe_code: str = "print('datalayer runtime health probe')",
+        timeout: float = 20.0,
+        api_key: Optional[str] = None,
+    ) -> dict[str, Any]:
+        """Check runtime reachability and execute a probe on the sandbox.
+
+        Parameters
+        ----------
+        runtime : Union[RuntimeService, str]
+            Runtime object or runtime identifier (pod name/uid/name).
+        probe_code : str
+            Python code to execute as health probe on the sandbox.
+        timeout : float
+            Probe execution timeout in seconds.
+        api_key : Optional[str]
+            Optional API key override used for runtime lookup.
+
+        Returns
+        -------
+        dict[str, Any]
+            Health result with success flag and diagnostics.
+        """
+        client_for_request = self
+        if api_key:
+            client_for_request = DatalayerClient(urls=self._urls, token=api_key)
+
+        runtime_service = (
+            runtime if isinstance(runtime, RuntimeService) else client_for_request.get_runtime(runtime)
+        )
+
+        endpoint = str(runtime_service.ingress or "").rstrip("/")
+        runtime_token = str(
+            runtime_service.jupyter_token
+            or client_for_request._get_token()
+            or ""
+        ).strip()
+
+        result: dict[str, Any] = {
+            "success": False,
+            "runtime_uid": runtime_service.uid,
+            "runtime_pod_name": runtime_service.pod_name,
+            "runtime_name": runtime_service.name,
+            "ingress": endpoint,
+            "probe_mode": "sandbox_execute_code",
+        }
+
+        if not endpoint:
+            result["message"] = "runtime ingress is missing"
+            return result
+        if not runtime_token:
+            result["message"] = "runtime token is missing"
+            return result
+
+        kernel_client: Optional[KernelClient] = None
+        try:
+            kernel_client = KernelClient(server_url=endpoint, token=runtime_token)
+            kernel_client.start()
+            reply = kernel_client.execute(probe_code, timeout=timeout)
+            outputs = reply.get("outputs", [])
+            if not isinstance(outputs, list):
+                outputs = []
+
+            error_outputs = [
+                output
+                for output in outputs
+                if isinstance(output, dict)
+                and str(output.get("output_type") or "") == "error"
+            ]
+
+            if error_outputs:
+                first_error = error_outputs[0]
+                result["message"] = "sandbox probe execution failed"
+                result["error_name"] = first_error.get("ename")
+                result["error_value"] = first_error.get("evalue")
+                traceback_lines = first_error.get("traceback")
+                if isinstance(traceback_lines, list):
+                    result["traceback_tail"] = "\n".join(
+                        [str(line) for line in traceback_lines if line is not None]
+                    )[-4000:]
+                return result
+
+            stream_text_parts = []
+            for output in outputs:
+                if not isinstance(output, dict):
+                    continue
+                if str(output.get("output_type") or "") == "stream":
+                    stream_text_parts.append(str(output.get("text") or ""))
+
+            result["success"] = True
+            result["message"] = "runtime reachable and sandbox probe executed"
+            result["stdout_tail"] = "".join(stream_text_parts)[-1000:]
+            return result
+        except Exception as exc:
+            result["message"] = f"runtime health probe exception: {exc}"
+            return result
+        finally:
+            if kernel_client is not None:
+                try:
+                    kernel_client.stop()
+                except Exception:
+                    pass
+
     def list_secrets(self) -> list[SecretModel]:
         """
         List all secrets available in the Datalayer environment.
@@ -736,76 +855,77 @@ def delete_snapshot(
         )
         return self._delete_snapshot(snapshot_uid)
 
-    def create_token(
+    def create_api_key(
         self,
         name: str,
         description: str,
         expiration_date: int = 0,
-        token_type: Union[str, TokenType] = TokenType.USER,
+        api_key_type: Union[str, ApiKeyType] = ApiKeyType.SECRET,
     ) -> dict[str, Any]:
         """
-        Create a new token.
+        Create a new API key.
 
         Parameters
         ----------
         name : str
-            Name of the token.
+            Name of the API key.
         description : str
-            Description of the token.
+            Description of the API key.
         expiration_date : int, default 0
-            Expiration date of the token in seconds since epoch.
-        token_type : Union[str, TokenType], default TokenType.USER
-            Type of the token (e.g., "user", "admin").
+            Expiration date of the API key in seconds since epoch.
+        api_key_type : Union[str, ApiKeyType], default ApiKeyType.SECRET
+            Type of the API key (secret, publishable, restricted, temporary).
 
         Returns
         -------
         dict[str, Any]
-            A dictionary containing the created token and its details.
+            A dictionary containing the created API key and its details.
         """
-        return self._create_token(
+        return self._create_api_key(
             name=name,
             description=description,
             expiration_date=expiration_date,
-            token_type=token_type,
+            api_key_type=api_key_type,
         )
 
-    def list_tokens(self) -> list[TokenModel]:
+    def list_api_keys(self) -> list[ApiKeyModel]:
         """
-        List all tokens.
+        List all API keys.
 
         Returns
         -------
-        list[Token]
-            A list of tokens associated with the user.
-        """
-        response = self._list_tokens()
-        if response.get("success") and "tokens" in response:
-            token_objects = []
-            for token_data in response["tokens"]:
-                token = TokenModel(
-                    uid=token_data["uid"],
-                    name=token_data.get("name_s", ""),
-                    description=token_data.get("description_t", ""),
-                    token_type=token_data.get("variant_s", "user"),
+        list[ApiKeyModel]
+            A list of API keys associated with the user.
+        """
+        response = self._list_api_keys()
+        if response.get("success"):
+            payload = response.get("api_keys", response.get("tokens", []))
+            api_key_objects = []
+            for api_key_data in payload:
+                api_key = ApiKeyModel(
+                    uid=api_key_data["uid"],
+                    name=api_key_data.get("name_s", ""),
+                    description=api_key_data.get("description_t", ""),
+                    api_key_type=api_key_data.get("variant_s", "secret"),
                 )
-                token_objects.append(token)
-            return token_objects
+                api_key_objects.append(api_key)
+            return api_key_objects
         return []
 
-    def delete_token(self, token: Union[str, TokenModel]) -> bool:
+    def delete_api_key(self, api_key: Union[str, ApiKeyModel]) -> bool:
         """
-        Delete a specific token.
+        Delete a specific API key.
 
         Parameters
         ----------
-        token : Union[str, Token]
-            Token object or UID string to delete.
+        api_key : Union[str, ApiKeyModel]
+            API key object or UID string to delete.
 
         Returns
         -------
         bool
             The result of the deletion operation.
         """
-        token_uid = token.uid if isinstance(token, TokenModel) else token
-        response = self._delete_token(token_uid)
+        api_key_uid = api_key.uid if isinstance(api_key, ApiKeyModel) else api_key
+        response = self._delete_api_key(api_key_uid)
         return response.get("success", False)
diff --git a/datalayer_core/console/consoleapp.py b/datalayer_core/console/consoleapp.py
index e665c4a1..a666813b 100644
--- a/datalayer_core/console/consoleapp.py
+++ b/datalayer_core/console/consoleapp.py
@@ -41,7 +41,7 @@
 aliases = dict(datalayer_aliases)
 aliases.update(
     {
-        "runtime": "RuntimesConsoleApp.runtime_name",
+        "agent": "RuntimesConsoleApp.runtime_name",
     }
 )
 
diff --git a/datalayer_core/console/manager.py b/datalayer_core/console/manager.py
index 8b3c6edc..136fc9d0 100644
--- a/datalayer_core/console/manager.py
+++ b/datalayer_core/console/manager.py
@@ -57,6 +57,10 @@ def __init__(
         _ = kwargs.pop("kernel_id", None)  # kernel_id not supported
         super().__init__(server_url="", token="", username=username, **kwargs)
         self._kernel_id = ""
+        self.runtime_uid = ""
+        self.runtime_name = ""
+        self.runtime_pod_name = ""
+        self.runtime_created_in_start = False
         self.run_url = run_url
         self.run_token = token
         self.username = username
@@ -114,145 +118,256 @@ def start_kernel(
                 "A kernel is already started. Shutdown it before starting a new one."
             )
 
+        # Reset per-start state markers.
+        self.runtime_created_in_start = False
+
         runtime_name = name
         runtime = None
 
-        # Use DatalayerClient to get runtime information
-        if runtime_name:
-            # Get specific runtime by name
-            runtimes = self._client.list_runtimes()
-            for r in runtimes:
-                if r.name == runtime_name:
-                    runtime = {
-                        "pod_name": r.pod_name,
-                        "ingress": r.ingress,
-                        "token": r.jupyter_token,
-                        "expired_at": r.expired_at,
-                    }
-                    break
-        else:
+        # Use DatalayerClient to get runtime information.
+        runtimes = self._client.list_runtimes()
+
+        if not runtime_name:
             self.log.debug(
-                "No Runtime name provided. Picking the first available Runtime…"
+                "No Agent name provided. Picking the first available Agent…"
             )
-            # Get list of available runtimes
-            runtimes = self._client.list_runtimes()
-
-            # If no runtime is running, let the user decide to start one from the first environment
             if not runtimes:
-                environments = self._client.list_environments()
-                if not environments:
+                # Historical behaviour: when no Agent is running, offer to
+                # launch one from the first available environment instead of
+                # failing outright.
+                launched = self._prompt_and_launch_agent()
+                if launched is None:
                     raise RuntimeError(
-                        "No environments available to create a runtime from."
+                        "No Agent running. Start one first with: "
+                        "`d agents create <ENVIRONMENT_NAME> --time-reservation 10`"
                     )
+                runtimes = [launched]
+
+            selected = self._pick_accessible_runtime(runtimes)
+
+            if selected is None:
+                # The accessibility probe is best-effort (a freshly launched
+                # Agent may still be warming up its ingress). Fall back to the
+                # first listed Agent and let `_ensure_kernel_id` retry until the
+                # kernel endpoint is reachable.
+                selected = runtimes[0]
+
+            runtime_name = selected.name or selected.uid or selected.pod_name or ""
+            self.runtime_uid = str(selected.uid or "")
+            self.runtime_name = str(selected.name or runtime_name or "")
+            self.runtime_pod_name = str(selected.pod_name or "")
+            runtime = {
+                "pod_name": selected.pod_name,
+                "ingress": selected.ingress,
+                "token": selected.jupyter_token or self.run_token,
+                "expired_at": selected.expired_at,
+            }
+        else:
+            selected = None
+            for r in runtimes:
+                if r.name == runtime_name or r.uid == runtime_name:
+                    selected = r
+                    break
+            if selected is None:
+                raise RuntimeError(f"Agent '{runtime_name}' not found")
+            self.runtime_uid = str(selected.uid or "")
+            self.runtime_name = str(selected.name or runtime_name or "")
+            self.runtime_pod_name = str(selected.pod_name or "")
+            runtime = {
+                "pod_name": selected.pod_name,
+                "ingress": selected.ingress,
+                "token": selected.jupyter_token or self.run_token,
+                "expired_at": selected.expired_at,
+            }
 
-                first_environment = environments[0]
-                first_environment_name = first_environment.name
+        if runtime is None:
+            raise RuntimeError("Unable to find an Agent.")
 
-                # Calculate credits limit based on environment
-                credits_limit = (
-                    first_environment.burning_rate * 60.0 * 10.0
-                )  # 10 minutes default
+        self.server_url = runtime["ingress"]
+        self.token = runtime.get("token", "")
 
-                user_input = (
-                    input(
-                        f"No Runtime running.\nDo you want to launch a runtime from the environment {first_environment_name} with {credits_limit:.2f} reserved credits? (yes/no) [default: yes]: "
-                    )
-                    or "yes"
-                )
-                if user_input.lower() != "yes":
-                    raise RuntimeError(
-                        "No Runtime running. Please start one Runtime using `datalayer runtimes create <ENV_ID>`."
-                    )
+        # Ensure runtime endpoint is ready and a usable kernel exists.
+        self._kernel_id = self._ensure_kernel_id()
 
-                # Create new runtime using the client
-                new_runtime = self._client.create_runtime(
-                    name=f"console-runtime-{first_environment_name}",
-                    environment=first_environment_name,
-                    time_reservation=10.0,  # 10 minutes default
-                )
+        kernel_model = self.refresh_model()
+        msg = f"RuntimeManager using existing Agent {runtime_name}"
+        expired_at = runtime.get("expired_at")
+        if expired_at is not None:
+            msg += f" expiring at {timestamp_to_local_date(expired_at)}"
+        self.log.info(msg)
 
-                # Start the runtime to get connection details
-                new_runtime._start()
-
-                runtime = {
-                    "pod_name": new_runtime.pod_name,
-                    "ingress": new_runtime.ingress,
-                    "token": new_runtime.jupyter_token,
-                    "expired_at": new_runtime.expired_at,
-                }
-
-                # Display the created runtime
-                runtime_dict = {
-                    "given_name": new_runtime.name,
-                    "environment_name": new_runtime.environment,
-                    "pod_name": new_runtime.pod_name,
-                    "ingress": new_runtime.ingress,
-                    "reservation_id": getattr(new_runtime, "reservation_id", ""),
-                    "uid": new_runtime.uid,
-                    "burning_rate": getattr(new_runtime, "burning_rate", 0.0),
-                    "token": new_runtime.jupyter_token,
-                    "started_at": getattr(new_runtime, "started_at", ""),
-                    "expired_at": new_runtime.expired_at,
-                }
-                display_runtimes([runtime_dict])
-
-                # Refresh runtime list
-                runtimes = self._client.list_runtimes()
-
-            # Use the first available runtime
-            if runtimes:
-                r = runtimes[0]
-                runtime = {
-                    "pod_name": r.pod_name,
-                    "ingress": r.ingress,
-                    "token": r.jupyter_token,
-                    "expired_at": r.expired_at,
-                }
-                runtime_name = r.pod_name or ""
+        return kernel_model
 
-        if runtime is None:
-            raise RuntimeError("Unable to find a Runtime.")
+    def _pick_accessible_runtime(self, runtimes: list[Any]) -> Optional[Any]:
+        """Return first runtime that responds on /api/kernels with its runtime token."""
+        for runtime in runtimes:
+            if self._runtime_is_accessible(runtime):
+                return runtime
+        return None
 
-        self.server_url = runtime["ingress"]
-        self.token = runtime.get("token", "")
+    def _prompt_and_launch_agent(self) -> Optional[Any]:
+        """Offer to launch an Agent from the first environment when none is running.
+
+        Mirrors the historical console behaviour: if no Agent is running, ask
+        the user whether to create one from the first available environment with
+        a default 10-minute reservation. Returns the launched runtime (waiting
+        until it is listed and reachable), or ``None`` when the user declines.
+        """
+        environments = self._client.list_environments()
+        if not environments:
+            raise RuntimeError("No environments available to create an Agent from.")
+
+        first_environment = environments[0]
+        first_environment_name = first_environment.name
+
+        # Default 10-minute reservation; estimate the reserved credits to inform
+        # the user before they confirm.
+        burning_rate = float(getattr(first_environment, "burning_rate", 0.0) or 0.0)
+        credits_limit = burning_rate * 60.0 * 10.0
+
+        prompt = (
+            "No Agent running.\n"
+            f"Do you want to launch an Agent from the environment "
+            f"{first_environment_name} with {credits_limit:.2f} reserved credits? "
+            "(yes/no) [default: yes]: "
+        )
+        try:
+            answer = (input(prompt) or "yes").strip().lower()
+        except EOFError:
+            answer = "yes"
+        if answer not in ("y", "yes"):
+            return None
+
+        new_runtime = self._client.create_runtime(
+            name=f"console-agent-{first_environment_name}",
+            environment=first_environment_name,
+            time_reservation=10.0,
+        )
+
+        # Surface the freshly created Agent, mirroring the historical flow.
+        try:
+            display_runtimes(
+                [
+                    {
+                        "given_name": new_runtime.name,
+                        "environment_name": new_runtime.environment,
+                        "pod_name": new_runtime.pod_name,
+                        "ingress": new_runtime.ingress,
+                        "reservation_id": getattr(new_runtime, "reservation_id", ""),
+                        "uid": new_runtime.uid,
+                        "burning_rate": getattr(new_runtime, "burning_rate", 0.0),
+                        "token": new_runtime.jupyter_token,
+                        "started_at": getattr(new_runtime, "started_at", ""),
+                        "expired_at": new_runtime.expired_at,
+                    }
+                ]
+            )
+        except Exception:
+            pass
+
+        # Wait until the launched Agent is listed and reachable before using it,
+        # falling back to the freshly created handle if the probe times out.
+        launched = self._wait_for_listed_accessible_runtime(str(new_runtime.uid or ""))
+        return launched or new_runtime
+
+    def _wait_for_listed_accessible_runtime(self, preferred_uid: str) -> Optional[Any]:
+        """Wait for a launched runtime to be listed and reachable before use."""
+        attempts = 30
+        for _ in range(attempts):
+            runtimes = self._client.list_runtimes()
+
+            if preferred_uid:
+                for runtime in runtimes:
+                    if str(runtime.uid or "") == preferred_uid and self._runtime_is_accessible(runtime):
+                        return runtime
+
+            selected = self._pick_accessible_runtime(runtimes)
+            if selected is not None:
+                return selected
+
+            time.sleep(1.0)
+
+        return None
+
+    def _runtime_is_accessible(self, runtime: Any) -> bool:
+        """Best-effort HTTP accessibility check for runtime ingress and token."""
+        ingress = str(getattr(runtime, "ingress", "") or "").rstrip("/")
+        token = str(getattr(runtime, "jupyter_token", "") or self.run_token or "")
+        if not ingress or not token:
+            return False
 
-        # Get runtime information.
         from datalayer_core.utils.network import fetch
 
-        response = None
-        max_attempts = 4
+        try:
+            fetch(f"{ingress}/api/kernels", token=token, timeout=10)
+            return True
+        except Exception:
+            return False
+
+    def _ensure_kernel_id(self) -> str:
+        """Return the runtime's existing kernel id.
+
+        Datalayer runtimes are provisioned with a kernel already running and
+        wired to the runtime ingress. We must connect to that existing kernel
+        instead of creating a new one: a freshly created kernel id is not the
+        one the ingress routes to, which leads to no execution output and 404
+        responses on kernel endpoints (e.g. /interrupt).
+        """
+        from datalayer_core.utils.network import fetch
+
+        kernels_url = f"{self.server_url.rstrip('/')}/api/kernels"
+        max_attempts = 30
+        last_error: Exception | None = None
         for attempt in range(1, max_attempts + 1):
             try:
-                response = fetch(f"{self.server_url}/api/kernels", token=self.token)
-                break
+                response = fetch(kernels_url, token=self.token, timeout=20)
+                kernels = response.json() if response.content else []
+                if isinstance(kernels, list) and kernels:
+                    # Freshly launched runtimes can briefly expose stale kernel IDs
+                    # in the list endpoint; verify a kernel can be read directly
+                    # before selecting it.
+                    ordered_kernels = sorted(
+                        kernels,
+                        key=lambda kernel: str((kernel or {}).get("id") or ""),
+                    )
+                    for kernel in ordered_kernels:
+                        kernel_id = str((kernel or {}).get("id") or "")
+                        if not kernel_id:
+                            continue
+                        try:
+                            fetch(
+                                f"{kernels_url}/{kernel_id}",
+                                token=self.token,
+                                timeout=20,
+                            )
+                            return kernel_id
+                        except requests.exceptions.HTTPError as e:
+                            status = (
+                                e.response.status_code
+                                if getattr(e, "response", None) is not None
+                                else None
+                            )
+                            if status in (404, 410):
+                                # Kernel disappeared while ingress was warming.
+                                continue
+                            last_error = e
+                        except requests.exceptions.ConnectionError as e:
+                            last_error = e
             except requests.exceptions.HTTPError as e:
                 status = (
                     e.response.status_code
                     if getattr(e, "response", None) is not None
                     else None
                 )
-                if status in (502, 503, 504) and attempt < max_attempts:
-                    time.sleep(2 ** (attempt - 1))
-                    continue
-                raise
-            except requests.exceptions.ConnectionError:
-                if attempt < max_attempts:
-                    time.sleep(2 ** (attempt - 1))
-                    continue
-                raise
-
-        if response is None:
-            raise RuntimeError("Failed to query kernel endpoint for runtime")
-
-        kernels = response.json()
-        if kernels:
-            self._kernel_id = kernels[0]["id"]
-
-        kernel_model = self.refresh_model()
-        msg = f"RuntimeManager using existing runtime {runtime_name}"
-        expired_at = runtime.get("expired_at")
-        if expired_at is not None:
-            msg += f" expiring at {timestamp_to_local_date(expired_at)}"
-        self.log.info(msg)
-
-        return kernel_model
+                if status not in (404, 502, 503, 504):
+                    raise
+                last_error = e
+            except requests.exceptions.ConnectionError as e:
+                last_error = e
+
+            # The kernel may still be registering on a freshly launched runtime.
+            time.sleep(1.0)
+
+        raise RuntimeError(
+            f"Runtime has no available kernel at '{kernels_url}': {last_error}"
+        )
diff --git a/datalayer_core/displays/api_keys.py b/datalayer_core/displays/api_keys.py
new file mode 100644
index 00000000..588cf9e0
--- /dev/null
+++ b/datalayer_core/displays/api_keys.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Display functions for Datalayer core."""
+
+from __future__ import annotations
+
+from rich.console import Console
+from rich.table import Table
+
+
+def _new_api_keys_table(title: str = "API Keys") -> Table:
+    """
+    Create a new API keys table.
+
+    Parameters
+    ----------
+    title : str, default "API Keys"
+        The title for the table.
+
+    Returns
+    -------
+    Table
+        A rich Table configured for displaying API keys.
+    """
+    table = Table(title=title)
+    table.add_column("ID", style="cyan", no_wrap=True)
+    table.add_column("Name", style="cyan", no_wrap=True)
+    table.add_column("Variant", style="cyan", no_wrap=True)
+    return table
+
+
+def _add_api_key_to_table(table: Table, api_key: dict[str, str]) -> None:
+    """
+    Add an API key row to the table.
+
+    Parameters
+    ----------
+    table : Table
+        The rich Table to add the row to.
+    api_key : dict[str, str]
+        Dictionary containing API key information with keys: uid, name_s, description_t, variant_s.
+    """
+    table.add_row(
+        api_key["uid"],
+        api_key["name_s"],
+        api_key["variant_s"],
+    )
+
+
+def display_api_keys(api_keys: list[dict[str, str]]) -> None:
+    """
+    Display a list of API keys in the console.
+
+    Parameters
+    ----------
+    api_keys : list[dict[str, str]]
+        List of API key dictionaries to display.
+    """
+    table = _new_api_keys_table(title="API Keys")
+    for api_key in api_keys:
+        _add_api_key_to_table(table, api_key)
+    console = Console()
+    console.print(table)
diff --git a/datalayer_core/displays/environments.py b/datalayer_core/displays/environments.py
index 312a8a37..f324abf4 100644
--- a/datalayer_core/displays/environments.py
+++ b/datalayer_core/displays/environments.py
@@ -6,64 +6,214 @@
 from __future__ import annotations
 
 import json
+import re
 from typing import Any
 
 from rich.console import Console
-from rich.table import Table
+
+
+def _description_to_text(description: str) -> str:
+    """Convert HTML/Markdown-like descriptions into readable plain text."""
+    text = (description or "").strip()
+    if not text:
+        return "(no description)"
+
+    normalized = text
+    normalized = re.sub(r"<\s*/\s*p\s*>", "\n\n", normalized, flags=re.IGNORECASE)
+    normalized = re.sub(r"<\s*p\s*>", "", normalized, flags=re.IGNORECASE)
+    normalized = re.sub(r"<\s*b\s*>", "", normalized, flags=re.IGNORECASE)
+    normalized = re.sub(r"<\s*/\s*b\s*>", "", normalized, flags=re.IGNORECASE)
+    normalized = re.sub(r"<[^>]+>", "", normalized)
+    # Strip lightweight markdown markers that look noisy in CLI tables.
+    normalized = re.sub(r"\*\*(.*?)\*\*", r"\1", normalized)
+    normalized = re.sub(r"__(.*?)__", r"\1", normalized)
+    normalized = re.sub(r"`([^`]*)`", r"\1", normalized)
+    normalized = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", normalized)
+    normalized = re.sub(r"^\s*#{1,6}\s*", "", normalized, flags=re.MULTILINE)
+    normalized = re.sub(r"\n{3,}", "\n\n", normalized)
+    normalized = normalized.strip() or "(no description)"
+    return normalized
+
+
+def _truncate(value: str, width: int) -> str:
+    if width <= 0:
+        return ""
+    if len(value) <= width:
+        return value
+    if width == 1:
+        return "…"
+    return value[: width - 1] + "…"
+
+def _wrap_lines(text: str, width: int) -> list[str]:
+    """Wrap plain text into lines bounded by width, preserving explicit breaks."""
+    if width <= 1:
+        return [text[:width]] if text else [""]
+
+    wrapped: list[str] = []
+    for raw_line in text.splitlines() or [""]:
+        line = raw_line.strip()
+        if not line:
+            wrapped.append("")
+            continue
+
+        remaining = line
+        while len(remaining) > width:
+            cut = remaining.rfind(" ", 0, width + 1)
+            if cut <= 0:
+                cut = width
+            wrapped.append(remaining[:cut].rstrip())
+            remaining = remaining[cut:].lstrip()
+        wrapped.append(remaining)
+
+    lines = wrapped
+    if not lines:
+        return [""]
+    return lines
+
+
+def _pad_cell(value: str, width: int, align_right: bool = False) -> str:
+    text = _truncate(value, width)
+    return text.rjust(width) if align_right else text.ljust(width)
 
 
 def display_environments(environments: list[dict[str, Any]]) -> None:
-    """
-    Display a list of environments in the console.
-
-    Parameters
-    ----------
-    environments : list[dict[str, Any]]
-        List of environment dictionaries to display.
-    """
-    table = _new_env_table()
-    for environment in environments:
-        _add_env_to_table(table, environment)
+    """Display environments with a full-width detail line per environment."""
     console = Console()
-    console.print(table)
-
-
-def _new_env_table() -> Table:
-    """
-    Create a new table for displaying environments.
-
-    Returns
-    -------
-    Table
-        A configured Rich Table object for environments.
-    """
-    table = Table(title="Environments")
-    table.add_column("ID", style="magenta", no_wrap=True)
-    table.add_column("Cost per seconds", justify="right", style="red", no_wrap=True)
-    table.add_column("Name", style="green", no_wrap=True)
-    table.add_column("Description", style="green", no_wrap=True)
-    table.add_column("Language", style="green", no_wrap=True)
-    table.add_column("Resources", justify="right", style="green", no_wrap=True)
-    return table
-
-
-def _add_env_to_table(table: Table, environment: dict[str, Any]) -> None:
-    """
-    Add an environment row to the display table.
-
-    Parameters
-    ----------
-    table : Table
-        Rich Table object to add the row to.
-    environment : dict[str, Any]
-        Environment data dictionary to add as a row.
-    """
-    desc = environment["description"]
-    table.add_row(
-        environment["name"],
-        "{:.3g}".format(environment["burning_rate"]),
-        environment["title"],
-        desc if len(desc) <= 50 else desc[:50] + "…",
-        environment["language"],
-        json.dumps(environment["resources"]),
+
+    headers = ("ID", "Credits/Second", "Name", "Language", "Resources")
+    rows: list[tuple[str, str, str, str, str, str]] = []
+    for env in environments:
+        env_id = str(env.get("name") or "")
+        cost = "{:.4g}".format(float(env.get("burning_rate") or 0.0))
+        name = str(env.get("title") or "")
+        language = str(env.get("language") or "")
+        resources = json.dumps(env.get("resources") or {}, ensure_ascii=False)
+        desc_text = _description_to_text(str(env.get("description") or ""))
+        rows.append((env_id, cost, name, language, resources, desc_text))
+
+    terminal_width = max(80, console.width)
+    inner_target = terminal_width - 2
+
+    # Preferred widths; later adjusted to fit exactly within terminal width.
+    id_width = max(len(headers[0]), *(len(r[0]) for r in rows)) if rows else len(headers[0])
+    cost_width = max(len(headers[1]), *(len(r[1]) for r in rows)) if rows else len(headers[1])
+    name_width = max(len(headers[2]), *(len(r[2]) for r in rows)) if rows else len(headers[2])
+    lang_width = max(len(headers[3]), *(len(r[3]) for r in rows)) if rows else len(headers[3])
+
+    id_width = max(12, min(id_width, 28))
+    cost_width = max(6, min(cost_width, 16))
+    name_width = max(18, min(name_width, 32))
+    lang_width = max(8, min(lang_width, 16))
+
+    # Resources column gets remaining space.
+    used_without_resources = (
+        (id_width + 2)
+        + (cost_width + 2)
+        + (name_width + 2)
+        + (lang_width + 2)
+        + 4  # column separators between 5 columns
+        + 2  # left/right padding of border interior
+    )
+    resources_width = max(20, inner_target - used_without_resources)
+
+    # If terminal is very narrow, squeeze fixed columns further.
+    if resources_width == 20 and used_without_resources + resources_width > inner_target:
+        overflow = (used_without_resources + resources_width) - inner_target
+        # Reduce name first, then id, then lang within minimums.
+        shrink_name = min(max(0, name_width - 12), overflow)
+        name_width -= shrink_name
+        overflow -= shrink_name
+        if overflow > 0:
+            shrink_id = min(max(0, id_width - 10), overflow)
+            id_width -= shrink_id
+            overflow -= shrink_id
+        if overflow > 0:
+            shrink_lang = min(max(0, lang_width - 6), overflow)
+            lang_width -= shrink_lang
+
+    # Recompute resources width with final fixed widths.
+    used_without_resources = (
+        (id_width + 2)
+        + (cost_width + 2)
+        + (name_width + 2)
+        + (lang_width + 2)
+        + 4
+        + 2
+    )
+    resources_width = max(12, inner_target - used_without_resources)
+
+    c1 = id_width + 2
+    c2 = cost_width + 2
+    c3 = name_width + 2
+    c4 = lang_width + 2
+    c5 = resources_width + 2
+    inner_total = c1 + c2 + c3 + c4 + c5 + 4
+
+    console.print("Environments".center(inner_total + 2), style="bold")
+
+    console.print(
+        "┏"
+        + "━" * c1
+        + "┳"
+        + "━" * c2
+        + "┳"
+        + "━" * c3
+        + "┳"
+        + "━" * c4
+        + "┳"
+        + "━" * c5
+        + "┓"
     )
+    console.print(
+        "┃ "
+        + _pad_cell(headers[0], id_width)
+        + " ┃ "
+        + _pad_cell(headers[1], cost_width, align_right=True)
+        + " ┃ "
+        + _pad_cell(headers[2], name_width)
+        + " ┃ "
+        + _pad_cell(headers[3], lang_width)
+        + " ┃ "
+        + _pad_cell(headers[4], resources_width)
+        + " ┃"
+    )
+    console.print(
+        "┡"
+        + "━" * c1
+        + "╇"
+        + "━" * c2
+        + "╇"
+        + "━" * c3
+        + "╇"
+        + "━" * c4
+        + "╇"
+        + "━" * c5
+        + "┩"
+    )
+
+    for index, (env_id, cost, name, language, resources, desc_text) in enumerate(rows):
+        span_width = inner_total - 2
+        for line in _wrap_lines(desc_text, span_width):
+            console.print("│ " + _pad_cell(line, span_width))
+
+        # Thin line between full-width detail line and the summary line.
+        console.print("├" + "─" * inner_total + "┤")
+
+        console.print(
+            "│ "
+            + _pad_cell(env_id, id_width)
+            + " │ "
+            + _pad_cell(cost, cost_width, align_right=True)
+            + " │ "
+            + _pad_cell(name, name_width)
+            + " │ "
+            + _pad_cell(language, lang_width)
+            + " │ "
+            + _pad_cell(resources, resources_width)
+            + " │"
+        )
+
+        if index < len(rows) - 1:
+            console.print("├" + "─" * inner_total + "┤")
+
+    console.print("└" + "─" * inner_total + "┘")
diff --git a/datalayer_core/displays/runtime_checkpoints.py b/datalayer_core/displays/runtime_checkpoints.py
index bb8d8a39..fabd7c95 100644
--- a/datalayer_core/displays/runtime_checkpoints.py
+++ b/datalayer_core/displays/runtime_checkpoints.py
@@ -30,7 +30,7 @@ def _new_runtime_checkpoints_table(title: str = "Runtime Checkpoints") -> Table:
     table = Table(title=title)
     table.add_column("ID", style="cyan", no_wrap=True)
     table.add_column("Runtime", style="green", no_wrap=True)
-    table.add_column("Agent Spec", style="magenta", no_wrap=True)
+    table.add_column("Agentspec", style="magenta", no_wrap=True)
     table.add_column("Name", style="cyan", no_wrap=True)
     table.add_column("Status", style="yellow", no_wrap=True)
     table.add_column("Updated", style="dim", no_wrap=True)
diff --git a/datalayer_core/displays/runtimes.py b/datalayer_core/displays/runtimes.py
index 95691444..b0309e65 100644
--- a/datalayer_core/displays/runtimes.py
+++ b/datalayer_core/displays/runtimes.py
@@ -13,7 +13,7 @@
 from datalayer_core.utils.date import timestamp_to_local_date
 
 
-def _new_runtime_table(title: str = "Runtimes") -> Table:
+def _new_runtime_table(title: str = "Agents") -> Table:
     """
     Create a new table for displaying runtimes.
 
@@ -64,7 +64,7 @@ def display_runtimes(runtimes: list[dict[str, Any]]) -> None:
     runtimes : list[dict[str, Any]]
         List of runtime dictionaries to display.
     """
-    table = _new_runtime_table(title="Runtimes")
+    table = _new_runtime_table(title="Agents")
     for runtime in runtimes:
         _add_runtime_to_table(table, runtime)
     console = Console()
diff --git a/datalayer_core/displays/tokens.py b/datalayer_core/displays/tokens.py
deleted file mode 100644
index 9a72eb41..00000000
--- a/datalayer_core/displays/tokens.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
-
-"""Display functions for Datalayer core."""
-
-from __future__ import annotations
-
-from rich.console import Console
-from rich.table import Table
-
-
-def _new_tokens_table(title: str = "Tokens") -> Table:
-    """
-    Create a new tokens table.
-
-    Parameters
-    ----------
-    title : str, default "tokens"
-        The title for the table.
-
-    Returns
-    -------
-    Table
-        A rich Table configured for displaying tokens.
-    """
-    table = Table(title=title)
-    table.add_column("ID", style="cyan", no_wrap=True)
-    table.add_column("Name", style="cyan", no_wrap=True)
-    table.add_column("Variant", style="cyan", no_wrap=True)
-    return table
-
-
-def _add_token_to_table(table: Table, token: dict[str, str]) -> None:
-    """
-    Add a token row to the table.
-
-    Parameters
-    ----------
-    table : Table
-        The rich Table to add the row to.
-    token : dict[str, str]
-        Dictionary containing token information with keys: uid, name_s, description_t, variant_s.
-    """
-    table.add_row(
-        token["uid"],
-        token["name_s"],
-        token["variant_s"],
-    )
-
-
-def display_tokens(tokens: list[dict[str, str]]) -> None:
-    """
-    Display a list of tokens in the console.
-
-    Parameters
-    ----------
-    tokens : list[dict[str, str]]
-        List of token dictionaries to display.
-    """
-    table = _new_tokens_table(title="Tokens")
-    for token in tokens:
-        _add_token_to_table(table, token)
-    console = Console()
-    console.print(table)
diff --git a/datalayer_core/evals/__init__.py b/datalayer_core/evals/__init__.py
new file mode 100644
index 00000000..799488e6
--- /dev/null
+++ b/datalayer_core/evals/__init__.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Evals shared package."""
+
+from datalayer_core.evals.evals import (
+    build_eval_report,
+    load_evalset_spec,
+    make_client,
+    merge_dicts,
+    now_iso,
+    parse_json_file,
+    parse_json_value,
+    render_eval_report_markdown,
+    resolve_billable_account_uid,
+    timestamp_slug,
+    watch_runs,
+    write_eval_report_csv,
+    write_eval_reports,
+)
+from datalayer_core.evals.evaluators import (
+    evaluate_evalset,
+    evaluate_run,
+    run_and_evaluate_evalset,
+    run_case_evaluators,
+)
+from datalayer_core.evals.report import (
+    average_latest_pass_rate,
+    collect_report_failures,
+    iter_report_runs,
+)
+from datalayer_core.evals.runner import execute_evalset_spec
+
+__all__ = [
+    "average_latest_pass_rate",
+    "build_eval_report",
+    "collect_report_failures",
+    "evaluate_evalset",
+    "evaluate_run",
+    "execute_evalset_spec",
+    "iter_report_runs",
+    "load_evalset_spec",
+    "make_client",
+    "merge_dicts",
+    "now_iso",
+    "parse_json_file",
+    "parse_json_value",
+    "render_eval_report_markdown",
+    "resolve_billable_account_uid",
+    "run_and_evaluate_evalset",
+    "run_case_evaluators",
+    "timestamp_slug",
+    "watch_runs",
+    "write_eval_report_csv",
+    "write_eval_reports",
+]
diff --git a/datalayer_core/evals/evals.py b/datalayer_core/evals/evals.py
new file mode 100644
index 00000000..ca46ae3c
--- /dev/null
+++ b/datalayer_core/evals/evals.py
@@ -0,0 +1,274 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Shared helpers for evals CLI and integrations."""
+
+from __future__ import annotations
+
+import json
+import time
+from pathlib import Path
+from typing import Any, Optional
+
+import typer
+
+from datalayer_core.client.client import DatalayerClient
+from datalayer_core.utils.urls import DatalayerURLs
+
+_TERMINAL_RUN_STATES = {
+    "completed",
+    "failed",
+    "error",
+    "cancelled",
+    "success",
+    "succeeded",
+    "passed",
+    "done",
+}
+
+
+def parse_json_value(raw: Optional[str], flag_name: str) -> dict[str, Any]:
+    if not raw:
+        return {}
+    try:
+        parsed = json.loads(raw)
+    except Exception as exc:
+        raise typer.BadParameter(f"Invalid JSON for {flag_name}: {exc}") from exc
+    if not isinstance(parsed, dict):
+        raise typer.BadParameter(f"{flag_name} must decode to an object")
+    return parsed
+
+
+def parse_json_file(path_value: Optional[str], flag_name: str) -> dict[str, Any]:
+    if not path_value:
+        return {}
+    path = Path(path_value)
+    if not path.exists():
+        raise typer.BadParameter(f"File not found for {flag_name}: {path}")
+    text = path.read_text(encoding="utf-8")
+    return parse_json_value(text, flag_name)
+
+
+def merge_dicts(*parts: dict[str, Any]) -> dict[str, Any]:
+    merged: dict[str, Any] = {}
+    for part in parts:
+        merged.update(part)
+    return merged
+
+
+def make_client(
+    token: Optional[str] = None,
+    api_key: Optional[str] = None,
+    *,
+    iam_url: Optional[str] = None,
+    runtimes_url: Optional[str] = None,
+    ai_agents_url: Optional[str] = None,
+) -> DatalayerClient:
+    """Build a :class:`DatalayerClient` from the environment.
+
+    Optional service-URL overrides are forwarded to
+    :meth:`DatalayerURLs.from_environment` so examples and integrations can
+    point at local proxies without re-implementing client construction.
+    """
+    urls = DatalayerURLs.from_environment(
+        iam_url=iam_url or None,
+        runtimes_url=runtimes_url or None,
+        ai_agents_url=ai_agents_url or None,
+    )
+    return DatalayerClient(urls=urls, token=(token or api_key))
+
+
+def resolve_billable_account_uid(
+    billable_account_uid: Optional[str],
+    account_uid: Optional[str],
+) -> Optional[str]:
+    """Resolve billable account UID with backwards-compatible fallback."""
+    return billable_account_uid or account_uid
+
+
+def load_evalset_spec(
+    spec_file: str | Path,
+    *,
+    expected_kind: Optional[str] = None,
+    require_cases: bool = False,
+) -> dict[str, Any]:
+    """Load and validate a JSON evalset spec file.
+
+    The returned dict can be passed straight to
+    :meth:`DatalayerClient.evals_create_eval_from_spec`. Shared by examples,
+    the GitHub Action, and any other integration that creates evalsets from a
+    declarative JSON spec.
+    """
+    path = Path(spec_file)
+    if not path.exists():
+        raise FileNotFoundError(f"Evalset spec file not found: {path}")
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    if not isinstance(payload, dict):
+        raise ValueError(f"Evalset spec must be a JSON object: {path}")
+    if not str(payload.get("name") or "").strip():
+        raise ValueError(f"Evalset spec is missing 'name': {path}")
+    if expected_kind is not None:
+        kind = str(payload.get("kind") or "").strip().lower()
+        if kind and kind != expected_kind:
+            raise ValueError(
+                f"Evalset spec kind '{kind}' does not match expected "
+                f"'{expected_kind}': {path}"
+            )
+    if require_cases:
+        cases = payload.get("cases")
+        if not isinstance(cases, list) or not cases:
+            raise ValueError(
+                f"Evalset spec must include a non-empty 'cases' array: {path}"
+            )
+    return payload
+
+
+def watch_runs(
+    client: DatalayerClient,
+    run_ids: list[str],
+    *,
+    account_uid: Optional[str] = None,
+    timeout_seconds: int = 120,
+    interval_seconds: int = 3,
+    verbose: bool = True,
+) -> dict[str, str]:
+    """Poll eval runs until they reach a terminal state or the timeout elapses.
+
+    Returns a mapping of ``run_id`` to its last observed status. Generic helper
+    reused by examples and integrations; it intentionally carries no demo-only
+    logic.
+    """
+    started = time.time()
+    statuses: dict[str, str] = {}
+    while True:
+        pending: list[str] = []
+        counts: dict[str, int] = {}
+        for run_id in run_ids:
+            snapshot = client.evals_get_run(run_id, account_uid=account_uid)
+            status = (
+                str((snapshot.get("run") or {}).get("status") or "").lower()
+                or "unknown"
+            )
+            statuses[run_id] = status
+            counts[status] = counts.get(status, 0) + 1
+            if status not in _TERMINAL_RUN_STATES:
+                pending.append(run_id)
+        if verbose:
+            elapsed = int(time.time() - started)
+            summary = (
+                ", ".join(f"{status}={count}" for status, count in sorted(counts.items()))
+                or "unknown=0"
+            )
+            print(f"Run status at t+{elapsed}s: {summary}")
+        if not pending:
+            return statuses
+        if time.time() - started > timeout_seconds:
+            if verbose:
+                preview = ", ".join(pending[:5])
+                suffix = " ..." if len(pending) > 5 else ""
+                print(
+                    "Run watch timed out before terminal state. "
+                    f"Pending ({len(pending)}): {preview}{suffix}"
+                )
+            return statuses
+        time.sleep(max(1, interval_seconds))
+
+
+def now_iso() -> str:
+    """Return the current UTC timestamp in ISO-8601 form."""
+    from datalayer_core.evals.report import _now_iso
+
+    return _now_iso()
+
+
+def timestamp_slug(raw_iso: str) -> str:
+    """Return a filesystem-safe slug for an ISO-8601 timestamp."""
+    from datalayer_core.evals.report import _timestamp_slug
+
+    return _timestamp_slug(raw_iso)
+
+
+def build_eval_report(
+    client: DatalayerClient,
+    evalset_id: str,
+    *,
+    account_uid: Optional[str] = None,
+    run_limit: int = 50,
+) -> dict[str, Any]:
+    """Return the structured eval report for an evalset.
+
+    Thin public facade over the report engine so callers do not import private
+    CLI helpers.
+    """
+    from datalayer_core.evals.report import _report_data
+
+    return _report_data(
+        client=client,
+        evalset_id=evalset_id,
+        run_limit=run_limit,
+        account_uid=account_uid,
+    )
+
+
+def render_eval_report_markdown(
+    report: dict[str, Any],
+    *,
+    run_limit: int = 50,
+    colorize: bool = False,
+) -> str:
+    """Render a structured eval report as markdown."""
+    from datalayer_core.evals.report import _report_markdown
+
+    return _report_markdown(report, run_limit=run_limit, colorize=colorize)
+
+
+def write_eval_report_csv(report: dict[str, Any], output_path: str | Path) -> Path:
+    """Write a structured eval report to a CSV file and return its path."""
+    from datalayer_core.evals.report import _write_report_csv
+
+    path = Path(output_path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    _write_report_csv(report, path)
+    return path
+
+
+def write_eval_reports(
+    client: DatalayerClient,
+    evalset_id: str,
+    *,
+    account_uid: Optional[str] = None,
+    run_limit: int = 50,
+    output_dir: str | Path = ".",
+    basename: str = "report",
+    timestamped: bool = True,
+    export_csv: bool = True,
+) -> dict[str, Any]:
+    """Build and persist markdown (and optionally CSV) eval reports.
+
+    Returns a dict with the structured ``report`` plus the written file paths.
+    Shared by examples and integrations to avoid duplicating report I/O.
+    """
+    from datalayer_core.evals.report import _timestamp_slug
+
+    report = build_eval_report(
+        client, evalset_id, account_uid=account_uid, run_limit=run_limit
+    )
+    markdown = render_eval_report_markdown(report, run_limit=run_limit)
+
+    out_dir = Path(output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    if timestamped:
+        stem = f"{basename}-{_timestamp_slug(str(report.get('generated_at') or ''))}"
+    else:
+        stem = basename
+
+    markdown_path = out_dir / f"{stem}.md"
+    markdown_path.write_text(markdown + "\n", encoding="utf-8")
+
+    result: dict[str, Any] = {"report": report, "markdown_path": markdown_path}
+    if export_csv:
+        result["csv_path"] = write_eval_report_csv(report, out_dir / f"{stem}.csv")
+    return result
diff --git a/datalayer_core/evals/evaluators.py b/datalayer_core/evals/evaluators.py
new file mode 100644
index 00000000..a83c331d
--- /dev/null
+++ b/datalayer_core/evals/evaluators.py
@@ -0,0 +1,415 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Reusable evaluator execution for real (non-synthetic) eval runs.
+
+Implements the common Datalayer evaluators (``equals_expected``, ``equals``,
+``contains``, ``pass_rate_threshold``) so examples, the CLI, and integrations
+can grade *real* agent outputs instead of fabricating scores. Evaluator names
+mirror the evaluator catalog (see ``agent_runtimes/specs/evals``); unknown names
+degrade gracefully to a skipped record so callers never crash on an unsupported
+evaluator.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Callable
+
+CaseEvaluator = Callable[[Any, Any, dict[str, Any]], dict[str, Any]]
+ReportEvaluator = Callable[[list[dict[str, Any]], dict[str, Any]], dict[str, Any]]
+
+
+def _coerce_text(value: Any) -> str:
+    """Return the textual payload of an output/expected value."""
+    if isinstance(value, dict):
+        text = value.get("text")
+        if text is not None:
+            return str(text)
+        return ""
+    if value is None:
+        return ""
+    return str(value)
+
+
+def _normalize_name(name: Any) -> str:
+    return str(name or "").strip().lower().replace("-", "_")
+
+
+def _evaluate_equals_expected(
+    output: Any, expected: Any, arguments: dict[str, Any]
+) -> dict[str, Any]:
+    expected_text = _coerce_text(expected).strip()
+    output_text = _coerce_text(output).strip()
+    if not expected_text:
+        return {"passed": True, "score": 1.0, "reason": "no expected output"}
+    passed = output_text == expected_text
+    return {
+        "passed": passed,
+        "score": 1.0 if passed else 0.0,
+        "reason": "exact match" if passed else "output does not equal expected",
+    }
+
+
+def _evaluate_contains(
+    output: Any, expected: Any, arguments: dict[str, Any]
+) -> dict[str, Any]:
+    output_text = _coerce_text(output)
+    case_sensitive = bool(arguments.get("case_sensitive"))
+
+    # Prefer explicit ``tokens`` (or a single ``substring``/``value``) from the
+    # evaluator arguments; fall back to the case ``expected_output`` text only
+    # when no needles are configured.
+    tokens = arguments.get("tokens")
+    if isinstance(tokens, (list, tuple)) and tokens:
+        needles = [str(token) for token in tokens]
+    else:
+        single = arguments.get("substring", arguments.get("value"))
+        if single is not None:
+            needles = [str(single)]
+        else:
+            needles = [_coerce_text(expected)]
+
+    needles = [needle for needle in needles if needle]
+    if not needles:
+        return {"passed": True, "score": 1.0, "reason": "no expected substring"}
+
+    haystack = output_text if case_sensitive else output_text.lower()
+    missing = [
+        needle
+        for needle in needles
+        if (needle if case_sensitive else needle.lower()) not in haystack
+    ]
+    passed = not missing
+    return {
+        "passed": passed,
+        "score": 1.0 if passed else 0.0,
+        "reason": (
+            "all tokens found"
+            if passed
+            else f"missing tokens: {', '.join(missing)}"
+        ),
+    }
+
+
+def _evaluate_pass_rate_threshold(
+    case_results: list[dict[str, Any]], arguments: dict[str, Any]
+) -> dict[str, Any]:
+    total = len(case_results)
+    passed = sum(1 for case in case_results if case.get("passed"))
+    rate = passed / total if total else 0.0
+    min_pass_rate = arguments.get("min_pass_rate", 0.8)
+    threshold = float(min_pass_rate) if isinstance(min_pass_rate, (int, float)) else 0.8
+    ok = total > 0 and rate >= threshold
+    return {
+        "passed": ok,
+        "score": round(rate, 4),
+        "threshold": round(threshold, 4),
+        "observed": round(rate, 4),
+        "summary": (
+            f"pass rate {rate:.2f} "
+            f"{'≥' if ok else '<'} threshold {threshold:.2f}"
+        ),
+    }
+
+
+CASE_EVALUATORS: dict[str, CaseEvaluator] = {
+    "equals_expected": _evaluate_equals_expected,
+    "equals": _evaluate_equals_expected,
+    "contains": _evaluate_contains,
+}
+
+REPORT_EVALUATORS: dict[str, ReportEvaluator] = {
+    "pass_rate_threshold": _evaluate_pass_rate_threshold,
+}
+
+
+def run_case_evaluators(
+    *,
+    output: Any,
+    expected: Any,
+    evaluators: list[dict[str, Any]] | None,
+) -> dict[str, Any]:
+    """Grade a single real output against its per-case evaluators.
+
+    Returns ``{"passed": bool, "score": float, "evaluators": [...]}``. A case
+    passes when every applicable evaluator passes; the score is the mean of the
+    evaluator scores (and defaults to ``1.0`` when no evaluator applies).
+    Unsupported evaluators are recorded as ``skipped`` and ignored.
+    """
+    records: list[dict[str, Any]] = []
+    scores: list[float] = []
+    passed_all = True
+    applied = 0
+    for evaluator in evaluators or []:
+        if not isinstance(evaluator, dict):
+            continue
+        name = _normalize_name(evaluator.get("name"))
+        arguments = evaluator.get("arguments") or {}
+        func = CASE_EVALUATORS.get(name)
+        if func is None:
+            records.append({"name": name or "evaluator", "skipped": True})
+            continue
+        outcome = func(output, expected, arguments)
+        outcome_passed = bool(outcome.get("passed"))
+        outcome_score = float(
+            outcome.get("score", 1.0 if outcome_passed else 0.0)
+        )
+        records.append(
+            {
+                "name": name,
+                "passed": outcome_passed,
+                "score": round(outcome_score, 4),
+                "reason": str(outcome.get("reason") or ""),
+            }
+        )
+        scores.append(outcome_score)
+        passed_all = passed_all and outcome_passed
+        applied += 1
+    score = round(sum(scores) / len(scores), 4) if scores else 1.0
+    return {
+        "passed": passed_all if applied else True,
+        "score": score,
+        "evaluators": records,
+    }
+
+
+def evaluate_run(
+    cases: list[dict[str, Any]],
+    outputs: list[Any],
+    *,
+    evalset_evaluators: list[dict[str, Any]] | None = None,
+    report_evaluators: list[dict[str, Any]] | None = None,
+    statuses: list[str] | None = None,
+) -> dict[str, Any]:
+    """Grade real per-case outputs and return run metrics.
+
+    ``outputs`` is aligned with ``cases`` (each entry a ``str`` or a mapping
+    with a ``text`` key). Evalset-level evaluators run for every case;
+    report-level evaluators run once over the resulting case outcomes. The
+    returned metrics mirror the synthetic shape (``case_results`` and
+    ``evaluator_results``) so the UI and report render real and synthetic runs
+    identically.
+    """
+    evalset_evaluators = [
+        item for item in (evalset_evaluators or []) if isinstance(item, dict)
+    ]
+    report_evaluators = [
+        item for item in (report_evaluators or []) if isinstance(item, dict)
+    ]
+
+    def _expected_for(case: dict[str, Any]) -> Any:
+        expected = case.get("expected_output")
+        if expected is None:
+            expected = case.get("expected")
+        return expected
+
+    def _status_for(idx: int) -> str:
+        if statuses and idx < len(statuses):
+            return str(statuses[idx] or "").strip().lower()
+        return "completed"
+
+    case_results: list[dict[str, Any]] = []
+    for idx, case in enumerate(cases):
+        metadata = case.get("metadata") or {}
+        expected = _expected_for(case)
+        output = outputs[idx] if idx < len(outputs) else None
+        case_evaluators = [
+            item for item in (case.get("evaluators") or []) if isinstance(item, dict)
+        ]
+        # Per-case evaluators override the evalset-level defaults; the evalset
+        # evaluators only apply to cases that do not declare their own.
+        applicable = case_evaluators or evalset_evaluators
+        outcome = run_case_evaluators(
+            output=output, expected=expected, evaluators=applicable
+        )
+        passed = bool(outcome.get("passed"))
+        score = float(outcome.get("score", 0.0))
+        if _status_for(idx) in {"failed", "error"}:
+            passed = False
+            score = 0.0
+        case_results.append(
+            {
+                "name": case.get("name"),
+                "passed": passed,
+                "status": "passed" if passed else "failed",
+                "score": round(score, 4),
+                "category": metadata.get("category"),
+                "difficulty": metadata.get("difficulty") or metadata.get("priority"),
+            }
+        )
+
+    evaluator_results: list[dict[str, Any]] = []
+    total = len(cases)
+    for evaluator in evalset_evaluators:
+        name = str(evaluator.get("name") or "evaluator")
+        passed_cases = 0
+        scores: list[float] = []
+        applicable_cases = 0
+        for idx, case in enumerate(cases):
+            # Skip cases that override the evalset default with their own
+            # per-case evaluators so the summary reflects only where this
+            # evalset evaluator actually applies.
+            if [
+                item
+                for item in (case.get("evaluators") or [])
+                if isinstance(item, dict)
+            ]:
+                continue
+            applicable_cases += 1
+            expected = _expected_for(case)
+            output = outputs[idx] if idx < len(outputs) else None
+            single = run_case_evaluators(
+                output=output, expected=expected, evaluators=[evaluator]
+            )
+            ok = bool(single.get("passed")) and _status_for(idx) not in {
+                "failed",
+                "error",
+            }
+            if ok:
+                passed_cases += 1
+            scores.append(float(single.get("score", 0.0)) if ok else 0.0)
+        mean_score = round(sum(scores) / len(scores), 4) if scores else None
+        evaluator_results.append(
+            {
+                "name": name,
+                "scope": "evalset",
+                "score": mean_score,
+                "passed": applicable_cases > 0 and passed_cases == applicable_cases,
+                "passed_cases": passed_cases,
+                "total_cases": applicable_cases,
+                "summary": f"{passed_cases}/{applicable_cases} cases passed {name}",
+            }
+        )
+
+    for evaluator in report_evaluators:
+        name = str(evaluator.get("name") or "evaluator")
+        func = REPORT_EVALUATORS.get(_normalize_name(evaluator.get("name")))
+        if func is None:
+            evaluator_results.append(
+                {
+                    "name": name,
+                    "scope": "report",
+                    "score": None,
+                    "passed": False,
+                    "summary": f"{name} not executed (unsupported)",
+                }
+            )
+            continue
+        outcome = func(case_results, evaluator.get("arguments") or {})
+        entry: dict[str, Any] = {
+            "name": name,
+            "scope": "report",
+            "score": outcome.get("score"),
+            "passed": bool(outcome.get("passed")),
+            "summary": str(outcome.get("summary") or ""),
+        }
+        for optional in ("threshold", "observed"):
+            if outcome.get(optional) is not None:
+                entry[optional] = outcome.get(optional)
+        evaluator_results.append(entry)
+
+    passed = sum(1 for case in case_results if case.get("passed"))
+    pass_rate = round(passed / total, 4) if total else 0.0
+    avg_score = (
+        round(sum(float(case["score"]) for case in case_results) / total, 4)
+        if total
+        else 0.0
+    )
+    return {
+        "pass_rate": pass_rate,
+        "total_cases": total,
+        "passed": passed,
+        "failed": total - passed,
+        "avg_score": avg_score,
+        "case_results": case_results,
+        "evaluator_results": evaluator_results,
+    }
+
+
+def evaluate_evalset(
+    evalset_spec: dict[str, Any],
+    outputs: list[Any],
+    *,
+    statuses: list[str] | None = None,
+) -> dict[str, Any]:
+    """Grade real outputs against a declarative evalset spec.
+
+    Convenience wrapper that pulls ``cases``, ``evalset_evaluators`` and
+    ``report_evaluators`` out of an evalset spec dict (as produced by
+    :func:`datalayer_core.evals.load_evalset_spec`) and delegates to
+    :func:`evaluate_run`. This is the single entry point examples and the CLI
+    use so evaluator execution lives in the evals API rather than the caller.
+    """
+    cases = [item for item in (evalset_spec.get("cases") or []) if isinstance(item, dict)]
+    evalset_evaluators = [
+        item
+        for item in (evalset_spec.get("evalset_evaluators") or [])
+        if isinstance(item, dict)
+    ]
+    report_evaluators = [
+        item
+        for item in (evalset_spec.get("report_evaluators") or [])
+        if isinstance(item, dict)
+    ]
+    return evaluate_run(
+        cases,
+        outputs,
+        evalset_evaluators=evalset_evaluators,
+        report_evaluators=report_evaluators,
+        statuses=statuses,
+    )
+
+
+CaseRunner = Callable[[dict[str, Any], int], Any]
+
+
+def run_and_evaluate_evalset(
+    evalset_spec: dict[str, Any],
+    run_case: CaseRunner,
+    *,
+    statuses: list[str] | None = None,
+) -> dict[str, Any]:
+    """Execute every case through a runner callback, then grade the outputs.
+
+    This bakes the per-case execution loop into the evals API so consumers
+    (examples, GitHub Actions, and other integrations) never re-implement the
+    "run each case, then evaluate" orchestration. ``run_case`` is called once
+    per case as ``run_case(case, index)`` and may return either:
+
+    * a plain output (``str`` or a mapping with a ``text`` key), or
+    * a mapping ``{"output": <output>, "status": <status>}`` where ``status``
+      is an optional per-case run status (e.g. ``"failed"``) that forces the
+      case to fail regardless of evaluator outcome.
+
+    Per-case and report-level evaluators from the spec then run for real over
+    the collected outputs via :func:`evaluate_evalset`, returning the same
+    metrics shape as synthetic runs (``case_results`` and ``evaluator_results``)
+    so reports and the UI render real and simulated runs identically.
+    """
+    cases = [
+        item for item in (evalset_spec.get("cases") or []) if isinstance(item, dict)
+    ]
+    outputs: list[Any] = []
+    collected_statuses: list[str | None] = []
+    for index, case in enumerate(cases):
+        result = run_case(case, index)
+        if isinstance(result, dict) and ("output" in result or "status" in result):
+            outputs.append(result.get("output"))
+            status = result.get("status")
+        else:
+            outputs.append(result)
+            status = None
+        if status is None and statuses is not None and index < len(statuses):
+            status = statuses[index]
+        collected_statuses.append(
+            str(status) if status is not None else None
+        )
+    normalized_statuses = (
+        [value or "" for value in collected_statuses]
+        if any(value is not None for value in collected_statuses)
+        else None
+    )
+    return evaluate_evalset(evalset_spec, outputs, statuses=normalized_statuses)
diff --git a/datalayer_core/evals/report.py b/datalayer_core/evals/report.py
new file mode 100644
index 00000000..1f32e603
--- /dev/null
+++ b/datalayer_core/evals/report.py
@@ -0,0 +1,3329 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Real evaluation and reporting logic for Datalayer evals.
+
+This module hosts the evals report engine and the helper functions used by the
+CLI commands, the Python evals API, and the examples. The CLI command layer in
+``datalayer_core.cli.commands.evals`` imports from here so it can stay a thin
+Typer wrapper around this logic.
+"""
+
+from __future__ import annotations
+
+import csv
+import json
+import math
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Optional
+from urllib.parse import quote
+
+import typer
+from rich.console import Console
+from rich.table import Table
+
+from datalayer_core.client.client import DatalayerClient
+
+console = Console()
+
+WEB_APP_BASE_URL = "https://datalayer.ai"
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def _timestamp_slug(raw_iso: str) -> str:
+    cleaned = raw_iso.replace("-", "").replace(":", "").replace(".", "")
+    cleaned = cleaned.replace("+0000", "Z").replace("+00:00", "Z")
+    cleaned = cleaned.replace("T", "T")
+    if cleaned.endswith("Z"):
+        return cleaned
+    return f"{cleaned}Z"
+
+
+def _status_style(status: str) -> str:
+    normalized = status.lower()
+    if normalized in {"completed", "success", "passed"}:
+        return "green"
+    if normalized in {"running", "queued", "pending"}:
+        return "yellow"
+    if normalized in {"failed", "error"}:
+        return "red"
+    return "white"
+
+
+def _run_pass_rate(run: dict[str, Any]) -> float | None:
+    metrics = run.get("metrics") or {}
+    raw = metrics.get("pass_rate")
+    if isinstance(raw, (int, float)):
+        value = float(raw)
+        if value < 0:
+            return 0.0
+        if value > 1:
+            return 1.0
+        return value
+    return None
+
+
+def _fmt_pct(raw: float | None) -> str:
+    if raw is None:
+        return "n/a"
+    return f"{raw * 100:.1f}%"
+
+
+def _parse_csv_values(raw: str | None) -> list[str]:
+    if raw is None:
+        return []
+    values: list[str] = []
+    seen: set[str] = set()
+    for token in str(raw).split(","):
+        item = token.strip()
+        if not item or item in seen:
+            continue
+        seen.add(item)
+        values.append(item)
+    return values
+
+
+def _parse_evaluator_specs(raw_values: list[str], option_name: str) -> list[dict[str, Any]]:
+    evaluators: list[dict[str, Any]] = []
+    for index, raw in enumerate(raw_values, start=1):
+        try:
+            parsed = json.loads(raw)
+        except Exception as error:
+            raise typer.BadParameter(
+                f"{option_name} entry #{index} is not valid JSON: {error}"
+            ) from error
+        if not isinstance(parsed, dict):
+            raise typer.BadParameter(
+                f"{option_name} entry #{index} must be a JSON object"
+            )
+        name = parsed.get("name")
+        if not isinstance(name, str) or not name.strip():
+            raise typer.BadParameter(
+                f"{option_name} entry #{index} must include a non-empty string field 'name'"
+            )
+        if "arguments" in parsed and not isinstance(parsed.get("arguments"), dict):
+            raise typer.BadParameter(
+                f"{option_name} entry #{index} field 'arguments' must be an object when provided"
+            )
+        if "arguments" not in parsed:
+            parsed = {**parsed, "arguments": {}}
+        evaluators.append(parsed)
+    return evaluators
+
+
+def _agentspec_details_url(agent_spec_id: str) -> str:
+    value = str(agent_spec_id or "").strip()
+    if not value:
+        return ""
+    return f"{WEB_APP_BASE_URL}/settings/agentspecs/{quote(value, safe='')}"
+
+
+def _evalset_runs_url(evalset_id: str, run_environment: str) -> str:
+    evalset_value = str(evalset_id or "").strip()
+    if not evalset_value:
+        return ""
+    encoded_evalset_id = quote(evalset_value, safe='')
+    env_value = str(run_environment or "").strip()
+    if env_value:
+        encoded_env = quote(env_value, safe='')
+        return f"{WEB_APP_BASE_URL}/evals/experiments/{encoded_env}/{encoded_evalset_id}"
+    return f"{WEB_APP_BASE_URL}/evals/experiments?evalset_id={encoded_evalset_id}"
+
+
+def _run_overlay_url(evalset_runs_url: str, run_id: str) -> str:
+    """Build a deep link that opens the run-details overlay directly.
+
+    The experiments page reads the ``run`` query parameter and opens the
+    run-details dialog for that run, so the same overlay shown by the in-app
+    "Details" button is reachable straight from the CLI report.
+    """
+    base = str(evalset_runs_url or "").strip()
+    run_value = str(run_id or "").strip()
+    if not base or not run_value:
+        return base
+    separator = "&" if "?" in base else "?"
+    return f"{base}{separator}run={quote(run_value, safe='')}"
+
+
+
+def _style_text(value: str, style: str | None, colorize: bool) -> str:
+    if not colorize or not style:
+        return value
+    return f"[{style}]{value}[/{style}]"
+
+
+def _compute_baseline_and_drift(runs: list[dict[str, Any]]) -> tuple[float | None, float | None, float | None]:
+    pass_rates = [rate for rate in (_run_pass_rate(run) for run in runs) if rate is not None]
+    if not pass_rates:
+        return None, None, None
+    baseline_size = min(3, max(1, len(pass_rates) // 2))
+    baseline_slice = pass_rates[:baseline_size]
+    baseline = sum(baseline_slice) / baseline_size
+    latest = pass_rates[-1]
+    drift = latest - baseline
+    return baseline, latest, drift
+
+
+def _analysis_scalar(name: str, metric: str, value: float | int | None) -> dict[str, Any]:
+    return {
+        "kind": "scalar",
+        "name": name,
+        "metric": metric,
+        "value": value,
+    }
+
+
+def _analysis_table(name: str, columns: list[str], rows: list[list[Any]]) -> dict[str, Any]:
+    return {
+        "kind": "table",
+        "name": name,
+        "columns": columns,
+        "rows": rows,
+    }
+
+
+def _analysis_line(name: str, x: list[Any], y: list[float]) -> dict[str, Any]:
+    return {
+        "kind": "line",
+        "name": name,
+        "x": x,
+        "y": y,
+    }
+
+
+def _build_experiment_report_analyses(
+    runs: list[dict[str, Any]],
+    consecutive_comparisons: list[dict[str, Any]],
+    *,
+    baseline: float | None,
+    latest: float | None,
+    drift: float | None,
+    latest_two_delta: float | None,
+    mean_pass: float | None,
+    stddev_pass: float | None,
+) -> list[dict[str, Any]]:
+    analyses: list[dict[str, Any]] = [
+        _analysis_scalar("Latest Pass Rate", "latest_pass_rate", latest),
+        _analysis_scalar("Baseline Pass Rate", "baseline_pass_rate", baseline),
+        _analysis_scalar("Drift Delta", "drift_delta", drift),
+        _analysis_scalar("Latest Two Delta", "latest_two_delta", latest_two_delta),
+        _analysis_scalar("Mean Pass Rate", "mean_pass_rate", mean_pass),
+        _analysis_scalar("Stddev Pass Rate", "stddev_pass_rate", stddev_pass),
+    ]
+
+    run_rows: list[list[Any]] = []
+    line_x: list[str] = []
+    line_y: list[float] = []
+    for idx, run in enumerate(runs):
+        run_id = str(run.get("id") or "")
+        run_status = str(run.get("status") or "")
+        run_pass_rate = _run_pass_rate(run)
+        run_rows.append([idx, run_id, run_status, run_pass_rate])
+        if isinstance(run_pass_rate, (int, float)):
+            line_x.append(run_id or str(idx))
+            line_y.append(float(run_pass_rate))
+    analyses.append(
+        _analysis_table(
+            "Run Pass Rates",
+            ["index", "run_id", "status", "pass_rate"],
+            run_rows,
+        )
+    )
+    analyses.append(_analysis_line("Run Pass Rate Trend", line_x, line_y))
+
+    delta_rows: list[list[Any]] = []
+    for item in consecutive_comparisons:
+        delta_rows.append(
+            [
+                str(item.get("run_a_id") or ""),
+                str(item.get("run_b_id") or ""),
+                item.get("run_a_pass_rate"),
+                item.get("run_b_pass_rate"),
+                item.get("delta_pass_rate"),
+            ]
+        )
+    analyses.append(
+        _analysis_table(
+            "Consecutive Run Deltas",
+            ["run_a_id", "run_b_id", "run_a_pass_rate", "run_b_pass_rate", "delta_pass_rate"],
+            delta_rows,
+        )
+    )
+    return analyses
+
+
+def _build_evalset_report_analyses(experiments: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    latest_rows: list[list[Any]] = []
+    latest_names: list[str] = []
+    latest_values: list[float] = []
+    all_run_pass_rates: list[float] = []
+    for experiment in experiments:
+        experiment_name = str(experiment.get("name") or experiment.get("id") or "")
+        latest_pass = experiment.get("latest_pass_rate")
+        baseline_pass = experiment.get("baseline_pass_rate")
+        drift_delta = experiment.get("drift_delta")
+        latest_rows.append([experiment_name, latest_pass, baseline_pass, drift_delta])
+        if isinstance(latest_pass, (int, float)):
+            latest_names.append(experiment_name)
+            latest_values.append(float(latest_pass))
+
+        for run in experiment.get("runs") or []:
+            if isinstance(run, dict):
+                pass_rate = run.get("pass_rate")
+                if isinstance(pass_rate, (int, float)):
+                    all_run_pass_rates.append(float(pass_rate))
+
+    overall_mean = (
+        (sum(all_run_pass_rates) / len(all_run_pass_rates)) if all_run_pass_rates else None
+    )
+
+    return [
+        _analysis_scalar("Experiment Count", "experiment_count", len(experiments)),
+        _analysis_scalar("Overall Mean Pass Rate", "overall_mean_pass_rate", overall_mean),
+        _analysis_table(
+            "Experiment Latest/Baseline",
+            ["experiment", "latest_pass_rate", "baseline_pass_rate", "drift_delta"],
+            latest_rows,
+        ),
+        _analysis_line("Latest Pass Rate By Experiment", latest_names, latest_values),
+    ]
+
+
+def _classify_legacy_failure(message: str) -> dict[str, Any]:
+    """Infer a structured stage/type/url from a free-form legacy error message.
+
+    Older runs (and any path that only persisted a plain error string) lack a
+    structured ``failure_cause``. Rather than rendering ``unknown`` /
+    ``legacy_error`` with an empty detail excerpt, classify the most common
+    error shapes so the report stays actionable.
+    """
+    text = message.strip()
+    lowered = text.lower()
+
+    url_match = re.search(r"https?://[^\s]+", text)
+    execution_url = url_match.group(0).rstrip(".,)") if url_match else ""
+
+    stage = "unknown"
+    failure_type = "legacy_error"
+    if "all connection attempts failed" in lowered or "connection refused" in lowered or "request failed" in lowered:
+        stage = "runtime_execution"
+        failure_type = "runtime_unreachable"
+    elif "returned http" in lowered or re.search(r"\bhttp\s*[45]\d\d\b", lowered):
+        stage = "runtime_execution"
+        failure_type = "runtime_http_error"
+    elif "traceback" in lowered:
+        stage = "runtime_execution"
+        failure_type = "runtime_traceback"
+    elif "no submitted code" in lowered or "missing" in lowered and "code" in lowered:
+        stage = "run_preparation"
+        failure_type = "missing_submitted_code"
+    elif "no interactive runtime url" in lowered or "not configured" in lowered:
+        stage = "runtime_resolution"
+        failure_type = "no_runtime_url"
+
+    cause: dict[str, Any] = {
+        "stage": stage,
+        "type": failure_type,
+        "message": text,
+        "detail_excerpt": text,
+    }
+    if execution_url:
+        cause["execution_url"] = execution_url
+    return cause
+
+
+def _extract_failure_cause(run: dict[str, Any]) -> dict[str, Any] | None:
+    """Extract a structured failure cause from a run's report/summary payload."""
+    for container_key in ("report", "summary"):
+        container = run.get(container_key)
+        if isinstance(container, dict):
+            cause = container.get("failure_cause")
+            if isinstance(cause, dict) and cause:
+                return cause
+    # Fallback: synthesize a structured cause from legacy error fields.
+    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    report = run.get("report") if isinstance(run.get("report"), dict) else {}
+    message = (
+        summary.get("failure_reason")
+        or summary.get("execution_error")
+        or report.get("error")
+    )
+    if isinstance(message, str) and message.strip():
+        return _classify_legacy_failure(message)
+    return None
+
+
+def _format_failure_cause(cause: dict[str, Any] | None) -> str:
+    """Render a failure cause as a concise single-line string."""
+    if not isinstance(cause, dict) or not cause:
+        return ""
+    failure_type = str(cause.get("type") or "").strip()
+    message = str(cause.get("message") or "").strip()
+    parts: list[str] = []
+    if failure_type:
+        parts.append(f"[{failure_type}]")
+    if message:
+        parts.append(message)
+    return " ".join(parts).strip()
+
+
+def _failure_cause_detail_lines(cause: dict[str, Any]) -> list[str]:
+    """Render the full failure cause (message, context, diagnostics, attempts) as markdown lines."""
+    lines: list[str] = []
+    message = str(cause.get("message") or "").strip()
+    if message:
+        lines.append(f"- Message: {message}")
+    for key, label in (
+        ("stage", "Stage"),
+        ("type", "Type"),
+        ("runtime_pod_name", "Runtime pod"),
+        ("runtime_id", "Runtime ID"),
+        ("environment_name", "Environment"),
+        ("execution_url", "Execution URL"),
+    ):
+        value = str(cause.get(key) or "").strip()
+        if value:
+            lines.append(f"- {label}: `{value}`")
+
+    detail = str(cause.get("detail_excerpt") or "").strip()
+    if detail:
+        lines.append("- Detail excerpt:")
+        lines.append("")
+        lines.append("```text")
+        lines.extend(detail.splitlines() or [detail])
+        lines.append("```")
+
+    diagnostics = cause.get("diagnostics")
+    if isinstance(diagnostics, dict) and diagnostics:
+        for key, label in (
+            ("agent_runtimes_url", "Agent runtimes URL"),
+            ("run_url", "Run URL"),
+        ):
+            value = diagnostics.get(key)
+            if value:
+                lines.append(f"- {label}: `{value}`")
+        for key, label in (
+            ("route_ids", "Route IDs tried"),
+            ("discovered_agent_ids", "Discovered agent IDs"),
+            ("candidate_urls", "Candidate URLs"),
+        ):
+            value = diagnostics.get(key)
+            if isinstance(value, list) and value:
+                rendered = ", ".join(f"`{item}`" for item in value)
+                lines.append(f"- {label}: {rendered}")
+
+        attempts = diagnostics.get("attempts")
+        if isinstance(attempts, list) and attempts:
+            lines.append("- Connection attempts:")
+            attempt_rows: list[list[str]] = []
+            for attempt in attempts:
+                if not isinstance(attempt, dict):
+                    continue
+                status_code = attempt.get("status_code")
+                attempt_rows.append(
+                    [
+                        str(attempt.get("url") or "-"),
+                        "ok" if attempt.get("ok") else "failed",
+                        "-" if status_code is None else str(status_code),
+                        str(attempt.get("error") or "-"),
+                    ]
+                )
+            if attempt_rows:
+                lines.append("")
+                lines.extend(
+                    _markdown_table(
+                        ["URL", "Result", "HTTP", "Error"],
+                        attempt_rows,
+                        ["left", "left", "right", "left"],
+                    )
+                )
+    return lines
+
+
+def _run_detail_record(run: dict[str, Any]) -> dict[str, Any]:
+    metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    report = run.get("report") if isinstance(run.get("report"), dict) else {}
+    usage = _extract_run_usage(run)
+    return {
+        "id": str(run.get("id", "")),
+        "status": str(run.get("status", "")),
+        "created_at": str(run.get("created_at", "")),
+        "updated_at": str(run.get("updated_at", "")),
+        "pass_rate": _run_pass_rate(run),
+        "metrics": metrics,
+        "summary": summary,
+        "report": report,
+        "usage": usage,
+        "failure_cause": _extract_failure_cause(run),
+    }
+
+
+def _extract_experiment_agentspec(experiment: dict[str, Any], runs: list[dict[str, Any]]) -> tuple[str, str]:
+    config = experiment.get("config") if isinstance(experiment.get("config"), dict) else {}
+    summary = experiment.get("summary") if isinstance(experiment.get("summary"), dict) else {}
+    run_summaries = [
+        run.get("summary")
+        for run in runs
+        if isinstance(run, dict) and isinstance(run.get("summary"), dict)
+    ]
+
+    id_candidates: list[Any] = [
+        config.get("agent_spec_id"),
+        config.get("agentSpecId"),
+        summary.get("agent_spec_id"),
+        summary.get("agentSpecId"),
+    ]
+    name_candidates: list[Any] = [
+        config.get("agent_spec_name"),
+        config.get("agentSpecName"),
+        summary.get("agent_spec_name"),
+        summary.get("agentSpecName"),
+    ]
+    for run_summary in run_summaries:
+        assert isinstance(run_summary, dict)
+        id_candidates.extend(
+            [
+                run_summary.get("agent_spec_id"),
+                run_summary.get("agentSpecId"),
+            ]
+        )
+        name_candidates.extend(
+            [
+                run_summary.get("agent_spec_name"),
+                run_summary.get("agentSpecName"),
+            ]
+        )
+
+    agent_spec_id = ""
+    for candidate in id_candidates:
+        if isinstance(candidate, str) and candidate.strip():
+            agent_spec_id = candidate.strip()
+            break
+
+    agent_spec_name = ""
+    for candidate in name_candidates:
+        if isinstance(candidate, str) and candidate.strip():
+            agent_spec_name = candidate.strip()
+            break
+
+    if not agent_spec_name and agent_spec_id:
+        agent_spec_name = agent_spec_id
+    return agent_spec_id, agent_spec_name
+
+
+def _first_str(*candidates: Any) -> str:
+    """Return the first non-empty stripped string from the candidates."""
+    for candidate in candidates:
+        if isinstance(candidate, str) and candidate.strip():
+            return candidate.strip()
+    return ""
+
+
+def _normalize_tags(value: Any) -> list[str]:
+    """Normalize a tags value (list or comma-separated string) to a list."""
+    if isinstance(value, (list, tuple)):
+        tags = [str(item).strip() for item in value if str(item).strip()]
+    elif isinstance(value, str):
+        tags = [token.strip() for token in value.split(",") if token.strip()]
+    else:
+        return []
+    seen: set[str] = set()
+    ordered: list[str] = []
+    for tag in tags:
+        if tag not in seen:
+            seen.add(tag)
+            ordered.append(tag)
+    return ordered
+
+
+def _extract_experiment_agentspec_details(
+    experiment: dict[str, Any], runs: list[dict[str, Any]]
+) -> dict[str, Any]:
+    """Extract rich agentspec metadata from experiment/run payloads.
+
+    Mirrors the fields surfaced by the in-app Agentspec Details dialog
+    (name, description, version, model, tags, icon/emoji/color) by
+    inspecting the experiment config/summary, any inline ``agent_spec``
+    object, and the most recent run summaries.
+    """
+    config = experiment.get("config") if isinstance(experiment.get("config"), dict) else {}
+    summary = experiment.get("summary") if isinstance(experiment.get("summary"), dict) else {}
+    run_summaries = [
+        run.get("summary")
+        for run in runs
+        if isinstance(run, dict) and isinstance(run.get("summary"), dict)
+    ]
+
+    # Inline agent_spec objects can live under several keys/scopes.
+    inline_specs: list[dict[str, Any]] = []
+    for scope in (config, summary, *run_summaries):
+        if not isinstance(scope, dict):
+            continue
+        for key in ("agent_spec", "agentSpec", "agentspec"):
+            candidate = scope.get(key)
+            if isinstance(candidate, dict):
+                inline_specs.append(candidate)
+
+    def _pick(field: str, camel: str) -> str:
+        candidates: list[Any] = []
+        for spec in inline_specs:
+            candidates.extend([spec.get(field), spec.get(camel)])
+        for scope in (config, summary, *run_summaries):
+            if isinstance(scope, dict):
+                candidates.extend(
+                    [
+                        scope.get(f"agent_spec_{field}"),
+                        scope.get(f"agentSpec{camel[0].upper()}{camel[1:]}"),
+                    ]
+                )
+        return _first_str(*candidates)
+
+    tags_candidates: list[Any] = []
+    for spec in inline_specs:
+        tags_candidates.extend([spec.get("tags")])
+    for scope in (config, summary, *run_summaries):
+        if isinstance(scope, dict):
+            tags_candidates.extend(
+                [scope.get("agent_spec_tags"), scope.get("agentSpecTags")]
+            )
+    tags: list[str] = []
+    for candidate in tags_candidates:
+        tags = _normalize_tags(candidate)
+        if tags:
+            break
+
+    return {
+        "description": _pick("description", "description"),
+        "version": _pick("version", "version"),
+        "model": _pick("model", "model"),
+        "icon": _pick("icon", "icon"),
+        "emoji": _pick("emoji", "emoji"),
+        "color": _pick("color", "color"),
+        "tags": tags,
+    }
+
+
+def _merge_agentspec_details(target: dict[str, Any], details: dict[str, Any]) -> None:
+    """Merge non-empty agentspec detail fields into the aggregate record."""
+    for key in ("description", "version", "model", "icon", "emoji", "color"):
+        value = details.get(key)
+        if isinstance(value, str) and value.strip() and not str(target.get(key) or "").strip():
+            target[key] = value.strip()
+    incoming_tags = details.get("tags")
+    if isinstance(incoming_tags, list) and incoming_tags:
+        existing = target.get("tags")
+        if not isinstance(existing, list) or not existing:
+            target["tags"] = list(incoming_tags)
+
+
+_AGENTSPEC_REGISTRY_LOOKUP: Any = None
+_AGENTSPEC_REGISTRY_LOADED = False
+_AGENTSPEC_REGISTRY_MAP: dict[str, Any] | None = None
+
+
+def _load_agentspec_registry() -> tuple[dict[str, Any], Any]:
+    """Load the agent_runtimes agentspec catalog once and cache it.
+
+    Returns a tuple of ``(catalog_by_id, get_agent_spec)`` where the first is a
+    mapping built from ``list_agentspecs`` (the Python equivalent of the
+    in-app ``listAgentspecs``) keyed by both the full id and the id without a
+    trailing ``:version`` segment, and the second is a per-id lookup callable.
+    Either component may be empty/``None`` when ``agent_runtimes`` (or a given
+    API surface) is unavailable, so the report degrades gracefully.
+    """
+    global _AGENTSPEC_REGISTRY_LOADED, _AGENTSPEC_REGISTRY_LOOKUP, _AGENTSPEC_REGISTRY_MAP
+    if _AGENTSPEC_REGISTRY_LOADED:
+        return (_AGENTSPEC_REGISTRY_MAP or {}, _AGENTSPEC_REGISTRY_LOOKUP)
+    _AGENTSPEC_REGISTRY_LOADED = True
+
+    module = None
+    for module_name in ("agent_runtimes.specs.agents", "agent_runtimes"):
+        try:
+            module = __import__(module_name, fromlist=["*"])
+            break
+        except Exception:
+            module = None
+    if module is None:
+        return ({}, None)
+
+    # Per-id lookup (handles version suffixes) is available on both the new
+    # and legacy package layouts.
+    _AGENTSPEC_REGISTRY_LOOKUP = getattr(module, "get_agent_spec", None)
+
+    # Build a full catalog map from the list accessor, mirroring the UI which
+    # calls listAgentspecs() and indexes the result by id.
+    catalog: dict[str, Any] = {}
+    list_fn = getattr(module, "list_agentspecs", None) or getattr(
+        module, "list_agent_specs", None
+    )
+    specs: list[Any] = []
+    if callable(list_fn):
+        try:
+            specs = list(list_fn() or [])
+        except Exception:
+            specs = []
+    if not specs:
+        registry = getattr(module, "AGENTSPECS", None) or getattr(
+            module, "AGENT_SPECS", None
+        )
+        if isinstance(registry, dict):
+            specs = list(registry.values())
+    for spec in specs:
+        spec_id = str(getattr(spec, "id", "") or "").strip()
+        if not spec_id:
+            continue
+        catalog[spec_id] = spec
+        base = spec_id.rpartition(":")[0]
+        if base and base not in catalog:
+            catalog[base] = spec
+    _AGENTSPEC_REGISTRY_MAP = catalog
+    return (catalog, _AGENTSPEC_REGISTRY_LOOKUP)
+
+
+def _agentspec_registry_details(agent_spec_id: str) -> dict[str, Any]:
+    """Look up rich agentspec metadata from the agent_runtimes catalog.
+
+    Uses the bundled agentspecification registry (the Python equivalent of
+    the in-app ``listAgentspecs``) to enrich the report with the canonical
+    name, description, version, model, tags, and display metadata for an
+    agentspec id. Returns an empty dict when the catalog or id is
+    unavailable, so the report still works without ``agent_runtimes``.
+    """
+    value = str(agent_spec_id or "").strip()
+    if not value:
+        return {}
+    catalog, lookup = _load_agentspec_registry()
+    spec = catalog.get(value)
+    if spec is None:
+        base = value.rpartition(":")[0]
+        if base:
+            spec = catalog.get(base)
+    if spec is None and callable(lookup):
+        try:
+            spec = lookup(value)
+        except Exception:
+            spec = None
+    if spec is None:
+        return {}
+
+    def _attr(*names: str) -> str:
+        for name in names:
+            candidate = getattr(spec, name, None)
+            if isinstance(candidate, str) and candidate.strip():
+                return candidate.strip()
+        return ""
+
+    return {
+        "name": _attr("name"),
+        "description": _attr("description"),
+        "version": _attr("version"),
+        "model": _attr("model"),
+        "icon": _attr("icon"),
+        "emoji": _attr("emoji"),
+        "color": _attr("color"),
+        "tags": _normalize_tags(getattr(spec, "tags", None)),
+    }
+
+
+def _aggregate_evaluator_results(
+    samples: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Aggregate per-run global-evaluator results across an evalset's runs.
+
+    ``samples`` are the ``evaluator_results`` entries collected from each run's
+    metrics (newest first). The aggregate exposes the mean score, how many runs
+    passed, and the latest run's outcome for each ``(name, scope)`` evaluator so
+    the report can show results, not just evaluator configuration.
+    """
+    order: list[tuple[str, str]] = []
+    grouped: dict[tuple[str, str], dict[str, Any]] = {}
+    for sample in samples:
+        if not isinstance(sample, dict):
+            continue
+        name = str(sample.get("name") or "evaluator")
+        scope = str(sample.get("scope") or "evalset")
+        key = (name, scope)
+        bucket = grouped.get(key)
+        if bucket is None:
+            bucket = {
+                "name": name,
+                "scope": scope,
+                "scores": [],
+                "passed_runs": 0,
+                "runs": 0,
+                "latest": sample,
+            }
+            grouped[key] = bucket
+            order.append(key)
+        bucket["runs"] += 1
+        score = sample.get("score")
+        if isinstance(score, (int, float)):
+            bucket["scores"].append(float(score))
+        if sample.get("passed"):
+            bucket["passed_runs"] += 1
+
+    aggregated: list[dict[str, Any]] = []
+    for key in order:
+        bucket = grouped[key]
+        scores = bucket["scores"]
+        latest = bucket["latest"]
+        latest_score = latest.get("score")
+        entry: dict[str, Any] = {
+            "name": bucket["name"],
+            "scope": bucket["scope"],
+            "runs": bucket["runs"],
+            "passed_runs": bucket["passed_runs"],
+            "mean_score": round(sum(scores) / len(scores), 4) if scores else None,
+            "latest_score": (
+                round(float(latest_score), 4)
+                if isinstance(latest_score, (int, float))
+                else None
+            ),
+            "latest_passed": bool(latest.get("passed")),
+            "summary": str(latest.get("summary") or ""),
+        }
+        for optional in ("threshold", "observed", "passed_cases", "total_cases"):
+            if latest.get(optional) is not None:
+                entry[optional] = latest.get(optional)
+        aggregated.append(entry)
+    return aggregated
+
+
+def _report_data(
+    client: DatalayerClient,
+    evalset_id: str,
+    run_limit: int,
+    account_uid: Optional[str],
+) -> dict[str, Any]:
+    evalset_record: dict[str, Any] = {}
+    evalsets_payload = client.evals_list_evals(
+        q=evalset_id,
+        limit=200,
+        offset=0,
+        account_uid=account_uid,
+    )
+    for item in (evalsets_payload.get("evalsets") or []):
+        if isinstance(item, dict) and str(item.get("id") or "") == evalset_id:
+            evalset_record = item
+            break
+
+    experiments_payload = client.evals_list_experiments(
+        evalset_id=evalset_id,
+        limit=200,
+        offset=0,
+        account_uid=account_uid,
+    )
+    experiments = experiments_payload.get("experiments") or []
+
+    report: dict[str, Any] = {
+        "evalset_id": evalset_id,
+        "evalset_name": str(evalset_record.get("name") or ""),
+        "run_environment": str(evalset_record.get("run_environment") or ""),
+        "generated_at": _now_iso(),
+        "agentspecs": [],
+        "evalset_evaluators": [
+            item
+            for item in (evalset_record.get("evalset_evaluators") or [])
+            if isinstance(item, dict)
+        ],
+        "report_evaluators": [
+            item
+            for item in (evalset_record.get("report_evaluators") or [])
+            if isinstance(item, dict)
+        ],
+        "cases": [
+            case for case in (evalset_record.get("cases") or []) if isinstance(case, dict)
+        ],
+        "report_analyses": [],
+        "experiments": [],
+    }
+    agentspec_by_id: dict[str, dict[str, Any]] = {}
+    evaluator_result_samples: list[dict[str, Any]] = []
+
+    for experiment in experiments:
+        experiment_id = str(experiment.get("id", ""))
+        experiment_name = str(experiment.get("name", experiment_id))
+
+        runs_payload = client.evals_list_runs(
+            experiment_id,
+            limit=run_limit,
+            offset=0,
+            account_uid=account_uid,
+        )
+        runs = runs_payload.get("runs") or []
+        for run in runs:
+            if not isinstance(run, dict):
+                continue
+            metrics = run.get("metrics")
+            if not isinstance(metrics, dict):
+                continue
+            samples = metrics.get("evaluator_results")
+            if isinstance(samples, list):
+                evaluator_result_samples.extend(
+                    sample for sample in samples if isinstance(sample, dict)
+                )
+        agent_spec_id, agent_spec_name = _extract_experiment_agentspec(experiment, runs)
+        registry_details = (
+            _agentspec_registry_details(agent_spec_id) if agent_spec_id else {}
+        )
+        registry_name = str(registry_details.get("name") or "").strip()
+        if registry_name:
+            agent_spec_name = registry_name
+        if agent_spec_id and agent_spec_id not in agentspec_by_id:
+            agentspec_by_id[agent_spec_id] = {
+                "id": agent_spec_id,
+                "name": agent_spec_name or agent_spec_id,
+                "experiments": 0,
+                "runs": 0,
+                "experiment_names": [],
+            }
+        if agent_spec_id:
+            record = agentspec_by_id[agent_spec_id]
+            record["experiments"] += 1
+            record["runs"] += len(runs)
+            if experiment_name:
+                names = record.setdefault("experiment_names", [])
+                if experiment_name not in names:
+                    names.append(experiment_name)
+            # The agent_runtimes catalog is authoritative; fall back to any
+            # metadata embedded in the experiment/run payloads for fields the
+            # catalog does not provide (or when the catalog is unavailable).
+            _merge_agentspec_details(record, registry_details)
+            _merge_agentspec_details(
+                record,
+                _extract_experiment_agentspec_details(experiment, runs),
+            )
+        total_runs = int(runs_payload.get("total") or len(runs))
+        baseline, latest, drift = _compute_baseline_and_drift(runs)
+
+        latest_two_delta: float | None = None
+        latest_two_run_ids: list[str] = []
+        latest_two_compare: dict[str, Any] | None = None
+        if len(runs) >= 2:
+            latest_two_run_ids = [str(runs[0].get("id", "")), str(runs[1].get("id", ""))]
+            compare_payload = client.evals_compare_runs(
+                latest_two_run_ids,
+                account_uid=account_uid,
+            )
+            compared_runs = compare_payload.get("runs") or []
+            compared_by_id = {
+                str(run.get("id", "")): run
+                for run in compared_runs
+                if isinstance(run, dict)
+            }
+            run_a = compared_by_id.get(latest_two_run_ids[0], runs[0])
+            run_b = compared_by_id.get(latest_two_run_ids[1], runs[1])
+            pass_a = _run_pass_rate(run_a)
+            pass_b = _run_pass_rate(run_b)
+            if pass_a is not None and pass_b is not None:
+                latest_two_delta = pass_a - pass_b
+            latest_two_compare = {
+                "run_ids": latest_two_run_ids,
+                "run_a": _run_detail_record(run_a),
+                "run_b": _run_detail_record(run_b),
+                "delta_pass_rate": latest_two_delta,
+            }
+
+        consecutive_comparisons: list[dict[str, Any]] = []
+        for idx in range(max(0, len(runs) - 1)):
+            run_a = runs[idx]
+            run_b = runs[idx + 1]
+            pass_a = _run_pass_rate(run_a)
+            pass_b = _run_pass_rate(run_b)
+            delta = None
+            if pass_a is not None and pass_b is not None:
+                delta = pass_a - pass_b
+            consecutive_comparisons.append(
+                {
+                    "run_a_id": str(run_a.get("id", "")),
+                    "run_b_id": str(run_b.get("id", "")),
+                    "run_a_status": str(run_a.get("status", "")),
+                    "run_b_status": str(run_b.get("status", "")),
+                    "run_a_pass_rate": pass_a,
+                    "run_b_pass_rate": pass_b,
+                    "delta_pass_rate": delta,
+                }
+            )
+
+        pass_rates = [
+            _run_pass_rate(run)
+            for run in runs
+            if isinstance(_run_pass_rate(run), (int, float))
+        ]
+        numeric_pass_rates = [float(value) for value in pass_rates if isinstance(value, (int, float))]
+        mean_pass = sum(numeric_pass_rates) / len(numeric_pass_rates) if numeric_pass_rates else None
+        stddev_pass = None
+        if numeric_pass_rates:
+            variance = sum((value - mean_pass) ** 2 for value in numeric_pass_rates) / len(numeric_pass_rates)
+            stddev_pass = math.sqrt(variance)
+
+        report["experiments"].append(
+            {
+                "id": experiment_id,
+                "name": experiment_name,
+                "runs_total": total_runs,
+                "runs_fetched": len(runs),
+                "agent_spec_id": agent_spec_id,
+                "agent_spec_name": agent_spec_name,
+                "latest_pass_rate": latest,
+                "baseline_pass_rate": baseline,
+                "drift_delta": drift,
+                "latest_two_run_ids": latest_two_run_ids,
+                "latest_two_delta": latest_two_delta,
+                "latest_two_comparison": latest_two_compare,
+                "mean_pass_rate": mean_pass,
+                "stddev_pass_rate": stddev_pass,
+                "runs": [_run_detail_record(run) for run in runs],
+                "consecutive_comparisons": consecutive_comparisons,
+                "report_analyses": _build_experiment_report_analyses(
+                    runs,
+                    consecutive_comparisons,
+                    baseline=baseline,
+                    latest=latest,
+                    drift=drift,
+                    latest_two_delta=latest_two_delta,
+                    mean_pass=mean_pass,
+                    stddev_pass=stddev_pass,
+                ),
+            }
+        )
+    report["agentspecs"] = list(agentspec_by_id.values())
+    report["evaluator_results"] = _aggregate_evaluator_results(evaluator_result_samples)
+    report["report_analyses"] = _build_evalset_report_analyses(report["experiments"])
+    return report
+
+
+def _ascii_bar(
+    value: float | None,
+    width: int = 28,
+    *,
+    full_blocks: bool = True,
+    colorize: bool = False,
+) -> str:
+    if value is None:
+        return "-"
+    bounded = max(0.0, min(1.0, float(value)))
+    filled = int(round(bounded * width))
+    fill_char = "█" if full_blocks else "#"
+    empty_char = "░" if full_blocks else "."
+    filled_part = fill_char * filled
+    empty_part = empty_char * (width - filled)
+    if not colorize:
+        return filled_part + empty_part
+    if bounded >= 0.85:
+        style = "green"
+    elif bounded >= 0.75:
+        style = "yellow"
+    else:
+        style = "red"
+    return _style_text(filled_part, style, True) + _style_text(empty_part, "grey39", True)
+
+
+def _fmt_pts(value: float) -> str:
+    return f"{value * 100:.1f}"
+
+
+def _ascii_histogram(
+    values: list[float],
+    *,
+    bins: int = 8,
+    width: int = 22,
+    min_value: float | None = None,
+    max_value: float | None = None,
+    full_blocks: bool = True,
+    colorize: bool = False,
+    drift_palette: bool = False,
+) -> list[str]:
+    if not values:
+        return ["n/a"]
+
+    lo = min_value if isinstance(min_value, (int, float)) else min(values)
+    hi = max_value if isinstance(max_value, (int, float)) else max(values)
+    if hi <= lo:
+        hi = lo + 1e-9
+
+    bins = max(2, bins)
+    counts = [0 for _ in range(bins)]
+    span = hi - lo
+    for value in values:
+        ratio = (value - lo) / span
+        idx = int(ratio * bins)
+        idx = max(0, min(bins - 1, idx))
+        counts[idx] += 1
+
+    peak = max(counts) if counts else 1
+    fill_char = "█" if full_blocks else "#"
+    empty_char = "░" if full_blocks else "."
+    lines: list[str] = []
+    for idx, count in enumerate(counts):
+        left = lo + (span * idx / bins)
+        right = lo + (span * (idx + 1) / bins)
+        filled = int(round((count / peak) * width)) if peak > 0 else 0
+        filled_part = fill_char * filled
+        empty_part = empty_char * (width - filled)
+        if colorize:
+            if drift_palette:
+                if right <= 0:
+                    bar_style = "red"
+                elif left >= 0:
+                    bar_style = "green"
+                else:
+                    bar_style = "yellow"
+            elif peak > 0 and count / peak >= 0.67:
+                bar_style = "cyan"
+            elif peak > 0 and count / peak >= 0.34:
+                bar_style = "blue"
+            else:
+                bar_style = "magenta"
+            bar = _style_text(filled_part, bar_style, True) + _style_text(empty_part, "grey39", True)
+        else:
+            bar = filled_part + empty_part
+        lines.append(
+            f"{_fmt_pts(left):>6} to {_fmt_pts(right):>6} pts |{bar}| {count}"
+        )
+    return lines
+
+
+def _fmt_delta(value: float | None, *, colorize: bool = False) -> str:
+    if value is None:
+        return "n/a"
+    rendered = f"{value * 100:+.1f} pts"
+    if value > 0:
+        return f"🟢 {_style_text(rendered, 'green', colorize)}"
+    if value < 0:
+        return f"🔴 {_style_text(rendered, 'red', colorize)}"
+    return f"⚪ {_style_text(rendered, 'yellow', colorize)}"
+
+
+def _sparkline(values: list[float], *, colorize: bool = False) -> str:
+    if not values:
+        return "n/a"
+    ticks = "▁▂▃▄▅▆▇█"
+    lo = min(values)
+    hi = max(values)
+    if hi <= lo:
+        base = ticks[-2] * len(values)
+    else:
+        span = hi - lo
+        chars = []
+        for value in values:
+            idx = int(round(((value - lo) / span) * (len(ticks) - 1)))
+            idx = max(0, min(len(ticks) - 1, idx))
+            chars.append(ticks[idx])
+        base = "".join(chars)
+    if not colorize:
+        return base
+    if values[-1] >= 0.85:
+        style = "green"
+    elif values[-1] >= 0.75:
+        style = "yellow"
+    else:
+        style = "red"
+    return _style_text(base, style, True)
+
+
+def _clamp_unit(value: float) -> float:
+    return max(0.0, min(1.0, value))
+
+
+def _heat_char(value: float) -> str:
+    shades = "░▒▓█"
+    bounded = _clamp_unit(value)
+    idx = int(round(bounded * (len(shades) - 1)))
+    return shades[idx]
+
+
+def _fit_label(text: str, width: int = 20) -> str:
+    raw = str(text or "")
+    if len(raw) <= width:
+        return raw.ljust(width)
+    if width <= 3:
+        return raw[:width]
+    return (raw[: width - 3] + "...")
+
+
+def _ascii_passrate_heatmap(
+    experiments: list[dict[str, Any]],
+    *,
+    max_columns: int = 12,
+    colorize: bool = False,
+) -> list[str]:
+    if not experiments:
+        return ["n/a"]
+
+    max_columns = max(1, max_columns)
+    header = f"{'Experiment':<20} | " + " ".join(
+        f"r{idx:02d}" for idx in range(1, max_columns + 1)
+    )
+    lines = [header, "-" * len(header)]
+
+    for experiment in experiments:
+        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+        cells: list[str] = []
+        for idx in range(max_columns):
+            value: float | None = None
+            if idx < len(runs):
+                raw = runs[idx].get("pass_rate")
+                if isinstance(raw, (int, float)):
+                    value = float(raw)
+            if value is None:
+                cells.append("·")
+                continue
+
+            token = _heat_char(value)
+            if colorize:
+                if value >= 0.85:
+                    token = _style_text(token, "green", True)
+                elif value >= 0.75:
+                    token = _style_text(token, "yellow", True)
+                else:
+                    token = _style_text(token, "red", True)
+            cells.append(token)
+
+        label = _fit_label(str(experiment.get("name", "")), width=20)
+        lines.append(f"{label} | " + " ".join(cells))
+
+    lines.append("Legend: low='░' .. high='█' (r01=latest fetched run, '·'=no run)")
+    return lines
+
+
+def _ascii_drift_heatmap(
+    experiments: list[dict[str, Any]],
+    *,
+    max_columns: int = 12,
+    colorize: bool = False,
+) -> list[str]:
+    if not experiments:
+        return ["n/a"]
+
+    max_columns = max(1, max_columns)
+    header = f"{'Experiment':<20} | " + " ".join(
+        f"d{idx:02d}" for idx in range(1, max_columns + 1)
+    )
+    lines = [header, "-" * len(header)]
+
+    for experiment in experiments:
+        comparisons = [
+            item for item in (experiment.get("consecutive_comparisons") or [])
+            if isinstance(item, dict)
+        ]
+        cells: list[str] = []
+        for idx in range(max_columns):
+            delta: float | None = None
+            if idx < len(comparisons):
+                raw = comparisons[idx].get("delta_pass_rate")
+                if isinstance(raw, (int, float)):
+                    delta = float(raw)
+            if delta is None:
+                cells.append("··")
+                continue
+
+            sign = "+" if delta >= 0 else "-"
+            magnitude = _heat_char(abs(delta))
+            token = f"{sign}{magnitude}"
+            if colorize:
+                if delta > 0:
+                    token = _style_text(token, "green", True)
+                elif delta < 0:
+                    token = _style_text(token, "red", True)
+                else:
+                    token = _style_text(token, "yellow", True)
+            cells.append(token)
+
+        label = _fit_label(str(experiment.get("name", "")), width=20)
+        lines.append(f"{label} | " + " ".join(cells))
+
+    lines.append("Legend: dNN are consecutive deltas (A-B), sign shows direction, magnitude uses '░'..'█', '··'=no comparison")
+    return lines
+
+
+def _pairwise_latest_deltas(experiments: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    pairs: list[dict[str, Any]] = []
+    for idx, left in enumerate(experiments):
+        left_latest = left.get("latest_pass_rate")
+        if not isinstance(left_latest, (int, float)):
+            continue
+        left_agent_spec_id = str(left.get("agent_spec_id") or "")
+        left_agent_spec_name = str(left.get("agent_spec_name") or left_agent_spec_id or "")
+        for right in experiments[idx + 1 :]:
+            right_latest = right.get("latest_pass_rate")
+            if not isinstance(right_latest, (int, float)):
+                continue
+            right_agent_spec_id = str(right.get("agent_spec_id") or "")
+            right_agent_spec_name = str(right.get("agent_spec_name") or right_agent_spec_id or "")
+            comparison_group = (
+                "within_agentspec"
+                if left_agent_spec_id and right_agent_spec_id and left_agent_spec_id == right_agent_spec_id
+                else "cross_agentspec"
+            )
+            pairs.append(
+                {
+                    "left_id": str(left.get("id", "")),
+                    "left": str(left.get("name", "")),
+                    "left_agent_spec_id": left_agent_spec_id,
+                    "left_agent_spec_name": left_agent_spec_name,
+                    "right_id": str(right.get("id", "")),
+                    "right": str(right.get("name", "")),
+                    "right_agent_spec_id": right_agent_spec_id,
+                    "right_agent_spec_name": right_agent_spec_name,
+                    "left_latest": float(left_latest),
+                    "right_latest": float(right_latest),
+                    "delta": float(left_latest) - float(right_latest),
+                    "group": comparison_group,
+                }
+            )
+    pairs.sort(key=lambda item: abs(item["delta"]), reverse=True)
+    return pairs
+
+
+def _markdown_table(headers: list[str], rows: list[list[str]], aligns: list[str]) -> list[str]:
+    widths = [len(header) for header in headers]
+    for row in rows:
+        for idx, cell in enumerate(row):
+            widths[idx] = max(widths[idx], len(cell))
+
+    def _pad(cell: str, width: int, align: str) -> str:
+        if align == "right":
+            return cell.rjust(width)
+        return cell.ljust(width)
+
+    header_line = "| " + " | ".join(headers[idx].ljust(widths[idx]) for idx in range(len(headers))) + " |"
+
+    sep_parts: list[str] = []
+    for idx, align in enumerate(aligns):
+        width = max(3, widths[idx])
+        if align == "right":
+            sep_parts.append("-" * (width - 1) + ":")
+        else:
+            sep_parts.append(":" + "-" * (width - 1))
+    sep_line = "| " + " | ".join(sep_parts) + " |"
+
+    body_lines = [
+        "| " + " | ".join(_pad(row[idx], widths[idx], aligns[idx]) for idx in range(len(headers))) + " |"
+        for row in rows
+    ]
+    return [header_line, sep_line, *body_lines]
+
+
+def _compact_json(value: Any, max_len: int = 140) -> str:
+    if value is None:
+        return "-"
+    if isinstance(value, str):
+        text = value
+    else:
+        try:
+            text = json.dumps(value, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
+        except Exception:
+            text = str(value)
+    text = " ".join(text.split())
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3] + "..."
+
+
+def _aggregate_case_outcomes(
+    experiments: list[dict[str, Any]],
+) -> tuple[dict[str, dict[str, Any]], list[str]]:
+    """Aggregate per-case pass/score stats across every fetched run.
+
+    Returns ``(case_stats, agentspec_names)`` where ``case_stats`` maps a case
+    name to ``{"runs", "passed", "score_sum", "score_count", "by_spec"}`` and
+    ``by_spec`` maps an agentspec label to ``{"runs", "passed"}``.
+    """
+    case_stats: dict[str, dict[str, Any]] = {}
+    case_order: list[str] = []
+    agentspec_names: list[str] = []
+    for experiment in experiments:
+        spec_label = str(
+            experiment.get("agent_spec_name")
+            or experiment.get("agent_spec_id")
+            or "-"
+        )
+        if spec_label not in agentspec_names:
+            agentspec_names.append(spec_label)
+        for run in experiment.get("runs") or []:
+            if not isinstance(run, dict):
+                continue
+            metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+            case_results = metrics.get("case_results")
+            if not isinstance(case_results, list):
+                continue
+            for case_result in case_results:
+                if not isinstance(case_result, dict):
+                    continue
+                name = str(case_result.get("name") or "-")
+                if name not in case_stats:
+                    case_stats[name] = {
+                        "runs": 0,
+                        "passed": 0,
+                        "score_sum": 0.0,
+                        "score_count": 0,
+                        "by_spec": {},
+                    }
+                    case_order.append(name)
+                stat = case_stats[name]
+                passed = bool(case_result.get("passed"))
+                stat["runs"] += 1
+                stat["passed"] += 1 if passed else 0
+                score = case_result.get("score")
+                if isinstance(score, (int, float)):
+                    stat["score_sum"] += float(score)
+                    stat["score_count"] += 1
+                spec_entry = stat["by_spec"].setdefault(
+                    spec_label, {"runs": 0, "passed": 0}
+                )
+                spec_entry["runs"] += 1
+                spec_entry["passed"] += 1 if passed else 0
+    ordered = {name: case_stats[name] for name in case_order}
+    return ordered, agentspec_names
+
+
+def _per_case_outcome_lines(
+    experiments: list[dict[str, Any]],
+    *,
+    colorize: bool = False,
+) -> list[str]:
+    """Render the per-case outcomes section (pass rate per case across runs)."""
+    lines: list[str] = []
+    lines.append("## Per-Case Outcomes")
+    lines.append("")
+    case_stats, agentspec_names = _aggregate_case_outcomes(experiments)
+    if not case_stats:
+        lines.append(
+            "No per-case results were recorded on the fetched runs. Runs that "
+            "store `case_results` in their metrics populate this section."
+        )
+        lines.append("")
+        return lines
+
+    lines.append(
+        "Pass rate for each case across every fetched run (all experiments and "
+        "agentspecs combined). This reveals which cases are reliable and which "
+        "ones regress, instead of only the aggregate run pass rate."
+    )
+    lines.append("")
+    overall_rows: list[list[str]] = []
+    for name, stat in case_stats.items():
+        runs = int(stat["runs"])
+        passed = int(stat["passed"])
+        pass_rate = (passed / runs) if runs else None
+        avg_score = (
+            stat["score_sum"] / stat["score_count"] if stat["score_count"] else None
+        )
+        overall_rows.append(
+            [
+                name,
+                str(runs),
+                f"{passed}/{runs}",
+                _fmt_pct(pass_rate),
+                "n/a" if avg_score is None else f"{avg_score:.3f}",
+            ]
+        )
+    lines.extend(
+        _markdown_table(
+            ["Case", "Runs", "Passed", "Pass Rate", "Avg Score"],
+            overall_rows,
+            ["left", "right", "right", "right", "right"],
+        )
+    )
+    lines.append("")
+
+    if len(agentspec_names) > 1:
+        lines.append("### Per-Case Pass Rate By Agentspec")
+        lines.append("")
+        lines.append(
+            "Compare how each case performs across agentspecs (for example "
+            "codemode vs no-codemode)."
+        )
+        lines.append("")
+        spec_rows: list[list[str]] = []
+        for name, stat in case_stats.items():
+            row = [name]
+            for spec_label in agentspec_names:
+                spec_entry = stat["by_spec"].get(spec_label)
+                if not spec_entry or not spec_entry.get("runs"):
+                    row.append("n/a")
+                    continue
+                spec_pass = spec_entry["passed"] / spec_entry["runs"]
+                row.append(_fmt_pct(spec_pass))
+            spec_rows.append(row)
+        lines.extend(
+            _markdown_table(
+                ["Case", *agentspec_names],
+                spec_rows,
+                ["left", *["right"] * len(agentspec_names)],
+            )
+        )
+        lines.append("")
+
+    return lines
+
+
+def _report_analyses_lines(report: dict[str, Any]) -> list[str]:
+    lines: list[str] = []
+    analyses = [
+        item for item in (report.get("report_analyses") or []) if isinstance(item, dict)
+    ]
+    lines.append("## Appendix: Structured Report Analyses")
+    lines.append("")
+    lines.append(
+        "The JSON block below is rendered directly from the top-level "
+        "`report_analyses` payload."
+    )
+    lines.append("")
+    lines.append("```json")
+    lines.append(json.dumps(analyses, ensure_ascii=False, indent=2, sort_keys=True))
+    lines.append("```")
+    lines.append("")
+    return lines
+
+
+def _report_markdown(report: dict[str, Any], run_limit: int, *, colorize: bool = False) -> str:
+    evalset_id = str(report.get("evalset_id", ""))
+    run_environment = str(report.get("run_environment") or "")
+    generated_at = str(report.get("generated_at", ""))
+    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+    agentspecs = [item for item in (report.get("agentspecs") or []) if isinstance(item, dict)]
+    cases = [item for item in (report.get("cases") or []) if isinstance(item, dict)]
+    case_by_name: dict[str, dict[str, Any]] = {}
+    representative_case_name: str | None = None
+    for case in cases:
+        name = str(case.get("name") or "")
+        if not name:
+            continue
+        if representative_case_name is None:
+            representative_case_name = name
+        if name not in case_by_name:
+            case_by_name[name] = case
+    evalset_runs_url = _evalset_runs_url(evalset_id, run_environment)
+
+    lines: list[str] = []
+    # Verbose, drill-down content is collected here and emitted under a single
+    # "# Appendices" heading at the end so the body stays scannable.
+    appendix_lines: list[str] = []
+    lines.append(f"# Evals Report: {evalset_id}")
+    lines.append("")
+    lines.append(f"- Generated at: {generated_at}")
+    lines.append(f"- Experiments: {len(experiments)}")
+    lines.append(f"- Agentspecs: {len(agentspecs)}")
+    lines.append(f"- Cases: {len(cases)}")
+    lines.append(
+        f"- Evalset evaluators: {len([item for item in (report.get('evalset_evaluators') or []) if isinstance(item, dict)])}"
+    )
+    lines.append(
+        f"- Report evaluators: {len([item for item in (report.get('report_evaluators') or []) if isinstance(item, dict)])}"
+    )
+    lines.append(f"- Run window per experiment: {run_limit}")
+    if evalset_runs_url:
+        lines.append(f"- Evalset run details: [Open in Datalayer]({evalset_runs_url})")
+    lines.append("")
+    lines.append(
+        "> The body summarises results (evaluators, per-case outcomes, drift, and "
+        "comparisons). Full configuration, cases, heatmaps, per-experiment timelines, "
+        "and per-run details are in the appendices at the end."
+    )
+    lines.append("")
+
+    lines.append("## Agentspec Coverage")
+    lines.append("")
+    if agentspecs:
+        agentspec_rows: list[list[str]] = []
+        for item in agentspecs:
+            agent_spec_id = str(item.get("id") or "")
+            agent_spec_link = _agentspec_details_url(agent_spec_id)
+            agentspec_rows.append(
+                [
+                    agent_spec_id,
+                    str(item.get("name") or item.get("id") or ""),
+                    str(item.get("model") or "-"),
+                    str(item.get("version") or "-"),
+                    str(int(item.get("experiments") or 0)),
+                    str(int(item.get("runs") or 0)),
+                    f"[Open]({agent_spec_link})" if agent_spec_link else "-",
+                ]
+            )
+        lines.extend(
+            _markdown_table(
+                [
+                    "Agentspec ID",
+                    "Agentspec",
+                    "Model",
+                    "Version",
+                    "Experiments",
+                    "Runs",
+                    "Details",
+                ],
+                agentspec_rows,
+                ["left", "left", "left", "left", "right", "right", "left"],
+            )
+        )
+        lines.append("")
+        appendix_lines.append("## Appendix: Agentspec Details")
+        appendix_lines.append("")
+        for item in agentspecs:
+            agent_spec_id = str(item.get("id") or "")
+            agent_spec_link = _agentspec_details_url(agent_spec_id)
+            display_name = str(item.get("name") or agent_spec_id or "-")
+            emoji = str(item.get("emoji") or "").strip()
+            heading = f"{emoji} {display_name}".strip()
+            appendix_lines.append(f"### {heading}")
+            appendix_lines.append("")
+            appendix_lines.append(f"- ID: `{agent_spec_id or '-'}`")
+            description = str(item.get("description") or "").strip()
+            if description:
+                appendix_lines.append(f"- Description: {description}")
+            model = str(item.get("model") or "").strip()
+            if model:
+                appendix_lines.append(f"- Model: {model}")
+            version = str(item.get("version") or "").strip()
+            if version:
+                appendix_lines.append(f"- Version: {version}")
+            color = str(item.get("color") or "").strip()
+            if color:
+                appendix_lines.append(f"- Color: {color}")
+            tags = item.get("tags")
+            if isinstance(tags, list) and tags:
+                appendix_lines.append(f"- Tags: {', '.join(str(tag) for tag in tags)}")
+            experiment_names = item.get("experiment_names")
+            if isinstance(experiment_names, list) and experiment_names:
+                appendix_lines.append(
+                    f"- Experiments ({len(experiment_names)}): "
+                    + ", ".join(str(name) for name in experiment_names)
+                )
+            appendix_lines.append(
+                f"- Runs analysed: {int(item.get('runs') or 0)}"
+            )
+            if agent_spec_link:
+                appendix_lines.append(f"- Details: [Open in Datalayer]({agent_spec_link})")
+            appendix_lines.append("")
+    else:
+        lines.append("No agentspec metadata found in experiment/run payloads.")
+    lines.append("")
+
+    evalset_evaluators = [
+        item for item in (report.get("evalset_evaluators") or []) if isinstance(item, dict)
+    ]
+    report_evaluators = [
+        item for item in (report.get("report_evaluators") or []) if isinstance(item, dict)
+    ]
+    evaluator_results = [
+        item for item in (report.get("evaluator_results") or []) if isinstance(item, dict)
+    ]
+    lines.append("## Evaluator Results")
+    lines.append("")
+    lines.append(
+        "Evalset-scoped evaluators run for each case; report-scoped evaluators run once "
+        "over the aggregated report. Scores are aggregated across all runs in the run "
+        "window (full evaluator configuration is in the appendix)."
+    )
+    lines.append("")
+    if evaluator_results:
+        lines.append(
+            "Runs Passed counts runs where the evaluator passed; Latest reflects the "
+            "most recent run."
+        )
+        lines.append("")
+        result_rows: list[list[str]] = []
+        for item in evaluator_results:
+            runs_count = int(item.get("runs") or 0)
+            passed_runs = int(item.get("passed_runs") or 0)
+            mean_score = item.get("mean_score")
+            latest_score = item.get("latest_score")
+            result_rows.append(
+                [
+                    str(item.get("name") or "-"),
+                    str(item.get("scope") or "-"),
+                    f"{passed_runs}/{runs_count}",
+                    f"{float(mean_score):.3f}" if isinstance(mean_score, (int, float)) else "-",
+                    f"{float(latest_score):.3f}" if isinstance(latest_score, (int, float)) else "-",
+                    "pass" if item.get("latest_passed") else "fail",
+                    str(item.get("summary") or "-"),
+                ]
+            )
+        lines.extend(
+            _markdown_table(
+                ["Evaluator", "Scope", "Runs Passed", "Mean Score", "Latest Score", "Latest", "Summary"],
+                result_rows,
+                ["left", "left", "right", "right", "right", "left", "left"],
+            )
+        )
+    else:
+        lines.append(
+            "No evaluator results recorded. Runs that store `evaluator_results` "
+            "in their metrics populate this section."
+        )
+    lines.append("")
+
+    appendix_lines.append("## Appendix: Evaluator Configuration")
+    appendix_lines.append("")
+    appendix_lines.append(
+        "Evalset-level evaluators run for each case; report-level evaluators run once "
+        "after all cases. Evaluator names are resolved at runtime via the Pydantic "
+        "evaluator registries."
+    )
+    appendix_lines.append("")
+    appendix_lines.append("### Evalset Evaluators")
+    appendix_lines.append("")
+    if evalset_evaluators:
+        appendix_lines.append("```json")
+        appendix_lines.append(json.dumps(evalset_evaluators, ensure_ascii=False, indent=2, sort_keys=True))
+        appendix_lines.append("```")
+    else:
+        appendix_lines.append("No evalset-level evaluators configured.")
+    appendix_lines.append("")
+    appendix_lines.append("### Report Evaluators")
+    appendix_lines.append("")
+    if report_evaluators:
+        appendix_lines.append("```json")
+        appendix_lines.append(json.dumps(report_evaluators, ensure_ascii=False, indent=2, sort_keys=True))
+        appendix_lines.append("```")
+    else:
+        appendix_lines.append("No report-level evaluators configured.")
+    appendix_lines.append("")
+
+    appendix_lines.append("## Appendix: Evalset Cases")
+    appendix_lines.append("")
+    appendix_lines.append(f"{len(cases)} case(s) in this evalset.")
+    appendix_lines.append("")
+    if cases:
+        case_rows: list[list[str]] = []
+        for case in cases:
+            expected_output = case.get("expected_output")
+            if expected_output is None:
+                expected_output = case.get("expected")
+            case_rows.append(
+                [
+                    str(case.get("name") or "-"),
+                    str(case.get("id") or "-"),
+                    _compact_json(case.get("inputs")),
+                    _compact_json(expected_output),
+                    _compact_json(case.get("evaluators")),
+                    _compact_json(case.get("metadata")),
+                ]
+            )
+        appendix_lines.extend(
+            _markdown_table(
+                ["Case", "ID", "Inputs", "Expected Output", "Evaluators", "Metadata"],
+                case_rows,
+                ["left", "left", "left", "left", "left", "left"],
+            )
+        )
+    else:
+        appendix_lines.append("No cases returned for this evalset.")
+    appendix_lines.append("")
+
+    lines.extend(_per_case_outcome_lines(experiments, colorize=colorize))
+
+    lines.append("## Experiment Overview")
+    lines.append("")
+    overview_rows: list[list[str]] = []
+    for experiment in experiments:
+        runs_fetched = int(experiment.get("runs_fetched") or 0)
+        runs_total = int(experiment.get("runs_total") or 0)
+        overview_rows.append(
+            [
+                f"{experiment.get('name', '')}",
+                str(experiment.get('agent_spec_name') or experiment.get('agent_spec_id') or '-'),
+                f"{runs_fetched}/{runs_total}",
+                _fmt_pct(experiment.get('latest_pass_rate') if isinstance(experiment.get('latest_pass_rate'), (int, float)) else None),
+                _fmt_pct(experiment.get('baseline_pass_rate') if isinstance(experiment.get('baseline_pass_rate'), (int, float)) else None),
+                _fmt_delta(experiment.get('drift_delta') if isinstance(experiment.get('drift_delta'), (int, float)) else None, colorize=colorize),
+                _fmt_delta(experiment.get('latest_two_delta') if isinstance(experiment.get('latest_two_delta'), (int, float)) else None, colorize=colorize),
+            ]
+        )
+    lines.extend(
+        _markdown_table(
+            ["Experiment", "Agentspec", "Runs (fetched/total)", "Latest", "Baseline", "Drift", "Latest-2 Delta"],
+            overview_rows,
+            ["left", "left", "right", "right", "right", "right", "right"],
+        )
+    )
+    lines.append("")
+
+    appendix_lines.extend(_report_analyses_lines(report))
+
+    lines.append("## Comparison Combinations")
+    lines.append("")
+
+    ranked_latest = sorted(
+        [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
+        key=lambda item: float(item.get("latest_pass_rate") or 0.0),
+        reverse=True,
+    )
+    lines.append("### By Latest Pass Rate")
+    lines.append("")
+    latest_rows: list[list[str]] = []
+    for idx, item in enumerate(ranked_latest, start=1):
+        latest_rows.append([str(idx), f"{item.get('name', '')}", _fmt_pct(float(item.get('latest_pass_rate') or 0.0))])
+    lines.extend(_markdown_table(["Rank", "Experiment", "Latest"], latest_rows, ["right", "left", "right"]))
+    latest_values = [
+        float(item.get("latest_pass_rate"))
+        for item in ranked_latest
+        if isinstance(item.get("latest_pass_rate"), (int, float))
+    ]
+    lines.append("")
+    lines.append("Latest pass-rate histogram (pts):")
+    for hist_line in _ascii_histogram(
+        latest_values,
+        bins=8,
+        width=20,
+        min_value=0.0,
+        max_value=1.0,
+        full_blocks=True,
+        colorize=colorize,
+    ):
+        lines.append(f"`{hist_line}`")
+    lines.append("")
+
+    ranked_drift = sorted(
+        [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
+        key=lambda item: float(item.get("drift_delta") or 0.0),
+    )
+    lines.append("### By Drift (Most Negative To Most Positive)")
+    lines.append("")
+    drift_rows: list[list[str]] = []
+    for idx, item in enumerate(ranked_drift, start=1):
+        drift_rows.append([str(idx), f"{item.get('name', '')}", _fmt_delta(float(item.get('drift_delta') or 0.0), colorize=colorize)])
+    lines.extend(_markdown_table(["Rank", "Experiment", "Drift"], drift_rows, ["right", "left", "right"]))
+    drift_values = [
+        float(item.get("drift_delta"))
+        for item in ranked_drift
+        if isinstance(item.get("drift_delta"), (int, float))
+    ]
+    lines.append("")
+    lines.append("Drift histogram (delta pts):")
+    for hist_line in _ascii_histogram(
+        drift_values,
+        bins=8,
+        width=20,
+        full_blocks=True,
+        colorize=colorize,
+        drift_palette=True,
+    ):
+        lines.append(f"`{hist_line}`")
+    lines.append("")
+
+    ranked_stability = sorted(
+        [item for item in experiments if isinstance(item.get("stddev_pass_rate"), (int, float))],
+        key=lambda item: float(item.get("stddev_pass_rate") or 0.0),
+    )
+    lines.append("### By Stability (Lowest Pass-Rate StdDev)")
+    lines.append("")
+    stability_rows: list[list[str]] = []
+    for idx, item in enumerate(ranked_stability, start=1):
+        stddev = item.get("stddev_pass_rate")
+        mean = item.get("mean_pass_rate")
+        stability_rows.append(
+            [
+                str(idx),
+                f"{item.get('name', '')}",
+                (f"{float(stddev) * 100:.2f} pts" if isinstance(stddev, (int, float)) else "n/a"),
+                (_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else "n/a"),
+            ]
+        )
+    lines.extend(_markdown_table(["Rank", "Experiment", "StdDev", "Mean"], stability_rows, ["right", "left", "right", "right"]))
+    lines.append("")
+
+    pairwise = _pairwise_latest_deltas(experiments)
+    within_agentspec_pairs = [
+        pair for pair in pairwise if str(pair.get("group") or "") == "within_agentspec"
+    ]
+    cross_agentspec_pairs = [
+        pair for pair in pairwise if str(pair.get("group") or "") == "cross_agentspec"
+    ]
+    lines.append("### Pairwise Latest-Pass Deltas")
+    lines.append("")
+    pair_rows: list[list[str]] = []
+    for pair in pairwise:
+        pair_rows.append(
+            [
+                f"{pair['left']} vs {pair['right']}",
+                _fmt_pct(pair['left_latest']),
+                _fmt_pct(pair['right_latest']),
+                _fmt_delta(pair['delta'], colorize=colorize),
+            ]
+        )
+    if not pairwise:
+        pair_rows.append(["n/a", "n/a", "n/a", "n/a"])
+    lines.extend(
+        _markdown_table(
+            ["Pair", "Left Latest", "Right Latest", "Delta (Left-Right)"],
+            pair_rows,
+            ["left", "right", "right", "right"],
+        )
+    )
+    pair_deltas = [float(pair["delta"]) for pair in pairwise if isinstance(pair.get("delta"), (int, float))]
+    lines.append("")
+    lines.append("Pairwise latest-delta histogram (pts):")
+    for hist_line in _ascii_histogram(
+        pair_deltas,
+        bins=8,
+        width=20,
+        full_blocks=True,
+        colorize=colorize,
+        drift_palette=True,
+    ):
+        lines.append(f"`{hist_line}`")
+    lines.append("")
+
+    lines.append("### Within-Agentspec Pairwise Latest-Pass Deltas")
+    lines.append("")
+    within_pair_rows: list[list[str]] = []
+    for pair in within_agentspec_pairs:
+        within_pair_rows.append(
+            [
+                f"{pair['left']} vs {pair['right']}",
+                str(pair.get('left_agent_spec_name') or pair.get('left_agent_spec_id') or '-'),
+                _fmt_pct(pair['left_latest']),
+                _fmt_pct(pair['right_latest']),
+                _fmt_delta(pair['delta'], colorize=colorize),
+            ]
+        )
+    if not within_pair_rows:
+        within_pair_rows.append(["n/a", "n/a", "n/a", "n/a", "n/a"])
+    lines.extend(
+        _markdown_table(
+            ["Pair", "Agentspec", "Left Latest", "Right Latest", "Delta (Left-Right)"],
+            within_pair_rows,
+            ["left", "left", "right", "right", "right"],
+        )
+    )
+    lines.append("")
+
+    lines.append("### Cross-Agentspec Pairwise Latest-Pass Deltas")
+    lines.append("")
+    cross_pair_rows: list[list[str]] = []
+    for pair in cross_agentspec_pairs:
+        cross_pair_rows.append(
+            [
+                f"{pair['left']} ({pair.get('left_agent_spec_name') or pair.get('left_agent_spec_id') or '-'}) vs {pair['right']} ({pair.get('right_agent_spec_name') or pair.get('right_agent_spec_id') or '-'})",
+                _fmt_pct(pair['left_latest']),
+                _fmt_pct(pair['right_latest']),
+                _fmt_delta(pair['delta'], colorize=colorize),
+            ]
+        )
+    if not cross_pair_rows:
+        cross_pair_rows.append(["n/a", "n/a", "n/a", "n/a"])
+    lines.extend(
+        _markdown_table(
+            ["Pair", "Left Latest", "Right Latest", "Delta (Left-Right)"],
+            cross_pair_rows,
+            ["left", "right", "right", "right"],
+        )
+    )
+    lines.append("")
+
+    appendix_lines.append("## Appendix: Heatmaps")
+    appendix_lines.append("")
+    appendix_lines.append("Pass-rate heatmap by experiment and run window:")
+    appendix_lines.append("")
+    appendix_lines.append("```text")
+    appendix_lines.extend(_ascii_passrate_heatmap(experiments, max_columns=12, colorize=False))
+    appendix_lines.append("```")
+    appendix_lines.append("")
+    appendix_lines.append("Consecutive delta heatmap (A-B) by experiment:")
+    appendix_lines.append("")
+    appendix_lines.append("```text")
+    appendix_lines.extend(_ascii_drift_heatmap(experiments, max_columns=12, colorize=False))
+    appendix_lines.append("```")
+    appendix_lines.append("")
+
+    lines.append("### Insight Highlights")
+    lines.append("")
+    best_latest = ranked_latest[0] if ranked_latest else None
+    worst_latest = ranked_latest[-1] if ranked_latest else None
+    most_negative = ranked_drift[0] if ranked_drift else None
+    most_positive = ranked_drift[-1] if ranked_drift else None
+    most_stable = ranked_stability[0] if ranked_stability else None
+    if best_latest:
+        lines.append(
+            "- Top latest pass-rate: "
+            + f"{best_latest.get('name', '')} ({_fmt_pct(float(best_latest.get('latest_pass_rate') or 0.0))})."
+        )
+    if worst_latest:
+        lines.append(
+            "- Lowest latest pass-rate: "
+            + f"{worst_latest.get('name', '')} ({_fmt_pct(float(worst_latest.get('latest_pass_rate') or 0.0))})."
+        )
+    if most_positive:
+        drift_pos = float(most_positive.get("drift_delta") or 0.0)
+        lines.append(
+            "- Strongest positive drift: "
+            + f"{most_positive.get('name', '')} ({_fmt_delta(drift_pos, colorize=colorize)})."
+        )
+    if most_negative:
+        drift_neg = float(most_negative.get("drift_delta") or 0.0)
+        lines.append(
+            "- Strongest negative drift: "
+            + f"{most_negative.get('name', '')} ({_fmt_delta(drift_neg, colorize=colorize)})."
+        )
+    if most_stable:
+        std = most_stable.get("stddev_pass_rate")
+        mean = most_stable.get("mean_pass_rate")
+        lines.append(
+            "- Stability leader: "
+            + f"{most_stable.get('name', '')} "
+            + f"(stddev={(float(std) * 100):.2f} pts, mean={_fmt_pct(float(mean)) if isinstance(mean, (int, float)) else 'n/a'})."
+        )
+
+    drift_neg_count = len([value for value in drift_values if value < 0])
+    drift_flat_count = len([value for value in drift_values if value == 0])
+    drift_pos_count = len([value for value in drift_values if value > 0])
+    total = max(1, drift_neg_count + drift_flat_count + drift_pos_count)
+    neg_meter = "█" * int(round((drift_neg_count / total) * 14))
+    flat_meter = "█" * int(round((drift_flat_count / total) * 14))
+    pos_meter = "█" * int(round((drift_pos_count / total) * 14))
+    neg_meter = neg_meter or "·"
+    flat_meter = flat_meter or "·"
+    pos_meter = pos_meter or "·"
+    lines.append("")
+    lines.append("Drift balance meter:")
+    lines.append(
+        "`NEG "
+        + _style_text(neg_meter, "red", colorize)
+        + f" ({drift_neg_count}) | FLAT "
+        + _style_text(flat_meter, "yellow", colorize)
+        + f" ({drift_flat_count}) | POS "
+        + _style_text(pos_meter, "green", colorize)
+        + f" ({drift_pos_count})`"
+    )
+    lines.append("")
+
+    appendix_lines.append("## Appendix: Per-Experiment Details")
+    appendix_lines.append("")
+    for experiment in experiments:
+        appendix_lines.append(f"### {experiment.get('name', '')}")
+        appendix_lines.append("")
+        agent_spec_id = str(experiment.get("agent_spec_id") or "")
+        agent_spec_label = str(experiment.get('agent_spec_name') or agent_spec_id or '-')
+        agent_spec_link = _agentspec_details_url(agent_spec_id)
+        if agent_spec_link:
+            appendix_lines.append(f"Agentspec: [{agent_spec_label}]({agent_spec_link})")
+        else:
+            appendix_lines.append(f"Agentspec: {agent_spec_label}")
+        if evalset_runs_url:
+            appendix_lines.append(f"Evalset run details: [Open run page]({evalset_runs_url})")
+        appendix_lines.append("")
+        appendix_lines.append("#### Run Timeline")
+        appendix_lines.append("")
+        run_rows: list[list[str]] = []
+        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+        for idx, run in enumerate(runs, start=1):
+            pass_rate = run.get("pass_rate") if isinstance(run.get("pass_rate"), (int, float)) else None
+            cause_text = _format_failure_cause(run.get("failure_cause"))
+            run_id = str(run.get('id', ''))
+            run_link = _run_overlay_url(evalset_runs_url, run_id)
+            run_rows.append(
+                [
+                    str(idx),
+                    (f"[{run_id}]({run_link})" if run_link and run_id else run_id),
+                    str(run.get('status', '')),
+                    _fmt_pct(float(pass_rate)) if isinstance(pass_rate, (int, float)) else 'n/a',
+                    f"`{_ascii_bar(float(pass_rate), full_blocks=True, colorize=colorize) if isinstance(pass_rate, (int, float)) else '-'}`",
+                    cause_text or "-",
+                ]
+            )
+        if not runs:
+            run_rows.append(["1", "n/a", "n/a", "n/a", "`-`", "-"])
+        appendix_lines.extend(_markdown_table(["#", "Run ID", "Status", "Pass Rate", "ASCII Trend", "Failure Cause"], run_rows, ["right", "left", "left", "right", "left", "left"]))
+        appendix_lines.append("")
+        failure_rows: list[list[str]] = []
+        for idx, run in enumerate(runs, start=1):
+            cause = run.get("failure_cause")
+            if not isinstance(cause, dict) or not cause:
+                continue
+            detail = str(cause.get("detail_excerpt") or "").strip()
+            detail_single = " ".join(detail.split())
+            if len(detail_single) > 240:
+                detail_single = detail_single[:237] + "..."
+            failure_rows.append(
+                [
+                    str(idx),
+                    str(run.get("id", "")),
+                    str(cause.get("stage") or "-"),
+                    str(cause.get("type") or "-"),
+                    str(cause.get("message") or "-"),
+                    detail_single or "-",
+                ]
+            )
+        if failure_rows:
+            appendix_lines.append("#### Failure Causes")
+            appendix_lines.append("")
+            appendix_lines.extend(
+                _markdown_table(
+                    ["#", "Run ID", "Stage", "Type", "Message", "Detail Excerpt"],
+                    failure_rows,
+                    ["right", "left", "left", "left", "left", "left"],
+                )
+            )
+            appendix_lines.append("")
+            for idx, run in enumerate(runs, start=1):
+                cause = run.get("failure_cause")
+                if not isinstance(cause, dict) or not cause:
+                    continue
+                detail_lines = _failure_cause_detail_lines(cause)
+                if not detail_lines:
+                    continue
+                appendix_lines.append(f"<details><summary>Run {idx} failure detail ({run.get('id', '')})</summary>")
+                appendix_lines.append("")
+                appendix_lines.extend(detail_lines)
+                appendix_lines.append("")
+                appendix_lines.append("</details>")
+                appendix_lines.append("")
+        timeline_values = [
+            float(run.get("pass_rate"))
+            for run in runs
+            if isinstance(run.get("pass_rate"), (int, float))
+        ]
+        appendix_lines.append(
+            "Pass-rate sparkline: "
+            + f"`{_sparkline(timeline_values, colorize=colorize) if timeline_values else 'n/a'}`"
+        )
+        appendix_lines.append("")
+
+        comparisons = [
+            item for item in (experiment.get("consecutive_comparisons") or [])
+            if isinstance(item, dict)
+        ]
+        appendix_lines.append("#### Consecutive Run Deltas (A-B)")
+        appendix_lines.append("")
+        comparison_rows: list[list[str]] = []
+        for item in comparisons:
+            run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
+            run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
+            delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
+            comparison_rows.append(
+                [
+                    str(item.get('run_a_id', '')),
+                    str(item.get('run_b_id', '')),
+                    _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else 'n/a',
+                    _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else 'n/a',
+                    _fmt_delta(float(delta), colorize=colorize) if isinstance(delta, (int, float)) else 'n/a',
+                ]
+            )
+        if not comparisons:
+            comparison_rows.append(["n/a", "n/a", "n/a", "n/a", "n/a"])
+        appendix_lines.extend(_markdown_table(["Run A", "Run B", "A Pass", "B Pass", "Delta"], comparison_rows, ["left", "left", "right", "right", "right"]))
+        appendix_lines.append("")
+
+    lines.append("## Notes")
+    lines.append("")
+    lines.append("- Drift is computed as latest - baseline.")
+    lines.append("- Baseline uses the first half of fetched runs (minimum 1, maximum 3).")
+    lines.append("- Latest-2 delta uses the latest two runs returned in the fetched window.")
+    lines.append("")
+
+    appendix_lines.extend(
+        _report_appendix_lines(
+            experiments,
+            evalset_runs_url,
+            case_by_name=case_by_name,
+            representative_case_name=representative_case_name,
+        )
+    )
+
+    if appendix_lines:
+        lines.append("# Appendices")
+        lines.append("")
+        lines.append(
+            "Full configuration, cases, heatmaps, per-experiment timelines, and per-run "
+            "details are collected below to keep the summary above readable."
+        )
+        lines.append("")
+        lines.extend(appendix_lines)
+
+    return "\n".join(lines)
+
+
+def _appendix_metric_int(metrics: dict[str, Any], *keys: str) -> str:
+    for key in keys:
+        value = metrics.get(key)
+        if isinstance(value, bool):
+            continue
+        if isinstance(value, (int, float)):
+            return str(int(value))
+    return "-"
+
+
+def _appendix_metric_float(metrics: dict[str, Any], *keys: str) -> str:
+    for key in keys:
+        value = metrics.get(key)
+        if isinstance(value, bool):
+            continue
+        if isinstance(value, (int, float)):
+            return f"{float(value):.3f}"
+    return "-"
+
+
+# Candidate paths mirror the in-app run-details overlay
+# (`getRunInteractionDetails` in AIEvals.tsx) so the report renders the same
+# prompt/output the UI shows.
+_PROMPT_CANDIDATE_PATHS: tuple[tuple[str, str], ...] = (
+    ("summary", "agent_prompt"),
+    ("summary", "sent_prompt"),
+    ("summary", "prompt"),
+    ("report", "agent_prompt"),
+    ("report", "sent_prompt"),
+    ("report", "prompt"),
+)
+
+_OUTPUT_CANDIDATE_PATHS: tuple[tuple[str, str], ...] = (
+    ("summary", "agent_output"),
+    ("summary", "output"),
+    ("report", "agent_output"),
+    ("report", "output"),
+    ("report", "parsed"),
+    ("summary", "agent_output_text"),
+    ("report", "agent_output_text"),
+    ("report", "raw_excerpt"),
+)
+
+
+def _run_interaction_value(
+    run: dict[str, Any], paths: tuple[tuple[str, str], ...]
+) -> Any:
+    """Return the first non-empty value found along the candidate paths."""
+    for container_key, field in paths:
+        container = run.get(container_key)
+        if isinstance(container, dict):
+            value = container.get(field)
+            if value is not None:
+                return value
+    return None
+
+
+def _format_display_value(value: Any) -> tuple[str, str]:
+    """Render a value the way the UI overlay does.
+
+    Returns a ``(language, text)`` tuple so callers can fence the content
+    with the right code-block language hint.
+    """
+    if value is None:
+        return "text", "(none)"
+    if isinstance(value, str):
+        return "text", value
+    try:
+        return "json", json.dumps(value, ensure_ascii=False, indent=2, sort_keys=True)
+    except Exception:
+        return "text", str(value)
+
+
+def _fenced_block(language: str, text: str) -> list[str]:
+    """Emit a fenced code block, guarding against backtick collisions."""
+    body = text if text != "" else "(empty)"
+    return [f"```{language}", *body.splitlines(), "```"]
+
+
+def _extract_case_prompt(case_record: dict[str, Any] | None) -> Any:
+    if not isinstance(case_record, dict):
+        return None
+    inputs = case_record.get("inputs")
+    if not isinstance(inputs, dict):
+        return None
+    for key in ("prompt", "text", "query", "message"):
+        value = inputs.get(key)
+        if value is not None:
+            return value
+    return inputs
+
+
+def _extract_case_prompt_from_result(case_result: dict[str, Any]) -> Any:
+    for key in ("prompt", "input", "inputs", "case_input"):
+        value = case_result.get(key)
+        if value is not None:
+            return value
+    return None
+
+
+def _extract_case_output_from_result(case_result: dict[str, Any]) -> Any:
+    for key in ("output", "actual_output", "response", "result"):
+        value = case_result.get(key)
+        if value is not None:
+            return value
+    return None
+
+
+def _is_synthetic_run(run: dict[str, Any]) -> bool:
+    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    report = run.get("report") if isinstance(run.get("report"), dict) else {}
+    if summary.get("synthetic") is True or report.get("synthetic") is True:
+        return True
+    output = summary.get("agent_output")
+    if isinstance(output, dict):
+        if output.get("synthetic") is True:
+            return True
+        if output.get("mode") == "synthetic":
+            return True
+    return False
+
+
+def _synthetic_case_output(
+    run: dict[str, Any],
+    case_record: dict[str, Any] | None,
+    case_result: dict[str, Any],
+    *,
+    representative_case_name: str | None,
+    case_name: str,
+) -> Any:
+    """Build per-case output for synthetic runs to mirror UI case switching."""
+    run_output = _run_interaction_value(run, _OUTPUT_CANDIDATE_PATHS)
+    if representative_case_name and case_name == representative_case_name:
+        return run_output
+    if case_result.get("passed"):
+        if isinstance(case_record, dict) and "expected_output" in case_record:
+            return case_record.get("expected_output")
+        return None
+    if isinstance(case_record, dict):
+        inputs = case_record.get("inputs")
+        if isinstance(inputs, dict):
+            return (
+                inputs.get("text")
+                or inputs.get("prompt")
+                or inputs.get("query")
+                or inputs.get("message")
+                or "(no usable answer — regressed run)"
+            )
+    return "(no usable answer — regressed run)"
+
+
+def _run_detail_block_lines(
+    idx: int,
+    run: dict[str, Any],
+    case_by_name: dict[str, dict[str, Any]],
+    *,
+    representative_case_name: str | None,
+) -> list[str]:
+    """Render the full per-run detail shown by the in-app overlay.
+
+    Mirrors the run-details dialog in AIEvals.tsx: prompt sent, agent output
+    received, run summary, and run report.
+    """
+    run_id = str(run.get("id", "") or "")
+    status = str(run.get("status", "") or "unknown")
+    created = str(run.get("created_at", "") or "-")
+    pass_rate = run.get("pass_rate")
+    pass_text = (
+        _fmt_pct(float(pass_rate)) if isinstance(pass_rate, (int, float)) else "n/a"
+    )
+
+    lines: list[str] = []
+    summary_label = run_id or f"run {idx}"
+    lines.append(
+        f"<details><summary>Run {idx} — {summary_label} "
+        f"(status: {status}, pass rate: {pass_text})</summary>"
+    )
+    lines.append("")
+    lines.append(f"- Run ID: `{run_id or '-'}`")
+    lines.append(f"- Status: {status}")
+    lines.append(f"- Pass rate: {pass_text}")
+    lines.append(f"- Created: {created}")
+    summary_for_header = (
+        run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    )
+    runtime_pod = str(summary_for_header.get("runtime_pod_name") or "").strip()
+    if runtime_pod:
+        lines.append(f"- Runtime: `{runtime_pod}`")
+    runtime_id = str(summary_for_header.get("runtime_id") or "").strip()
+    if runtime_id:
+        lines.append(f"- Runtime ID: `{runtime_id}`")
+    lines.append("")
+
+    metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+    case_results = metrics.get("case_results")
+    if isinstance(case_results, list) and case_results:
+        lines.append("**Per-Case Results**")
+        lines.append("")
+        case_rows: list[list[str]] = []
+        for case_result in case_results:
+            if not isinstance(case_result, dict):
+                continue
+            score = case_result.get("score")
+            case_rows.append(
+                [
+                    str(case_result.get("name") or "-"),
+                    "✅ pass" if case_result.get("passed") else "❌ fail",
+                    f"{float(score):.3f}" if isinstance(score, (int, float)) else "-",
+                    str(case_result.get("category") or "-"),
+                    str(case_result.get("difficulty") or "-"),
+                ]
+            )
+        if case_rows:
+            lines.extend(
+                _markdown_table(
+                    ["Case", "Result", "Score", "Category", "Difficulty"],
+                    case_rows,
+                    ["left", "left", "right", "left", "left"],
+                )
+            )
+            lines.append("")
+
+        lines.append("**Per-Case Prompts and Outputs**")
+        lines.append("")
+        synthetic_run = _is_synthetic_run(run)
+        for case_result in case_results:
+            if not isinstance(case_result, dict):
+                continue
+            case_name = str(case_result.get("name") or "-")
+            case_record = case_by_name.get(case_name)
+            prompt_value = _extract_case_prompt(case_record)
+            if prompt_value is None:
+                prompt_value = _extract_case_prompt_from_result(case_result)
+            if synthetic_run:
+                output_value = _synthetic_case_output(
+                    run,
+                    case_record,
+                    case_result,
+                    representative_case_name=representative_case_name,
+                    case_name=case_name,
+                )
+            else:
+                output_value = _extract_case_output_from_result(case_result)
+                if output_value is None:
+                    output_value = "(per-case output not captured for this run)"
+            expected_value = (
+                case_record.get("expected_output") if isinstance(case_record, dict) else None
+            )
+            metadata_value = (
+                case_record.get("metadata") if isinstance(case_record, dict) else None
+            )
+            evaluators_value = (
+                case_record.get("evaluators") if isinstance(case_record, dict) else None
+            )
+            prompt_lang, prompt_text = _format_display_value(prompt_value)
+            output_lang, output_text = _format_display_value(output_value)
+            expected_lang, expected_text = _format_display_value(expected_value)
+            metadata_lang, metadata_text = _format_display_value(metadata_value)
+            evaluators_lang, evaluators_text = _format_display_value(evaluators_value)
+            result_text = "pass" if case_result.get("passed") else "fail"
+            score = case_result.get("score")
+            score_text = f"{float(score):.3f}" if isinstance(score, (int, float)) else "-"
+            category_text = str(case_result.get("category") or "-")
+            difficulty_text = str(case_result.get("difficulty") or "-")
+            lines.append(
+                f"<details><summary>Case {case_name} ({result_text}, score: {score_text})</summary>"
+            )
+            lines.append("")
+            lines.append(f"- Category: {category_text}")
+            lines.append(f"- Difficulty: {difficulty_text}")
+            lines.append("")
+            lines.append("**Prompt**")
+            lines.append("")
+            lines.extend(_fenced_block(prompt_lang, prompt_text))
+            lines.append("")
+            lines.append("**Output**")
+            lines.append("")
+            lines.extend(_fenced_block(output_lang, output_text))
+            lines.append("")
+            lines.append("**Expected Output**")
+            lines.append("")
+            lines.extend(_fenced_block(expected_lang, expected_text))
+            lines.append("")
+            lines.append("**Case Metadata**")
+            lines.append("")
+            lines.extend(_fenced_block(metadata_lang, metadata_text))
+            lines.append("")
+            lines.append("**Case Evaluators**")
+            lines.append("")
+            lines.extend(_fenced_block(evaluators_lang, evaluators_text))
+            lines.append("")
+            lines.append("</details>")
+            lines.append("")
+
+    prompt_lang, prompt_text = _format_display_value(
+        _run_interaction_value(run, _PROMPT_CANDIDATE_PATHS)
+    )
+    lines.append("**Prompt Sent**")
+    lines.append("")
+    lines.extend(_fenced_block(prompt_lang, prompt_text))
+    lines.append("")
+
+    output_lang, output_text = _format_display_value(
+        _run_interaction_value(run, _OUTPUT_CANDIDATE_PATHS)
+    )
+    lines.append("**Agent Output Received**")
+    lines.append("")
+    lines.extend(_fenced_block(output_lang, output_text))
+    lines.append("")
+
+    usage = _extract_run_usage(run)
+    if usage:
+        lines.append("**Pydantic AI Usage**")
+        lines.append("")
+        preferred_keys = [
+            "source",
+            "provider",
+            "model",
+            "requests",
+            "prompt_tokens",
+            "completion_tokens",
+            "total_tokens",
+            "input_cached_tokens",
+            "tool_calls",
+            "duration_ms",
+            "credits_consumed",
+            "captured_at",
+            "reservation_id",
+            "runtime_pod_name",
+        ]
+        usage_rows: list[list[str]] = []
+        for key in preferred_keys:
+            if key not in usage:
+                continue
+            usage_rows.append([key, str(usage.get(key) or "-")])
+        for key in sorted(str(k) for k in usage.keys()):
+            if key in preferred_keys:
+                continue
+            usage_rows.append([key, str(usage.get(key) or "-")])
+        if usage_rows:
+            lines.extend(_markdown_table(["Metric", "Value"], usage_rows, ["left", "left"]))
+            lines.append("")
+        usage_lang, usage_text = _format_display_value(usage)
+        lines.append("Raw usage payload:")
+        lines.append("")
+        lines.extend(_fenced_block(usage_lang, usage_text))
+        lines.append("")
+
+    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    summary_lang, summary_text = _format_display_value(summary)
+    lines.append("**Run Summary**")
+    lines.append("")
+    lines.extend(_fenced_block(summary_lang, summary_text))
+    lines.append("")
+
+    report = run.get("report") if isinstance(run.get("report"), dict) else {}
+    report_lang, report_text = _format_display_value(report)
+    lines.append("**Run Report**")
+    lines.append("")
+    lines.extend(_fenced_block(report_lang, report_text))
+    lines.append("")
+
+    cause = run.get("failure_cause")
+    if isinstance(cause, dict) and cause:
+        detail_lines = _failure_cause_detail_lines(cause)
+        if detail_lines:
+            lines.append("**Failure Cause**")
+            lines.append("")
+            lines.extend(detail_lines)
+            lines.append("")
+
+    lines.append("</details>")
+    lines.append("")
+    return lines
+
+
+def _extract_run_usage(run: dict[str, Any]) -> dict[str, Any]:
+    def _coerce_usage(candidate: Any) -> dict[str, Any]:
+        if not isinstance(candidate, dict) or not candidate:
+            return {}
+        nested = candidate.get("pydantic_ai_usage")
+        if isinstance(nested, dict) and nested:
+            merged = dict(nested)
+            for key, value in candidate.items():
+                if key == "pydantic_ai_usage":
+                    continue
+                merged.setdefault(str(key), value)
+            return merged
+        return dict(candidate)
+
+    # Prefer usage already normalized onto run detail records.
+    direct_usage = _coerce_usage(run.get("usage"))
+    if direct_usage:
+        return direct_usage
+
+    metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+    for key in ("pydantic_ai_usage", "usage"):
+        usage = _coerce_usage(metrics.get(key))
+        if usage:
+            return usage
+
+    summary = run.get("summary") if isinstance(run.get("summary"), dict) else {}
+    for key in ("pydantic_ai_usage", "usage"):
+        usage = _coerce_usage(summary.get(key))
+        if usage:
+            return usage
+
+    report_payload = run.get("report") if isinstance(run.get("report"), dict) else {}
+    report_usage = _coerce_usage(report_payload.get("usage"))
+    if report_usage:
+        return report_usage
+    return {}
+
+
+def _usage_pick(usage: dict[str, Any], *keys: str) -> Any:
+    for key in keys:
+        value = usage.get(key)
+        if value is None:
+            continue
+        if isinstance(value, str) and not value.strip():
+            continue
+        return value
+    return None
+
+
+def _usage_number(value: Any) -> float | None:
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        try:
+            return float(text)
+        except Exception:
+            return None
+    return None
+
+
+def _usage_total_tokens_value(usage: dict[str, Any]) -> str:
+    total_value = _usage_number(
+        _usage_pick(
+            usage,
+            "total_tokens",
+            "totalTokens",
+            "tokens_total",
+            "token_total",
+        )
+    )
+    if total_value is None:
+        prompt = _usage_number(_usage_pick(usage, "prompt_tokens", "promptTokens", "input_tokens", "inputTokens"))
+        completion = _usage_number(
+            _usage_pick(usage, "completion_tokens", "completionTokens", "output_tokens", "outputTokens")
+        )
+        if prompt is not None and completion is not None:
+            total_value = prompt + completion
+    if total_value is None:
+        return "-"
+    return str(int(round(total_value)))
+
+
+def _usage_credits_value(usage: dict[str, Any]) -> str:
+    credits = _usage_number(
+        _usage_pick(
+            usage,
+            "credits_consumed",
+            "creditsConsumed",
+            "credits",
+            "total_credits",
+            "cost_credits",
+        )
+    )
+    if credits is None:
+        return "-"
+    return f"{credits:.6f}".rstrip("0").rstrip(".")
+
+
+def _report_appendix_lines(
+    experiments: list[dict[str, Any]],
+    evalset_runs_url: str,
+    *,
+    case_by_name: dict[str, dict[str, Any]] | None = None,
+    representative_case_name: str | None = None,
+) -> list[str]:
+    """Render an appendix that lists every fetched run with its details.
+
+    Each Run ID links back to the experiments page with a ``run`` query
+    parameter, which opens the run-details overlay directly.
+    """
+    lines: list[str] = []
+    lines.append("## Appendix: Run Details")
+    lines.append("")
+    lines.append(
+        "Per-run detail for every run fetched in the window above. "
+        "Each Run ID opens the run-details overlay directly in Datalayer, and "
+        "the collapsible blocks below reproduce the same prompt, agent output, "
+        "summary, and report shown by the in-app run-details dialog."
+    )
+    lines.append("")
+
+    any_runs = False
+    case_by_name = case_by_name or {}
+    for experiment in experiments:
+        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+        if not runs:
+            continue
+        any_runs = True
+        agent_spec_label = str(
+            experiment.get("agent_spec_name")
+            or experiment.get("agent_spec_id")
+            or "-"
+        )
+        lines.append(f"### {experiment.get('name', '')}")
+        lines.append("")
+        lines.append(f"Agentspec: {agent_spec_label}")
+        lines.append("")
+        run_rows: list[list[str]] = []
+        for idx, run in enumerate(runs, start=1):
+            metrics = run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+            usage = _extract_run_usage(run)
+            run_id = str(run.get("id", ""))
+            run_link = _run_overlay_url(evalset_runs_url, run_id)
+            pass_rate = run.get("pass_rate")
+            passed = _appendix_metric_int(metrics, "passed", "passed_cases")
+            total = _appendix_metric_int(metrics, "total_cases", "total", "cases")
+            cases_cell = (
+                f"{passed}/{total}" if passed != "-" or total != "-" else "-"
+            )
+            run_rows.append(
+                [
+                    str(idx),
+                    (f"[{run_id}]({run_link})" if run_link and run_id else (run_id or "-")),
+                    str(run.get("status", "") or "-"),
+                    _fmt_pct(float(pass_rate)) if isinstance(pass_rate, (int, float)) else "n/a",
+                    cases_cell,
+                    _appendix_metric_float(metrics, "avg_score", "average_score"),
+                    _usage_total_tokens_value(usage),
+                    _usage_credits_value(usage),
+                    str(run.get("created_at", "") or "-"),
+                    _format_failure_cause(run.get("failure_cause")) or "-",
+                ]
+            )
+        lines.extend(
+            _markdown_table(
+                [
+                    "#",
+                    "Run ID",
+                    "Status",
+                    "Pass Rate",
+                    "Cases (pass/total)",
+                    "Avg Score",
+                    "Total Tokens",
+                    "Credits",
+                    "Created",
+                    "Failure Cause",
+                ],
+                run_rows,
+                ["right", "left", "left", "right", "right", "right", "right", "right", "left", "left"],
+            )
+        )
+        lines.append("")
+        lines.append("#### Full Run Detail (as shown in the UI)")
+        lines.append("")
+        for idx, run in enumerate(runs, start=1):
+            lines.extend(
+                _run_detail_block_lines(
+                    idx,
+                    run,
+                    case_by_name,
+                    representative_case_name=representative_case_name,
+                )
+            )
+
+    if not any_runs:
+        lines.append("No runs were fetched for any experiment.")
+        lines.append("")
+
+    return lines
+
+
+def _write_report_csv(report: dict[str, Any], output_path: Path) -> None:
+    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+
+    def _run_usage_fields(run: dict[str, Any]) -> dict[str, Any]:
+        usage = _extract_run_usage(run)
+        return {
+            "usage_source": _usage_pick(usage, "source"),
+            "usage_provider": _usage_pick(usage, "provider"),
+            "usage_model": _usage_pick(usage, "model"),
+            "usage_requests": _usage_pick(usage, "requests"),
+            "usage_prompt_tokens": _usage_pick(usage, "prompt_tokens", "promptTokens", "input_tokens", "inputTokens"),
+            "usage_completion_tokens": _usage_pick(usage, "completion_tokens", "completionTokens", "output_tokens", "outputTokens"),
+            "usage_total_tokens": _usage_total_tokens_value(usage),
+            "usage_input_cached_tokens": _usage_pick(usage, "input_cached_tokens", "inputCachedTokens"),
+            "usage_tool_calls": _usage_pick(usage, "tool_calls", "toolCalls"),
+            "usage_duration_ms": _usage_pick(usage, "duration_ms", "durationMs"),
+            "usage_credits_consumed": _usage_credits_value(usage),
+            "usage_captured_at": _usage_pick(usage, "captured_at", "capturedAt"),
+        }
+
+    fieldnames = [
+        "row_type",
+        "evalset_id",
+        "evalset_runs_url",
+        "agent_spec_id",
+        "agent_spec_name",
+        "agent_spec_url",
+        "experiment_id",
+        "experiment_name",
+        "run_index",
+        "run_id",
+        "run_status",
+        "run_pass_rate",
+        "runs_fetched",
+        "runs_total",
+        "baseline_pass_rate",
+        "latest_pass_rate",
+        "drift_delta",
+        "latest_two_delta",
+        "mean_pass_rate",
+        "stddev_pass_rate",
+        "failure_stage",
+        "failure_type",
+        "failure_message",
+        "usage_source",
+        "usage_provider",
+        "usage_model",
+        "usage_requests",
+        "usage_prompt_tokens",
+        "usage_completion_tokens",
+        "usage_total_tokens",
+        "usage_input_cached_tokens",
+        "usage_tool_calls",
+        "usage_duration_ms",
+        "usage_credits_consumed",
+        "usage_captured_at",
+        "case_name",
+        "case_status",
+        "case_score",
+        "case_category",
+        "case_difficulty",
+        "evaluator_name",
+        "evaluator_scope",
+        "evaluator_runs",
+        "evaluator_passed_runs",
+        "evaluator_mean_score",
+        "evaluator_latest_score",
+        "evaluator_latest_passed",
+        "generated_at",
+    ]
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with output_path.open("w", encoding="utf-8", newline="") as stream:
+        writer = csv.DictWriter(stream, fieldnames=fieldnames)
+        writer.writeheader()
+        evalset_id = str(report.get("evalset_id", ""))
+        run_environment = str(report.get("run_environment") or "")
+        evalset_runs_url = _evalset_runs_url(evalset_id, run_environment)
+        for experiment in experiments:
+            agent_spec_id = str(experiment.get("agent_spec_id", ""))
+            writer.writerow(
+                {
+                    "row_type": "experiment",
+                    "evalset_id": evalset_id,
+                    "evalset_runs_url": evalset_runs_url,
+                    "agent_spec_id": agent_spec_id,
+                    "agent_spec_name": str(experiment.get("agent_spec_name", "")),
+                    "agent_spec_url": _agentspec_details_url(agent_spec_id),
+                    "experiment_id": str(experiment.get("id", "")),
+                    "experiment_name": str(experiment.get("name", "")),
+                    "run_index": "",
+                    "run_id": "",
+                    "run_status": "",
+                    "run_pass_rate": "",
+                    "runs_fetched": int(experiment.get("runs_fetched") or 0),
+                    "runs_total": int(experiment.get("runs_total") or 0),
+                    "baseline_pass_rate": experiment.get("baseline_pass_rate"),
+                    "latest_pass_rate": experiment.get("latest_pass_rate"),
+                    "drift_delta": experiment.get("drift_delta"),
+                    "latest_two_delta": experiment.get("latest_two_delta"),
+                    "mean_pass_rate": experiment.get("mean_pass_rate"),
+                    "stddev_pass_rate": experiment.get("stddev_pass_rate"),
+                    "failure_stage": "",
+                    "failure_type": "",
+                    "failure_message": "",
+                    "usage_source": "",
+                    "usage_provider": "",
+                    "usage_model": "",
+                    "usage_requests": "",
+                    "usage_prompt_tokens": "",
+                    "usage_completion_tokens": "",
+                    "usage_total_tokens": "",
+                    "usage_input_cached_tokens": "",
+                    "usage_tool_calls": "",
+                    "usage_duration_ms": "",
+                    "usage_credits_consumed": "",
+                    "usage_captured_at": "",
+                    "generated_at": str(report.get("generated_at", "")),
+                }
+            )
+            runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+            for idx, run in enumerate(runs, start=1):
+                cause = run.get("failure_cause") if isinstance(run.get("failure_cause"), dict) else {}
+                usage_fields = _run_usage_fields(run)
+                writer.writerow(
+                    {
+                        "row_type": "run",
+                        "evalset_id": evalset_id,
+                        "evalset_runs_url": evalset_runs_url,
+                        "agent_spec_id": agent_spec_id,
+                        "agent_spec_name": str(experiment.get("agent_spec_name", "")),
+                        "agent_spec_url": _agentspec_details_url(agent_spec_id),
+                        "experiment_id": str(experiment.get("id", "")),
+                        "experiment_name": str(experiment.get("name", "")),
+                        "run_index": idx,
+                        "run_id": str(run.get("id", "")),
+                        "run_status": str(run.get("status", "")),
+                        "run_pass_rate": run.get("pass_rate"),
+                        "runs_fetched": int(experiment.get("runs_fetched") or 0),
+                        "runs_total": int(experiment.get("runs_total") or 0),
+                        "baseline_pass_rate": experiment.get("baseline_pass_rate"),
+                        "latest_pass_rate": experiment.get("latest_pass_rate"),
+                        "drift_delta": experiment.get("drift_delta"),
+                        "latest_two_delta": experiment.get("latest_two_delta"),
+                        "mean_pass_rate": experiment.get("mean_pass_rate"),
+                        "stddev_pass_rate": experiment.get("stddev_pass_rate"),
+                        "failure_stage": str(cause.get("stage", "")),
+                        "failure_type": str(cause.get("type", "")),
+                        "failure_message": str(cause.get("message", "")),
+                        **usage_fields,
+                        "generated_at": str(report.get("generated_at", "")),
+                    }
+                )
+                run_metrics = (
+                    run.get("metrics") if isinstance(run.get("metrics"), dict) else {}
+                )
+                case_results = run_metrics.get("case_results")
+                if isinstance(case_results, list):
+                    for case_result in case_results:
+                        if not isinstance(case_result, dict):
+                            continue
+                        writer.writerow(
+                            {
+                                "row_type": "case",
+                                "evalset_id": evalset_id,
+                                "evalset_runs_url": evalset_runs_url,
+                                "agent_spec_id": agent_spec_id,
+                                "agent_spec_name": str(
+                                    experiment.get("agent_spec_name", "")
+                                ),
+                                "agent_spec_url": _agentspec_details_url(agent_spec_id),
+                                "experiment_id": str(experiment.get("id", "")),
+                                "experiment_name": str(experiment.get("name", "")),
+                                "run_index": idx,
+                                "run_id": str(run.get("id", "")),
+                                "run_status": str(run.get("status", "")),
+                                "run_pass_rate": run.get("pass_rate"),
+                                "case_name": str(case_result.get("name", "")),
+                                "case_status": (
+                                    "passed"
+                                    if case_result.get("passed")
+                                    else "failed"
+                                ),
+                                "case_score": case_result.get("score"),
+                                "case_category": str(
+                                    case_result.get("category") or ""
+                                ),
+                                "case_difficulty": str(
+                                    case_result.get("difficulty") or ""
+                                ),
+                                **usage_fields,
+                                "generated_at": str(report.get("generated_at", "")),
+                            }
+                        )
+
+        evaluator_results = [
+            item
+            for item in (report.get("evaluator_results") or [])
+            if isinstance(item, dict)
+        ]
+        for item in evaluator_results:
+            writer.writerow(
+                {
+                    "row_type": "evaluator",
+                    "evalset_id": evalset_id,
+                    "evalset_runs_url": evalset_runs_url,
+                    "evaluator_name": str(item.get("name") or ""),
+                    "evaluator_scope": str(item.get("scope") or ""),
+                    "evaluator_runs": int(item.get("runs") or 0),
+                    "evaluator_passed_runs": int(item.get("passed_runs") or 0),
+                    "evaluator_mean_score": item.get("mean_score"),
+                    "evaluator_latest_score": item.get("latest_score"),
+                    "evaluator_latest_passed": bool(item.get("latest_passed")),
+                    "usage_source": "",
+                    "usage_provider": "",
+                    "usage_model": "",
+                    "usage_requests": "",
+                    "usage_prompt_tokens": "",
+                    "usage_completion_tokens": "",
+                    "usage_total_tokens": "",
+                    "usage_input_cached_tokens": "",
+                    "usage_tool_calls": "",
+                    "usage_duration_ms": "",
+                    "usage_credits_consumed": "",
+                    "usage_captured_at": "",
+                    "generated_at": str(report.get("generated_at", "")),
+                }
+            )
+
+
+def _print_report_console(report: dict[str, Any], run_limit: int) -> None:
+    evalset_id = str(report.get("evalset_id", ""))
+    run_environment = str(report.get("run_environment") or "")
+    generated_at = str(report.get("generated_at", ""))
+    experiments = [item for item in (report.get("experiments") or []) if isinstance(item, dict)]
+    agentspecs = [item for item in (report.get("agentspecs") or []) if isinstance(item, dict)]
+    evalset_runs_url = _evalset_runs_url(evalset_id, run_environment)
+
+    console.rule(f"[bold cyan]Evals Report[/bold cyan] {evalset_id}")
+    console.print(f"Generated at: {generated_at}")
+    console.print(f"Experiments: {len(experiments)} | Run window per experiment: {run_limit}")
+    if evalset_runs_url:
+        console.print(f"Evalset run details: {evalset_runs_url}")
+    console.print("")
+
+    if agentspecs:
+        agentspec_table = Table(title="Agentspec Coverage")
+        agentspec_table.add_column("Agentspec ID", style="cyan")
+        agentspec_table.add_column("Agentspec", style="white")
+        agentspec_table.add_column("Model", style="white")
+        agentspec_table.add_column("Version", style="white")
+        agentspec_table.add_column("Experiments", justify="right")
+        agentspec_table.add_column("Runs", justify="right")
+        for item in agentspecs:
+            agentspec_table.add_row(
+                str(item.get("id") or ""),
+                str(item.get("name") or item.get("id") or ""),
+                str(item.get("model") or "-"),
+                str(item.get("version") or "-"),
+                str(int(item.get("experiments") or 0)),
+                str(int(item.get("runs") or 0)),
+            )
+        console.print(agentspec_table)
+
+    overview = Table(title="Experiment Overview")
+    overview.add_column("Experiment", style="white")
+    overview.add_column("Agentspec", style="white")
+    overview.add_column("Runs", justify="right")
+    overview.add_column("Latest", justify="right")
+    overview.add_column("Baseline", justify="right")
+    overview.add_column("Drift", justify="right")
+    overview.add_column("Latest-2", justify="right")
+    for experiment in experiments:
+        overview.add_row(
+            str(experiment.get("name", "")),
+            str(experiment.get("agent_spec_name") or experiment.get("agent_spec_id") or "-"),
+            f"{int(experiment.get('runs_fetched') or 0)}/{int(experiment.get('runs_total') or 0)}",
+            _fmt_pct(experiment.get("latest_pass_rate") if isinstance(experiment.get("latest_pass_rate"), (int, float)) else None),
+            _fmt_pct(experiment.get("baseline_pass_rate") if isinstance(experiment.get("baseline_pass_rate"), (int, float)) else None),
+            _fmt_delta(experiment.get("drift_delta") if isinstance(experiment.get("drift_delta"), (int, float)) else None, colorize=True),
+            _fmt_delta(experiment.get("latest_two_delta") if isinstance(experiment.get("latest_two_delta"), (int, float)) else None, colorize=True),
+        )
+    console.print(overview)
+
+    ranked_latest = sorted(
+        [item for item in experiments if isinstance(item.get("latest_pass_rate"), (int, float))],
+        key=lambda item: float(item.get("latest_pass_rate") or 0.0),
+        reverse=True,
+    )
+    latest_table = Table(title="By Latest Pass Rate")
+    latest_table.add_column("Rank", justify="right", no_wrap=True)
+    latest_table.add_column("Experiment", style="white")
+    latest_table.add_column("Latest", justify="right", no_wrap=True)
+    for idx, item in enumerate(ranked_latest, start=1):
+        latest_table.add_row(str(idx), str(item.get("name", "")), _fmt_pct(float(item.get("latest_pass_rate") or 0.0)))
+    console.print(latest_table)
+    latest_values = [
+        float(item.get("latest_pass_rate"))
+        for item in ranked_latest
+        if isinstance(item.get("latest_pass_rate"), (int, float))
+    ]
+    console.print("Latest histogram:")
+    for hist_line in _ascii_histogram(
+        latest_values,
+        bins=8,
+        width=20,
+        min_value=0.0,
+        max_value=1.0,
+        full_blocks=True,
+        colorize=True,
+    ):
+        console.print(hist_line)
+
+    ranked_drift = sorted(
+        [item for item in experiments if isinstance(item.get("drift_delta"), (int, float))],
+        key=lambda item: float(item.get("drift_delta") or 0.0),
+    )
+    drift_table = Table(title="By Drift (Negative To Positive)")
+    drift_table.add_column("Rank", justify="right", no_wrap=True)
+    drift_table.add_column("Experiment", style="white")
+    drift_table.add_column("Drift", justify="right", no_wrap=True)
+    for idx, item in enumerate(ranked_drift, start=1):
+        drift_table.add_row(
+            str(idx),
+            str(item.get("name", "")),
+            _fmt_delta(float(item.get("drift_delta") or 0.0), colorize=True),
+        )
+    console.print(drift_table)
+    drift_values = [
+        float(item.get("drift_delta"))
+        for item in ranked_drift
+        if isinstance(item.get("drift_delta"), (int, float))
+    ]
+    console.print("Drift histogram:")
+    for hist_line in _ascii_histogram(
+        drift_values,
+        bins=8,
+        width=20,
+        full_blocks=True,
+        colorize=True,
+        drift_palette=True,
+    ):
+        console.print(hist_line)
+
+    pairwise = _pairwise_latest_deltas(experiments)
+    pairwise_table = Table(title="Pairwise Latest-Pass Deltas")
+    pairwise_table.add_column("Pair", style="white")
+    pairwise_table.add_column("Left", justify="right", no_wrap=True)
+    pairwise_table.add_column("Right", justify="right", no_wrap=True)
+    pairwise_table.add_column("Delta", justify="right", no_wrap=True)
+    for pair in pairwise:
+        pairwise_table.add_row(
+            f"{pair['left']} vs {pair['right']}",
+            _fmt_pct(pair["left_latest"]),
+            _fmt_pct(pair["right_latest"]),
+            _fmt_delta(pair["delta"], colorize=True),
+        )
+    if not pairwise:
+        pairwise_table.add_row("n/a", "n/a", "n/a", "n/a")
+    console.print(pairwise_table)
+
+    within_agentspec_pairs = [
+        pair for pair in pairwise if str(pair.get("group") or "") == "within_agentspec"
+    ]
+    cross_agentspec_pairs = [
+        pair for pair in pairwise if str(pair.get("group") or "") == "cross_agentspec"
+    ]
+
+    within_table = Table(title="Within-Agentspec Pairwise Latest-Pass Deltas")
+    within_table.add_column("Pair", style="white")
+    within_table.add_column("Agentspec", style="white")
+    within_table.add_column("Left", justify="right", no_wrap=True)
+    within_table.add_column("Right", justify="right", no_wrap=True)
+    within_table.add_column("Delta", justify="right", no_wrap=True)
+    for pair in within_agentspec_pairs:
+        within_table.add_row(
+            f"{pair['left']} vs {pair['right']}",
+            str(pair.get("left_agent_spec_name") or pair.get("left_agent_spec_id") or "-"),
+            _fmt_pct(pair["left_latest"]),
+            _fmt_pct(pair["right_latest"]),
+            _fmt_delta(pair["delta"], colorize=True),
+        )
+    if not within_agentspec_pairs:
+        within_table.add_row("n/a", "n/a", "n/a", "n/a", "n/a")
+    console.print(within_table)
+
+    cross_table = Table(title="Cross-Agentspec Pairwise Latest-Pass Deltas")
+    cross_table.add_column("Pair", style="white")
+    cross_table.add_column("Left", justify="right", no_wrap=True)
+    cross_table.add_column("Right", justify="right", no_wrap=True)
+    cross_table.add_column("Delta", justify="right", no_wrap=True)
+    for pair in cross_agentspec_pairs:
+        cross_table.add_row(
+            (
+                f"{pair['left']} ({pair.get('left_agent_spec_name') or pair.get('left_agent_spec_id') or '-'}) "
+                f"vs {pair['right']} ({pair.get('right_agent_spec_name') or pair.get('right_agent_spec_id') or '-'})"
+            ),
+            _fmt_pct(pair["left_latest"]),
+            _fmt_pct(pair["right_latest"]),
+            _fmt_delta(pair["delta"], colorize=True),
+        )
+    if not cross_agentspec_pairs:
+        cross_table.add_row("n/a", "n/a", "n/a", "n/a")
+    console.print(cross_table)
+
+    console.print("[bold]Pass-rate heatmap (r01=latest fetched run):[/bold]")
+    for line in _ascii_passrate_heatmap(experiments, max_columns=12, colorize=True):
+        console.print(line)
+    console.print("[bold]Consecutive delta heatmap (A-B):[/bold]")
+    for line in _ascii_drift_heatmap(experiments, max_columns=12, colorize=True):
+        console.print(line)
+
+    if ranked_latest:
+        console.print(
+            "[bold]Insight:[/bold] top latest "
+            f"[green]{ranked_latest[0].get('name', '')}[/green] "
+            f"({_fmt_pct(float(ranked_latest[0].get('latest_pass_rate') or 0.0))})"
+        )
+    if ranked_drift:
+        console.print(
+            "[bold]Insight:[/bold] strongest drift "
+            f"{ranked_drift[-1].get('name', '')} "
+            f"({_fmt_delta(float(ranked_drift[-1].get('drift_delta') or 0.0), colorize=True)})"
+        )
+    console.print("")
+
+    for experiment in experiments:
+        console.print("")
+        console.print(f"[bold]Run Timeline:[/bold] {experiment.get('name', '')}")
+        run_table = Table()
+        run_table.add_column("#", justify="right", style="cyan", no_wrap=True)
+        run_table.add_column("Run ID", style="white", no_wrap=True)
+        run_table.add_column("Status", no_wrap=True)
+        run_table.add_column("Pass Rate", justify="right", no_wrap=True)
+        run_table.add_column("Trend", style="white", no_wrap=True)
+        run_table.add_column("Failure Cause", style="red", overflow="fold")
+
+        runs = [run for run in (experiment.get("runs") or []) if isinstance(run, dict)]
+        for idx, run in enumerate(runs, start=1):
+            status_value = str(run.get("status", ""))
+            pass_rate = float(run.get("pass_rate")) if isinstance(run.get("pass_rate"), (int, float)) else None
+            cause_text = _format_failure_cause(run.get("failure_cause"))
+            run_table.add_row(
+                str(idx),
+                str(run.get("id", "")),
+                f"[{_status_style(status_value)}]{status_value}[/{_status_style(status_value)}]",
+                _fmt_pct(pass_rate),
+                _ascii_bar(pass_rate, width=28, full_blocks=True, colorize=True) if pass_rate is not None else "-",
+                cause_text or "-",
+            )
+        if not runs:
+            run_table.add_row("1", "n/a", "n/a", "n/a", "-", "-")
+        console.print(run_table)
+
+        for idx, run in enumerate(runs, start=1):
+            cause = run.get("failure_cause")
+            if not isinstance(cause, dict) or not cause:
+                continue
+            console.print(
+                f"[red bold]Run {idx} failure:[/red bold] "
+                f"[red]{str(cause.get('message') or 'Unknown failure.')}[/red]"
+            )
+            for key, label in (
+                ("stage", "stage"),
+                ("type", "type"),
+                ("execution_url", "execution url"),
+            ):
+                value = str(cause.get(key) or "").strip()
+                if value:
+                    console.print(f"    {label}: {value}")
+            diagnostics = cause.get("diagnostics")
+            if isinstance(diagnostics, dict):
+                for key, label in (
+                    ("agent_runtimes_url", "agent runtimes url"),
+                    ("run_url", "run url"),
+                ):
+                    value = diagnostics.get(key)
+                    if value:
+                        console.print(f"    {label}: {value}")
+                candidate_urls = diagnostics.get("candidate_urls")
+                if isinstance(candidate_urls, list) and candidate_urls:
+                    console.print(f"    candidate urls: {', '.join(str(u) for u in candidate_urls)}")
+                attempts = diagnostics.get("attempts")
+                if isinstance(attempts, list) and attempts:
+                    for attempt in attempts:
+                        if not isinstance(attempt, dict):
+                            continue
+                        outcome = "ok" if attempt.get("ok") else "failed"
+                        console.print(
+                            f"    attempt: {attempt.get('url', '')} -> {outcome} "
+                            f"{attempt.get('error') or ''}".rstrip()
+                        )
+            detail = str(cause.get("detail_excerpt") or "").strip()
+            if detail:
+                console.print(f"    detail: {detail}")
+
+        deltas_table = Table(title="Consecutive Run Deltas")
+        deltas_table.add_column("Run A", style="white", no_wrap=True)
+        deltas_table.add_column("Run B", style="white", no_wrap=True)
+        deltas_table.add_column("A Pass", justify="right", no_wrap=True)
+        deltas_table.add_column("B Pass", justify="right", no_wrap=True)
+        deltas_table.add_column("Delta", justify="right", no_wrap=True)
+        comparisons = [
+            item for item in (experiment.get("consecutive_comparisons") or [])
+            if isinstance(item, dict)
+        ]
+        for item in comparisons:
+            run_a = item.get("run_a_pass_rate") if isinstance(item.get("run_a_pass_rate"), (int, float)) else None
+            run_b = item.get("run_b_pass_rate") if isinstance(item.get("run_b_pass_rate"), (int, float)) else None
+            delta = item.get("delta_pass_rate") if isinstance(item.get("delta_pass_rate"), (int, float)) else None
+            deltas_table.add_row(
+                str(item.get("run_a_id", "")),
+                str(item.get("run_b_id", "")),
+                _fmt_pct(float(run_a)) if isinstance(run_a, (int, float)) else "n/a",
+                _fmt_pct(float(run_b)) if isinstance(run_b, (int, float)) else "n/a",
+                _fmt_delta(float(delta), colorize=True) if isinstance(delta, (int, float)) else "n/a",
+            )
+        if not comparisons:
+            deltas_table.add_row("n/a", "n/a", "n/a", "n/a", "n/a")
+        console.print(deltas_table)
+
+
+def iter_report_runs(report: dict[str, Any]) -> list[tuple[str, dict[str, Any]]]:
+    """Return ``(experiment_name, run)`` tuples for every run in a report.
+
+    Operates on the structured report produced by :func:`build_eval_report`, so
+    consumers (the GitHub Action, dashboards, alerts) don't re-walk the nested
+    ``experiments -> runs`` shape themselves.
+    """
+    pairs: list[tuple[str, dict[str, Any]]] = []
+    for experiment in report.get("experiments") or []:
+        if not isinstance(experiment, dict):
+            continue
+        experiment_name = str(experiment.get("name") or experiment.get("id") or "")
+        for run in experiment.get("runs") or []:
+            if isinstance(run, dict):
+                pairs.append((experiment_name, run))
+    return pairs
+
+
+def collect_report_failures(report: dict[str, Any]) -> dict[str, Any]:
+    """Aggregate failure information across every run in a report.
+
+    Returns a dict with the failed-run count, failed-status run count, a
+    breakdown by failure type, and structured failure records (experiment, run
+    id, stage, type, message, trimmed detail excerpt, execution URL). This is
+    the shared aggregation the GitHub Action renders into its step summary.
+    """
+    failures: list[dict[str, Any]] = []
+    type_counts: dict[str, int] = {}
+    failed_status_runs = 0
+
+    for experiment_name, run in iter_report_runs(report):
+        status = str(run.get("status") or "").strip().lower()
+        cause = run.get("failure_cause")
+        cause = cause if isinstance(cause, dict) else None
+        is_failed = status in {"failed", "error"} or bool(cause)
+        if not is_failed:
+            continue
+        if status in {"failed", "error"}:
+            failed_status_runs += 1
+
+        failure_type = str((cause or {}).get("type") or "unknown")
+        type_counts[failure_type] = type_counts.get(failure_type, 0) + 1
+
+        detail = str((cause or {}).get("detail_excerpt") or "").strip()
+        detail_single = " ".join(detail.split())
+        if len(detail_single) > 300:
+            detail_single = detail_single[:297] + "..."
+
+        failures.append(
+            {
+                "experiment": experiment_name,
+                "run_id": str(run.get("id") or ""),
+                "status": status or "unknown",
+                "stage": str((cause or {}).get("stage") or "-"),
+                "type": failure_type,
+                "message": str((cause or {}).get("message") or "-"),
+                "detail_excerpt": detail_single or "-",
+                "execution_url": str((cause or {}).get("execution_url") or ""),
+            }
+        )
+
+    return {
+        "failed_run_count": len(failures),
+        "failed_status_runs": failed_status_runs,
+        "type_counts": type_counts,
+        "failures": failures,
+    }
+
+
+def average_latest_pass_rate(report: dict[str, Any]) -> float | None:
+    """Return the mean of each experiment's ``latest_pass_rate`` (or ``None``)."""
+    values = [
+        float(experiment.get("latest_pass_rate"))
+        for experiment in (report.get("experiments") or [])
+        if isinstance(experiment, dict)
+        and isinstance(experiment.get("latest_pass_rate"), (int, float))
+    ]
+    if not values:
+        return None
+    return sum(values) / len(values)
diff --git a/datalayer_core/evals/runner.py b/datalayer_core/evals/runner.py
new file mode 100644
index 00000000..6c0fb578
--- /dev/null
+++ b/datalayer_core/evals/runner.py
@@ -0,0 +1,713 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Reusable evalset execution runner.
+
+This module hosts the end-to-end "execute an evalset spec against one or more
+agentspecs" workflow so that examples, the GitHub Action, and any other
+integration can launch real eval runs without re-implementing the orchestration
+(create evalset -> launch cloud runtime(s) -> run each case through the agent ->
+grade outputs -> persist runs -> teardown runtimes).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import uuid
+from typing import Any, Callable, Optional
+from urllib.parse import urlparse
+
+from datalayer_core.agents import (
+    LocalAgentRuntime,
+    compute_time_reservation_minutes,
+    create_cloud_agent_runtime,
+    ensure_local_agent,
+    resolve_environment_burning_rate,
+    start_local_agent_runtime,
+    teardown_agent_execution_resources,
+)
+from datalayer_core.agents.agent_local import (
+    run_cloud_agent_chat,
+    run_local_agent_chat,
+    runtime_route_candidates,
+    wait_for_local_runtime,
+)
+from datalayer_core.client.client import DatalayerClient
+from datalayer_core.evals.evals import now_iso, timestamp_slug, write_eval_reports
+from datalayer_core.evals.evaluators import evaluate_evalset
+
+DEFAULT_ENVIRONMENT_NAME = "ai-agents-env"
+DEFAULT_AGENT_NAME = "default"
+DEFAULT_LOCAL_AGENT_BASE_URL = "http://localhost:8765"
+# Default per-request timeout (seconds) for a single agent chat call. Bounding
+# each call guarantees a hung agent cannot block the run forever: the call is
+# aborted, the case is marked failed, execution continues, and the enclosing
+# runner always tears down its cloud runtimes before returning.
+DEFAULT_REQUEST_TIMEOUT_SECONDS = 180
+
+
+def _case_prompt(case: dict[str, Any]) -> str:
+    """Extract a prompt string from an evalset case's inputs."""
+    inputs = case.get("inputs")
+    if isinstance(inputs, dict):
+        for key in ("prompt", "text", "query", "message"):
+            value = inputs.get(key)
+            if isinstance(value, str) and value.strip():
+                return value
+        return json.dumps(inputs, ensure_ascii=True)
+    if isinstance(inputs, str):
+        return inputs
+    return ""
+
+
+def _compose_case_prompt(case: dict[str, Any], *, preamble: str = "") -> str:
+    """Build the effective case prompt with an optional preamble.
+
+    ``preamble`` lets a spec enforce task instructions (for example output
+    format/constraints) without mutating every individual case input.
+    """
+    base_prompt = _case_prompt(case)
+    normalized_preamble = str(preamble or "").strip()
+    if not normalized_preamble:
+        return base_prompt
+    if not base_prompt:
+        return normalized_preamble
+    return f"{normalized_preamble}\n\nInput:\n{base_prompt}"
+
+
+def _extract_text(payload: Any) -> str:
+    """Coerce an agent output payload into a plain text answer."""
+    if isinstance(payload, dict):
+        text = payload.get("text")
+        if isinstance(text, str):
+            return text
+        message = payload.get("message")
+        if isinstance(message, str):
+            return message
+    if isinstance(payload, str):
+        return payload
+    return json.dumps(payload, ensure_ascii=True)
+
+
+def _usage_number(value: Any) -> float | None:
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        try:
+            return float(text)
+        except Exception:
+            return None
+    return None
+
+
+def _usage_pick_number(usage: dict[str, Any], *keys: str) -> float | None:
+    for key in keys:
+        number = _usage_number(usage.get(key))
+        if number is not None:
+            return number
+    return None
+
+
+def _extract_case_usage(chat_result: dict[str, Any]) -> dict[str, Any]:
+    direct = chat_result.get("usage")
+    if isinstance(direct, dict) and direct:
+        return dict(direct)
+    output = chat_result.get("output") if isinstance(chat_result.get("output"), dict) else {}
+    nested = output.get("pydantic_ai_usage") or output.get("usage")
+    if isinstance(nested, dict) and nested:
+        return dict(nested)
+    return {}
+
+
+def _merge_run_usage(aggregate: dict[str, Any], case_usage: dict[str, Any]) -> dict[str, Any]:
+    if not case_usage:
+        return aggregate
+
+    prompt_tokens = _usage_pick_number(
+        case_usage,
+        "prompt_tokens",
+        "promptTokens",
+        "input_tokens",
+        "inputTokens",
+    )
+    completion_tokens = _usage_pick_number(
+        case_usage,
+        "completion_tokens",
+        "completionTokens",
+        "output_tokens",
+        "outputTokens",
+    )
+    total_tokens = _usage_pick_number(
+        case_usage,
+        "total_tokens",
+        "totalTokens",
+        "tokens_total",
+        "token_total",
+    )
+    if total_tokens is None and prompt_tokens is not None and completion_tokens is not None:
+        total_tokens = prompt_tokens + completion_tokens
+
+    numeric_fields: list[tuple[str, float | None]] = [
+        ("prompt_tokens", prompt_tokens),
+        ("completion_tokens", completion_tokens),
+        ("total_tokens", total_tokens),
+        (
+            "input_cached_tokens",
+            _usage_pick_number(
+                case_usage,
+                "input_cached_tokens",
+                "inputCachedTokens",
+                "cached_input_tokens",
+                "cachedInputTokens",
+            ),
+        ),
+        (
+            "tool_calls",
+            _usage_pick_number(
+                case_usage,
+                "tool_calls",
+                "toolCalls",
+                "tool_call_count",
+                "toolCallCount",
+            ),
+        ),
+        (
+            "requests",
+            _usage_pick_number(
+                case_usage,
+                "requests",
+                "request_count",
+                "requestCount",
+            ),
+        ),
+        (
+            "duration_ms",
+            _usage_pick_number(
+                case_usage,
+                "duration_ms",
+                "durationMs",
+                "latency_ms",
+                "latencyMs",
+            ),
+        ),
+        (
+            "credits_consumed",
+            _usage_pick_number(
+                case_usage,
+                "credits_consumed",
+                "creditsConsumed",
+                "credits",
+                "total_credits",
+                "cost_credits",
+            ),
+        ),
+    ]
+    for key, value in numeric_fields:
+        if value is None:
+            continue
+        current = _usage_number(aggregate.get(key)) or 0.0
+        summed = current + value
+        if key in {"credits_consumed"}:
+            aggregate[key] = round(summed, 6)
+        else:
+            aggregate[key] = int(round(summed))
+
+    for key in (
+        "source",
+        "provider",
+        "model",
+        "billable_account_kind",
+        "billable_account_uid",
+        "requester_kind",
+        "requester_uid",
+        "captured_at",
+        "timestamp",
+    ):
+        value = case_usage.get(key)
+        if value is None:
+            continue
+        if isinstance(value, str) and not value.strip():
+            continue
+        aggregate.setdefault(key, value)
+    return aggregate
+
+
+def execute_evalset_spec(
+    client: DatalayerClient,
+    *,
+    spec: dict[str, Any],
+    agentspec_ids: list[str],
+    run_evalset: bool = True,
+    create_report: bool = False,
+    run_limit: int = 1,
+    run_environment: str = "sdk",
+    environment_name: str = DEFAULT_ENVIRONMENT_NAME,
+    account_uid: Optional[str] = None,
+    credits_limit: float = 100.0,
+    evalset_name: Optional[str] = None,
+    backend_run_environment: str = "sdk",
+    launch_source: str = "datalayer-core",
+    agent_name: str = DEFAULT_AGENT_NAME,
+    execution_target: str = "cloud",
+    local_agent_base_url: str = DEFAULT_LOCAL_AGENT_BASE_URL,
+    auto_start_local_agent_runtime: bool = False,
+    local_agent_log_level: str = "info",
+    request_timeout_seconds: int = DEFAULT_REQUEST_TIMEOUT_SECONDS,
+    log: Optional[Callable[[str], None]] = print,
+) -> dict[str, Any]:
+    """Execute an evalset spec against one or more agentspecs and persist runs.
+
+    Creates an evalset from ``spec``, runs every case through each agentspec
+    ``run_limit`` times against either a cloud runtime (one per agentspec) or a
+    local ``agent-runtimes`` server, grades the outputs with the evals API, and
+    stores one run record per execution. Execution resources (cloud runtimes or
+    the local agent registration/server) are always torn down before returning,
+    including on error.
+
+    Parameters
+    ----------
+    client : DatalayerClient
+        An authenticated client.
+    spec : dict[str, Any]
+        Evalset spec (as loaded by :func:`load_evalset_spec`).
+    agentspec_ids : list[str]
+        Agentspec ids to execute. One experiment is created per id (plus one
+        cloud runtime per id when ``execution_target='cloud'``).
+    run_evalset : bool
+        Whether to execute the evalset after creating it. Defaults to ``True``.
+        When ``False``, this function only creates the evalset and returns.
+    create_report : bool
+        Whether to generate markdown/CSV reports via
+        :func:`write_eval_reports` before returning. Defaults to ``False``.
+    run_limit : int
+        Number of runs to create per experiment (minimum 1).
+    run_environment : str
+        Run-environment label stored on run summaries (for example ``sdk``).
+    environment_name : str
+        Runtime environment to launch cloud agents in (cloud only).
+    account_uid : Optional[str]
+        Optional billable account UID context.
+    credits_limit : float
+        Target credits budget used to size each cloud runtime reservation.
+    evalset_name : Optional[str]
+        Optional explicit evalset name. Defaults to a timestamped name derived
+        from the spec name.
+    backend_run_environment : str
+        ``run_environment`` value persisted on the created evalset.
+    launch_source : str
+        ``launch_source`` recorded on experiments and runs.
+    agent_name : str
+        Agent route/name used when contacting the runtime.
+    execution_target : str
+        ``cloud`` (default) launches one cloud runtime per agentspec; ``local``
+        executes against a local ``agent-runtimes`` server.
+    local_agent_base_url : str
+        Base URL of the local ``agent-runtimes`` server (local only). Ignored
+        when ``auto_start_local_agent_runtime`` starts a new server.
+    auto_start_local_agent_runtime : bool
+        When ``execution_target='local'``, start a local ``agent-runtimes``
+        server on a free port and tear it down afterwards. When ``False``, the
+        runner first attempts to reuse ``local_agent_base_url`` and will
+        auto-start a local runtime only if that server is unreachable.
+    local_agent_log_level : str
+        Log level for an auto-started local ``agent-runtimes`` server.
+    request_timeout_seconds : int
+        Per-request timeout (seconds) for a single agent chat call. When an
+        agent does not respond within this window the call is aborted, the case
+        is recorded as failed, and execution continues with the next case. This
+        bounds hung agents without killing legitimately slow multi-agentspec
+        runs. Defaults to ``180`` (3 minutes per call).
+    log : Optional[Callable[[str], None]]
+        Optional logging callback (defaults to ``print``; pass ``None`` to
+        silence progress output).
+
+    Returns
+    -------
+    dict[str, Any]
+        ``{"evalset_id", "evalset_name", "experiment_ids", "run_ids", "view_url"}``
+        plus optional report paths when ``create_report=True``.
+
+    Raises
+    ------
+    ValueError
+        If ``agentspec_ids`` is empty, the spec has no cases, or
+        ``execution_target`` is not ``cloud``/``local``.
+    RuntimeError
+        If the platform returns an unexpected create response or a cloud
+        runtime is missing its ingress/pod.
+    """
+
+    def _emit(message: str) -> None:
+        if log is not None:
+            log(message)
+
+    target = str(execution_target or "").strip().lower()
+    if target not in {"cloud", "local"}:
+        raise ValueError(
+            f"execution_target must be 'cloud' or 'local', got {execution_target!r}."
+        )
+
+    normalized_specs: list[str] = []
+    for value in agentspec_ids:
+        spec_id = str(value or "").strip()
+        if spec_id and spec_id not in normalized_specs:
+            normalized_specs.append(spec_id)
+    if not normalized_specs:
+        raise ValueError("agentspec_ids must contain at least one agentspec id.")
+
+    cases = [item for item in (spec.get("cases") or []) if isinstance(item, dict)]
+    if not cases:
+        raise ValueError("Evalset spec has no cases; cannot execute real runs.")
+
+    metadata = spec.get("metadata") if isinstance(spec.get("metadata"), dict) else {}
+    run_mode = str(spec.get("kind") or "batch").strip().lower() or "batch"
+    if run_mode not in {"batch", "interactive"}:
+        raise ValueError(
+            f"Evalset spec kind must be 'batch' or 'interactive', got {run_mode!r}."
+        )
+    prompt_preamble = str(metadata.get("prompt_preamble") or "").strip()
+
+    run_limit = max(1, int(run_limit))
+
+    case_request_timeout = max(1, int(request_timeout_seconds))
+
+    resolved_name = str(
+        evalset_name
+        or f"{str(spec.get('name') or 'evalset')}-{run_environment}-{timestamp_slug(now_iso())}"
+    )
+    evalset_payload = client.evals_create_eval_from_spec(
+        spec=spec,
+        name=resolved_name,
+        run_environment=backend_run_environment,
+        kind=run_mode,
+        account_uid=account_uid,
+    )
+    evalset_id = str((evalset_payload.get("evalset") or {}).get("id") or "")
+    if not evalset_id:
+        raise RuntimeError(f"Unable to create evalset from spec: {evalset_payload}")
+    _emit(f"Created evalset: {evalset_id} ({resolved_name})")
+
+    ui_base = str(os.environ.get("DATALAYER_UI_URL") or "http://localhost:3063").strip().rstrip("/")
+    view_url = f"{ui_base}/evals/experiments/{run_environment}/{evalset_id}"
+
+    result: dict[str, Any] = {
+        "evalset_id": evalset_id,
+        "evalset_name": resolved_name,
+        "experiment_ids": [],
+        "run_ids": [],
+        "view_url": view_url,
+    }
+
+    if not bool(run_evalset):
+        _emit(f"Skipped eval execution for evalset: {evalset_id}")
+        if bool(create_report):
+            reports = write_eval_reports(
+                client,
+                evalset_id,
+                account_uid=account_uid,
+            )
+            result["report_markdown_path"] = str(reports.get("markdown_path") or "")
+            if reports.get("csv_path") is not None:
+                result["report_csv_path"] = str(reports.get("csv_path") or "")
+        return result
+
+    experiment_ids: list[str] = []
+    run_ids: list[str] = []
+    runtimes_by_spec: dict[str, Any] = {}
+    local_runtime: Optional[LocalAgentRuntime] = None
+    local_base_url = str(local_agent_base_url or DEFAULT_LOCAL_AGENT_BASE_URL)
+    token = str(client._get_token() or "")
+    try:
+        if target == "cloud":
+            for spec_id in normalized_specs:
+                burning_rate = resolve_environment_burning_rate(
+                    client, environment_name
+                )
+                reservation_minutes = compute_time_reservation_minutes(
+                    credits_limit=credits_limit,
+                    burning_rate=burning_rate,
+                )
+                runtime = create_cloud_agent_runtime(
+                    client,
+                    environment_name=environment_name,
+                    name=f"evals-{spec_id}-{uuid.uuid4().hex[:8]}",
+                    agent_spec_id=spec_id,
+                    time_reservation=reservation_minutes,
+                    billable_account_uid=account_uid,
+                )
+                runtimes_by_spec[spec_id] = runtime
+                _emit(
+                    f"Launched runtime for agentspec {spec_id}: "
+                    f"pod={getattr(runtime, 'pod_name', '')} "
+                    f"runtime_id={getattr(runtime, 'uid', '')}"
+                )
+        elif target == "local":
+            local_host = urlparse(local_base_url).hostname or "127.0.0.1"
+            should_start_local_runtime = bool(auto_start_local_agent_runtime)
+            if not should_start_local_runtime:
+                try:
+                    wait_for_local_runtime(local_base_url, timeout_seconds=2)
+                except Exception:
+                    should_start_local_runtime = True
+                    _emit(
+                        "No local agent-runtimes server reachable at "
+                        f"{local_base_url}; starting one automatically."
+                    )
+            if should_start_local_runtime:
+                local_runtime = start_local_agent_runtime(
+                    agent_spec_id=normalized_specs[0],
+                    agent_name=agent_name,
+                    host=local_host,
+                    log_level=local_agent_log_level,
+                    disable_tool_approvals=True,
+                )
+                local_base_url = local_runtime.base_url
+                _emit(f"Started local agent-runtimes server at {local_base_url}")
+
+        for spec_id in normalized_specs:
+            experiment_payload = client.evals_create_experiment(
+                name=f"evals-{spec_id}-{timestamp_slug(now_iso())}",
+                evalset_id=evalset_id,
+                description="Eval execution via datalayer-core runner.",
+                status="running",
+                config={
+                    "run_mode": run_mode,
+                    "execution_target": target,
+                    "agent_spec_id": spec_id,
+                    "environment_name": environment_name,
+                },
+                summary={
+                    "launch_source": launch_source,
+                    "run_environment": run_environment,
+                    "agent_spec_id": spec_id,
+                },
+                account_uid=account_uid,
+            )
+            experiment_id = str(
+                (experiment_payload.get("experiment") or {}).get("id") or ""
+            )
+            if not experiment_id:
+                raise RuntimeError(
+                    f"Unable to create experiment: {experiment_payload}"
+                )
+            experiment_ids.append(experiment_id)
+
+            ingress = ""
+            pod_name = ""
+            runtime_id = ""
+            if target == "cloud":
+                runtime = runtimes_by_spec[spec_id]
+                ingress = str(getattr(runtime, "ingress", "") or "").strip()
+                pod_name = str(getattr(runtime, "pod_name", "") or "").strip()
+                runtime_id = str(getattr(runtime, "uid", "") or "").strip()
+                if not ingress or not pod_name:
+                    raise RuntimeError(
+                        f"Runtime missing ingress/pod for agentspec {spec_id}"
+                    )
+            else:
+                ensure_local_agent(
+                    base_url=local_base_url,
+                    agent_name=agent_name,
+                    token=token,
+                    agent_spec_id=spec_id,
+                    disable_tool_approvals=True,
+                )
+                _emit(
+                    f"Using local agent execution at {local_base_url.rstrip('/')} "
+                    f"(agent: {agent_name}, agentspec: {spec_id})."
+                )
+
+            for run_index in range(run_limit):
+                outputs: list[dict[str, Any]] = []
+                full_outputs: list[dict[str, Any]] = []
+                case_statuses: list[str] = []
+                case_prompts: list[Any] = []
+                aggregated_usage: dict[str, Any] = {}
+                failed_cases = 0
+                failure_causes: list[dict[str, Any]] = []
+
+                for case in cases:
+                    prompt = _compose_case_prompt(case, preamble=prompt_preamble)
+                    case_prompts.append(prompt)
+                    if target == "cloud":
+                        chat_result = run_cloud_agent_chat(
+                            ingress=ingress,
+                            token=token,
+                            prompt=prompt,
+                            route_candidates=runtime_route_candidates(
+                                agent_name=agent_name,
+                                agent_spec_id=spec_id,
+                                pod_name=pod_name,
+                            ),
+                            timeout=case_request_timeout,
+                        )
+                    else:
+                        chat_result = run_local_agent_chat(
+                            base_url=local_base_url,
+                            agent_name=agent_name,
+                            token=token,
+                            prompt=prompt,
+                            timeout=case_request_timeout,
+                        )
+                    status = str(chat_result.get("status") or "completed").strip().lower()
+                    case_statuses.append(status)
+                    output_payload = chat_result.get("output") or {}
+                    outputs.append({"text": _extract_text(output_payload)})
+                    full_outputs.append(
+                        output_payload
+                        if isinstance(output_payload, dict)
+                        else {"text": _extract_text(output_payload)}
+                    )
+                    if status in {"failed", "error"}:
+                        failed_cases += 1
+                        failure = chat_result.get("failure_cause")
+                        if isinstance(failure, dict):
+                            failure_causes.append(failure)
+
+                    case_usage = _extract_case_usage(chat_result)
+                    aggregated_usage = _merge_run_usage(aggregated_usage, case_usage)
+
+                metrics = evaluate_evalset(spec, outputs, statuses=case_statuses)
+                # Persist per-case prompts/outputs onto the graded case results so
+                # the report can render the actual agent interaction instead of
+                # "(per-case output not captured for this run)".
+                case_results = metrics.get("case_results")
+                if isinstance(case_results, list):
+                    for idx, case_result in enumerate(case_results):
+                        if not isinstance(case_result, dict):
+                            continue
+                        if idx < len(case_prompts):
+                            case_result["prompt"] = case_prompts[idx]
+                        if idx < len(full_outputs):
+                            case_result["output"] = full_outputs[idx]
+
+                interaction = [
+                    {
+                        "case": str(cases[idx].get("name") or f"case-{idx + 1}"),
+                        "status": case_statuses[idx] if idx < len(case_statuses) else None,
+                        "prompt": case_prompts[idx] if idx < len(case_prompts) else None,
+                        "output": full_outputs[idx] if idx < len(full_outputs) else None,
+                    }
+                    for idx in range(len(cases))
+                ]
+
+                run_status = "failed" if failed_cases > 0 else "completed"
+                if target == "cloud":
+                    # Surface the runtime pod name and runtime id on every
+                    # failure cause so the report's failure-cause block (and UI)
+                    # can show which runtime produced the failure for easier
+                    # debugging.
+                    for cause in failure_causes:
+                        cause.setdefault("runtime_pod_name", pod_name)
+                        if runtime_id:
+                            cause.setdefault("runtime_id", runtime_id)
+                summary: dict[str, Any] = {
+                    "launch_source": launch_source,
+                    "run_mode": run_mode,
+                    "run_environment": run_environment,
+                    "execution_target": target,
+                    "agent_spec_id": spec_id,
+                    "case_failures": failed_cases,
+                    "run_index": run_index + 1,
+                    "agent_prompt": [item["prompt"] for item in interaction],
+                    "agent_output": [item["output"] for item in interaction],
+                }
+                if target == "cloud":
+                    summary["runtime_pod_name"] = pod_name
+                    if runtime_id:
+                        summary["runtime_id"] = runtime_id
+                else:
+                    summary["local_agent_base_url"] = local_base_url
+                    summary["local_agent_id"] = agent_name
+                if failure_causes:
+                    summary["failure_cause"] = failure_causes[0]
+                report = {
+                    "note": f"real agent execution via datalayer-core runner ({run_mode})",
+                    "interaction": interaction,
+                    "failure_causes": failure_causes,
+                }
+                if aggregated_usage:
+                    metrics = {
+                        **metrics,
+                        "pydantic_ai_usage": aggregated_usage,
+                    }
+                    summary["usage"] = {"pydantic_ai_usage": aggregated_usage}
+                    report["usage"] = {"pydantic_ai_usage": aggregated_usage}
+                if target == "cloud":
+                    report["runtime_pod_name"] = pod_name
+                    if runtime_id:
+                        report["runtime_id"] = runtime_id
+                else:
+                    report["local_agent_base_url"] = local_base_url
+                    report["local_agent_id"] = agent_name
+
+                run_payload = client.evals_create_run(
+                    experiment_id,
+                    status=run_status,
+                    metrics=metrics,
+                    summary=summary,
+                    report=report,
+                    account_uid=account_uid,
+                )
+                run_id = str((run_payload.get("run") or {}).get("id") or "")
+                if not run_id:
+                    raise RuntimeError(f"Unable to create run: {run_payload}")
+                run_ids.append(run_id)
+                _emit(
+                    f"Created run {run_index + 1}/{run_limit} for agentspec="
+                    f"{spec_id} experiment={experiment_id}: {run_id}"
+                )
+
+        _emit(f"Executed evalset: {evalset_id}")
+        result["experiment_ids"] = experiment_ids
+        result["run_ids"] = run_ids
+        if bool(create_report):
+            reports = write_eval_reports(
+                client,
+                evalset_id,
+                account_uid=account_uid,
+            )
+            result["report_markdown_path"] = str(reports.get("markdown_path") or "")
+            if reports.get("csv_path") is not None:
+                result["report_csv_path"] = str(reports.get("csv_path") or "")
+        return result
+    finally:
+        if target == "cloud":
+            for spec_id, runtime in runtimes_by_spec.items():
+                pod_name = str(getattr(runtime, "pod_name", "") or "").strip()
+                cleanup = teardown_agent_execution_resources(
+                    client,
+                    execution_target="cloud",
+                    cloud_runtime_or_pod_name=pod_name,
+                    token=token,
+                )
+                if cleanup.get("cloud_runtime_terminated"):
+                    _emit(f"Terminated runtime for agentspec {spec_id}: {pod_name}")
+                else:
+                    _emit(
+                        "Warning: runtime termination unconfirmed for agentspec "
+                        f"{spec_id}: {pod_name}"
+                    )
+        else:
+            cleanup = teardown_agent_execution_resources(
+                client,
+                execution_target="local",
+                local_base_url=local_base_url,
+                local_agent_name=agent_name,
+                token=token,
+                local_runtime=local_runtime,
+            )
+            if cleanup.get("local_agent_deleted"):
+                _emit(f"Terminated local agent registration: {agent_name}")
+            if cleanup.get("local_runtime_terminated"):
+                _emit("Stopped auto-started local agent-runtimes server.")
+
diff --git a/datalayer_core/mixins/__init__.py b/datalayer_core/mixins/__init__.py
index 8370f351..8980b223 100644
--- a/datalayer_core/mixins/__init__.py
+++ b/datalayer_core/mixins/__init__.py
@@ -5,7 +5,7 @@
 from .sandbox_snapshots import SandboxSnapshotsMixin
 from .runtimes import RuntimesMixin
 from .secrets import SecretsMixin
-from .tokens import TokensMixin
+from .api_keys import ApiKeysMixin
 from .usage import UsageMixin
 from .whoami import WhoamiAppMixin
 
@@ -15,7 +15,7 @@
     "SandboxSnapshotsMixin",
     "RuntimesMixin",
     "SecretsMixin",
-    "TokensMixin",
+    "ApiKeysMixin",
     "UsageMixin",
     "WhoamiAppMixin",
 ]
diff --git a/datalayer_core/mixins/tokens.py b/datalayer_core/mixins/api_keys.py
similarity index 55%
rename from datalayer_core/mixins/tokens.py
rename to datalayer_core/mixins/api_keys.py
index e5810e03..01239e05 100644
--- a/datalayer_core/mixins/tokens.py
+++ b/datalayer_core/mixins/api_keys.py
@@ -3,22 +3,22 @@
 
 from typing import Any, Union
 
-from datalayer_core.models.token import TokenType
+from datalayer_core.models.api_key import ApiKeyType
 from datalayer_core.utils import btoa
 
 
-class TokensCreateMixin:
-    """Mixin for creating tokens in Datalayer."""
+class ApiKeysCreateMixin:
+    """Mixin for creating API keys in Datalayer."""
 
-    def _create_token(
+    def _create_api_key(
         self,
         name: str,
         description: str,
         expiration_date: int = 0,
-        token_type: Union[str, TokenType] = TokenType.USER,
+        api_key_type: Union[str, ApiKeyType] = ApiKeyType.SECRET,
     ) -> dict[str, Any]:
         """
-        Create a Token with the given parameters.
+        Create an API key with the given parameters.
 
         Parameters
         ----------
@@ -27,10 +27,10 @@ def _create_token(
         description : str
             Description of the secret.
         expiration_date : float
-            Expiration date of the token.
-        token_type : str, TokenType
-            Variant or type of the token. Defaults to "user_token".
-            Type of the token (e.g., "user").
+            Expiration date of the API key.
+        api_key_type : str, ApiKeyType
+            Variant or type of the API key. Defaults to "secret".
+            Type of the API key (secret, publishable, restricted, temporary).
 
         Returns
         -------
@@ -40,14 +40,14 @@ def _create_token(
         body = {
             "name": name,
             "description": btoa(description),
-            "variant": token_type.value
-            if isinstance(token_type, TokenType)
-            else token_type,
+            "variant": api_key_type.value
+            if isinstance(api_key_type, ApiKeyType)
+            else api_key_type,
             "expiration_date": expiration_date,
         }
         try:
             response = self._fetch(  # type: ignore
-                "{}/api/iam/v1/tokens".format(self.urls.iam_url),  # type: ignore
+                "{}/api/iam/v1/api-keys".format(self.urls.iam_url),  # type: ignore
                 method="POST",
                 json=body,
             )
@@ -56,17 +56,17 @@ def _create_token(
             return {"success": False, "message": str(e)}
 
 
-class TokensDeleteMixin:
-    """Mixin for deleting tokens in Datalayer."""
+class ApiKeysDeleteMixin:
+    """Mixin for deleting API keys in Datalayer."""
 
-    def _delete_token(self, token_uid: str) -> dict[str, Any]:
+    def _delete_api_key(self, api_key_uid: str) -> dict[str, Any]:
         """
-        Delete a token by its unique identifier.
+        Delete an API key by its unique identifier.
 
         Parameters
         ----------
-        token_uid : str
-            Unique identifier of the token to delete.
+        api_key_uid : str
+            Unique identifier of the API key to delete.
 
         Returns
         -------
@@ -75,7 +75,7 @@ def _delete_token(self, token_uid: str) -> dict[str, Any]:
         """
         try:
             response = self._fetch(  # type: ignore
-                "{}/api/iam/v1/tokens/{}".format(self.urls.iam_url, token_uid),  # type: ignore
+                "{}/api/iam/v1/api-keys/{}".format(self.urls.iam_url, api_key_uid),  # type: ignore
                 method="DELETE",
             )
             return response.json()
@@ -83,21 +83,21 @@ def _delete_token(self, token_uid: str) -> dict[str, Any]:
             return {"success": False, "message": str(e)}
 
 
-class TokensListMixin:
-    """Mixin class for listing tokens."""
+class ApiKeysListMixin:
+    """Mixin class for listing API keys."""
 
-    def _list_tokens(self) -> dict[str, Any]:
+    def _list_api_keys(self) -> dict[str, Any]:
         """
-        List all tokens in the Datalayer environment.
+        List all API keys in the Datalayer environment.
 
         Returns
         -------
         dict[str, Any]
-            Dictionary containing tokens information.
+            Dictionary containing API key information.
         """
         try:
             response = self._fetch(  # type: ignore
-                "{}/api/iam/v1/tokens".format(self.urls.iam_url),  # type: ignore
+                "{}/api/iam/v1/api-keys".format(self.urls.iam_url),  # type: ignore
                 method="GET",
             )
             return response.json()
@@ -105,5 +105,5 @@ def _list_tokens(self) -> dict[str, Any]:
             return {"sucess": False, "error": str(e)}
 
 
-class TokensMixin(TokensCreateMixin, TokensDeleteMixin, TokensListMixin):
-    """A mixin that combines create, delete, and list functionalities for tokens."""
+class ApiKeysMixin(ApiKeysCreateMixin, ApiKeysDeleteMixin, ApiKeysListMixin):
+    """A mixin that combines create, delete, and list functionalities for API keys."""
diff --git a/datalayer_core/mixins/evals.py b/datalayer_core/mixins/evals.py
index 6cc27043..519f7852 100644
--- a/datalayer_core/mixins/evals.py
+++ b/datalayer_core/mixins/evals.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -5,6 +8,7 @@
 
 from __future__ import annotations
 
+import os
 from typing import Any, Optional
 
 
@@ -16,13 +20,20 @@ def _evals_request(
         path: str,
         *,
         method: str,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
         params: Optional[dict[str, Any]] = None,
         json_body: Optional[dict[str, Any]] = None,
     ) -> dict[str, Any]:
         query: dict[str, Any] = dict(params or {})
-        if account_uid:
-            query["account_uid"] = account_uid
+        resolved_account_uid = (
+            billable_account_uid
+            or account_uid
+            or os.environ.get("DATALAYER_ACCOUNT_UID")
+            or os.environ.get("DATALAYER_BILLABLE_ACCOUNT_UID")
+        )
+        if resolved_account_uid:
+            query["account_uid"] = resolved_account_uid
         response = self._fetch(  # type: ignore
             f"{self.urls.ai_agents_url}/api/ai-agents/v1/evals{path}",  # type: ignore
             method=method,
@@ -39,6 +50,7 @@ def evals_list_evals(
         q: Optional[str] = None,
         limit: int = 50,
         offset: int = 0,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         params: dict[str, Any] = {"limit": limit, "offset": offset}
@@ -52,6 +64,7 @@ def evals_list_evals(
             "/evalsets",
             method="GET",
             params=params,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -63,9 +76,12 @@ def evals_create_eval(
         run_environment: str = "sdk",
         kind: str = "batch",
         schema: Optional[dict[str, Any]] = None,
+        evalset_evaluators: Optional[list[dict[str, Any]]] = None,
+        report_evaluators: Optional[list[dict[str, Any]]] = None,
         tags: Optional[list[str]] = None,
         metadata: Optional[dict[str, Any]] = None,
         cases: Optional[list[dict[str, Any]]] = None,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         body = {
@@ -74,6 +90,8 @@ def evals_create_eval(
             "run_environment": run_environment,
             "kind": kind,
             "schema": schema or {},
+            "evalset_evaluators": evalset_evaluators or [],
+            "report_evaluators": report_evaluators or [],
             "tags": tags or [],
             "metadata": metadata or {},
             "cases": cases or [],
@@ -82,6 +100,59 @@ def evals_create_eval(
             "/evalsets",
             method="POST",
             json_body=body,
+            billable_account_uid=billable_account_uid,
+            account_uid=account_uid,
+        )
+
+    def evals_create_eval_from_spec(
+        self,
+        *,
+        spec: dict[str, Any],
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        run_environment: Optional[str] = None,
+        kind: Optional[str] = None,
+        billable_account_uid: Optional[str] = None,
+        account_uid: Optional[str] = None,
+    ) -> dict[str, Any]:
+        if not isinstance(spec, dict):
+            raise ValueError("spec must be a JSON object")
+
+        resolved_name = str(name if name is not None else spec.get("name") or "").strip()
+        if not resolved_name:
+            raise ValueError("spec.name is required when name is not provided")
+
+        resolved_description = str(
+            description if description is not None else spec.get("description") or ""
+        )
+        resolved_run_environment = str(
+            run_environment if run_environment is not None else spec.get("run_environment") or "sdk"
+        )
+        resolved_kind = str(kind if kind is not None else spec.get("kind") or "batch")
+
+        schema = spec.get("schema") if isinstance(spec.get("schema"), dict) else {}
+        metadata = spec.get("metadata") if isinstance(spec.get("metadata"), dict) else {}
+        tags = [str(tag) for tag in (spec.get("tags") or []) if str(tag).strip()]
+        evalset_evaluators = [
+            item for item in (spec.get("evalset_evaluators") or []) if isinstance(item, dict)
+        ]
+        report_evaluators = [
+            item for item in (spec.get("report_evaluators") or []) if isinstance(item, dict)
+        ]
+        cases = [item for item in (spec.get("cases") or []) if isinstance(item, dict)]
+
+        return self.evals_create_eval(
+            name=resolved_name,
+            description=resolved_description,
+            run_environment=resolved_run_environment,
+            kind=resolved_kind,
+            schema=schema,
+            evalset_evaluators=evalset_evaluators,
+            report_evaluators=report_evaluators,
+            tags=tags,
+            metadata=metadata,
+            cases=cases,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -89,14 +160,42 @@ def evals_delete_eval(
         self,
         evalset_id: str,
         *,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             f"/evalsets/{evalset_id}",
             method="DELETE",
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
+    def evals_set_eval_public(
+        self,
+        evalset_id: str,
+        *,
+        is_public: bool,
+        billable_account_uid: Optional[str] = None,
+        account_uid: Optional[str] = None,
+    ) -> dict[str, Any]:
+        return self._evals_request(
+            f"/evalsets/{evalset_id}/public",
+            method="PATCH",
+            json_body={"is_public": bool(is_public)},
+            billable_account_uid=billable_account_uid,
+            account_uid=account_uid,
+        )
+
+    def evals_get_public_eval(
+        self,
+        evalset_id: str,
+    ) -> dict[str, Any]:
+        response = self._fetch(  # type: ignore
+            f"{self.urls.ai_agents_url}/api/ai-agents/v1/evals/public/evalsets/{evalset_id}",  # type: ignore
+            method="GET",
+        )
+        return response.json()
+
     def evals_list_experiments(
         self,
         *,
@@ -104,6 +203,7 @@ def evals_list_experiments(
         status: Optional[str] = None,
         limit: int = 50,
         offset: int = 0,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         params: dict[str, Any] = {"limit": limit, "offset": offset}
@@ -115,6 +215,7 @@ def evals_list_experiments(
             "/experiments",
             method="GET",
             params=params,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -128,6 +229,7 @@ def evals_create_experiment(
         config: Optional[dict[str, Any]] = None,
         summary: Optional[dict[str, Any]] = None,
         tags: Optional[list[str]] = None,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         body = {
@@ -143,6 +245,7 @@ def evals_create_experiment(
             "/experiments",
             method="POST",
             json_body=body,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -150,11 +253,13 @@ def evals_delete_experiment(
         self,
         experiment_id: str,
         *,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             f"/experiments/{experiment_id}",
             method="DELETE",
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -164,12 +269,14 @@ def evals_list_runs(
         *,
         limit: int = 50,
         offset: int = 0,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             f"/experiments/{experiment_id}/runs",
             method="GET",
             params={"limit": limit, "offset": offset},
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -183,6 +290,7 @@ def evals_create_run(
         metrics: Optional[dict[str, Any]] = None,
         summary: Optional[dict[str, Any]] = None,
         report: Optional[dict[str, Any]] = None,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         body: dict[str, Any] = {
@@ -199,6 +307,7 @@ def evals_create_run(
             f"/experiments/{experiment_id}/runs",
             method="POST",
             json_body=body,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -206,11 +315,13 @@ def evals_get_run(
         self,
         run_id: str,
         *,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             f"/runs/{run_id}",
             method="GET",
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -218,12 +329,14 @@ def evals_compare_runs(
         self,
         run_ids: list[str],
         *,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             "/runs/compare",
             method="POST",
             json_body={"run_ids": run_ids},
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -239,6 +352,7 @@ def evals_create_live_event(
         passed: Optional[bool] = None,
         attributes: Optional[dict[str, Any]] = None,
         created_at: Optional[str] = None,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         body: dict[str, Any] = {
@@ -262,6 +376,7 @@ def evals_create_live_event(
             "/live/events",
             method="POST",
             json_body=body,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -270,12 +385,14 @@ def evals_list_live_targets(
         *,
         window: str = "24h",
         limit: int = 50,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         return self._evals_request(
             "/live/targets",
             method="GET",
             params={"window": window, "limit": limit},
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
 
@@ -288,6 +405,7 @@ def evals_list_live_events(
         evaluator_name: Optional[str] = None,
         limit: int = 50,
         offset: int = 0,
+        billable_account_uid: Optional[str] = None,
         account_uid: Optional[str] = None,
     ) -> dict[str, Any]:
         params: dict[str, Any] = {
@@ -303,5 +421,6 @@ def evals_list_live_events(
             "/live/events",
             method="GET",
             params=params,
+            billable_account_uid=billable_account_uid,
             account_uid=account_uid,
         )
\ No newline at end of file
diff --git a/datalayer_core/mixins/ray.py b/datalayer_core/mixins/ray.py
index 7de8b647..815cfc8e 100644
--- a/datalayer_core/mixins/ray.py
+++ b/datalayer_core/mixins/ray.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -30,7 +33,7 @@ def _ray_request(
         prefixes = self._get_ray_api_prefixes()
         prefix = prefixes[0]
         response = self._fetch(  # type: ignore
-            f"{self.urls.ray_url}{prefix}{path}",  # type: ignore
+            f"{self.urls.runtimes_url}{prefix}{path}",  # type: ignore
             method=method,
             params=params,
             json=json_body,
diff --git a/datalayer_core/mixins/runtimes.py b/datalayer_core/mixins/runtimes.py
index e721f3e0..4d049820 100644
--- a/datalayer_core/mixins/runtimes.py
+++ b/datalayer_core/mixins/runtimes.py
@@ -4,6 +4,7 @@
 """Runtime management module for Datalayer Core."""
 
 import logging
+import os
 import sys
 import time
 from typing import Any, Optional
@@ -69,6 +70,15 @@ def _create_runtime(
             "environment_name": environment_name,
         }
 
+        resolved_billable_account_uid = (
+            billable_account_uid
+            or os.environ.get("DATALAYER_ACCOUNT_UID")
+            or os.environ.get("DATALAYER_BILLABLE_ACCOUNT_UID")
+        )
+        resolved_billable_account_handle = (
+            billable_account_handle or os.environ.get("DATALAYER_ACCOUNT_HANDLE")
+        )
+
         if given_name:
             body["given_name"] = given_name
 
@@ -118,12 +128,12 @@ def _create_runtime(
             if agent_spec:
                 body["agent_spec"] = agent_spec
 
-            if billable_account_uid:
-                body["billable_account_uid"] = billable_account_uid
+            if resolved_billable_account_uid:
+                body["billable_account_uid"] = resolved_billable_account_uid
             if billable_account_type:
                 body["billable_account_type"] = billable_account_type
-            if billable_account_handle:
-                body["billable_account_handle"] = billable_account_handle
+            if resolved_billable_account_handle:
+                body["billable_account_handle"] = resolved_billable_account_handle
 
             runtime_url = "{}/api/runtimes/v1/runtimes".format(self.urls.runtimes_url)  # type: ignore
             logger.debug(
diff --git a/datalayer_core/models/__init__.py b/datalayer_core/models/__init__.py
index c128d2c2..053da4a4 100644
--- a/datalayer_core/models/__init__.py
+++ b/datalayer_core/models/__init__.py
@@ -21,6 +21,7 @@
     HealthResponseData,
     ModelsResponseData,
 )
+from .api_key import ApiKeyModel, ApiKeyType
 from .base import (
     BaseResponse,
     DataResponse,
@@ -83,10 +84,11 @@
 from .runtime import RuntimeModel
 from .sandbox_snapshot import SandboxSnapshotModel
 from .secret import SecretModel, SecretVariant
-from .token import TokenModel, TokenType
 
 __all__ = [
     "BaseResponse",
+    "ApiKeyModel",
+    "ApiKeyType",
     "ChatMessage",
     "ChatRequest",
     "ChatResponseData",
@@ -146,9 +148,6 @@
     "TeamListResponseData",
     "TeamMemberModel",
     "TeamRequest",
-    "TokenModel",
-    "TokenModel",
-    "TokenType",
     "UsageData",
     "User",
     "UserModel",
diff --git a/datalayer_core/models/api_key.py b/datalayer_core/models/api_key.py
new file mode 100644
index 00000000..c6801264
--- /dev/null
+++ b/datalayer_core/models/api_key.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""
+API key models for Datalayer.
+
+Provides data structures for API key management in Datalayer environments.
+"""
+
+from enum import Enum
+from typing import Any, Dict, Union
+
+from pydantic import BaseModel, Field
+
+
+class ApiKeyType(str, Enum):
+    """Enum for API key variants."""
+
+    SECRET = "secret"
+    PUBLISHABLE = "publishable"
+    RESTRICTED = "restricted"
+    TEMPORARY = "temporary"
+
+
+class ApiKeyModel(BaseModel):
+    """
+    Pydantic model representing an API key in Datalayer.
+    """
+
+    uid: str = Field(..., description="Unique identifier for the API key")
+    name: str = Field(..., description="Name of the API key")
+    description: str = Field(..., description="Description of the API key")
+    api_key_type: Union[str, ApiKeyType] = Field(
+        default=ApiKeyType.SECRET,
+        description='Type of the API key (secret, publishable, restricted, temporary)',
+    )
+    kwargs: Dict[str, Any] = Field(
+        default_factory=dict, description="Additional keyword arguments"
+    )
+
+    def __repr__(self) -> str:
+        return f"ApiKeyModel(uid='{self.uid}', name='{self.name}')"
diff --git a/datalayer_core/models/token.py b/datalayer_core/models/token.py
deleted file mode 100644
index 084deda0..00000000
--- a/datalayer_core/models/token.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
-
-"""
-Token models for Datalayer.
-
-Provides data structures for token management in Datalayer environments.
-"""
-
-from enum import Enum
-from typing import Any, Dict, Union
-
-from pydantic import BaseModel, Field
-
-
-class TokenType(str, Enum):
-    """Enum for token variants."""
-
-    USER = "user_token"
-
-
-class TokenModel(BaseModel):
-    """
-    Pydantic model representing a token in Datalayer.
-    """
-
-    uid: str = Field(..., description="Unique identifier for the token")
-    name: str = Field(..., description="Name of the token")
-    description: str = Field(..., description="Description of the token")
-    token_type: Union[str, TokenType] = Field(
-        default=TokenType.USER,
-        description='Type of the token (e.g., "user", "admin")',
-    )
-    kwargs: Dict[str, Any] = Field(
-        default_factory=dict, description="Additional keyword arguments"
-    )
-
-    def __repr__(self) -> str:
-        return f"TokenModel(uid='{self.uid}', name='{self.name}')"
diff --git a/datalayer_core/runtimes/__init__.py b/datalayer_core/runtimes/__init__.py
deleted file mode 100644
index f7d0007b..00000000
--- a/datalayer_core/runtimes/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
diff --git a/datalayer_core/runtimes/sandbox_snapshot.py b/datalayer_core/runtimes/sandbox_snapshot.py
deleted file mode 100644
index a02198eb..00000000
--- a/datalayer_core/runtimes/sandbox_snapshot.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2023-2025 Datalayer, Inc.
-# Distributed under the terms of the Modified BSD License.
-
-"""
-Snapshot services for Datalayer.
-
-Provides code sandbox snapshot management and operations in Datalayer environments.
-"""
-
-import uuid
-from typing import Any, List, Optional, Tuple
-
-from datalayer_core.models.sandbox_snapshot import SandboxSnapshotModel
-
-
-def create_snapshot(name: Optional[str], description: Optional[str]) -> Tuple[str, str]:
-    """
-    Create snapshot name and description with defaults.
-
-    Parameters
-    ----------
-    name : Optional[str]
-        Name for the snapshot, or None for auto-generated name.
-    description : Optional[str]
-        Description for the snapshot, or None for auto-generated description.
-
-    Returns
-    -------
-    Tuple[str, str]
-        Tuple of (name, description) strings.
-    """
-    uid = uuid.uuid4()
-    if name is None:
-        name = f"snapshot-{uid}"
-
-    if description is None:
-        description = f"snapshot-{uid}"
-
-    return name, description
-
-
-def as_code_sandbox_snapshots(response: dict[str, Any]) -> List["SandboxSnapshotModel"]:
-    """
-    Parse API response and create SandboxSnapshot objects.
-
-    Parameters
-    ----------
-    response : dict[str, Any]
-        API response dictionary containing snapshots data.
-
-    Returns
-    -------
-    List[SandboxSnapshot]
-        List of SandboxSnapshot objects parsed from the response.
-    """
-    snapshot_objects = []
-    if response["success"]:
-        snapshots = response["snapshots"]
-        for snapshot in snapshots:
-            snapshot_objects.append(
-                SandboxSnapshotModel(
-                    uid=snapshot["uid"],
-                    name=snapshot["name"],
-                    description=snapshot["description"],
-                    environment=snapshot["environment"],
-                    metadata=snapshot["metadata"],
-                )
-            )
-    return snapshot_objects
diff --git a/datalayer_core/sandboxes/__init__.py b/datalayer_core/sandboxes/__init__.py
new file mode 100644
index 00000000..e5072289
--- /dev/null
+++ b/datalayer_core/sandboxes/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Code sandbox utilities for Datalayer."""
+
+from datalayer_core.sandboxes.code_sandbox_snapshots import (
+    as_code_sandbox_snapshots,
+    create_snapshot,
+)
+
+__all__ = [
+    "as_code_sandbox_snapshots",
+    "create_snapshot",
+]
diff --git a/datalayer_core/sandboxes/code_sandbox_snapshots.py b/datalayer_core/sandboxes/code_sandbox_snapshots.py
new file mode 100644
index 00000000..5acdaae0
--- /dev/null
+++ b/datalayer_core/sandboxes/code_sandbox_snapshots.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Snapshot services for Datalayer code sandboxes."""
+
+import uuid
+from typing import Any, Optional, Tuple
+
+from datalayer_core.models.sandbox_snapshot import SandboxSnapshotModel
+
+
+def create_snapshot(name: Optional[str], description: Optional[str]) -> Tuple[str, str]:
+    """Create snapshot name and description with defaults."""
+    uid = uuid.uuid4()
+    if name is None:
+        name = f"snapshot-{uid}"
+
+    if description is None:
+        description = f"snapshot-{uid}"
+
+    return name, description
+
+
+def as_code_sandbox_snapshots(
+    response: dict[str, Any],
+) -> list[SandboxSnapshotModel]:
+    """Parse API response and create SandboxSnapshotModel objects."""
+    snapshot_objects: list[SandboxSnapshotModel] = []
+    if response["success"]:
+        snapshots = response["snapshots"]
+        for snapshot in snapshots:
+            snapshot_objects.append(
+                SandboxSnapshotModel(
+                    uid=snapshot["uid"],
+                    name=snapshot["name"],
+                    description=snapshot["description"],
+                    environment=snapshot["environment"],
+                    metadata=snapshot["metadata"],
+                )
+            )
+    return snapshot_objects
diff --git a/datalayer_core/templates/index.html b/datalayer_core/templates/index.html
index 3b3c720f..0f422224 100644
--- a/datalayer_core/templates/index.html
+++ b/datalayer_core/templates/index.html
@@ -13,10 +13,10 @@
         "version": "{{ datalayer_version }}",
         "runUrl": "{{ run_url }}",
         "iamRunUrl": "{{ iam_url }}",
-        "jupyterRunUrl": {{ run_url }}",
+        "jupyterRunUrl": "{{ run_url }}",
         "jupyterServerUrl": "http://localhost:8888{{ base_url }}",
-        "jupyterServerToken": "{{ token }}"
-        "jupyterServerless": "true",
+        "jupyterServerToken": "{{ token }}",
+        "jupyterServerless": "true"
       }
     </script>
     <script id="jupyter-config-data" type="application/json">
diff --git a/datalayer_core/tests/test_cli.py b/datalayer_core/tests/test_cli.py
index 9083a986..b9253b7f 100644
--- a/datalayer_core/tests/test_cli.py
+++ b/datalayer_core/tests/test_cli.py
@@ -74,8 +74,8 @@ def test_cli(args: List[str], expected_output: str) -> None:
         # TODO Disabled for now, we need to create a stable test account
         #        (["snapshots", "list", "--token", TEST_DATALAYER_API_KEY], "Snapshots"),
         #        (["snapshots", "ls", "--token", TEST_DATALAYER_API_KEY], "Snapshots"),
-        (["tokens", "list", "--token", TEST_DATALAYER_API_KEY], "Tokens"),
-        (["tokens", "ls", "--token", TEST_DATALAYER_API_KEY], "Tokens"),
+        (["api-keys", "list", "--token", TEST_DATALAYER_API_KEY], "API Keys"),
+        (["api-keys", "ls", "--token", TEST_DATALAYER_API_KEY], "API Keys"),
         (["whoami", "--token", TEST_DATALAYER_API_KEY], "User:"),
         (["logout"], "Stored token cleared"),
     ],
diff --git a/datalayer_core/tests/test_cli_exec_examples.py b/datalayer_core/tests/test_cli_exec_examples.py
new file mode 100644
index 00000000..d1dced37
--- /dev/null
+++ b/datalayer_core/tests/test_cli_exec_examples.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+"""Unit tests for datalayer exec example file generators."""
+
+from pathlib import Path
+
+from datalayer_core.cli.commands.exec import (
+    _create_example_notebook_file,
+    _create_example_python_file,
+)
+
+
+def test_create_example_python_file() -> None:
+    path = _create_example_python_file()
+    try:
+        assert path.exists()
+        assert path.suffix == ".py"
+        content = path.read_text(encoding="utf-8")
+        assert "--example-py" in content
+    finally:
+        path.unlink(missing_ok=True)
+
+
+def test_create_example_notebook_file() -> None:
+    path = _create_example_notebook_file()
+    try:
+        assert path.exists()
+        assert path.suffix == ".ipynb"
+        content = path.read_text(encoding="utf-8")
+        assert "--example-notebook" in content
+        assert '"cells"' in content
+    finally:
+        path.unlink(missing_ok=True)
diff --git a/datalayer_core/tests/test_cli_main.py b/datalayer_core/tests/test_cli_main.py
index fe12f845..44b48432 100644
--- a/datalayer_core/tests/test_cli_main.py
+++ b/datalayer_core/tests/test_cli_main.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
diff --git a/datalayer_core/tests/test_client.py b/datalayer_core/tests/test_client.py
index 532fa133..753d7d37 100644
--- a/datalayer_core/tests/test_client.py
+++ b/datalayer_core/tests/test_client.py
@@ -185,9 +185,9 @@ def test_profile() -> None:
     not bool(TEST_DATALAYER_API_KEY),
     reason="TEST_DATALAYER_API_KEY is not set, skipping secret tests.",
 )
-def test_tokens_list() -> None:
+def test_api_keys_list() -> None:
     """
-    Test the listing of tokens
+    Test the listing of API keys.
     """
     client = DatalayerClient(token=TEST_DATALAYER_API_KEY)
-    assert client.list_tokens()
+    assert isinstance(client.list_api_keys(), list)
diff --git a/datalayer_core/tests/test_evals_report_csv_usage.py b/datalayer_core/tests/test_evals_report_csv_usage.py
new file mode 100644
index 00000000..25d1189d
--- /dev/null
+++ b/datalayer_core/tests/test_evals_report_csv_usage.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+import csv
+
+from datalayer_core.evals.report import _report_appendix_lines, _write_report_csv
+
+
+def test_write_report_csv_includes_usage_columns_and_values(tmp_path):
+    report = {
+        "evalset_id": "evalset-1",
+        "run_environment": "ui",
+        "generated_at": "2026-06-20T00:00:00Z",
+        "experiments": [
+            {
+                "id": "exp-1",
+                "name": "Experiment One",
+                "agent_spec_id": "agent-1",
+                "agent_spec_name": "Agent One",
+                "runs_fetched": 1,
+                "runs_total": 1,
+                "baseline_pass_rate": 1.0,
+                "latest_pass_rate": 1.0,
+                "drift_delta": 0.0,
+                "latest_two_delta": 0.0,
+                "mean_pass_rate": 1.0,
+                "stddev_pass_rate": 0.0,
+                "runs": [
+                    {
+                        "id": "run-1",
+                        "status": "completed",
+                        "pass_rate": 1.0,
+                        "failure_cause": {},
+                        "metrics": {
+                            "pydantic_ai_usage": {
+                                "source": "reconciled",
+                                "provider": "openai",
+                                "model": "gpt-test",
+                                "requests": 2,
+                                "prompt_tokens": 100,
+                                "completion_tokens": 20,
+                                "total_tokens": 120,
+                                "credits_consumed": 0.15,
+                            },
+                            "case_results": [
+                                {
+                                    "name": "case-1",
+                                    "passed": True,
+                                    "score": 1.0,
+                                    "category": "basic",
+                                    "difficulty": "easy",
+                                }
+                            ],
+                        },
+                    }
+                ],
+            }
+        ],
+        "evaluator_results": [],
+    }
+
+    output_path = tmp_path / "report.csv"
+    _write_report_csv(report, output_path)
+
+    with output_path.open("r", encoding="utf-8", newline="") as stream:
+        rows = list(csv.DictReader(stream))
+
+    assert rows
+
+    run_row = next(row for row in rows if row.get("row_type") == "run")
+    case_row = next(row for row in rows if row.get("row_type") == "case")
+
+    assert run_row["usage_source"] == "reconciled"
+    assert run_row["usage_provider"] == "openai"
+    assert run_row["usage_model"] == "gpt-test"
+    assert run_row["usage_requests"] == "2"
+    assert run_row["usage_prompt_tokens"] == "100"
+    assert run_row["usage_completion_tokens"] == "20"
+    assert run_row["usage_total_tokens"] == "120"
+    assert run_row["usage_credits_consumed"] == "0.15"
+
+    assert case_row["usage_total_tokens"] == "120"
+    assert case_row["usage_provider"] == "openai"
+
+
+def test_write_report_csv_falls_back_to_report_usage_when_metrics_usage_missing(tmp_path):
+    report = {
+        "evalset_id": "evalset-1",
+        "run_environment": "ui",
+        "generated_at": "2026-06-20T00:00:00Z",
+        "experiments": [
+            {
+                "id": "exp-1",
+                "name": "Experiment One",
+                "agent_spec_id": "agent-1",
+                "agent_spec_name": "Agent One",
+                "runs_fetched": 1,
+                "runs_total": 1,
+                "baseline_pass_rate": 1.0,
+                "latest_pass_rate": 1.0,
+                "drift_delta": 0.0,
+                "latest_two_delta": 0.0,
+                "mean_pass_rate": 1.0,
+                "stddev_pass_rate": 0.0,
+                "runs": [
+                    {
+                        "id": "run-1",
+                        "status": "completed",
+                        "pass_rate": 1.0,
+                        "failure_cause": {},
+                        "metrics": {
+                            "case_results": [
+                                {
+                                    "name": "case-1",
+                                    "passed": True,
+                                    "score": 1.0,
+                                    "category": "basic",
+                                    "difficulty": "easy",
+                                }
+                            ],
+                        },
+                        "report": {
+                            "usage": {
+                                "pydantic_ai_usage": {
+                                    "source": "reconciled",
+                                    "provider": "openai",
+                                    "model": "gpt-test",
+                                    "requests": 2,
+                                    "prompt_tokens": 100,
+                                    "completion_tokens": 20,
+                                    "total_tokens": 120,
+                                    "credits_consumed": 0.15,
+                                }
+                            }
+                        },
+                    }
+                ],
+            }
+        ],
+        "evaluator_results": [],
+    }
+
+    output_path = tmp_path / "report.csv"
+    _write_report_csv(report, output_path)
+
+    with output_path.open("r", encoding="utf-8", newline="") as stream:
+        rows = list(csv.DictReader(stream))
+
+    run_row = next(row for row in rows if row.get("row_type") == "run")
+    case_row = next(row for row in rows if row.get("row_type") == "case")
+
+    assert run_row["usage_source"] == "reconciled"
+    assert run_row["usage_total_tokens"] == "120"
+    assert run_row["usage_credits_consumed"] == "0.15"
+    assert case_row["usage_provider"] == "openai"
+
+
+def test_write_report_csv_supports_direct_usage_alias_keys(tmp_path):
+    report = {
+        "evalset_id": "evalset-1",
+        "run_environment": "ui",
+        "generated_at": "2026-06-20T00:00:00Z",
+        "experiments": [
+            {
+                "id": "exp-1",
+                "name": "Experiment One",
+                "agent_spec_id": "agent-1",
+                "agent_spec_name": "Agent One",
+                "runs_fetched": 1,
+                "runs_total": 1,
+                "baseline_pass_rate": 1.0,
+                "latest_pass_rate": 1.0,
+                "drift_delta": 0.0,
+                "latest_two_delta": 0.0,
+                "mean_pass_rate": 1.0,
+                "stddev_pass_rate": 0.0,
+                "runs": [
+                    {
+                        "id": "run-1",
+                        "status": "completed",
+                        "pass_rate": 1.0,
+                        "failure_cause": {},
+                        "metrics": {
+                            "case_results": [
+                                {
+                                    "name": "case-1",
+                                    "passed": True,
+                                    "score": 1.0,
+                                    "category": "basic",
+                                    "difficulty": "easy",
+                                }
+                            ],
+                        },
+                        "usage": {
+                            "provider": "openai",
+                            "promptTokens": 90,
+                            "completionTokens": 30,
+                            "creditsConsumed": 0.2,
+                        },
+                    }
+                ],
+            }
+        ],
+        "evaluator_results": [],
+    }
+
+    output_path = tmp_path / "report.csv"
+    _write_report_csv(report, output_path)
+
+    with output_path.open("r", encoding="utf-8", newline="") as stream:
+        rows = list(csv.DictReader(stream))
+
+    run_row = next(row for row in rows if row.get("row_type") == "run")
+    assert run_row["usage_provider"] == "openai"
+    assert run_row["usage_prompt_tokens"] == "90"
+    assert run_row["usage_completion_tokens"] == "30"
+    assert run_row["usage_total_tokens"] == "120"
+    assert run_row["usage_credits_consumed"] == "0.2"
+
+
+def test_report_appendix_run_table_reads_direct_report_usage_payload():
+    experiments = [
+        {
+            "name": "exp-1",
+            "agent_spec_name": "Agent One",
+            "runs": [
+                {
+                    "id": "run-1",
+                    "status": "completed",
+                    "pass_rate": 1.0,
+                    "created_at": "2026-06-24T15:56:22Z",
+                    "failure_cause": {},
+                    "metrics": {
+                        "passed": 5,
+                        "total_cases": 5,
+                        "avg_score": 1.0,
+                    },
+                    "report": {
+                        "usage": {
+                            "total_tokens": 321,
+                            "credits_consumed": 0.42,
+                        }
+                    },
+                }
+            ],
+        }
+    ]
+
+    lines = _report_appendix_lines(
+        experiments,
+        evalset_runs_url="",
+        case_by_name={},
+        representative_case_name=None,
+    )
+    content = "\n".join(lines)
+
+    assert "| Total Tokens | Credits |" in content
+    assert "321" in content
+    assert "0.42" in content
+
+
diff --git a/datalayer_core/tests/test_evals_usage_capture.py b/datalayer_core/tests/test_evals_usage_capture.py
new file mode 100644
index 00000000..0ee2dfc6
--- /dev/null
+++ b/datalayer_core/tests/test_evals_usage_capture.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+# Copyright (c) 2023-2026 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
+from datalayer_core.agents.agent_local import extract_vercel_stream_usage
+from datalayer_core.evals.runner import _merge_run_usage
+
+
+def test_extract_vercel_stream_usage_prefers_token_payload() -> None:
+    raw = "\n".join(
+        [
+            'data: {"type":"start"}',
+            'data: {"type":"message-metadata","messageMetadata":{"pydantic_ai":{"timestamp":"2026-06-24T12:00:00Z"}}}',
+            'data: {"type":"message-metadata","messageMetadata":{"pydantic_ai":{"provider":"bedrock","model":"claude","usage":{"prompt_tokens":12,"completion_tokens":5,"total_tokens":17,"credits_consumed":0.00034}}}}',
+            'data: [DONE]',
+        ]
+    )
+
+    usage = extract_vercel_stream_usage(raw)
+
+    assert usage["provider"] == "bedrock"
+    assert usage["model"] == "claude"
+    assert usage["prompt_tokens"] == 12
+    assert usage["completion_tokens"] == 5
+    assert usage["total_tokens"] == 17
+    assert usage["credits_consumed"] == 0.00034
+
+
+def test_merge_run_usage_normalizes_aliases_and_sums() -> None:
+    aggregate: dict[str, object] = {}
+
+    aggregate = _merge_run_usage(
+        aggregate,
+        {
+            "provider": "bedrock",
+            "prompt_tokens": 10,
+            "completion_tokens": 3,
+            "credits_consumed": 0.0002,
+        },
+    )
+    aggregate = _merge_run_usage(
+        aggregate,
+        {
+            "input_tokens": "4",
+            "output_tokens": "2",
+            "credits": "0.0003",
+        },
+    )
+
+    assert aggregate["provider"] == "bedrock"
+    assert aggregate["prompt_tokens"] == 14
+    assert aggregate["completion_tokens"] == 5
+    assert aggregate["total_tokens"] == 19
+    assert aggregate["credits_consumed"] == 0.0005
diff --git a/datalayer_core/tests/test_ray.py b/datalayer_core/tests/test_ray.py
index 3b2b193f..0233da83 100644
--- a/datalayer_core/tests/test_ray.py
+++ b/datalayer_core/tests/test_ray.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -19,7 +22,7 @@ def json(self):
 
 class _FakeRayClient(RayMixin):
     def __init__(self):
-        self.urls = DatalayerURLs.from_environment(ray_url="https://ray.example")
+        self.urls = DatalayerURLs.from_environment(runtimes_url="https://ray.example")
         self.calls = []
 
     def _fetch(self, url: str, **kwargs):
@@ -27,16 +30,16 @@ def _fetch(self, url: str, **kwargs):
         return _FakeResponse({"success": True, "url": url, "kwargs": kwargs})
 
 
-def test_urls_resolve_ray_url_from_environment(monkeypatch):
-    monkeypatch.setenv("DATALAYER_RAY_URL", "https://ray-from-env.example/")
+def test_urls_resolve_runtimes_url_from_environment(monkeypatch):
+    monkeypatch.setenv("DATALAYER_RUNTIMES_URL", "https://runtimes-from-env.example/")
     urls = DatalayerURLs.from_environment()
-    assert urls.ray_url == "https://ray-from-env.example"
+    assert urls.runtimes_url == "https://runtimes-from-env.example"
 
 
-def test_urls_resolve_ray_url_from_default(monkeypatch):
-    monkeypatch.delenv("DATALAYER_RAY_URL", raising=False)
+def test_urls_resolve_runtimes_url_from_default(monkeypatch):
+    monkeypatch.delenv("DATALAYER_RUNTIMES_URL", raising=False)
     urls = DatalayerURLs.from_environment()
-    assert urls.ray_url == "https://prod1.datalayer.run"
+    assert urls.runtimes_url == "https://r1.datalayer.run"
 
 
 def test_ray_mixin_job_logs_and_events_paths():
diff --git a/datalayer_core/tests/test_usage.py b/datalayer_core/tests/test_usage.py
index 4d0d5786..c736ce0e 100644
--- a/datalayer_core/tests/test_usage.py
+++ b/datalayer_core/tests/test_usage.py
@@ -1,3 +1,6 @@
+# Copyright (c) 2023-2025 Datalayer, Inc.
+# Distributed under the terms of the Modified BSD License.
+
 # Copyright (c) 2023-2026 Datalayer, Inc.
 # Distributed under the terms of the Modified BSD License.
 
@@ -30,6 +33,11 @@
 )
 
 
+def _is_insufficient_credits_error(exc: Exception) -> bool:
+    message = str(exc).lower()
+    return "insufficient_credits" in message or "insufficient credits" in message
+
+
 def _build_test_client() -> DatalayerClient:
     return DatalayerClient(
         token=TEST_DATALAYER_API_KEY,
@@ -198,13 +206,18 @@ def test_usage_matrix_creation_reservation_and_history(account_case: str) -> Non
     runtime_name = f"test_usage_{account_case}_{uuid.uuid4().hex[:8]}"
 
     try:
-        runtime = client.create_runtime(
-            name=runtime_name,
-            time_reservation=1,
-            billable_account_uid=account["uid"],
-            billable_account_type=account["kind"],
-            billable_account_handle=account["handle"] or None,
-        )
+        try:
+            runtime = client.create_runtime(
+                name=runtime_name,
+                time_reservation=1,
+                billable_account_uid=account["uid"],
+                billable_account_type=account["kind"],
+                billable_account_handle=account["handle"] or None,
+            )
+        except RuntimeError as exc:
+            if account_case == "team" and _is_insufficient_credits_error(exc):
+                pytest.skip("Team account has insufficient credits for runtime launch in this environment.")
+            raise
 
         # Creation coverage.
         assert runtime.pod_name, "Runtime pod_name should be set after creation"
diff --git a/datalayer_core/utils/notebook.py b/datalayer_core/utils/notebook.py
index f91ddddb..2117900d 100644
--- a/datalayer_core/utils/notebook.py
+++ b/datalayer_core/utils/notebook.py
@@ -32,6 +32,10 @@ def get_cells(filepath: Path) -> t.Iterator[tuple[Optional[str], str]]:
             return
         for cell in nb.cells:
             if cell.cell_type == "code":
-                yield cell.id, cell.source
+                # Some notebooks do not include a cell id; keep execution robust.
+                cell_id = getattr(cell, "id", None)
+                if cell_id is None and isinstance(cell, dict):
+                    cell_id = cell.get("id")
+                yield (str(cell_id) if cell_id is not None else None), cell.source
     else:
         yield None, filepath.read_text(encoding="utf-8")
diff --git a/datalayer_core/utils/urls.py b/datalayer_core/utils/urls.py
index f51f7cc2..9f8f3835 100644
--- a/datalayer_core/utils/urls.py
+++ b/datalayer_core/utils/urls.py
@@ -8,6 +8,7 @@
 """
 
 import os
+from dataclasses import asdict
 from dataclasses import dataclass
 from typing import Optional
 
@@ -34,8 +35,6 @@
 
 DEFAULT_DATALAYER_AI_INFERENCE_URL = DEFAULT_DATALAYER_RUN_URL
 
-DEFAULT_DATALAYER_RAY_URL = DEFAULT_DATALAYER_RUN_URL
-
 DEFAULT_DATALAYER_MCP_SERVERS_URL = DEFAULT_DATALAYER_RUN_URL
 
 DEFAULT_DATALAYER_OTEL_URL = DEFAULT_DATALAYER_RUN_URL
@@ -48,6 +47,8 @@
 
 DEFAULT_DATALAYER_SUPPORT_URL = DEFAULT_DATALAYER_RUN_URL
 
+DEFAULT_DATALAYER_SCHEDULER_URL = DEFAULT_DATALAYER_RUN_URL
+
 
 @dataclass
 class DatalayerURLs:
@@ -87,8 +88,8 @@ class DatalayerURLs:
         The Datalayer support service URL
     mcp_server_url : str
         The Datalayer MCP server service URL
-    ray_url : str
-        The Datalayer Ray service URL
+    scheduler_url : str
+        The Datalayer scheduler service URL
     """
 
     run_url: str
@@ -105,7 +106,7 @@ class DatalayerURLs:
     status_url: str
     support_url: str
     mcp_server_url: str
-    ray_url: str
+    scheduler_url: str
 
     @classmethod
     def from_environment(
@@ -124,7 +125,7 @@ def from_environment(
         status_url: Optional[str] = None,
         support_url: Optional[str] = None,
         mcp_server_url: Optional[str] = None,
-        ray_url: Optional[str] = None,
+        scheduler_url: Optional[str] = None,
     ) -> "DatalayerURLs":
         """
         Create DatalayerURLs instance from environment variables and parameters.
@@ -173,9 +174,9 @@ def from_environment(
         mcp_server_url : Optional[str]
             Override for the MCP server URL. If None, will check DATALAYER_MCP_SERVER_URL env var
             then fallback to DEFAULT_DATALAYER_MCP_SERVER_URL.
-        ray_url : Optional[str]
-            Override for the Ray URL. If None, will check DATALAYER_RAY_URL env var
-            then fallback to DEFAULT_DATALAYER_RAY_URL.
+        scheduler_url : Optional[str]
+            Override for the scheduler URL. If None, will check DATALAYER_SCHEDULER_URL env var
+            then fallback to DEFAULT_DATALAYER_SCHEDULER_URL.
 
         Returns
         -------
@@ -285,11 +286,11 @@ def from_environment(
             or base_url_for_services
             or DEFAULT_DATALAYER_MCP_SERVERS_URL
         )
-        resolved_ray_url = (
-            ray_url
-            or os.environ.get("DATALAYER_RAY_URL")
+        resolved_scheduler_url = (
+            scheduler_url
+            or os.environ.get("DATALAYER_SCHEDULER_URL")
             or base_url_for_services
-            or DEFAULT_DATALAYER_RAY_URL
+            or DEFAULT_DATALAYER_SCHEDULER_URL
         )
 
         # Strip trailing slashes for consistency
@@ -307,7 +308,7 @@ def from_environment(
         resolved_status_url = resolved_status_url.rstrip("/")
         resolved_support_url = resolved_support_url.rstrip("/")
         resolved_mcp_server_url = resolved_mcp_server_url.rstrip("/")
-        resolved_ray_url = resolved_ray_url.rstrip("/")
+        resolved_scheduler_url = resolved_scheduler_url.rstrip("/")
 
         return cls(
             run_url=resolved_run_url,
@@ -324,7 +325,7 @@ def from_environment(
             status_url=resolved_status_url,
             support_url=resolved_support_url,
             mcp_server_url=resolved_mcp_server_url,
-            ray_url=resolved_ray_url,
+            scheduler_url=resolved_scheduler_url,
         )
 
     def __post_init__(self) -> None:
@@ -343,4 +344,46 @@ def __post_init__(self) -> None:
         self.status_url = self.status_url.rstrip("/")
         self.support_url = self.support_url.rstrip("/")
         self.mcp_server_url = self.mcp_server_url.rstrip("/")
-        self.ray_url = self.ray_url.rstrip("/")
+        self.scheduler_url = self.scheduler_url.rstrip("/")
+
+    def as_dict(self) -> dict[str, str]:
+        """Return all resolved service URLs as a dictionary."""
+        return asdict(self)
+
+    @classmethod
+    def get_all_urls(
+        cls,
+        run_url: Optional[str] = None,
+        iam_url: Optional[str] = None,
+        runtimes_url: Optional[str] = None,
+        spacer_url: Optional[str] = None,
+        library_url: Optional[str] = None,
+        manager_url: Optional[str] = None,
+        ai_agents_url: Optional[str] = None,
+        ai_inference_url: Optional[str] = None,
+        otel_url: Optional[str] = None,
+        growth_url: Optional[str] = None,
+        success_url: Optional[str] = None,
+        status_url: Optional[str] = None,
+        support_url: Optional[str] = None,
+        mcp_server_url: Optional[str] = None,
+        scheduler_url: Optional[str] = None,
+    ) -> dict[str, str]:
+        """Resolve and return all service URLs with optional overrides."""
+        return cls.from_environment(
+            run_url=run_url,
+            iam_url=iam_url,
+            runtimes_url=runtimes_url,
+            spacer_url=spacer_url,
+            library_url=library_url,
+            manager_url=manager_url,
+            ai_agents_url=ai_agents_url,
+            ai_inference_url=ai_inference_url,
+            otel_url=otel_url,
+            growth_url=growth_url,
+            success_url=success_url,
+            status_url=status_url,
+            support_url=support_url,
+            mcp_server_url=mcp_server_url,
+            scheduler_url=scheduler_url,
+        ).as_dict()
diff --git a/docs/docs/index.mdx b/docs/docs/index.mdx
index 0719b661..33e6c404 100644
--- a/docs/docs/index.mdx
+++ b/docs/docs/index.mdx
@@ -4,18 +4,202 @@ title: Datalayer Core
 slug: /
 ---
 
-import HomepageRow1 from '@site/src/components/HomepageRow1';
-import HomepageRow2 from '@site/src/components/HomepageRow2';
-import HomepageRow3 from '@site/src/components/HomepageRow3';
-import DocCardList from '@theme/DocCardList';
-import { useDocsSidebar } from '@docusaurus/plugin-content-docs/client';
-
 # Datalayer Core
 
-Datalayer Core is a solution to scale AI without losing the user's local productivity.
+Datalayer Core provides TypeScript and Python clients plus a CLI for authentication, runtime management, notebooks/documents, eval workflows, and operational APIs.
+
+This page is the main API entrypoint and consolidates the previous API reference content.
+
+## Installation
+
+```bash
+npm install @datalayer/core
+```
+
+## Initialize The TypeScript Client
+
+```typescript
+import { DatalayerClient } from '@datalayer/core/client';
+import { DEFAULT_SERVICE_URLS } from '@datalayer/core/api/constants';
+
+const client = new DatalayerClient({
+	token: 'bearer-token-123',
+	iamRunUrl: DEFAULT_SERVICE_URLS.IAM,
+	runtimesRunUrl: DEFAULT_SERVICE_URLS.RUNTIMES,
+	spacerRunUrl: DEFAULT_SERVICE_URLS.SPACER,
+});
+```
+
+## Handlers Pattern
+
+Use lifecycle handlers for logging, UX state, and centralized error handling.
+
+```typescript
+const client = new DatalayerClient({
+	token: 'bearer-token-123',
+	handlers: {
+		beforeCall: async (methodName, args) => {
+			console.log(`[Client] ${methodName}`, args);
+		},
+		afterCall: async (methodName) => {
+			console.log(`[Client] ${methodName} completed`);
+		},
+		onError: async (methodName, error) => {
+			console.error(`[Client] ${methodName} failed`, error);
+		},
+	},
+});
+```
+
+## Authentication
+
+```typescript
+const user = await client.whoami();
+console.log(user.uid, user.email);
+
+await client.login('new-bearer-token');
+const credits = await client.getCredits();
+const iamHealth = await client.checkIAMHealth();
+await client.logout();
+```
+
+## Runtime Management
+
+`createRuntime` currently uses minutes (not raw credits) and converts using environment burn rate.
+
+```typescript
+const environments = await client.listEnvironments();
+
+const runtime = await client.createRuntime(
+	'ai-agents-env',
+	'notebook',
+	'my-runtime',
+	30,
+);
+
+await runtime.waitUntilReady(60_000);
+
+const reused = await client.ensureRuntime(
+	'ai-agents-env',
+	50,
+	true,
+	60_000,
+	true,
+);
+
+const snapshot = await client.createSnapshot(
+	runtime.podName,
+	'checkpoint-1',
+	'Before major change',
+	false,
+);
+
+await client.deleteSnapshot(snapshot.uid);
+await client.deleteRuntime(runtime.podName);
+
+const runtimesHealth = await client.checkRuntimesHealth();
+```
+
+## Notebook And Document Management
+
+```typescript
+const spaces = await client.getMySpaces();
+const space = spaces[0];
+
+const notebook = await client.createNotebook(
+	space.uid,
+	'Analysis Notebook',
+	'Q4 analysis',
+);
+
+const updatedNotebook = await client.updateNotebook(
+	notebook.id,
+	'Analysis Notebook v2',
+	'Updated description',
+);
+
+const lexical = await client.createLexical(
+	space.uid,
+	'Project Notes',
+	'Working notes',
+);
+
+await client.updateLexical(lexical.id, 'Project Notes v2', 'Refined notes');
+
+const items = await client.getSpaceItems(space.uid);
+await client.deleteSpaceItem(notebook.id);
+await client.deleteSpaceItem(lexical.id);
+
+const spacerHealth = await client.checkSpacerHealth();
+```
+
+## Secrets And Datasources
+
+```typescript
+const secret = await client.createSecret({
+	name: 'AWS_ACCESS_KEY_ID',
+	description: 'Bedrock access key',
+	value: '***',
+});
+
+await client.updateSecret(secret.id, { description: 'Updated description' });
+await client.listSecrets();
+await client.deleteSecret(secret.id);
+
+const ds = await client.createDatasource({
+	name: 'warehouse',
+	variant: 'postgres',
+	configuration: { host: 'db.internal', port: 5432 },
+});
+
+await client.getDatasource(ds.id);
+```
+
+## Model Classes
+
+Client methods return rich model instances with helpers:
+
+- `RuntimeDTO`: `waitUntilReady`, `getState`, `createSnapshot`, `delete`
+- `CodeSandboxSnapshotDTO`: `getStatus`, `getSize`, `restore`, `delete`
+- `NotebookDTO` / `LexicalDTO`: metadata + content helper methods
+- `SpaceDTO` / `ProjectDTO`: item composition and workspace-level operations
+
+## Error Handling
+
+```typescript
+try {
+	const runtime = await client.createRuntime('ai-agents-env', 'notebook', 'r1', 10);
+	await runtime.waitUntilReady(30_000);
+} catch (error) {
+	console.error('Runtime provisioning failed', error);
+}
+```
+
+## Best Practices
+
+- Prefer environment variables or secure token storage over hard-coded tokens.
+- Reuse runtimes when possible (`ensureRuntime`) to reduce startup overhead.
+- Persist checkpoints before destructive steps.
+- Use handlers for observability and UX state transitions.
+- Validate service health (`checkIAMHealth`, `checkRuntimesHealth`, `checkSpacerHealth`) in production workflows.
+
+## CLI Quick Reference
+
+The CLI supports global overrides and auth defaults:
+
+```bash
+datalayer --api-key "$DATALAYER_API_KEY" --ai-agents-url https://... evals ls
+```
+
+Key command groups:
+
+- `auth`, `login`, `logout`, `whoami`
+- `agents`, `envs`, `secrets`, `exec`, `console`
+- `evals`, `usage`, `otel`, `api-keys`, `subscription`, `plans`
+
+## Related
 
-<DocCardList items={useDocsSidebar().items.filter(i => (i.label !== 'Datalayer Core'))} />
+- API source examples: `API.md` in repository root
+- CLI reference: Datalayer platform docs
+- Package README: repository root
 
-<HomepageRow1 />
-<HomepageRow2 />
-<HomepageRow3 />
diff --git a/docs/docs/python/APIKeys/_category_.yml b/docs/docs/python/APIKeys/_category_.yml
new file mode 100644
index 00000000..aba13a12
--- /dev/null
+++ b/docs/docs/python/APIKeys/_category_.yml
@@ -0,0 +1,2 @@
+label: "API Keys"
+position: 5
diff --git a/docs/docs/python/APIKeys/index.mdx b/docs/docs/python/APIKeys/index.mdx
new file mode 100644
index 00000000..e11621cd
--- /dev/null
+++ b/docs/docs/python/APIKeys/index.mdx
@@ -0,0 +1,168 @@
+title: API Keys
+---
+
+import DocCardList from '@theme/DocCardList';
+
+The API Keys module provides authentication credential management for the Datalayer platform. API keys are used for API authentication, service-to-service communication, and secure access to Datalayer resources with configurable expiration and access control.
+
+## Overview
+
+API keys in Datalayer provide secure, time-limited access to platform resources. They serve as authentication credentials for API calls, runtime access, and service integrations. All API keys are cryptographically secure and can be configured with expiration dates for enhanced security.
+
+### Key Features
+
+- **Secure Authentication**: Cryptographically secure API keys for API access
+- **Configurable Expiration**: Set custom expiration dates for API key lifecycle management
+- **Access Control**: Role-based access control integrated with API key permissions
+- **API Integration**: Seamless integration with Datalayer APIs and services
+- **Audit Trail**: Complete audit logging of API key creation, usage, and deletion
+
+## API Key Class
+
+The `ApiKey` model represents an authentication API key with the following attributes:
+
+- **uid**: Unique identifier for the API key
+- **name**: Human-readable name of the API key
+- **description**: Description of the API key's purpose
+- **token_type**: Type of API key (e.g. `secret`)
+
+
+## Creating API Keys
+
+### Basic API Key Creation
+
+Create a simple secret API key for API authentication:
+
+```python
+from datalayer_core import DatalayerClient
+
+client = DatalayerClient()
+token_response = client.create_token(
+    name="API_ACCESS_TOKEN",
+    description="API key for automated API access"
+)
+
+print(f"API key created: {token_response['name']}")
+print(f"API key value: {token_response['token']}")  # Store securely!
+print(f"API key UID: {token_response['uid']}")
+```
+
+### API Key with Expiration
+
+Create API keys with custom expiration dates:
+
+```python
+from datalayer_core import DatalayerClient
+import time
+
+client = DatalayerClient()
+
+# Create API key that expires in 30 days
+expiration_timestamp = int(time.time()) + (30 * 24 * 60 * 60)  # 30 days from now
+
+token_response = client.create_token(
+    name="TEMPORARY_ACCESS_TOKEN",
+    description="30-day temporary access API key",
+    expiration_date=expiration_timestamp
+)
+
+print(f"API key created with expiration: {token_response['name']}")
+print(f"Expires at: {time.ctime(expiration_timestamp)}")
+```
+
+### Typed API Key Creation
+
+Create API keys with specific types:
+
+```python
+from datalayer_core import DatalayerClient
+from datalayer_core.models import ApiKeyType
+
+client = DatalayerClient()
+
+# Create secret API key (default type)
+user_token = client.create_token(
+    name="USER_API_TOKEN",
+    description="User API access key",
+    token_type=ApiKeyType.SECRET
+)
+
+print(f"Created {user_token['type']} API key: {user_token['name']}")
+```
+
+## Listing and Managing API Keys
+
+### List All API Keys
+
+View all API keys associated with your account:
+
+```python
+from datalayer_core import DatalayerClient
+import time
+
+client = DatalayerClient()
+tokens = client.list_tokens()
+
+print(f"Found {len(tokens)} API keys:")
+for token in tokens:
+    print(f"  - {token.name}")
+    print(f"    UID: {token.uid}")
+    print(f"    Type: {token.token_type}")
+    print(f"    Description: {token.description}")
+    print("---")
+```
+
+### Search API Keys
+
+Find API keys by name or description:
+
+```python
+from datalayer_core import DatalayerClient
+
+def find_tokens(client, search_term):
+    """Find API keys by name or description."""
+    all_tokens = client.list_tokens()
+    matching_tokens = []
+    
+    for token in all_tokens:
+        if (search_term.lower() in token.name.lower() or 
+            search_term.lower() in token.description.lower()):
+            matching_tokens.append(token)
+    
+    return matching_tokens
+```
+
+## Deleting API Keys
+
+### Delete Single API Key
+
+Remove an API key by UID or API key object:
+
+```python
+from datalayer_core import DatalayerClient
+
+client = DatalayerClient()
+
+# Delete by UID
+success = client.delete_token("token-uid-12345")
+print(f"API key deletion successful: {success}")
+
+# Delete by Token object
+tokens = client.list_tokens()
+if tokens:
+    old_token = tokens[0]  # Delete the first API key
+    success = client.delete_token(old_token)
+    print(f"Deleted API key '{old_token.name}': {success}")
+```
+
+## Notes
+
+- **Security**: All API keys are cryptographically secure and should be treated as sensitive credentials
+- **Storage**: Never hardcode API keys in source code; use secure storage mechanisms
+- **Expiration**: Always set appropriate expiration dates for enhanced security
+- **Rotation**: Implement regular API key rotation as part of security best practices
+- **Monitoring**: Regularly audit and monitor API key usage for security compliance
+- **Access Control**: API keys inherit the permissions of the user who created them
+- **API Usage**: API keys can be used for both Client authentication and direct API calls
+- **Cleanup**: Regularly clean up expired and unused API keys to maintain security hygiene
+
diff --git a/docs/docs/python/Tokens/_category_.yml b/docs/docs/python/Tokens/_category_.yml
deleted file mode 100644
index 71d43cc7..00000000
--- a/docs/docs/python/Tokens/_category_.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-label: "Tokens"
-position: 5
diff --git a/docs/docs/python/Tokens/index.mdx b/docs/docs/python/Tokens/index.mdx
deleted file mode 100644
index 5b04fa2a..00000000
--- a/docs/docs/python/Tokens/index.mdx
+++ /dev/null
@@ -1,169 +0,0 @@
----
-title: Tokens
----
-
-import DocCardList from '@theme/DocCardList';
-
-The Tokens module provides authentication token management for the Datalayer platform. Tokens are used for API authentication, service-to-service communication, and secure access to Datalayer resources with configurable expiration and access control.
-
-## Overview
-
-Tokens in Datalayer provide secure, time-limited access to platform resources. They serve as authentication credentials for API calls, runtime access, and service integrations. All tokens are cryptographically secure and can be configured with expiration dates for enhanced security.
-
-### Key Features
-
-- **Secure Authentication**: Cryptographically secure tokens for API access
-- **Configurable Expiration**: Set custom expiration dates for token lifecycle management
-- **Access Control**: Role-based access control integrated with token permissions
-- **API Integration**: Seamless integration with Datalayer APIs and services
-- **Audit Trail**: Complete audit logging of token creation, usage, and deletion
-
-## Token Class
-
-The `Token` class represents an authentication token with the following attributes:
-
-- **uid**: Unique identifier for the token
-- **name**: Human-readable name of the token
-- **description**: Description of the token's purpose
-- **token_type**: Type of token (e.g. `user_token`)
-
-
-## Creating Tokens
-
-### Basic Token Creation
-
-Create a simple user token for API authentication:
-
-```python
-from datalayer_core import DatalayerClient
-
-client = DatalayerClient()
-token_response = client.create_token(
-    name="API_ACCESS_TOKEN",
-    description="Token for automated API access"
-)
-
-print(f"Token created: {token_response['name']}")
-print(f"Token value: {token_response['token']}")  # Store securely!
-print(f"Token UID: {token_response['uid']}")
-```
-
-### Token with Expiration
-
-Create tokens with custom expiration dates:
-
-```python
-from datalayer_core import DatalayerClient
-import time
-
-client = DatalayerClient()
-
-# Create token that expires in 30 days
-expiration_timestamp = int(time.time()) + (30 * 24 * 60 * 60)  # 30 days from now
-
-token_response = client.create_token(
-    name="TEMPORARY_ACCESS_TOKEN",
-    description="30-day temporary access token",
-    expiration_date=expiration_timestamp
-)
-
-print(f"Token created with expiration: {token_response['name']}")
-print(f"Expires at: {time.ctime(expiration_timestamp)}")
-```
-
-### Typed Token Creation
-
-Create tokens with specific types:
-
-```python
-from datalayer_core import DatalayerClient
-from datalayer_core.tokens import TokenType
-
-client = DatalayerClient()
-
-# Create user token (default type)
-user_token = client.create_token(
-    name="USER_API_TOKEN",
-    description="User API access token",
-    token_type=TokenType.USER
-)
-
-print(f"Created {user_token['type']} token: {user_token['name']}")
-```
-
-## Listing and Managing Tokens
-
-### List All Tokens
-
-View all tokens associated with your account:
-
-```python
-from datalayer_core import DatalayerClient
-import time
-
-client = DatalayerClient()
-tokens = client.list_tokens()
-
-print(f"Found {len(tokens)} tokens:")
-for token in tokens:
-    print(f"  - {token.name}")
-    print(f"    UID: {token.uid}")
-    print(f"    Type: {token.token_type}")
-    print(f"    Description: {token.description}")
-    print("---")
-```
-
-### Search Tokens
-
-Find tokens by name or description:
-
-```python
-from datalayer_core import DatalayerClient
-
-def find_tokens(client, search_term):
-    """Find tokens by name or description."""
-    all_tokens = client.list_tokens()
-    matching_tokens = []
-    
-    for token in all_tokens:
-        if (search_term.lower() in token.name.lower() or 
-            search_term.lower() in token.description.lower()):
-            matching_tokens.append(token)
-    
-    return matching_tokens
-```
-
-## Deleting Tokens
-
-### Delete Single Token
-
-Remove a token by UID or Token object:
-
-```python
-from datalayer_core import DatalayerClient
-
-client = DatalayerClient()
-
-# Delete by UID
-success = client.delete_token("token-uid-12345")
-print(f"Token deletion successful: {success}")
-
-# Delete by Token object
-tokens = client.list_tokens()
-if tokens:
-    old_token = tokens[0]  # Delete the first token
-    success = client.delete_token(old_token)
-    print(f"Deleted token '{old_token.name}': {success}")
-```
-
-## Notes
-
-- **Security**: All tokens are cryptographically secure and should be treated as sensitive credentials
-- **Storage**: Never hardcode tokens in source code; use secure storage mechanisms
-- **Expiration**: Always set appropriate expiration dates for enhanced security
-- **Rotation**: Implement regular token rotation as part of security best practices
-- **Monitoring**: Regularly audit and monitor token usage for security compliance
-- **Access Control**: Tokens inherit the permissions of the user who created them
-- **API Usage**: Tokens can be used for both Client authentication and direct API calls
-- **Cleanup**: Regularly clean up expired and unused tokens to maintain security hygiene
-
diff --git a/examples/nextjs/README.md b/examples/nextjs/README.md
index d6a8f7f3..36d3f5b5 100644
--- a/examples/nextjs/README.md
+++ b/examples/nextjs/README.md
@@ -32,7 +32,7 @@ This example showcases:
 
 - Node.js 20+
 - npm
-- Datalayer account and API token ([Create one here](https://datalayer.app/settings/iam/tokens))
+- Datalayer account and API key ([Create one here](https://datalayer.app/settings/iam/api-keys))
 
 ## Installation
 
@@ -88,7 +88,7 @@ This example showcases:
 ### Environments Page (`/environments`)
 
 - View all available compute environments
-- See environment specifications (language, description)
+- See environmentspecifications (language, description)
 - Visual icons for each environment type
 
 ### Viewer Page (`/viewer`)
diff --git a/examples/nextjs/src/app/welcome/page.tsx b/examples/nextjs/src/app/welcome/page.tsx
index 44fffd5a..ef3e94ac 100644
--- a/examples/nextjs/src/app/welcome/page.tsx
+++ b/examples/nextjs/src/app/welcome/page.tsx
@@ -171,21 +171,21 @@ export default function WelcomePage() {
             }}
           >
             <Text as="p" sx={{ fontSize: 1, color: 'fg.muted', mb: 2 }}>
-              Don&apos;t have a token yet?
+              Don&apos;t have an API key yet?
             </Text>
             <Link
-              href="https://datalayer.app/settings/iam/tokens"
+              href="https://datalayer.app/settings/iam/api-keys"
               target="_blank"
               sx={{ fontSize: 1 }}
             >
-              <LinkExternalIcon size={16} /> Create a token on Datalayer
+              <LinkExternalIcon size={16} /> Create an API key on Datalayer
             </Link>
           </Box>
         </Box>
 
         <Box sx={{ textAlign: 'center', mt: 4 }}>
           <Text as="p" sx={{ fontSize: 0, color: 'fg.subtle' }}>
-            Your token is stored locally and used to authenticate with
+            Your API key is stored locally and used to authenticate with
             Datalayer&apos;s API
           </Text>
         </Box>
diff --git a/package.json b/package.json
index 690c61c2..ecad2177 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@datalayer/core",
-  "version": "1.0.24",
+  "version": "1.0.25",
   "type": "module",
   "workspaces": [
     ".",
diff --git a/pyproject.toml b/pyproject.toml
index b375249c..8657a42a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ classifiers = [
 ]
 # TODO Unpin keyring https://github.com/datalayer/core/issues/36
 dependencies = [
+    "agent_runtimes>=1.0.10",
     "httpx",
     "jupyter-console",
     "jupyter-kernel-client",
@@ -35,7 +36,7 @@ dependencies = [
     "mcp",
     "pydantic-settings",
     "pydantic[email]",
-    "pyyaml>=6.0",  # Require newer PyYAML that builds on Python 3.12+
+    "pyyaml>=6.0",
     "questionary",
     "requests>=2.33.0",
     "pygments>=2.20.0",
@@ -100,12 +101,16 @@ path = "datalayer_core/__version__.py"
 [tool.hatch.build.targets.sdist]
 artifacts = [
     "datalayer_core/assets/about.md",
+    "datalayer_core/templates/*.html",
+    "datalayer_core/static/**",
 ]
 exclude = [".github", "binder"]
 
 [tool.hatch.build.targets.wheel]
 artifacts = [
     "datalayer_core/assets/about.md",
+    "datalayer_core/templates/*.html",
+    "datalayer_core/static/**",
 ]
 
 [tool.hatch.build.targets.wheel.shared-data]
diff --git a/src/api/constants.ts b/src/api/constants.ts
index b90f6431..c0e312d4 100644
--- a/src/api/constants.ts
+++ b/src/api/constants.ts
@@ -11,6 +11,7 @@ export const API_BASE_PATHS = {
   IAM: '/api/iam/v1',
   OTEL: '/api/otel/v1',
   RUNTIMES: '/api/runtimes/v1',
+  SCHEDULER: '/api/scheduler/v1',
   SPACER: '/api/spacer/v1',
 } as const;
 
@@ -26,6 +27,8 @@ export const DEFAULT_SERVICE_URLS = {
   OTEL: 'https://prod1.datalayer.run',
   /** Default URL for Runtimes service */
   RUNTIMES: 'https://r1.datalayer.run',
+  /** Default URL for Scheduler (cron schedules and runs) service */
+  SCHEDULER: 'https://prod1.datalayer.run',
   /** Default URL for Spacer (workspaces and collaboration) service */
   SPACER: 'https://prod1.datalayer.run',
 } as const;
diff --git a/src/api/index.ts b/src/api/index.ts
index 6a464843..c3d63093 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -24,6 +24,7 @@ export type { IRequestDatalayerAPIOptions } from './DatalayerApi';
 export * as iam from './iam';
 export * as otel from './otel';
 export * as runtimes from './runtimes';
+export * as scheduler from './scheduler';
 export * as spacer from './spacer';
 
 /**
diff --git a/src/api/runtimes/checkpoints.ts b/src/api/runtimes/checkpoints.ts
index eae229d3..6067266b 100644
--- a/src/api/runtimes/checkpoints.ts
+++ b/src/api/runtimes/checkpoints.ts
@@ -30,9 +30,9 @@ export interface RuntimeCheckpointData {
   description: string;
   /** Runtime that was checkpointed */
   runtime_uid: string;
-  /** Agent spec identifier (e.g. "mocks/monitor-sales-kpis") */
+  /** Agentspec identifier (e.g. "mocks/monitor-sales-kpis") */
   agent_spec_id: string;
-  /** Full agent spec payload */
+  /** Full agentspec payload */
   agentspec: Record<string, any>;
   /** Additional metadata */
   metadata: Record<string, any>;
@@ -58,9 +58,9 @@ export interface CreateRuntimeCheckpointRequest {
   name?: string;
   /** Checkpoint description */
   description?: string;
-  /** Agent spec identifier */
+  /** Agentspec identifier */
   agentspec_id?: string;
-  /** Full agent spec payload to persist */
+  /** Full agentspec payload to persist */
   agentspec?: Record<string, any>;
   /** Additional metadata */
   metadata?: Record<string, any>;
diff --git a/src/api/runtimes/runtimes.ts b/src/api/runtimes/runtimes.ts
index 748fa9a1..63d23302 100644
--- a/src/api/runtimes/runtimes.ts
+++ b/src/api/runtimes/runtimes.ts
@@ -254,9 +254,9 @@ export interface PauseRuntimeBody {
   name?: string;
   /** Checkpoint description */
   description?: string;
-  /** Agent spec identifier */
+  /** Agentspec identifier */
   agent_spec_id?: string;
-  /** Full agent spec payload to persist with the checkpoint */
+  /** Full agentspec payload to persist with the checkpoint */
   agentspec?: Record<string, any>;
   /** Additional metadata */
   metadata?: Record<string, any>;
@@ -305,7 +305,7 @@ export interface ResumeRuntimeBody {
   checkpoint_mode?: 'criu' | 'light';
   /** Explicit checkpoint identifier */
   checkpoint_id?: string;
-  /** Agent spec identifier (required by the operator for restore) */
+  /** Agentspec identifier (required by the operator for restore) */
   agent_spec_id?: string;
   /** Specific checkpoint timestamp to restore from */
   checkpoint_timestamp?: string;
diff --git a/src/api/scheduler/index.ts b/src/api/scheduler/index.ts
new file mode 100644
index 00000000..9f3cfba4
--- /dev/null
+++ b/src/api/scheduler/index.ts
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/**
+ * Scheduler API module for the Datalayer platform.
+ *
+ * @module api/scheduler
+ */
+
+export * from './schedules';
diff --git a/src/api/scheduler/schedules.ts b/src/api/scheduler/schedules.ts
new file mode 100644
index 00000000..c4f0de3d
--- /dev/null
+++ b/src/api/scheduler/schedules.ts
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/**
+ * Scheduler schedules API functions for the Datalayer platform.
+ *
+ * Provides functions for listing, creating, and updating cron schedules that
+ * trigger automated notebook executions.
+ *
+ * @module api/scheduler/schedules
+ */
+
+import { requestDatalayerAPI } from '../DatalayerApi';
+import { API_BASE_PATHS, DEFAULT_SERVICE_URLS } from '../constants';
+
+/**
+ * Raw schedule document as returned by the scheduler service.
+ *
+ * Fields follow the Solr suffix convention (`_s`, `_b`, `_dt`, `_i`).
+ */
+export interface ScheduleDoc {
+  uid?: string;
+  type_s?: string;
+  notebook_uid_s?: string;
+  owner_uid_s?: string;
+  cron_expression_s?: string;
+  preset_s?: string;
+  enabled_b?: boolean;
+  [key: string]: unknown;
+}
+
+/**
+ * Request payload to create or update a notebook schedule.
+ */
+export interface UpsertScheduleRequest {
+  /** Target notebook uid. */
+  notebookUid: string;
+  /** Cron expression (e.g. `* * * * *`). */
+  cronExpression: string;
+  /** Optional preset identifier (e.g. `every-minute`, `hourly`, `daily`, `custom`). */
+  preset?: string;
+  /** Whether the schedule is enabled. Defaults to `true`. */
+  enabled?: boolean;
+}
+
+/**
+ * Request payload to update an existing schedule by uid.
+ */
+export interface UpdateScheduleRequest {
+  cronExpression?: string;
+  preset?: string;
+  enabled?: boolean;
+}
+
+/**
+ * Response shape for listing schedules.
+ */
+export interface ListSchedulesResponse {
+  success: boolean;
+  message: string;
+  schedules: ScheduleDoc[];
+}
+
+/**
+ * Response shape for a single schedule mutation.
+ */
+export interface ScheduleResponse {
+  success: boolean;
+  message: string;
+  schedule: ScheduleDoc;
+}
+
+/**
+ * List the schedules owned by the authenticated user.
+ * @param token - Authentication token
+ * @param baseUrl - Base URL for the scheduler service (defaults to production)
+ * @param includeDisabled - Include disabled schedules in the result
+ * @returns Promise resolving to the list of schedules
+ */
+export const listSchedules = async (
+  token: string,
+  baseUrl: string = DEFAULT_SERVICE_URLS.SCHEDULER,
+  includeDisabled: boolean = false,
+): Promise<ListSchedulesResponse> => {
+  const query = includeDisabled ? '?includeDisabled=true' : '';
+  return requestDatalayerAPI<ListSchedulesResponse>({
+    url: `${baseUrl}${API_BASE_PATHS.SCHEDULER}/schedules${query}`,
+    method: 'GET',
+    token,
+  });
+};
+
+/**
+ * Create or update the schedule for a notebook.
+ * @param token - Authentication token
+ * @param data - Schedule configuration
+ * @param baseUrl - Base URL for the scheduler service (defaults to production)
+ * @returns Promise resolving to the upserted schedule
+ */
+export const upsertSchedule = async (
+  token: string,
+  data: UpsertScheduleRequest,
+  baseUrl: string = DEFAULT_SERVICE_URLS.SCHEDULER,
+): Promise<ScheduleResponse> => {
+  return requestDatalayerAPI<ScheduleResponse>({
+    url: `${baseUrl}${API_BASE_PATHS.SCHEDULER}/schedules`,
+    method: 'POST',
+    token,
+    body: {
+      enabled: true,
+      preset: 'custom',
+      ...data,
+    },
+  });
+};
+
+/**
+ * Update an existing schedule by uid.
+ * @param token - Authentication token
+ * @param scheduleUid - The schedule uid
+ * @param data - Fields to update
+ * @param baseUrl - Base URL for the scheduler service (defaults to production)
+ * @returns Promise resolving to the updated schedule
+ */
+export const updateSchedule = async (
+  token: string,
+  scheduleUid: string,
+  data: UpdateScheduleRequest,
+  baseUrl: string = DEFAULT_SERVICE_URLS.SCHEDULER,
+): Promise<ScheduleResponse> => {
+  return requestDatalayerAPI<ScheduleResponse>({
+    url: `${baseUrl}${API_BASE_PATHS.SCHEDULER}/schedules/${scheduleUid}`,
+    method: 'PUT',
+    token,
+    body: data,
+  });
+};
+
+/**
+ * Disable a schedule by uid.
+ * @param token - Authentication token
+ * @param scheduleUid - The schedule uid
+ * @param baseUrl - Base URL for the scheduler service (defaults to production)
+ * @returns Promise resolving to the disabled schedule
+ */
+export const disableSchedule = async (
+  token: string,
+  scheduleUid: string,
+  baseUrl: string = DEFAULT_SERVICE_URLS.SCHEDULER,
+): Promise<ScheduleResponse> => {
+  return requestDatalayerAPI<ScheduleResponse>({
+    url: `${baseUrl}${API_BASE_PATHS.SCHEDULER}/schedules/${scheduleUid}/disable`,
+    method: 'POST',
+    token,
+  });
+};
diff --git a/src/components/avatars/UserAvatar.tsx b/src/components/avatars/UserAvatar.tsx
new file mode 100644
index 00000000..313ed720
--- /dev/null
+++ b/src/components/avatars/UserAvatar.tsx
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/**
+ * UserAvatar – Single source of truth for rendering a user's avatar.
+ *
+ * When a real (non-Gravatar) avatar URL is available it renders a
+ * {@link DLAvatar}. Otherwise it falls back to a themed, colormoded
+ * {@link AlienIcon} placeholder so every consumer (profile, sidebar,
+ * principal overlay, …) shares the same default look.
+ */
+import { AlienIcon } from '@datalayer/icons-react';
+import { Box, useColorPalette } from '@datalayer/primer-addons';
+import { DLAvatar } from './DLAvatar';
+
+/**
+ * Returns `true` when the given URL points to a real user avatar (i.e. not a
+ * Gravatar default placeholder).
+ */
+export function hasRealAvatar(url?: string): boolean {
+  if (!url) {
+    return false;
+  }
+  if (url.startsWith('https://www.gravatar.com/avatar')) {
+    return false;
+  }
+  return true;
+}
+
+export type UserAvatarProps = {
+  avatarUrl?: string;
+  /** Avatar edge length in pixels. Defaults to 100. */
+  size?: number;
+  /** Render with rounded square corners instead of a circle. Defaults to true. */
+  square?: boolean;
+  /** Fallback icon size. Defaults to ~48% of `size`. */
+  iconSize?: number;
+};
+
+export const UserAvatar = ({
+  avatarUrl,
+  size = 100,
+  square = true,
+  iconSize,
+}: UserAvatarProps): JSX.Element => {
+  const palette = useColorPalette();
+  if (hasRealAvatar(avatarUrl)) {
+    return <DLAvatar square={square} src={avatarUrl} size={size} />;
+  }
+  const resolvedIconSize = iconSize ?? Math.round(size * 0.48);
+  return (
+    <Box
+      sx={{
+        width: size,
+        height: size,
+        borderRadius: square ? 2 : '50%',
+        bg: 'accent.subtle',
+        display: 'flex',
+        alignItems: 'center',
+        justifyContent: 'center',
+        '--datalayer-icon-fg': palette.primary,
+      }}
+    >
+      <AlienIcon size={resolvedIconSize} themed colormoded />
+    </Box>
+  );
+};
+
+export default UserAvatar;
diff --git a/src/components/avatars/index.ts b/src/components/avatars/index.ts
index f2565be2..7822e59e 100644
--- a/src/components/avatars/index.ts
+++ b/src/components/avatars/index.ts
@@ -5,4 +5,5 @@
 
 export * from './BoringAvatar';
 export * from './DLAvatar';
+export * from './UserAvatar';
 export * from './UserProfileAvatar';
diff --git a/src/components/billing/BillableAccountSelect.tsx b/src/components/billing/BillableAccountSelect.tsx
index 81466562..32903f7f 100644
--- a/src/components/billing/BillableAccountSelect.tsx
+++ b/src/components/billing/BillableAccountSelect.tsx
@@ -1,3 +1,8 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
 /*
  * Copyright (c) 2023-2026 Datalayer, Inc.
  * Distributed under the terms of the Modified BSD License.
diff --git a/src/components/billing/index.ts b/src/components/billing/index.ts
index c6ecd8b9..a5362bad 100644
--- a/src/components/billing/index.ts
+++ b/src/components/billing/index.ts
@@ -1,3 +1,8 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
 /*
  * Copyright (c) 2023-2026 Datalayer, Inc.
  * Distributed under the terms of the Modified BSD License.
diff --git a/src/components/checkout/StripeCheckout.tsx b/src/components/checkout/StripeCheckout.tsx
index e647dc07..e0f9b4c4 100644
--- a/src/components/checkout/StripeCheckout.tsx
+++ b/src/components/checkout/StripeCheckout.tsx
@@ -942,6 +942,37 @@ export function StripeCheckout({
     void startSubscriptionCheckout(pendingSubscriptionPlan);
   }, [pendingSubscriptionPlan, startSubscriptionCheckout]);
 
+  // Auto-open the subscription checkout when the page is opened with
+  // `?upgrade` or `?action=upgrade` (e.g. from the Plan Overview
+  // "Upgrade to Team Plan" CTA).
+  useEffect(() => {
+    if (typeof window === 'undefined') {
+      return;
+    }
+    if (autoActionTriggeredRef.current) {
+      return;
+    }
+    if (isPaidSubscription || isCancellationScheduled) {
+      return;
+    }
+    try {
+      const params = new URLSearchParams(window.location.search);
+      const action = String(params.get('action') || '').toLowerCase();
+      const shouldAutoUpgrade = params.has('upgrade') || action === 'upgrade';
+      if (shouldAutoUpgrade && pendingSubscriptionPlan) {
+        autoActionTriggeredRef.current = true;
+        startPendingSubscriptionCheckout();
+      }
+    } catch (_error) {
+      // Ignore malformed URLs.
+    }
+  }, [
+    isPaidSubscription,
+    isCancellationScheduled,
+    pendingSubscriptionPlan,
+    startPendingSubscriptionCheckout,
+  ]);
+
   const openPortal = useCallback((url?: string) => {
     if (!url) {
       return;
@@ -1157,6 +1188,54 @@ export function StripeCheckout({
               Cancel pending plan change
             </Button>
           </Box>
+          {cancelViewOpen && (
+            <Box
+              sx={{
+                marginTop: 'var(--stack-gap-normal)',
+                border: '1px solid',
+                borderColor: 'border.default',
+                borderRadius: 'var(--borderRadius-medium)',
+                backgroundColor: 'canvas.subtle',
+                padding: 'var(--stack-padding-normal)',
+                display: 'grid',
+                gap: 'var(--stack-gap-condensed)',
+              }}
+            >
+              <Text as="h4" sx={{ fontWeight: 'bold' }}>
+                Cancel pending plan change
+              </Text>
+              <Text as="p" sx={{ color: 'fg.muted' }}>
+                This pending plan change will be canceled immediately.
+              </Text>
+              <Box
+                sx={{
+                  display: 'flex',
+                  gap: 'var(--stack-gap-condensed)',
+                  flexWrap: 'wrap',
+                }}
+              >
+                <Button
+                  variant="danger"
+                  onClick={() => void onConfirmCancelSubscription()}
+                  disabled={isCancelActionPending}
+                  leadingVisual={() =>
+                    isCancelActionPending ? <Spinner size="small" /> : undefined
+                  }
+                >
+                  {isCancelActionPending
+                    ? 'Canceling pending plan change...'
+                    : 'Confirm cancel pending plan change'}
+                </Button>
+                <Button
+                  variant="default"
+                  onClick={onAbortCancelView}
+                  disabled={isCancelActionPending}
+                >
+                  Keep pending plan change
+                </Button>
+              </Box>
+            </Box>
+          )}
         </>
       ) : !isPaidSubscription ? (
         <>
@@ -1226,7 +1305,7 @@ export function StripeCheckout({
           >
             {subscriptionPaymentIntentMutation.isPending
               ? 'Preparing Team plan checkout...'
-              : 'Update to Team Plan'}
+              : 'Upgrade to Team Plan'}
           </Button>
         </>
       ) : null}
diff --git a/src/components/display/LiveRelativeTime.tsx b/src/components/display/LiveRelativeTime.tsx
index 7aa84eec..bfe4dec5 100644
--- a/src/components/display/LiveRelativeTime.tsx
+++ b/src/components/display/LiveRelativeTime.tsx
@@ -3,33 +3,10 @@
  * Distributed under the terms of the Modified BSD License.
  */
 
-import { useEffect, useMemo, useState } from 'react';
-import { formatRelativeTime } from '../../utils';
-
-type ILiveRelativeTimeProps = {
-  value?: Date | string | number;
-  refreshIntervalMs?: number;
-  fallback?: string;
-};
-
-/**
- * Display a live-updating relative time label (e.g. "5m ago").
+/*
+ * Copyright (c) 2023-2026 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
  */
-export function LiveRelativeTime({
-  value,
-  refreshIntervalMs = 1000,
-  fallback = '—',
-}: ILiveRelativeTimeProps): JSX.Element {
-  const [now, setNow] = useState(() => new Date());
-
-  useEffect(() => {
-    const timer = window.setInterval(() => {
-      setNow(new Date());
-    }, refreshIntervalMs);
-    return () => window.clearInterval(timer);
-  }, [refreshIntervalMs]);
-
-  const label = useMemo(() => formatRelativeTime(value, now), [value, now]);
 
-  return <>{label ?? fallback}</>;
-}
+// Backward compatibility export. Use ../../components/time/LiveRelativeTime.
+export { LiveRelativeTime } from '../time/LiveRelativeTime';
diff --git a/src/components/index.ts b/src/components/index.ts
index d292b021..2c774990 100644
--- a/src/components/index.ts
+++ b/src/components/index.ts
@@ -5,5 +5,8 @@
 
 export * from './auth';
 export * from './billing';
+export * from './scheduler';
 export * from './sharing';
 export * from './sparklines';
+export * from './time';
+export * from './timeline';
diff --git a/src/components/principal/Principal.tsx b/src/components/principal/Principal.tsx
index a4607f5e..fc4765f8 100644
--- a/src/components/principal/Principal.tsx
+++ b/src/components/principal/Principal.tsx
@@ -14,6 +14,7 @@
 import * as React from 'react';
 import { Box } from '@datalayer/primer-addons';
 import { useCache } from '../../hooks';
+import { useIAMStore } from '../../state';
 import { PrincipalAvatar, PrincipalAvatarKind } from './PrincipalAvatar';
 import { PrincipalDetailsOverlay } from './PrincipalDetailsOverlay';
 
@@ -54,20 +55,87 @@ export const Principal: React.FC<PrincipalProps> = ({
   square = false,
   sx,
 }) => {
-  const { useUser, useOrganization } = useCache();
+  const {
+    useUser,
+    useOrganization,
+    useUserPublicProfileByHandle,
+    useOrganizationPublicProfileByHandle,
+  } = useCache();
+
+  // When no user is authenticated (anonymous visitor, e.g. public pages), the
+  // authenticated `useUser` / `useOrganization` endpoints return 401, which
+  // triggers a global logout + redirect to sign-in. For anonymous visitors we
+  // resolve the principal through the public-by-handle endpoints instead, which
+  // are anonymous-accessible and therefore never redirect.
+  const { user: authenticatedUser } = useIAMStore();
+  const isAnonymous = !authenticatedUser;
+
+  const principalHandle = String(
+    principal.handle || principal.accountHandle || '',
+  ).trim();
 
   const hydratedUserQuery = useUser(
-    principal.kind === 'user' ? String(principal.uid || '') : '',
+    !isAnonymous && principal.kind === 'user'
+      ? String(principal.uid || '')
+      : '',
   );
   const hydratedOrgQuery = useOrganization(
-    principal.kind === 'organization' ? String(principal.uid || '') : '',
+    !isAnonymous && principal.kind === 'organization'
+      ? String(principal.uid || '')
+      : '',
+  );
+  const hydratedPublicUserQuery = useUserPublicProfileByHandle(
+    isAnonymous && principal.kind === 'user' ? principalHandle : '',
   );
+  const hydratedPublicOrgQuery = useOrganizationPublicProfileByHandle(
+    isAnonymous && principal.kind === 'organization' ? principalHandle : '',
+  );
+
+  // Normalise the public (snake_case) profile payloads into the same camelCase
+  // shape the authenticated entities expose, so downstream logic is uniform.
+  const normalizedPublicUser = React.useMemo(() => {
+    const profile = hydratedPublicUserQuery.data as any;
+    if (!profile) {
+      return undefined;
+    }
+    const displayName = String(
+      profile.display_name ||
+        [profile.first_name, profile.last_name].filter(Boolean).join(' ') ||
+        '',
+    ).trim();
+    return {
+      displayName,
+      firstName: profile.first_name,
+      lastName: profile.last_name,
+      handle: profile.handle,
+      avatarUrl: profile.avatar_url,
+      origin: profile.origin,
+      email: profile.email,
+    };
+  }, [hydratedPublicUserQuery.data]);
+
+  const normalizedPublicOrg = React.useMemo(() => {
+    const profile = hydratedPublicOrgQuery.data as any;
+    if (!profile) {
+      return undefined;
+    }
+    return {
+      displayName: String(profile.display_name || profile.name || '').trim(),
+      name: profile.name,
+      handle: profile.handle,
+      avatarUrl: profile.avatar_url,
+    };
+  }, [hydratedPublicOrgQuery.data]);
 
   const hydratedEntity =
     principal.kind === 'user'
-      ? hydratedUserQuery.data
+      ? isAnonymous
+        ? normalizedPublicUser
+        : hydratedUserQuery.data
       : principal.kind === 'organization'
-        ? hydratedOrgQuery.data
+        ? isAnonymous
+          ? normalizedPublicOrg
+          : hydratedOrgQuery.data
         : undefined;
 
   const hydratedDisplayName =
diff --git a/src/components/principal/PrincipalAvatar.tsx b/src/components/principal/PrincipalAvatar.tsx
index b28b7ab9..9aef7589 100644
--- a/src/components/principal/PrincipalAvatar.tsx
+++ b/src/components/principal/PrincipalAvatar.tsx
@@ -3,10 +3,9 @@
  * Distributed under the terms of the Modified BSD License.
  */
 
-import { Box, useColorPalette } from '@datalayer/primer-addons';
+import { Box } from '@datalayer/primer-addons';
 import { OrganizationIcon, PeopleIcon } from '@primer/octicons-react';
-import { AlienIcon } from '@datalayer/icons-react';
-import { DLAvatar } from '../avatars';
+import { UserAvatar } from '../avatars';
 
 export type PrincipalAvatarKind = 'user' | 'team' | 'organization';
 
@@ -18,16 +17,6 @@ export type PrincipalAvatarProps = {
   square?: boolean;
 };
 
-function hasRealAvatar(url?: string): boolean {
-  if (!url) {
-    return false;
-  }
-  if (url.startsWith('https://www.gravatar.com/avatar')) {
-    return false;
-  }
-  return true;
-}
-
 function getFallbackIconSize(size: number): number {
   return Math.max(12, Math.round(size * 0.62));
 }
@@ -39,14 +28,13 @@ export function PrincipalAvatar({
   size = 20,
   square = false,
 }: PrincipalAvatarProps): JSX.Element {
-  const palette = useColorPalette();
-  if (kind === 'user' && hasRealAvatar(avatarUrl)) {
+  if (kind === 'user') {
     return (
-      <DLAvatar
-        src={avatarUrl}
-        alt={alt || 'User'}
+      <UserAvatar
+        avatarUrl={avatarUrl}
         size={size}
         square={square}
+        iconSize={getFallbackIconSize(size)}
       />
     );
   }
@@ -54,27 +42,6 @@ export function PrincipalAvatar({
   const iconSize = getFallbackIconSize(size);
   const borderRadius = square ? 2 : '50%';
 
-  if (kind === 'user') {
-    return (
-      <Box
-        sx={{
-          width: size,
-          height: size,
-          display: 'inline-flex',
-          alignItems: 'center',
-          justifyContent: 'center',
-          bg: 'accent.subtle',
-          borderRadius,
-          overflow: 'hidden',
-          '--datalayer-icon-fg': palette.primary,
-        }}
-        aria-label={alt || 'User'}
-      >
-        <AlienIcon size={iconSize} />
-      </Box>
-    );
-  }
-
   const Icon = kind === 'team' ? PeopleIcon : OrganizationIcon;
 
   return (
diff --git a/src/components/runtimes/RuntimeLauncherDialog.tsx b/src/components/runtimes/RuntimeLauncherDialog.tsx
index 7b088b2c..745d8446 100644
--- a/src/components/runtimes/RuntimeLauncherDialog.tsx
+++ b/src/components/runtimes/RuntimeLauncherDialog.tsx
@@ -43,7 +43,7 @@ import {
 const NOT_AVAILABLE_INIT_RETRY = 10_000;
 
 /**
- * Number of trials in case of unavailable kernels
+ * Number of trials in case of unavailable code sandboxes
  */
 const NOT_AVAILABLE_RETRIES = 5;
 
diff --git a/src/components/scheduler/ScheduleMenu.tsx b/src/components/scheduler/ScheduleMenu.tsx
new file mode 100644
index 00000000..099ac39f
--- /dev/null
+++ b/src/components/scheduler/ScheduleMenu.tsx
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+import { useEffect, useMemo, useState } from 'react';
+import {
+  ActionList,
+  ActionMenu,
+  Box,
+  Button,
+  Dialog,
+  FormControl,
+  IconButton,
+  Spinner,
+  Text,
+  TextInput,
+} from '@primer/react';
+import { ClockIcon, TrashIcon, type Icon } from '@primer/octicons-react';
+import {
+  disableSchedule,
+  updateSchedule,
+  upsertSchedule,
+  type ScheduleDoc,
+} from '../../api/scheduler/schedules';
+import { DEFAULT_SERVICE_URLS } from '../../api/constants';
+
+/**
+ * Supported schedule presets.
+ */
+export type SchedulePreset = 'every-minute' | 'hourly' | 'daily' | 'custom';
+
+/**
+ * Props for the self-contained {@link ScheduleMenu} component.
+ *
+ * The component performs the scheduler service calls internally. Provide the
+ * `notebookUid`, an authentication `token`, and the scheduler service
+ * `baseUrl` (configurable via props) to enable scheduling.
+ */
+export type ScheduleMenuProps = {
+  /** Target notebook uid to schedule (used to create/update a schedule). */
+  notebookUid?: string;
+  /**
+   * Existing schedule uid to update in place. When provided, changes are
+   * persisted with a PUT update instead of a notebook upsert.
+   */
+  scheduleUid?: string;
+  /** Authentication token used for scheduler API calls. */
+  token?: string;
+  /** Base URL of the scheduler service. Defaults to the production service. */
+  baseUrl?: string;
+  /** Force-disable the menu. When omitted, the menu is enabled if a token and notebook uid are present. */
+  enabled?: boolean;
+  /** Icon shown on the anchor button (replaced by a spinner while saving). */
+  icon?: Icon;
+  /** Optional color for the anchor icon. */
+  iconColor?: string;
+  /** Accessible label for the anchor button. */
+  ariaLabel?: string;
+  /** Initial preset selection. */
+  initialPreset?: SchedulePreset;
+  /** Initial cron expression. */
+  initialCronExpression?: string;
+  /** Called after a schedule is successfully saved. */
+  onSaved?: (schedule: ScheduleDoc) => void;
+  /** Called after a schedule is successfully deleted/disabled. */
+  onDeleted?: (schedule: ScheduleDoc) => void;
+  /** Called when saving a schedule fails. */
+  onError?: (error: unknown) => void;
+};
+
+const SCHEDULE_PRESETS: Array<{
+  id: Exclude<SchedulePreset, 'custom'>;
+  label: string;
+  cron: string;
+}> = [
+  { id: 'every-minute', label: 'Every minute', cron: '* * * * *' },
+  { id: 'hourly', label: 'Every hour', cron: '0 * * * *' },
+  { id: 'daily', label: 'Every day', cron: '0 0 * * *' },
+];
+
+const DEFAULT_CRON_EXPRESSION = '* * * * *';
+
+const presetFromCron = (cronExpression: string): SchedulePreset => {
+  const normalized = cronExpression.trim();
+  const preset = SCHEDULE_PRESETS.find(item => item.cron === normalized);
+  return preset ? preset.id : 'custom';
+};
+
+/**
+ * Self-contained schedule menu that lets a user pick a cron schedule for a
+ * notebook and persists it to the scheduler service.
+ *
+ * The component owns its loading state and renders a spinner in place of the
+ * anchor icon while a save request is in flight. The scheduler service URL is
+ * configurable through the `baseUrl` prop.
+ */
+export const ScheduleMenu = ({
+  notebookUid,
+  scheduleUid,
+  token,
+  baseUrl = DEFAULT_SERVICE_URLS.SCHEDULER,
+  enabled,
+  icon = ClockIcon,
+  iconColor,
+  ariaLabel = 'Schedule execution',
+  initialPreset = 'every-minute',
+  initialCronExpression,
+  onSaved,
+  onDeleted,
+  onError,
+}: ScheduleMenuProps) => {
+  const providedInitialCron = (initialCronExpression || '').trim();
+  const hasProvidedInitialCron = providedInitialCron.length > 0;
+  const initialCron = hasProvidedInitialCron
+    ? providedInitialCron
+    : DEFAULT_CRON_EXPRESSION;
+  const [cronExpression, setCronExpression] = useState<string>(initialCron);
+  const [hasExplicitCron, setHasExplicitCron] = useState(
+    hasProvidedInitialCron,
+  );
+  const [isSaving, setIsSaving] = useState(false);
+  const [isDeleting, setIsDeleting] = useState(false);
+  const [isDeleteDialogOpen, setIsDeleteDialogOpen] = useState(false);
+  const [deleteConfirmUid, setDeleteConfirmUid] = useState('');
+
+  useEffect(() => {
+    const nextProvidedCron = (initialCronExpression || '').trim();
+    const nextHasExplicitCron = nextProvidedCron.length > 0;
+    setHasExplicitCron(nextHasExplicitCron);
+    setCronExpression(
+      nextHasExplicitCron ? nextProvidedCron : DEFAULT_CRON_EXPRESSION,
+    );
+  }, [initialCronExpression, scheduleUid, notebookUid]);
+
+  const preset = useMemo(
+    () => (hasExplicitCron ? presetFromCron(cronExpression) : 'custom'),
+    [cronExpression, hasExplicitCron],
+  );
+
+  const isEnabled =
+    (enabled ?? true) &&
+    Boolean(token) &&
+    Boolean(notebookUid || scheduleUid) &&
+    !isSaving &&
+    !isDeleting;
+
+  const selectedPresetDescription = useMemo(() => {
+    if (!hasExplicitCron) {
+      return 'Not scheduled';
+    }
+    const selected = SCHEDULE_PRESETS.find(
+      item => item.cron === cronExpression.trim(),
+    );
+    if (selected) {
+      return `${selected.label} (${selected.cron})`;
+    }
+    return `Custom (${cronExpression || 'Cron expression not set'})`;
+  }, [cronExpression, hasExplicitCron]);
+
+  const saveSchedule = async (nextPreset: SchedulePreset, nextCron: string) => {
+    const cron = nextCron.trim();
+    if (!token || (!notebookUid && !scheduleUid) || !cron) {
+      return;
+    }
+    setIsSaving(true);
+    try {
+      let schedule: ScheduleDoc;
+      if (scheduleUid) {
+        const response = await updateSchedule(
+          token,
+          scheduleUid,
+          {
+            cronExpression: cron,
+            preset: nextPreset,
+          },
+          baseUrl,
+        );
+        schedule = response.schedule;
+      } else {
+        const response = await upsertSchedule(
+          token,
+          {
+            notebookUid: notebookUid as string,
+            cronExpression: cron,
+            preset: nextPreset,
+            enabled: true,
+          },
+          baseUrl,
+        );
+        schedule = response.schedule;
+      }
+      onSaved?.(schedule);
+    } catch (error) {
+      onError?.(error);
+    } finally {
+      setIsSaving(false);
+    }
+  };
+
+  const selectPreset = (nextPreset: Exclude<SchedulePreset, 'custom'>) => {
+    const next = SCHEDULE_PRESETS.find(item => item.id === nextPreset);
+    if (!next) {
+      return;
+    }
+    setHasExplicitCron(true);
+    setCronExpression(next.cron);
+    void saveSchedule(nextPreset, next.cron);
+  };
+
+  const applyCustomCron = () => {
+    const value = cronExpression.trim();
+    const inferredPreset = presetFromCron(value);
+    void saveSchedule(inferredPreset, value);
+  };
+
+  const performDeleteSchedule = async () => {
+    if (!token || !scheduleUid) {
+      return;
+    }
+    setIsDeleting(true);
+    try {
+      const response = await disableSchedule(token, scheduleUid, baseUrl);
+      setIsDeleteDialogOpen(false);
+      setDeleteConfirmUid('');
+      onDeleted?.(response.schedule);
+    } catch (error) {
+      onError?.(error);
+    } finally {
+      setIsDeleting(false);
+    }
+  };
+
+  return (
+    <>
+      <ActionMenu>
+        <ActionMenu.Anchor>
+          <IconButton
+            icon={isSaving ? () => <Spinner size="small" /> : icon}
+            variant="invisible"
+            sx={iconColor ? { color: iconColor } : undefined}
+            aria-label={ariaLabel}
+            title={
+              isSaving
+                ? `${ariaLabel} (saving…)`
+                : isEnabled
+                  ? ariaLabel
+                  : `${ariaLabel} (disabled)`
+            }
+          />
+        </ActionMenu.Anchor>
+        <ActionMenu.Overlay width="medium" sx={{ minWidth: 300 }}>
+          <Box
+            sx={{
+              px: 3,
+              pt: 3,
+              pb: 2,
+              borderBottom: '1px solid',
+              borderColor: 'border.default',
+            }}
+          >
+            <Text as="p" sx={{ fontWeight: 600 }}>
+              Schedule
+            </Text>
+          </Box>
+          <ActionList selectionVariant="single" showDividers>
+            {SCHEDULE_PRESETS.map(option => (
+              <ActionList.Item
+                key={option.id}
+                selected={preset === option.id}
+                disabled={!isEnabled}
+                onSelect={() => selectPreset(option.id)}
+              >
+                {option.label}
+                <ActionList.Description variant="block">
+                  {option.cron}
+                </ActionList.Description>
+              </ActionList.Item>
+            ))}
+          </ActionList>
+          <Box
+            sx={{ p: 3, borderTop: '1px solid', borderColor: 'border.default' }}
+          >
+            <Text as="p" sx={{ fontSize: 0, color: 'fg.muted', mb: 2 }}>
+              Cron Expression
+            </Text>
+            <TextInput
+              value={cronExpression}
+              onChange={event => {
+                const value = event.currentTarget.value;
+                setHasExplicitCron(value.trim().length > 0);
+                setCronExpression(value);
+              }}
+              placeholder="* * * * *"
+              aria-label="Cron expression"
+              block
+              disabled={!isEnabled}
+            />
+            <Box sx={{ mt: 2, display: 'flex', gap: 2, flexWrap: 'wrap' }}>
+              <Button
+                size="small"
+                onClick={applyCustomCron}
+                disabled={!isEnabled}
+              >
+                Apply Cron
+              </Button>
+              {scheduleUid ? (
+                <Button
+                  size="small"
+                  variant="danger"
+                  leadingVisual={TrashIcon}
+                  onClick={() => setIsDeleteDialogOpen(true)}
+                  disabled={!isEnabled}
+                >
+                  Delete Schedule
+                </Button>
+              ) : null}
+            </Box>
+            <Text as="p" sx={{ mt: 2, fontSize: 0, color: 'fg.muted' }}>
+              Current: {selectedPresetDescription}
+            </Text>
+          </Box>
+        </ActionMenu.Overlay>
+      </ActionMenu>
+      {isDeleteDialogOpen && scheduleUid ? (
+        <Dialog
+          title="Delete schedule"
+          onClose={() => {
+            if (isDeleting) {
+              return;
+            }
+            setIsDeleteDialogOpen(false);
+            setDeleteConfirmUid('');
+          }}
+          width="medium"
+        >
+          <Box sx={{ display: 'grid', gap: 3 }}>
+            <Text>
+              This action will disable the schedule and remove planned runs.
+              Type{' '}
+              <Text as="span" sx={{ fontWeight: 700 }}>
+                {scheduleUid}
+              </Text>{' '}
+              to confirm.
+            </Text>
+            <FormControl>
+              <FormControl.Label>Schedule UID</FormControl.Label>
+              <TextInput
+                value={deleteConfirmUid}
+                onChange={event =>
+                  setDeleteConfirmUid(event.currentTarget.value)
+                }
+                placeholder={scheduleUid}
+                autoFocus
+              />
+            </FormControl>
+            <Box sx={{ display: 'flex', justifyContent: 'flex-end', gap: 2 }}>
+              <Button
+                variant="default"
+                onClick={() => {
+                  setIsDeleteDialogOpen(false);
+                  setDeleteConfirmUid('');
+                }}
+                disabled={isDeleting}
+              >
+                Cancel
+              </Button>
+              <Button
+                variant="danger"
+                onClick={() => void performDeleteSchedule()}
+                disabled={isDeleting || deleteConfirmUid !== scheduleUid}
+              >
+                Delete schedule
+              </Button>
+            </Box>
+          </Box>
+        </Dialog>
+      ) : null}
+    </>
+  );
+};
+
+export default ScheduleMenu;
diff --git a/src/views/iam-tokens/index.ts b/src/components/scheduler/index.ts
similarity index 54%
rename from src/views/iam-tokens/index.ts
rename to src/components/scheduler/index.ts
index 933a0869..19bfa586 100644
--- a/src/views/iam-tokens/index.ts
+++ b/src/components/scheduler/index.ts
@@ -3,6 +3,4 @@
  * Distributed under the terms of the Modified BSD License.
  */
 
-export * from './IAMTokenEdit';
-export * from './IAMTokenNew';
-export * from './IAMTokens';
+export * from './ScheduleMenu';
diff --git a/src/components/sharing/SharingEditor.tsx b/src/components/sharing/SharingEditor.tsx
index dc6fe7e2..d8ebcecd 100644
--- a/src/components/sharing/SharingEditor.tsx
+++ b/src/components/sharing/SharingEditor.tsx
@@ -1,3 +1,8 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
 /*
  * Copyright (c) 2023-2026 Datalayer, Inc.
  * Distributed under the terms of the Modified BSD License.
diff --git a/src/components/time/LiveRelativeTime.tsx b/src/components/time/LiveRelativeTime.tsx
new file mode 100644
index 00000000..052b6989
--- /dev/null
+++ b/src/components/time/LiveRelativeTime.tsx
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/*
+ * Copyright (c) 2023-2026 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+import { useEffect, useMemo, useState } from 'react';
+import { Tooltip } from '@primer/react';
+import { formatDateTimeDetails, formatRelativeTime } from '../../utils';
+
+type ILiveRelativeTimeProps = {
+  value?: Date | string | number;
+  refreshIntervalMs?: number;
+  fallback?: string;
+  showTooltip?: boolean;
+};
+
+/**
+ * Display a live-updating relative time label (e.g. "5m ago", "in 2m").
+ *
+ * When `showTooltip` is true, a Primer tooltip shows ISO + localized datetime
+ * details with timezone information.
+ */
+export function LiveRelativeTime({
+  value,
+  refreshIntervalMs = 30_000,
+  fallback = '—',
+  showTooltip = true,
+}: ILiveRelativeTimeProps): JSX.Element {
+  const [now, setNow] = useState(() => new Date());
+
+  useEffect(() => {
+    const timer = window.setInterval(() => {
+      setNow(new Date());
+    }, refreshIntervalMs);
+    return () => window.clearInterval(timer);
+  }, [refreshIntervalMs]);
+
+  const label = useMemo(() => formatRelativeTime(value, now), [value, now]);
+  const tooltip = useMemo(() => formatDateTimeDetails(value), [value]);
+  const content = (
+    <button
+      type="button"
+      aria-label={tooltip || 'Date and time details'}
+      style={{
+        border: 'none',
+        background: 'transparent',
+        padding: 0,
+        margin: 0,
+        font: 'inherit',
+        color: 'inherit',
+        cursor: 'inherit',
+        lineHeight: 'inherit',
+      }}
+    >
+      {label ?? fallback}
+    </button>
+  );
+
+  if (!showTooltip || !tooltip) {
+    return content;
+  }
+
+  return <Tooltip text={tooltip}>{content}</Tooltip>;
+}
diff --git a/src/components/time/index.ts b/src/components/time/index.ts
new file mode 100644
index 00000000..a3653af5
--- /dev/null
+++ b/src/components/time/index.ts
@@ -0,0 +1,11 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/*
+ * Copyright (c) 2023-2026 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+export * from './LiveRelativeTime';
diff --git a/src/components/timeline/Timeline.tsx b/src/components/timeline/Timeline.tsx
new file mode 100644
index 00000000..7874c70b
--- /dev/null
+++ b/src/components/timeline/Timeline.tsx
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/*
+ * Copyright (c) 2023-2026 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+import { type ReactNode } from 'react';
+import { Box, Text } from '@primer/react';
+
+export type TimelineStatus =
+  | 'done'
+  | 'current'
+  | 'pending'
+  | 'failed'
+  | 'neutral';
+
+export type TimelineItem = {
+  id: string;
+  title: string;
+  timestamp?: string;
+  status?: TimelineStatus;
+  subtitle?: ReactNode;
+};
+
+export type TimelineProps = {
+  items: TimelineItem[];
+  renderTimestamp?: (value?: string) => ReactNode;
+};
+
+const statusStyles: Record<
+  TimelineStatus,
+  {
+    dotBackground: string;
+    dotBorder: string;
+    lineColor: string;
+    titleColor: string;
+  }
+> = {
+  done: {
+    dotBackground: 'success.emphasis',
+    dotBorder: 'success.emphasis',
+    lineColor: 'success.muted',
+    titleColor: 'fg.default',
+  },
+  current: {
+    dotBackground: 'attention.emphasis',
+    dotBorder: 'attention.emphasis',
+    lineColor: 'attention.muted',
+    titleColor: 'fg.default',
+  },
+  pending: {
+    dotBackground: 'canvas.default',
+    dotBorder: 'border.default',
+    lineColor: 'border.default',
+    titleColor: 'fg.muted',
+  },
+  failed: {
+    dotBackground: 'danger.emphasis',
+    dotBorder: 'danger.emphasis',
+    lineColor: 'danger.muted',
+    titleColor: 'danger.fg',
+  },
+  neutral: {
+    dotBackground: 'accent.emphasis',
+    dotBorder: 'accent.emphasis',
+    lineColor: 'accent.muted',
+    titleColor: 'fg.default',
+  },
+};
+
+/**
+ * Horizontal metro-style timeline with connected markers and labels.
+ *
+ * The first item is rendered on the left. Pass items in the desired order
+ * (for example: newest to oldest).
+ */
+export const Timeline = ({ items, renderTimestamp }: TimelineProps) => {
+  if (!items.length) {
+    return null;
+  }
+
+  return (
+    <Box
+      sx={{
+        overflowX: 'auto',
+        pb: 1,
+      }}
+    >
+      <Box
+        sx={{
+          display: 'flex',
+          alignItems: 'stretch',
+          minWidth: 'max-content',
+        }}
+      >
+        {items.map((item, index) => {
+          const status = item.status || 'neutral';
+          const style = statusStyles[status];
+          const hasNext = index < items.length - 1;
+          return (
+            <Box
+              key={item.id}
+              sx={{
+                display: 'flex',
+                flexDirection: 'column',
+                minWidth: 220,
+                flex: hasNext ? '0 0 220px' : '0 0 auto',
+                pr: hasNext ? 0 : 1,
+              }}
+            >
+              <Box
+                sx={{
+                  display: 'flex',
+                  alignItems: 'center',
+                  minHeight: 28,
+                  mb: 2,
+                }}
+              >
+                <Box
+                  sx={{
+                    width: 16,
+                    height: 16,
+                    borderRadius: '50%',
+                    border: '2px solid',
+                    borderColor: style.dotBorder,
+                    bg: style.dotBackground,
+                    boxShadow: 'inset 0 0 0 1px rgba(255,255,255,0.2)',
+                  }}
+                />
+                {hasNext ? (
+                  <Box
+                    sx={{
+                      flex: 1,
+                      height: 4,
+                      ml: 2,
+                      mr: 2,
+                      borderRadius: 999,
+                      bg: style.lineColor,
+                    }}
+                  />
+                ) : null}
+              </Box>
+              <Text
+                as="p"
+                sx={{ fontWeight: 600, color: style.titleColor, mb: 1 }}
+              >
+                {item.title}
+              </Text>
+              <Text
+                as="p"
+                sx={{
+                  fontSize: 0,
+                  color: 'fg.muted',
+                  mb: item.subtitle ? 1 : 0,
+                }}
+              >
+                {renderTimestamp
+                  ? renderTimestamp(item.timestamp)
+                  : item.timestamp || 'n/a'}
+              </Text>
+              {item.subtitle ? (
+                <Text as="p" sx={{ fontSize: 0, color: 'fg.subtle' }}>
+                  {item.subtitle}
+                </Text>
+              ) : null}
+            </Box>
+          );
+        })}
+      </Box>
+    </Box>
+  );
+};
diff --git a/src/components/timeline/index.ts b/src/components/timeline/index.ts
new file mode 100644
index 00000000..c1bc8bcb
--- /dev/null
+++ b/src/components/timeline/index.ts
@@ -0,0 +1,11 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+/*
+ * Copyright (c) 2023-2026 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+export * from './Timeline';
diff --git a/src/hooks/useCache.ts b/src/hooks/useCache.ts
index 4aced5dc..5ec33f27 100644
--- a/src/hooks/useCache.ts
+++ b/src/hooks/useCache.ts
@@ -638,7 +638,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       path: raw_dataset.s3_path_s,
       cdnUrl: raw_dataset.cdn_url_s,
       creationDate: new Date(raw_dataset.creation_ts_dt),
-      public: raw_dataset.public_b ?? false,
+      public: raw_dataset.is_public_b ?? false,
       lastPublicationDate: raw_dataset.creation_ts_dt
         ? new Date(raw_dataset.creation_ts_dt)
         : undefined,
@@ -662,7 +662,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       description: cl.description_t,
       source: cl.source_t,
       creationDate: new Date(cl.creation_ts_dt),
-      public: cl.public_b ?? false,
+      public: cl.is_public_b ?? false,
       lastPublicationDate: cl.last_publication_ts_dt
         ? new Date(cl.last_publication_ts_dt)
         : undefined,
@@ -689,7 +689,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       nbformat: raw_notebook.model_s
         ? JSON.parse(raw_notebook.model_s)
         : undefined,
-      public: raw_notebook.public_b ?? false,
+      public: raw_notebook.is_public_b ?? false,
       creationDate: new Date(raw_notebook.creation_ts_dt),
       lastUpdateDate: raw_notebook.last_update_ts_dt
         ? new Date(raw_notebook.last_update_ts_dt)
@@ -717,7 +717,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       name: doc.name_t,
       description: doc.description_t,
       model: doc.model_s ? JSON.parse(doc.model_s) : undefined,
-      public: doc.public_b ?? false,
+      public: doc.is_public_b ?? false,
       creationDate: new Date(doc.creation_ts_dt),
       lastUpdateDate: doc.last_update_ts_dt
         ? new Date(doc.last_update_ts_dt)
@@ -744,7 +744,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       name: raw_lesson.name_t,
       description: raw_lesson.description_t,
       nbformat: raw_lesson.model_s ? JSON.parse(raw_lesson.model_s) : undefined,
-      public: raw_lesson.public_b ?? false,
+      public: raw_lesson.is_public_b ?? false,
       creationDate: new Date(raw_lesson.creation_ts_dt),
       lastUpdateDate: raw_lesson.last_update_ts_dt
         ? new Date(raw_lesson.last_update_ts_dt)
@@ -776,7 +776,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       codeQuestion: ex.code_question_t,
       codeSolution: ex.code_solution_t,
       codeTest: ex.code_test_t,
-      public: ex.public_b ?? false,
+      public: ex.is_public_b ?? false,
       creationDate: new Date(ex.creation_ts_dt),
       lastUpdateDate: ex.last_update_ts_dt
         ? new Date(ex.last_update_ts_dt)
@@ -820,7 +820,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       nbformat: raw_assignment.model_s
         ? JSON.parse(raw_assignment.model_s)
         : undefined,
-      public: raw_assignment.public_b ?? false,
+      public: raw_assignment.is_public_b ?? false,
       creationDate: new Date(raw_assignment.creation_ts_dt),
       lastUpdateDate: raw_assignment.last_update_ts_dt
         ? new Date(raw_assignment.last_update_ts_dt)
@@ -864,6 +864,25 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     };
   };
 
+  const toEvalset = (raw_evalset: any): any => {
+    const owner = toItemOwner(raw_evalset);
+    return {
+      id: raw_evalset.uid,
+      type: 'evalset',
+      name: raw_evalset.name_t,
+      description: raw_evalset.description_t,
+      public: raw_evalset.is_public_b ?? false,
+      creationDate: raw_evalset.creation_ts_dt
+        ? new Date(raw_evalset.creation_ts_dt)
+        : undefined,
+      lastUpdateDate: raw_evalset.last_update_ts_dt
+        ? new Date(raw_evalset.last_update_ts_dt)
+        : undefined,
+      tags: Array.isArray(raw_evalset.tags_ss) ? raw_evalset.tags_ss : [],
+      owner,
+    };
+  };
+
   const toItem = (item: any): any => {
     if (!item.type_s) {
       console.error('No type_s found on item', item);
@@ -886,6 +905,8 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
         return toNotebook(item);
       case 'page':
         return toPage(item);
+      case 'evalset':
+        return toEvalset(item);
       default:
         return {};
     }
@@ -2638,11 +2659,14 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       queryKey: queryKeys.tokens.all(),
       queryFn: async () => {
         const resp = await requestDatalayer({
-          url: `${configuration.iamRunUrl}/api/iam/v1/tokens`,
+          url: `${configuration.iamRunUrl}/api/iam/v1/api-keys`,
           method: 'GET',
         });
-        if (resp.success && resp.tokens) {
-          const tokens = resp.tokens
+        const tokenItems = asArray(
+          resp?.apiKeys ?? resp?.api_keys ?? resp?.tokens,
+        );
+        if (resp.success && tokenItems.length > 0) {
+          const tokens = tokenItems
             .map((t: unknown) => {
               const token = toToken(t);
               if (token) {
@@ -2670,7 +2694,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async (token: Omit<IIAMToken, 'id' | 'value'>) => {
         const resp = await requestDatalayer({
-          url: `${configuration.iamRunUrl}/api/iam/v1/tokens`,
+          url: `${configuration.iamRunUrl}/api/iam/v1/api-keys`,
           method: 'POST',
           body: {
             ...token,
@@ -2678,8 +2702,9 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
           },
         });
         // Transform the token in the response
-        if (resp.success && resp.token) {
-          resp.token = toToken(resp.token);
+        const tokenPayload = resp?.apiKey ?? resp?.api_key ?? resp?.token;
+        if (resp.success && tokenPayload) {
+          resp.token = toToken(tokenPayload);
         }
         return resp;
       },
@@ -3057,11 +3082,12 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       queryKey: queryKeys.tokens.detail(tokenId),
       queryFn: async () => {
         const resp = await requestDatalayer({
-          url: `${configuration.iamRunUrl}/api/iam/v1/tokens/${tokenId}`,
+          url: `${configuration.iamRunUrl}/api/iam/v1/api-keys/${tokenId}`,
           method: 'GET',
         });
-        if (resp.success && resp.token) {
-          return toToken(resp.token);
+        const tokenPayload = resp?.apiKey ?? resp?.api_key ?? resp?.token;
+        if (resp.success && tokenPayload) {
+          return toToken(tokenPayload);
         }
         return null;
       },
@@ -3077,14 +3103,15 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async (token: IIAMToken) => {
         return requestDatalayer({
-          url: `${configuration.iamRunUrl}/api/iam/v1/tokens/${token.id}`,
+          url: `${configuration.iamRunUrl}/api/iam/v1/api-keys/${token.id}`,
           method: 'PUT',
           body: { ...token },
         });
       },
       onSuccess: resp => {
-        if (resp.success && resp.token) {
-          const tok = toToken(resp.token);
+        const tokenPayload = resp?.apiKey ?? resp?.api_key ?? resp?.token;
+        if (resp.success && tokenPayload) {
+          const tok = toToken(tokenPayload);
           if (tok) {
             queryClient.setQueryData(queryKeys.tokens.detail(tok.id), tok);
           }
@@ -3101,7 +3128,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async (tokenId: string) => {
         return requestDatalayer({
-          url: `${configuration.iamRunUrl}/api/iam/v1/tokens/${tokenId}`,
+          url: `${configuration.iamRunUrl}/api/iam/v1/api-keys/${tokenId}`,
           method: 'DELETE',
         });
       },
@@ -4317,7 +4344,8 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       mutationFn: async (itemId: string) => {
         return requestDatalayer({
           url: `${configuration.libraryRunUrl}/api/library/v1/items/${itemId}/public`,
-          method: 'PUT',
+          method: 'PATCH',
+          body: { is_public: true },
         });
       },
       onSuccess: () => {
@@ -4333,8 +4361,9 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async (itemId: string) => {
         return requestDatalayer({
-          url: `${configuration.libraryRunUrl}/api/library/v1/items/${itemId}/private`,
-          method: 'PUT',
+          url: `${configuration.libraryRunUrl}/api/library/v1/items/${itemId}/public`,
+          method: 'PATCH',
+          body: { is_public: false },
         });
       },
       onSuccess: () => {
@@ -6978,18 +7007,32 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async ({
         q,
-        types = ['notebook', 'document', 'lesson'],
+        types = ['notebook', 'document', 'cell', 'lesson', 'evalset'],
         max = 100,
       }: {
         q?: string;
         types?: string[];
         max?: number;
       }) => {
+        const normalizedTypes = Array.from(
+          new Set(
+            (types || [])
+              .map(type =>
+                String(type || '')
+                  .trim()
+                  .toLowerCase(),
+              )
+              .filter(Boolean)
+              .map(type => (type === 'eval' ? 'evalset' : type)),
+          ),
+        );
         const queryString = Object.entries({
           q: q || '*',
-          types: types.join(' '),
+          types: (normalizedTypes.length > 0
+            ? normalizedTypes
+            : ['notebook', 'document', 'cell', 'lesson', 'evalset']
+          ).join(' '),
           max: max.toString(),
-          public: 'true',
         })
           .map(([key, value]) => `${key}=${encodeURIComponent(value)}`)
           .join('&');
@@ -7831,10 +7874,38 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       queryKey: queryKeys.items.public(),
       queryFn: async () => {
         const resp = await requestDatalayer({
-          url: `${configuration.libraryRunUrl}/api/library/v1/items/public`,
+          url: `${configuration.libraryRunUrl}/api/library/v1/search?q=*&max=-1`,
           method: 'GET',
         });
-        return resp;
+        return {
+          ...resp,
+          items: (resp.items || [])
+            .map((item: any) => toItem(item))
+            .filter(Boolean),
+        };
+      },
+      ...options,
+    });
+  };
+
+  /**
+   * Get the current user's own public items (publications).
+   * @param options - Query options
+   */
+  const usePublications = (options?: UseQueryOptions<unknown, Error>) => {
+    return useQuery({
+      queryKey: [...queryKeys.items.all(), 'publications'] as const,
+      queryFn: async () => {
+        const resp = await requestDatalayer({
+          url: `${configuration.libraryRunUrl}/api/library/v1/publications`,
+          method: 'GET',
+        });
+        return {
+          ...resp,
+          items: (resp.items || [])
+            .map((item: any) => toItem(item))
+            .filter(Boolean),
+        };
       },
       ...options,
     });
@@ -7852,10 +7923,15 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     return useMutation({
       mutationFn: async () => {
         const resp = await requestDatalayer({
-          url: `${configuration.libraryRunUrl}/api/library/v1/items/public`,
+          url: `${configuration.libraryRunUrl}/api/library/v1/search?q=*&max=-1`,
           method: 'GET',
         });
-        return resp;
+        return {
+          ...resp,
+          items: (resp.items || [])
+            .map((item: any) => toItem(item))
+            .filter(Boolean),
+        };
       },
       onSuccess: () => {
         queryClient.invalidateQueries({ queryKey: queryKeys.items.public() });
@@ -8223,6 +8299,9 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
       },
       onSuccess: () => {
         queryClient.invalidateQueries({ queryKey: queryKeys.items.public() });
+        queryClient.invalidateQueries({
+          queryKey: [...queryKeys.items.all(), 'publications'] as const,
+        });
       },
       ...options,
     });
@@ -8911,6 +8990,7 @@ export const useCache = ({ loginRoute = '/login' }: CacheProps = {}) => {
     useMakeItemPrivate,
     useSearchPublicItems,
     usePublicItems,
+    usePublications,
     useRefreshPublicItems,
     useRefreshSpaceItems,
     useClearCachedPublicItems,
diff --git a/src/hooks/useProjectStore.ts b/src/hooks/useProjectStore.ts
index 0b9bb896..fb3f67df 100644
--- a/src/hooks/useProjectStore.ts
+++ b/src/hooks/useProjectStore.ts
@@ -23,7 +23,7 @@ export type ProjectRuntimeEntry = {
   agentName?: string;
   /** Agent runtime status (running, starting, terminated, etc.). */
   agentStatus?: string;
-  /** The agent spec ID used to create the runtime. */
+  /** The agentspec ID used to create the runtime. */
   agentSpecId?: string;
 };
 
diff --git a/src/hooks/useProjects.ts b/src/hooks/useProjects.ts
index a26bae81..c99c1a53 100644
--- a/src/hooks/useProjects.ts
+++ b/src/hooks/useProjects.ts
@@ -72,7 +72,7 @@ export type ProjectData = {
   isPublic: boolean;
   /** Attached agent (runtime) pod name, if any */
   attachedAgentPodName?: string;
-  /** Attached agent spec ID (e.g. 'data-acquisition'), if any */
+  /** Attached agentspec ID (e.g. 'data-acquisition'), if any */
   attachedAgentSpecId?: string;
 };
 
@@ -84,7 +84,7 @@ export type CreateProjectRequest = {
   name: string;
   /** Project description */
   description?: string;
-  /** Agent spec to attach (creates agent on project creation) */
+  /** Agentspec to attach (creates agent on project creation) */
   agentSpecId?: string;
 };
 
diff --git a/src/models/IAMToken.ts b/src/models/IAMToken.ts
index 02b69686..ae3a2be9 100644
--- a/src/models/IAMToken.ts
+++ b/src/models/IAMToken.ts
@@ -14,7 +14,11 @@ export const asToken = (s: any): IIAMToken => {
   };
 };
 
-export type IIAMTokenVariant = 'user_token';
+export type IIAMTokenVariant =
+  | 'secret'
+  | 'publishable'
+  | 'restricted'
+  | 'temporary';
 
 export type IIAMToken = {
   id: string;
diff --git a/src/models/ProjectDTO.ts b/src/models/ProjectDTO.ts
index 490f011c..ed578e25 100644
--- a/src/models/ProjectDTO.ts
+++ b/src/models/ProjectDTO.ts
@@ -119,7 +119,7 @@ export class ProjectDTO {
     return this._data.attached_agent_pod_name_s || undefined;
   }
 
-  /** Attached agent spec ID (e.g., 'data-acquisition'), if any. */
+  /** Attached agentspec ID (e.g., 'data-acquisition'), if any. */
   get attachedAgentSpecId(): string | undefined {
     this._checkDeleted();
     return this._data.attached_agent_spec_id_s || undefined;
diff --git a/src/state/substates/CoreState.ts b/src/state/substates/CoreState.ts
index 3d95dc54..b4703317 100644
--- a/src/state/substates/CoreState.ts
+++ b/src/state/substates/CoreState.ts
@@ -6,7 +6,7 @@
 import { createStore } from 'zustand/vanilla';
 import { useStore } from 'zustand';
 import type { IDatalayerCoreConfig } from '../../config/Configuration';
-import { configLogger } from '../../utils/logger';
+import { configLogger } from '../../utils/Logger';
 
 let loadConfigurationFromServer = true;
 
diff --git a/src/utils/Date.ts b/src/utils/Date.ts
index 6fd8d75f..0ae6feb6 100644
--- a/src/utils/Date.ts
+++ b/src/utils/Date.ts
@@ -95,25 +95,66 @@ export const formatRelativeTime = (
     return typeof value === 'string' ? value : undefined;
   }
 
-  const diffMs = Math.max(0, now.getTime() - ts);
-  const seconds = Math.floor(diffMs / 1000);
+  const diffMs = ts - now.getTime();
+  const inFuture = diffMs > 0;
+  const absSeconds = Math.floor(Math.abs(diffMs) / 1000);
 
-  if (seconds < 60) return 'just now';
+  if (absSeconds < 30) return inFuture ? 'in a few seconds' : 'just now';
 
-  const minutes = Math.floor(seconds / 60);
-  if (minutes < 60) return `${minutes}m ago`;
+  const withDirection = (value: number, unit: string): string => {
+    return inFuture ? `in ${value}${unit}` : `${value}${unit} ago`;
+  };
+
+  if (absSeconds < 60) return withDirection(absSeconds, 's');
+
+  const minutes = Math.floor(absSeconds / 60);
+  if (minutes < 60) return withDirection(minutes, 'm');
 
   const hours = Math.floor(minutes / 60);
-  if (hours < 24) return `${hours}h ago`;
+  if (hours < 24) return withDirection(hours, 'h');
 
   const days = Math.floor(hours / 24);
-  if (days < 7) return `${days}d ago`;
+  if (days < 7) return withDirection(days, 'd');
 
   const weeks = Math.floor(days / 7);
-  if (weeks < 52) return `${weeks}w ago`;
+  if (weeks < 52) return withDirection(weeks, 'w');
 
   const years = Math.floor(days / 365);
-  return `${years}y ago`;
+  return withDirection(years, 'y');
+};
+
+/**
+ * Build a detailed datetime string suitable for tooltips.
+ *
+ * Example:
+ * "2026-06-08T16:15:32.531Z • Jun 8, 2026, 6:15:32 PM CEST (Europe/Paris)"
+ */
+export const formatDateTimeDetails = (
+  value?: Date | string | number,
+): string | undefined => {
+  if (value === undefined || value === null) {
+    return undefined;
+  }
+
+  const date =
+    value instanceof Date
+      ? value
+      : typeof value === 'number'
+        ? new Date(value)
+        : new Date(value);
+
+  const ts = date.getTime();
+  if (Number.isNaN(ts)) {
+    return typeof value === 'string' ? value : undefined;
+  }
+
+  const iso = date.toISOString();
+  const zone = Intl.DateTimeFormat().resolvedOptions().timeZone || 'local';
+  const local = new Intl.DateTimeFormat(undefined, {
+    dateStyle: 'medium',
+    timeStyle: 'long',
+  }).format(date);
+  return `${iso} • ${local} (${zone})`;
 };
 
 /**
diff --git a/src/utils/logger.ts b/src/utils/Logger.ts
similarity index 100%
rename from src/utils/logger.ts
rename to src/utils/Logger.ts
diff --git a/src/utils/index.ts b/src/utils/index.ts
index 31de484e..9ad4f63c 100644
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -20,6 +20,7 @@ export * from './Ids';
 export * from './Jwt';
 export * from './Jupyter';
 export * from './Lazy';
+export * from './Logger';
 export * from './Msc';
 export * from './Name';
 export * from './Notebook';
diff --git a/src/views/iam-tokens/IAMTokenEdit.tsx b/src/views/api-keys/APIKeyEdit.tsx
similarity index 73%
rename from src/views/iam-tokens/IAMTokenEdit.tsx
rename to src/views/api-keys/APIKeyEdit.tsx
index a7439e85..445a4854 100644
--- a/src/views/iam-tokens/IAMTokenEdit.tsx
+++ b/src/views/api-keys/APIKeyEdit.tsx
@@ -17,7 +17,7 @@ import {
 } from '@primer/react';
 import { Box } from '@datalayer/primer-addons';
 import { BoringAvatar } from '../../components/avatars';
-import { IIAMToken as AnyToken } from '../../models';
+import { IIAMToken as IAPIKey } from '../../models';
 import { useCache, useNavigate, useToast } from '../../hooks';
 import { useRunStore } from '../../state';
 
@@ -33,29 +33,36 @@ interface FormData {
   description: string;
 }
 
-export type IAMTokenEditProps = {
-  /** Route to navigate after delete. Defaults to '/settings/iam/tokens'. */
-  tokensListRoute?: string;
+export type APIKeyEditProps = {
+  /** Route to navigate after delete. Defaults to '/settings/iam/api-keys'. */
+  apiKeysListRoute?: string;
+  /** Whether to render the local edit title header. Defaults to true. */
+  showTitle?: boolean;
 };
 
-export const IAMTokenEdit = ({
-  tokensListRoute = '/settings/iam/tokens',
-}: IAMTokenEditProps = {}) => {
-  const { tokenId } = useParams();
+export const APIKeyEdit = ({
+  apiKeysListRoute = '/settings/iam/api-keys',
+  showTitle = true,
+}: APIKeyEditProps = {}) => {
+  const { tokenId: apiKeyId } = useParams();
   const runStore = useRunStore();
   const navigate = useNavigate();
   const { enqueueToast } = useToast();
-  const { useUpdateToken, useToken, useDeleteToken } = useCache();
+  const {
+    useUpdateToken: useUpdateAPIKey,
+    useToken: useAPIKey,
+    useDeleteToken: useDeleteAPIKey,
+  } = useCache();
 
-  const getTokenQuery = useToken(tokenId!);
-  const updateTokenMutation = useUpdateToken();
-  const deleteTokenMutation = useDeleteToken();
+  const getAPIKeyQuery = useAPIKey(apiKeyId!);
+  const updateAPIKeyMutation = useUpdateAPIKey();
+  const deleteAPIKeyMutation = useDeleteAPIKey();
 
-  const [token, setToken] = useState<AnyToken>();
+  const [apiKey, setAPIKey] = useState<IAPIKey>();
   const [formValues, setFormValues] = useState<FormData>({
-    name: token?.name!,
+    name: apiKey?.name!,
     nameConfirm: '',
-    description: token?.description!,
+    description: apiKey?.description!,
   });
   const [validationResult, setValidationResult] = useState<ValidationData>({
     name: undefined,
@@ -63,12 +70,12 @@ export const IAMTokenEdit = ({
     description: undefined,
   });
   useEffect(() => {
-    if (getTokenQuery.data) {
-      const token = getTokenQuery.data;
-      setToken(token);
-      setFormValues({ ...token, nameConfirm: '' });
+    if (getAPIKeyQuery.data) {
+      const key = getAPIKeyQuery.data;
+      setAPIKey(key);
+      setFormValues({ ...key, nameConfirm: '' });
     }
-  }, [getTokenQuery.data]);
+  }, [getAPIKeyQuery.data]);
   const nameNameChange = (event: React.ChangeEvent<HTMLInputElement>) => {
     setFormValues(prevFormValues => ({
       ...prevFormValues,
@@ -98,7 +105,7 @@ export const IAMTokenEdit = ({
           : formValues.name.length > 2
             ? true
             : false,
-      nameConfirm: formValues.nameConfirm === token?.name ? true : false,
+      nameConfirm: formValues.nameConfirm === apiKey?.name ? true : false,
       description:
         formValues.description === undefined
           ? undefined
@@ -107,20 +114,20 @@ export const IAMTokenEdit = ({
             : false,
     });
   }, [formValues]);
-  const nameSubmit = async () => {
+  const submitAPIKey = async () => {
     runStore.layout().showBackdrop();
-    const updatedToken = {
-      ...token!,
+    const updatedAPIKey = {
+      ...apiKey!,
       name: formValues.name,
       description: formValues.description,
     };
-    updateTokenMutation.mutate(updatedToken, {
+    updateAPIKeyMutation.mutate(updatedAPIKey, {
       onSuccess: (resp: any) => {
         if (resp.success) {
-          enqueueToast('The token is successfully updated.', {
+          enqueueToast('The API key was updated successfully.', {
             variant: 'success',
           });
-          setToken(updatedToken);
+          setAPIKey(updatedAPIKey);
         }
       },
       onSettled: () => {
@@ -128,23 +135,23 @@ export const IAMTokenEdit = ({
       },
     });
   };
-  const handleDelete = async () => {
-    runStore.layout().showBackdrop('Deleting the token...');
-    deleteTokenMutation.mutate(token!.id, {
+  const deleteAPIKey = async () => {
+    runStore.layout().showBackdrop('Deleting the API key...');
+    deleteAPIKeyMutation.mutate(apiKey!.id, {
       onSuccess: (resp: any) => {
         if (resp.success) {
-          enqueueToast('The token is successfully deleted.', {
+          enqueueToast('The API key was deleted successfully.', {
             variant: 'success',
           });
-          navigate(tokensListRoute);
+          navigate(apiKeysListRoute);
         } else {
-          enqueueToast(resp.message || 'Failed to delete token.', {
+          enqueueToast(resp.message || 'Failed to delete API key.', {
             variant: 'error',
           });
         }
       },
       onError: () => {
-        enqueueToast('Failed to delete token.', { variant: 'error' });
+        enqueueToast('Failed to delete API key.', { variant: 'error' });
       },
       onSettled: () => {
         runStore.layout().hideBackdrop();
@@ -153,21 +160,23 @@ export const IAMTokenEdit = ({
   };
   return (
     <>
-      <PageHeader>
-        <Heading sx={{ fontSize: 3 }}>API Key</Heading>
-      </PageHeader>
+      {showTitle && (
+        <PageHeader>
+          <Heading sx={{ fontSize: 3 }}>API Key</Heading>
+        </PageHeader>
+      )}
       <Box display="flex">
         <Box>
           <BoringAvatar
-            displayName={token?.name}
+            displayName={apiKey?.name}
             size={100}
             style={{ paddingRight: 10 }}
           />
           <Text as="h2" sx={{ paddingTop: 3 }}>
-            {token?.name}
+            {apiKey?.name}
           </Text>
           <Box mt={3}>
-            <Label size="large">{token?.variant}</Label>
+            <Label size="large">{apiKey?.variant}</Label>
           </Box>
         </Box>
         <Box ml={10}>
@@ -203,7 +212,7 @@ export const IAMTokenEdit = ({
               <FormControl.Label>Expiration date</FormControl.Label>
               <TextInput
                 block
-                value={token?.expirationDate.toLocaleDateString()}
+                value={apiKey?.expirationDate.toLocaleDateString()}
                 onChange={nameNameChange}
                 disabled
               />
@@ -212,9 +221,9 @@ export const IAMTokenEdit = ({
               variant="primary"
               disabled={!validationResult.name || !validationResult.description}
               sx={{ marginTop: 3 }}
-              onClick={nameSubmit}
+              onClick={submitAPIKey}
             >
-              Update token
+              Update API key
             </Button>
           </Box>
           <Box sx={{ marginTop: 3 }}>
@@ -245,7 +254,7 @@ export const IAMTokenEdit = ({
                 <Text
                   sx={{ fontSize: 1, fontWeight: 'bold', color: 'danger.fg' }}
                 >
-                  Confirm the token name to delete
+                  Confirm the API key name to delete
                 </Text>
                 <FormControl>
                   <TextInput
@@ -261,9 +270,9 @@ export const IAMTokenEdit = ({
               <Button
                 variant="danger"
                 disabled={!validationResult.nameConfirm}
-                onClick={handleDelete}
+                onClick={deleteAPIKey}
               >
-                Delete token
+                Delete API key
               </Button>
             </Box>
           </Box>
@@ -273,4 +282,4 @@ export const IAMTokenEdit = ({
   );
 };
 
-export default IAMTokenEdit;
+export default APIKeyEdit;
diff --git a/src/views/iam-tokens/IAMTokenNew.tsx b/src/views/api-keys/APIKeyNew.tsx
similarity index 64%
rename from src/views/iam-tokens/IAMTokenNew.tsx
rename to src/views/api-keys/APIKeyNew.tsx
index 409f9f91..f085c070 100644
--- a/src/views/iam-tokens/IAMTokenNew.tsx
+++ b/src/views/api-keys/APIKeyNew.tsx
@@ -18,11 +18,14 @@ import { Box } from '@datalayer/primer-addons';
 import { CopyIcon } from '@primer/octicons-react';
 import { Calendar, defaultCalendarStrings } from '@fluentui/react';
 import { useCache, useNavigate, useToast } from '../../hooks';
-import { IIAMToken, IIAMTokenVariant } from '../../models';
+import {
+  IIAMToken as IAPIKey,
+  IIAMTokenVariant as IAPIKeyVariant,
+} from '../../models';
 import { useRunStore } from '../../state';
 
 interface FormData {
-  variant: IIAMTokenVariant;
+  variant: IAPIKeyVariant;
   name?: string;
   description?: string;
   expirationDate?: Date;
@@ -35,28 +38,28 @@ interface ValidationData {
   expirationDate?: boolean;
 }
 
-export type IAMTokenNewProps = {
-  /** Route to navigate when clicking "List my Tokens". Defaults to '/settings/iam/tokens'. */
-  tokensListRoute?: string;
+export type APIKeyNewProps = {
+  /** Route to navigate when clicking "List my API keys". Defaults to '/settings/iam/api-keys'. */
+  apiKeysListRoute?: string;
   /** Whether to render the "New API Key" title header in create mode. Defaults to true. */
   showTitle?: boolean;
 };
 
-export const IAMTokenNew = ({
-  tokensListRoute = '/settings/iam/tokens',
+export const APIKeyNew = ({
+  apiKeysListRoute = '/settings/iam/api-keys',
   showTitle = true,
-}: IAMTokenNewProps = {}) => {
+}: APIKeyNewProps = {}) => {
   const runStore = useRunStore();
-  const { useCreateToken } = useCache();
-  const createTokenMutation = useCreateToken();
+  const { useCreateToken: useCreateAPIKey } = useCache();
+  const createAPIKeyMutation = useCreateAPIKey();
 
   const navigate = useNavigate();
   const { enqueueToast } = useToast();
   const [today, _] = useState<Date>(new Date());
-  const [showToken, setShowToken] = useState(false);
-  const [token, setToken] = useState<IIAMToken>();
+  const [showAPIKey, setShowAPIKey] = useState(false);
+  const [apiKey, setAPIKey] = useState<IAPIKey>();
   const [formValues, setFormValues] = useState<FormData>({
-    variant: 'user_token',
+    variant: 'secret',
     name: undefined,
     description: undefined,
     expirationDate: undefined,
@@ -70,7 +73,7 @@ export const IAMTokenNew = ({
   const valueVariantChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
     setFormValues(prevFormValues => ({
       ...prevFormValues,
-      variant: event.target.value as IIAMTokenVariant,
+      variant: event.target.value as IAPIKeyVariant,
     }));
   };
   const valueNameChange = (event: React.ChangeEvent<HTMLInputElement>) => {
@@ -116,9 +119,9 @@ export const IAMTokenNew = ({
             : false,
     });
   }, [formValues]);
-  const valueSubmit = () => {
-    runStore.layout().showBackdrop('Creating an token...');
-    createTokenMutation.mutate(
+  const submitAPIKey = () => {
+    runStore.layout().showBackdrop('Creating an API key...');
+    createAPIKeyMutation.mutate(
       {
         name: formValues.name!,
         variant: formValues.variant,
@@ -129,8 +132,8 @@ export const IAMTokenNew = ({
         onSuccess: (resp: any) => {
           if (resp.success && resp.token) {
             enqueueToast(resp.message, { variant: 'success' });
-            setToken(resp.token);
-            setShowToken(true);
+            setAPIKey(resp.token);
+            setShowAPIKey(true);
           }
         },
         onSettled: () => {
@@ -141,51 +144,94 @@ export const IAMTokenNew = ({
   };
   return (
     <Box>
-      {showToken ? (
+      {showAPIKey ? (
         <>
           <PageHeader>
             <PageHeader.TitleArea variant="large">
-              <PageHeader.Title>Your API Key is created</PageHeader.Title>
+              <PageHeader.Title>Your API Key Is Created</PageHeader.Title>
             </PageHeader.TitleArea>
           </PageHeader>
-          <Box>
-            <Text>
-              Take note of the API Key value, you will not be able to see it
-              after.
+
+          <Box
+            sx={{
+              border: '1px solid',
+              borderColor: 'accent.muted',
+              borderRadius: 2,
+              p: 3,
+              bg: 'canvas.subtle',
+              mb: 3,
+            }}
+          >
+            <Text sx={{ fontWeight: 600, color: 'accent.fg' }}>Important</Text>
+            <Text as="p" sx={{ mt: 1, color: 'fg.muted' }}>
+              Save this API key now. You will not be able to see the full value
+              again after leaving this page.
             </Text>
           </Box>
-          <Box>
-            <Text>Name: {token?.name}</Text>
-          </Box>
-          <Box>
-            <Text>Description: {token?.description}</Text>
-          </Box>
-          <Box>
-            <Text>Expiration date: {token?.expirationDate.toISOString()}</Text>
-          </Box>
-          <Box>
-            <Text mb={2}>Value: </Text>
-            <Box display="flex" sx={{ alignItems: 'center', gap: 2 }}>
+
+          <Box
+            sx={{
+              border: '1px solid',
+              borderColor: 'border.default',
+              borderRadius: 2,
+              p: 3,
+              bg: 'canvas.default',
+            }}
+          >
+            <Box
+              display="grid"
+              gridTemplateColumns="minmax(120px, 180px) 1fr"
+              sx={{ rowGap: 2, columnGap: 3, mb: 3 }}
+            >
+              <Text sx={{ color: 'fg.muted' }}>Name</Text>
+              <Text sx={{ fontWeight: 600 }}>{apiKey?.name || '-'}</Text>
+
+              <Text sx={{ color: 'fg.muted' }}>Description</Text>
+              <Text>{apiKey?.description || '-'}</Text>
+
+              <Text sx={{ color: 'fg.muted' }}>Expiration date</Text>
+              <Text>
+                {apiKey?.expirationDate
+                  ? `${apiKey.expirationDate.toLocaleString()} (${apiKey.expirationDate.toISOString()})`
+                  : '-'}
+              </Text>
+            </Box>
+
+            <Text sx={{ color: 'fg.muted', mb: 2 }}>API key value</Text>
+            <Box
+              display="flex"
+              sx={{
+                alignItems: 'flex-start',
+                gap: 2,
+              }}
+            >
               <Text
                 as="code"
                 sx={{
-                  color: 'fg.onEmphasis',
-                  bg: 'neutral.emphasis',
+                  color: 'fg.default',
+                  bg: 'canvas.inset',
+                  border: '1px solid',
+                  borderColor: 'border.default',
+                  borderRadius: 2,
                   p: 2,
                   overflowWrap: 'anywhere',
+                  fontSize: 0,
+                  lineHeight: '20px',
                   flex: 1,
+                  maxHeight: 140,
+                  overflowY: 'auto',
                 }}
               >
-                {token?.value}
+                {apiKey?.value}
               </Text>
               <IconButton
-                aria-label="Copy token to clipboard"
+                aria-label="Copy API key to clipboard"
                 icon={CopyIcon}
                 size="small"
                 onClick={() => {
-                  if (token?.value) {
-                    navigator.clipboard.writeText(token.value);
-                    enqueueToast('Token copied to clipboard', {
+                  if (apiKey?.value) {
+                    navigator.clipboard.writeText(apiKey.value);
+                    enqueueToast('API key copied to clipboard', {
                       variant: 'success',
                     });
                   }
@@ -193,8 +239,9 @@ export const IAMTokenNew = ({
               />
             </Box>
           </Box>
+
           <Box mt={3}>
-            <Button onClick={e => navigate(tokensListRoute, e)}>
+            <Button onClick={e => navigate(apiKeysListRoute, e)}>
               List my API Keys
             </Button>
           </Box>
@@ -212,16 +259,25 @@ export const IAMTokenNew = ({
             <Box>
               <Box sx={{ label: { marginTop: 2 } }}>
                 <FormControl required>
-                  <FormControl.Label>Token type</FormControl.Label>
+                  <FormControl.Label>Type</FormControl.Label>
                   <Select
                     name="type"
                     value={formValues.variant}
                     onChange={valueVariantChange}
                   >
-                    <Select.Option value="user_token">User Token</Select.Option>
+                    <Select.Option value="secret">Secret</Select.Option>
+                    <Select.Option value="publishable" disabled>
+                      Publishable (coming soon)
+                    </Select.Option>
+                    <Select.Option value="restricted" disabled>
+                      Restricted (coming soon)
+                    </Select.Option>
+                    <Select.Option value="temporary" disabled>
+                      Temporary (coming soon)
+                    </Select.Option>
                   </Select>
                   <FormControl.Caption>
-                    Pick the most appropriate token type.
+                    Secret is currently available. Additional API key types are coming soon.
                   </FormControl.Caption>
                 </FormControl>
                 <FormControl required>
@@ -284,10 +340,10 @@ export const IAMTokenNew = ({
                   sx={{ marginTop: 2 }}
                   onClick={e => {
                     e.preventDefault();
-                    valueSubmit();
+                    submitAPIKey();
                   }}
                 >
-                  Create a token
+                  Create an API key
                 </Button>
               </Box>
             </Box>
@@ -298,4 +354,4 @@ export const IAMTokenNew = ({
   );
 };
 
-export default IAMTokenNew;
+export default APIKeyNew;
diff --git a/src/views/api-keys/APIKeys.tsx b/src/views/api-keys/APIKeys.tsx
new file mode 100644
index 00000000..e7bb8440
--- /dev/null
+++ b/src/views/api-keys/APIKeys.tsx
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+import { useState, useEffect, useRef } from 'react';
+import {
+  PageLayout,
+  Button,
+  IconButton,
+  TextInput,
+  Text,
+  Label,
+  RelativeTime,
+} from '@primer/react';
+import {
+  Blankslate,
+  Dialog,
+  PageHeader,
+  Table,
+  DataTable,
+} from '@primer/react/experimental';
+import { Box } from '@datalayer/primer-addons';
+import { EditIcon } from '@datalayer/icons-react';
+import { TrashIcon } from '@primer/octicons-react';
+import { IIAMToken as IAPIKey } from '../../models';
+import { useCache, useNavigate, useToast } from '../../hooks';
+
+export type APIKeysProps = {
+  /** Route to navigate when clicking "New API Key" button. Defaults to '/new/api-key'. */
+  newAPIKeyRoute?: string;
+  /** Base route for the API keys list (used for edit navigation). Defaults to current relative path. */
+  apiKeysListRoute?: string;
+  /** Whether to display view titles/headings. Defaults to true. */
+  showTitle?: boolean;
+  /** Whether to render the "New API Key" button in this component header. Defaults to true. */
+  showNewButton?: boolean;
+};
+
+const APIKeysTable = ({
+  apiKeysListRoute,
+  showTitle = true,
+}: {
+  apiKeysListRoute?: string;
+  showTitle?: boolean;
+}) => {
+  const { useTokens: useAPIKeys, useDeleteToken: useDeleteAPIKey } = useCache();
+  const { enqueueToast } = useToast();
+
+  const getAPIKeysQuery = useAPIKeys();
+  const deleteAPIKeyMutation = useDeleteAPIKey();
+
+  const navigate = useNavigate();
+  const [apiKeys, setApiKeys] = useState<IAPIKey[]>([]);
+  const [deletingAPIKey, setDeletingAPIKey] = useState<IAPIKey | null>(null);
+  const [deleteNameConfirm, setDeleteNameConfirm] = useState('');
+  const returnFocusRef = useRef(null);
+  useEffect(() => {
+    if (getAPIKeysQuery.data) {
+      const normalized = getAPIKeysQuery.data.filter(
+        (apiKey): apiKey is IAPIKey => Boolean(apiKey),
+      );
+      setApiKeys(normalized);
+    }
+  }, [getAPIKeysQuery.data]);
+  const confirmDeleteAPIKey = () => {
+    if (!deletingAPIKey) return;
+    if (deleteNameConfirm.trim() !== deletingAPIKey.name) {
+      enqueueToast(
+        'Please type the API key name exactly to confirm deletion.',
+        {
+          variant: 'error',
+        },
+      );
+      return;
+    }
+    deleteAPIKeyMutation.mutate(deletingAPIKey.id, {
+      onSuccess: (resp: { success?: boolean; message?: string }) => {
+        if (resp?.success) {
+          enqueueToast(`API key "${deletingAPIKey.name}" deleted.`, {
+            variant: 'success',
+          });
+        } else {
+          enqueueToast(resp?.message || 'Failed to delete API key.', {
+            variant: 'error',
+          });
+        }
+      },
+      onError: () => {
+        enqueueToast('Failed to delete API key.', { variant: 'error' });
+      },
+      onSettled: () => {
+        setDeletingAPIKey(null);
+        setDeleteNameConfirm('');
+      },
+    });
+  };
+  return apiKeys.length === 0 ? (
+    <Blankslate border spacious>
+      {showTitle && <Blankslate.Heading>API Keys</Blankslate.Heading>}
+      <Blankslate.Description>
+        <Text sx={{ textAlign: 'center' }}>No API Keys found.</Text>
+      </Blankslate.Description>
+    </Blankslate>
+  ) : (
+    <>
+      <Table.Container>
+        {showTitle && (
+          <>
+            <Table.Title as="h2" id="api-keys">
+              API Keys
+            </Table.Title>
+            <Table.Subtitle as="p" id="api-keys-subtitle">
+              Your API keys.
+            </Table.Subtitle>
+          </>
+        )}
+        <DataTable
+          aria-labelledby="api-keys"
+          aria-describedby="api-keys-subtitle"
+          data={apiKeys}
+          columns={[
+            {
+              header: 'Type',
+              field: 'variant',
+              renderCell: apiKey => <Label>{apiKey.variant}</Label>,
+            },
+            {
+              header: 'Name',
+              field: 'name',
+              rowHeader: true,
+            },
+            {
+              header: 'Description',
+              field: 'description',
+            },
+            {
+              header: 'Expiration date',
+              field: 'expirationDate',
+              renderCell: apiKey => (
+                <RelativeTime date={new Date(apiKey.expirationDate)} />
+              ),
+            },
+            {
+              header: '',
+              field: 'id',
+              renderCell: apiKey => (
+                <Box display="flex" sx={{ gap: 1 }}>
+                  <IconButton
+                    icon={EditIcon}
+                    aria-label="Edit"
+                    size="small"
+                    variant="invisible"
+                    onClick={e =>
+                      navigate(
+                        apiKeysListRoute
+                          ? `${apiKeysListRoute}/${apiKey.id}`
+                          : `${apiKey.id}`,
+                        e,
+                      )
+                    }
+                  />
+                  <IconButton
+                    ref={returnFocusRef}
+                    icon={TrashIcon}
+                    aria-label="Delete"
+                    size="small"
+                    variant="invisible"
+                    sx={{ color: 'danger.fg' }}
+                    onClick={() => {
+                      setDeletingAPIKey(apiKey);
+                      setDeleteNameConfirm('');
+                    }}
+                  />
+                </Box>
+              ),
+            },
+          ]}
+        />
+      </Table.Container>
+      {deletingAPIKey && (
+        <Dialog
+          title="Delete API key"
+          onClose={() => {
+            setDeletingAPIKey(null);
+            setDeleteNameConfirm('');
+          }}
+          footerButtons={[
+            {
+              buttonType: 'default',
+              content: 'Cancel',
+              onClick: () => {
+                setDeletingAPIKey(null);
+                setDeleteNameConfirm('');
+              },
+            },
+            {
+              buttonType: 'danger',
+              content: 'Delete',
+              disabled: deleteNameConfirm.trim() !== deletingAPIKey.name,
+              onClick: event => {
+                if (!event.defaultPrevented) {
+                  event.preventDefault();
+                  confirmDeleteAPIKey();
+                }
+              },
+            },
+          ]}
+        >
+          Are you sure you want to delete the API key{' '}
+          <strong>{deletingAPIKey.name}</strong>? This action cannot be undone.
+          <Text sx={{ mt: 3, display: 'block', color: 'fg.muted' }}>
+            Type <strong>{deletingAPIKey.name}</strong> to confirm deletion.
+          </Text>
+          <TextInput
+            block
+            value={deleteNameConfirm}
+            onChange={e => setDeleteNameConfirm(e.target.value)}
+            placeholder="Retype API key name"
+            sx={{ mt: 2 }}
+            autoFocus
+          />
+        </Dialog>
+      )}
+    </>
+  );
+};
+
+export const APIKeys = ({
+  newAPIKeyRoute = '/new/api-key',
+  apiKeysListRoute,
+  showTitle = true,
+  showNewButton = true,
+}: APIKeysProps = {}) => {
+  const navigate = useNavigate();
+  return (
+    <PageLayout
+      containerWidth="full"
+      padding="normal"
+      style={{ overflow: 'visible', minHeight: 'calc(100vh - 45px)' }}
+    >
+      {showTitle || showNewButton ? (
+        <PageLayout.Header>
+          <PageHeader>
+            {showTitle && (
+              <PageHeader.TitleArea variant="large">
+                <PageHeader.Title>API Keys</PageHeader.Title>
+              </PageHeader.TitleArea>
+            )}
+            {showNewButton && (
+              <PageHeader.Actions>
+                <Button
+                  size="small"
+                  variant="primary"
+                  onClick={e => navigate(newAPIKeyRoute, e)}
+                >
+                  New API Key
+                </Button>
+              </PageHeader.Actions>
+            )}
+          </PageHeader>
+        </PageLayout.Header>
+      ) : null}
+      <PageLayout.Content>
+        <Box>
+          <APIKeysTable
+            apiKeysListRoute={apiKeysListRoute}
+            showTitle={showTitle}
+          />
+        </Box>
+      </PageLayout.Content>
+    </PageLayout>
+  );
+};
+
+export default APIKeys;
diff --git a/src/views/api-keys/APIKeysStandalone.tsx b/src/views/api-keys/APIKeysStandalone.tsx
new file mode 100644
index 00000000..bc46e5e2
--- /dev/null
+++ b/src/views/api-keys/APIKeysStandalone.tsx
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+import { useState, useEffect, useRef } from 'react';
+import {
+  PageLayout,
+  Button,
+  IconButton,
+  TextInput,
+  Text,
+  Label,
+  RelativeTime,
+} from '@primer/react';
+import {
+  Blankslate,
+  Dialog,
+  PageHeader,
+  Table,
+  DataTable,
+} from '@primer/react/experimental';
+import { Box } from '@datalayer/primer-addons';
+import { EditIcon } from '@datalayer/icons-react';
+import { TrashIcon } from '@primer/octicons-react';
+import { IIAMToken as IAPIKey } from '../../models';
+import { useCache, useNavigate, useToast } from '../../hooks';
+
+const APIKeysTable = () => {
+  const { useTokens: useAPIKeys, useDeleteToken: useDeleteAPIKey } = useCache();
+  const { enqueueToast } = useToast();
+
+  const getAPIKeysQuery = useAPIKeys();
+  const deleteAPIKeyMutation = useDeleteAPIKey();
+
+  const navigate = useNavigate();
+  const [apiKeys, setApiKeys] = useState<IAPIKey[]>([]);
+  const [deletingAPIKey, setDeletingAPIKey] = useState<IAPIKey | null>(null);
+  const [deleteNameConfirm, setDeleteNameConfirm] = useState('');
+  const returnFocusRef = useRef(null);
+  useEffect(() => {
+    if (getAPIKeysQuery.data) {
+      const normalized = getAPIKeysQuery.data.filter(
+        (apiKey): apiKey is IAPIKey => Boolean(apiKey),
+      );
+      setApiKeys(normalized);
+    }
+  }, [getAPIKeysQuery.data]);
+  const confirmDeleteAPIKey = () => {
+    if (!deletingAPIKey) return;
+    if (deleteNameConfirm.trim() !== deletingAPIKey.name) {
+      enqueueToast(
+        'Please type the API key name exactly to confirm deletion.',
+        {
+          variant: 'error',
+        },
+      );
+      return;
+    }
+    deleteAPIKeyMutation.mutate(deletingAPIKey.id, {
+      onSuccess: (resp: { success?: boolean; message?: string }) => {
+        if (resp?.success) {
+          enqueueToast(`API key "${deletingAPIKey.name}" deleted.`, {
+            variant: 'success',
+          });
+        } else {
+          enqueueToast(resp?.message || 'Failed to delete API key.', {
+            variant: 'error',
+          });
+        }
+      },
+      onError: () => {
+        enqueueToast('Failed to delete API key.', { variant: 'error' });
+      },
+      onSettled: () => {
+        setDeletingAPIKey(null);
+        setDeleteNameConfirm('');
+      },
+    });
+  };
+  return apiKeys.length === 0 ? (
+    <Blankslate border spacious>
+      <Blankslate.Heading>API Keys</Blankslate.Heading>
+      <Blankslate.Description>
+        <Text sx={{ textAlign: 'center' }}>No API Keys found.</Text>
+      </Blankslate.Description>
+    </Blankslate>
+  ) : (
+    <>
+      <Table.Container>
+        <Table.Title as="h2" id="api-keys">
+          API Keys
+        </Table.Title>
+        <Table.Subtitle as="p" id="api-keys-subtitle">
+          Your API keys.
+        </Table.Subtitle>
+        <DataTable
+          aria-labelledby="api-keys"
+          aria-describedby="api-keys-subtitle"
+          data={apiKeys}
+          columns={[
+            {
+              header: 'Type',
+              field: 'variant',
+              renderCell: apiKey => <Label>{apiKey.variant}</Label>,
+            },
+            {
+              header: 'Name',
+              field: 'name',
+              rowHeader: true,
+            },
+            {
+              header: 'Description',
+              field: 'description',
+            },
+            {
+              header: 'Expiration date',
+              field: 'expirationDate',
+              renderCell: apiKey => (
+                <RelativeTime date={new Date(apiKey.expirationDate)} />
+              ),
+            },
+            {
+              header: '',
+              field: 'id',
+              renderCell: apiKey => (
+                <Box display="flex" sx={{ gap: 1 }}>
+                  <IconButton
+                    icon={EditIcon}
+                    aria-label="Edit"
+                    size="small"
+                    variant="invisible"
+                    onClick={e => navigate(`${apiKey.id}`, e)}
+                  />
+                  <IconButton
+                    ref={returnFocusRef}
+                    icon={TrashIcon}
+                    aria-label="Delete"
+                    size="small"
+                    variant="invisible"
+                    sx={{ color: 'danger.fg' }}
+                    onClick={() => {
+                      setDeletingAPIKey(apiKey);
+                      setDeleteNameConfirm('');
+                    }}
+                  />
+                </Box>
+              ),
+            },
+          ]}
+        />
+      </Table.Container>
+      {deletingAPIKey && (
+        <Dialog
+          title="Delete API key"
+          onClose={() => {
+            setDeletingAPIKey(null);
+            setDeleteNameConfirm('');
+          }}
+          footerButtons={[
+            {
+              buttonType: 'default',
+              content: 'Cancel',
+              onClick: () => {
+                setDeletingAPIKey(null);
+                setDeleteNameConfirm('');
+              },
+            },
+            {
+              buttonType: 'danger',
+              content: 'Delete',
+              disabled: deleteNameConfirm.trim() !== deletingAPIKey.name,
+              onClick: event => {
+                if (!event.defaultPrevented) {
+                  event.preventDefault();
+                  confirmDeleteAPIKey();
+                }
+              },
+            },
+          ]}
+        >
+          Are you sure you want to delete the API key{' '}
+          <strong>{deletingAPIKey.name}</strong>? This action cannot be undone.
+          <Text sx={{ mt: 3, display: 'block', color: 'fg.muted' }}>
+            Type <strong>{deletingAPIKey.name}</strong> to confirm deletion.
+          </Text>
+          <TextInput
+            block
+            value={deleteNameConfirm}
+            onChange={e => setDeleteNameConfirm(e.target.value)}
+            placeholder="Retype API key name"
+            sx={{ mt: 2 }}
+            autoFocus
+          />
+        </Dialog>
+      )}
+    </>
+  );
+};
+
+export const APIKeysStandalone = () => {
+  const navigate = useNavigate();
+  return (
+    <PageLayout
+      containerWidth="full"
+      padding="normal"
+      style={{ overflow: 'visible', minHeight: 'calc(100vh - 45px)' }}
+    >
+      <PageLayout.Header>
+        <PageHeader>
+          <PageHeader.TitleArea variant="large">
+            <PageHeader.Title>API Keys</PageHeader.Title>
+          </PageHeader.TitleArea>
+          <PageHeader.Actions>
+            <Button
+              size="small"
+              variant="primary"
+              onClick={e => navigate('/api-keys/new', e)}
+            >
+              New API key
+            </Button>
+          </PageHeader.Actions>
+        </PageHeader>
+      </PageLayout.Header>
+      <PageLayout.Content>
+        <Box>
+          <APIKeysTable />
+        </Box>
+      </PageLayout.Content>
+    </PageLayout>
+  );
+};
+
+export default APIKeysStandalone;
diff --git a/src/views/api-keys/index.ts b/src/views/api-keys/index.ts
new file mode 100644
index 00000000..9c7d133a
--- /dev/null
+++ b/src/views/api-keys/index.ts
@@ -0,0 +1,8 @@
+/*
+ * Copyright (c) 2023-2025 Datalayer, Inc.
+ * Distributed under the terms of the Modified BSD License.
+ */
+
+export * from './APIKeyEdit';
+export * from './APIKeyNew';
+export * from './APIKeys';
diff --git a/src/views/iam-tokens/IAMTokens.tsx b/src/views/iam-tokens/IAMTokens.tsx
deleted file mode 100644
index 4c5f79ec..00000000
--- a/src/views/iam-tokens/IAMTokens.tsx
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2023-2025 Datalayer, Inc.
- * Distributed under the terms of the Modified BSD License.
- */
-
-import { useState, useEffect, useRef } from 'react';
-import {
-  PageLayout,
-  Button,
-  IconButton,
-  Text,
-  Label,
-  RelativeTime,
-  ConfirmationDialog,
-} from '@primer/react';
-import {
-  Blankslate,
-  PageHeader,
-  Table,
-  DataTable,
-} from '@primer/react/experimental';
-import { Box } from '@datalayer/primer-addons';
-import { EditIcon } from '@datalayer/icons-react';
-import { TrashIcon } from '@primer/octicons-react';
-import { IIAMToken } from '../../models';
-import { useCache, useNavigate, useToast } from '../../hooks';
-
-export type IAMTokensProps = {
-  /** Route to navigate when clicking "New API Key" button. Defaults to '/new/token'. */
-  newTokenRoute?: string;
-  /** Base route for the tokens list (used for edit navigation). Defaults to current relative path. */
-  tokensListRoute?: string;
-  /** Whether to display view titles/headings. Defaults to true. */
-  showTitle?: boolean;
-  /** Whether to render the "New API Key" button in this component header. Defaults to true. */
-  showNewButton?: boolean;
-};
-
-const TokensTable = ({
-  tokensListRoute,
-  showTitle = true,
-}: {
-  tokensListRoute?: string;
-  showTitle?: boolean;
-}) => {
-  const { useTokens, useDeleteToken } = useCache();
-  const { enqueueToast } = useToast();
-
-  const getTokensQuery = useTokens();
-  const deleteTokenMutation = useDeleteToken();
-
-  const navigate = useNavigate();
-  const [tokens, setTokens] = useState<IIAMToken[]>([]);
-  const [deletingToken, setDeletingToken] = useState<IIAMToken | null>(null);
-  const returnFocusRef = useRef(null);
-  useEffect(() => {
-    if (getTokensQuery.data) {
-      setTokens(getTokensQuery.data);
-    }
-  }, [getTokensQuery.data]);
-  const handleDeleteConfirm = () => {
-    if (!deletingToken) return;
-    deleteTokenMutation.mutate(deletingToken.id, {
-      onSuccess: (resp: any) => {
-        if (resp.success) {
-          enqueueToast(`Token "${deletingToken.name}" deleted.`, {
-            variant: 'success',
-          });
-        } else {
-          enqueueToast(resp.message || 'Failed to delete token.', {
-            variant: 'error',
-          });
-        }
-      },
-      onError: () => {
-        enqueueToast('Failed to delete token.', { variant: 'error' });
-      },
-      onSettled: () => setDeletingToken(null),
-    });
-  };
-  return tokens.length === 0 ? (
-    <Blankslate border spacious>
-      {showTitle && <Blankslate.Heading>API Keys</Blankslate.Heading>}
-      <Blankslate.Description>
-        <Text sx={{ textAlign: 'center' }}>No API Keys found.</Text>
-      </Blankslate.Description>
-    </Blankslate>
-  ) : (
-    <>
-      <Table.Container>
-        {showTitle && (
-          <>
-            <Table.Title as="h2" id="tokens">
-              API Keys
-            </Table.Title>
-            <Table.Subtitle as="p" id="tokens-subtitle">
-              Your tokens.
-            </Table.Subtitle>
-          </>
-        )}
-        <DataTable
-          aria-labelledby="teams"
-          aria-describedby="teams-subtitle"
-          data={tokens}
-          columns={[
-            {
-              header: 'Type',
-              field: 'variant',
-              renderCell: token => <Label>{token.variant}</Label>,
-            },
-            {
-              header: 'Name',
-              field: 'name',
-              rowHeader: true,
-            },
-            {
-              header: 'Description',
-              field: 'description',
-            },
-            {
-              header: 'Expiration date',
-              field: 'expirationDate',
-              renderCell: token => (
-                <RelativeTime date={new Date(token.expirationDate)} />
-              ),
-            },
-            {
-              header: '',
-              field: 'id',
-              renderCell: token => (
-                <Box display="flex" sx={{ gap: 1 }}>
-                  <IconButton
-                    icon={EditIcon}
-                    aria-label="Edit"
-                    size="small"
-                    variant="invisible"
-                    onClick={e =>
-                      navigate(
-                        tokensListRoute
-                          ? `${tokensListRoute}/${token.id}`
-                          : `${token.id}`,
-                        e,
-                      )
-                    }
-                  />
-                  <IconButton
-                    ref={returnFocusRef}
-                    icon={TrashIcon}
-                    aria-label="Delete"
-                    size="small"
-                    variant="invisible"
-                    sx={{ color: 'danger.fg' }}
-                    onClick={() => setDeletingToken(token)}
-                  />
-                </Box>
-              ),
-            },
-          ]}
-        />
-      </Table.Container>
-      {deletingToken && (
-        <ConfirmationDialog
-          title="Delete token"
-          onClose={gesture => {
-            if (gesture === 'confirm') handleDeleteConfirm();
-            else setDeletingToken(null);
-          }}
-          confirmButtonContent="Delete"
-          confirmButtonType="danger"
-        >
-          Are you sure you want to delete the token{' '}
-          <strong>{deletingToken.name}</strong>? This action cannot be undone.
-        </ConfirmationDialog>
-      )}
-    </>
-  );
-};
-
-export const IAMTokens = ({
-  newTokenRoute = '/new/token',
-  tokensListRoute,
-  showTitle = true,
-  showNewButton = true,
-}: IAMTokensProps = {}) => {
-  const navigate = useNavigate();
-  return (
-    <PageLayout
-      containerWidth="full"
-      padding="normal"
-      style={{ overflow: 'visible', minHeight: 'calc(100vh - 45px)' }}
-    >
-      {showTitle || showNewButton ? (
-        <PageLayout.Header>
-          <PageHeader>
-            {showTitle && (
-              <PageHeader.TitleArea variant="large">
-                <PageHeader.Title>API Keys</PageHeader.Title>
-              </PageHeader.TitleArea>
-            )}
-            {showNewButton && (
-              <PageHeader.Actions>
-                <Button
-                  size="small"
-                  variant="primary"
-                  onClick={e => navigate(newTokenRoute, e)}
-                >
-                  New API Key
-                </Button>
-              </PageHeader.Actions>
-            )}
-          </PageHeader>
-        </PageLayout.Header>
-      ) : null}
-      <PageLayout.Content>
-        <Box>
-          <TokensTable
-            tokensListRoute={tokensListRoute}
-            showTitle={showTitle}
-          />
-        </Box>
-      </PageLayout.Content>
-    </PageLayout>
-  );
-};
-
-export default IAMTokens;
diff --git a/src/views/iam-tokens/Tokens.tsx b/src/views/iam-tokens/Tokens.tsx
deleted file mode 100644
index 1663983f..00000000
--- a/src/views/iam-tokens/Tokens.tsx
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2023-2025 Datalayer, Inc.
- * Distributed under the terms of the Modified BSD License.
- */
-
-import { useState, useEffect, useRef } from 'react';
-import {
-  PageLayout,
-  Button,
-  IconButton,
-  Text,
-  Label,
-  RelativeTime,
-  ConfirmationDialog,
-} from '@primer/react';
-import {
-  Blankslate,
-  PageHeader,
-  Table,
-  DataTable,
-} from '@primer/react/experimental';
-import { Box } from '@datalayer/primer-addons';
-import { EditIcon } from '@datalayer/icons-react';
-import { TrashIcon } from '@primer/octicons-react';
-import { IIAMToken } from '../../models';
-import { useCache, useNavigate, useToast } from '../../hooks';
-
-const TokensTable = () => {
-  const { useTokens, useDeleteToken } = useCache();
-  const { enqueueToast } = useToast();
-
-  const getTokensQuery = useTokens();
-  const deleteTokenMutation = useDeleteToken();
-
-  const navigate = useNavigate();
-  const [tokens, setTokens] = useState<IIAMToken[]>([]);
-  const [deletingToken, setDeletingToken] = useState<IIAMToken | null>(null);
-  const returnFocusRef = useRef(null);
-  useEffect(() => {
-    if (getTokensQuery.data) {
-      setTokens(getTokensQuery.data);
-    }
-  }, [getTokensQuery.data]);
-  const handleDeleteConfirm = () => {
-    if (!deletingToken) return;
-    deleteTokenMutation.mutate(deletingToken.id, {
-      onSuccess: (resp: any) => {
-        if (resp.success) {
-          enqueueToast(`Token "${deletingToken.name}" deleted.`, {
-            variant: 'success',
-          });
-        } else {
-          enqueueToast(resp.message || 'Failed to delete token.', {
-            variant: 'error',
-          });
-        }
-      },
-      onError: () => {
-        enqueueToast('Failed to delete token.', { variant: 'error' });
-      },
-      onSettled: () => setDeletingToken(null),
-    });
-  };
-  return tokens.length === 0 ? (
-    <Blankslate border spacious>
-      <Blankslate.Heading>Tokens</Blankslate.Heading>
-      <Blankslate.Description>
-        <Text sx={{ textAlign: 'center' }}>No Tokens found.</Text>
-      </Blankslate.Description>
-    </Blankslate>
-  ) : (
-    <>
-      <Table.Container>
-        <Table.Title as="h2" id="tokens">
-          Tokens
-        </Table.Title>
-        <Table.Subtitle as="p" id="tokens-subtitle">
-          Your tokens.
-        </Table.Subtitle>
-        <DataTable
-          aria-labelledby="teams"
-          aria-describedby="teams-subtitle"
-          data={tokens}
-          columns={[
-            {
-              header: 'Type',
-              field: 'variant',
-              renderCell: token => <Label>{token.variant}</Label>,
-            },
-            {
-              header: 'Name',
-              field: 'name',
-              rowHeader: true,
-            },
-            {
-              header: 'Description',
-              field: 'description',
-            },
-            {
-              header: 'Expiration date',
-              field: 'expirationDate',
-              renderCell: token => (
-                <RelativeTime date={new Date(token.expirationDate)} />
-              ),
-            },
-            {
-              header: '',
-              field: 'id',
-              renderCell: token => (
-                <Box display="flex" sx={{ gap: 1 }}>
-                  <IconButton
-                    icon={EditIcon}
-                    aria-label="Edit"
-                    size="small"
-                    variant="invisible"
-                    onClick={e => navigate(`${token.id}`, e)}
-                  />
-                  <IconButton
-                    ref={returnFocusRef}
-                    icon={TrashIcon}
-                    aria-label="Delete"
-                    size="small"
-                    variant="invisible"
-                    sx={{ color: 'danger.fg' }}
-                    onClick={() => setDeletingToken(token)}
-                  />
-                </Box>
-              ),
-            },
-          ]}
-        />
-      </Table.Container>
-      {deletingToken && (
-        <ConfirmationDialog
-          title="Delete token"
-          onClose={gesture => {
-            if (gesture === 'confirm') handleDeleteConfirm();
-            else setDeletingToken(null);
-          }}
-          confirmButtonContent="Delete"
-          confirmButtonType="danger"
-        >
-          Are you sure you want to delete the token{' '}
-          <strong>{deletingToken.name}</strong>? This action cannot be undone.
-        </ConfirmationDialog>
-      )}
-    </>
-  );
-};
-
-export const Tokens = () => {
-  const navigate = useNavigate();
-  return (
-    <PageLayout
-      containerWidth="full"
-      padding="normal"
-      style={{ overflow: 'visible', minHeight: 'calc(100vh - 45px)' }}
-    >
-      <PageLayout.Header>
-        <PageHeader>
-          <PageHeader.TitleArea variant="large">
-            <PageHeader.Title>Tokens</PageHeader.Title>
-          </PageHeader.TitleArea>
-          <PageHeader.Actions>
-            <Button
-              size="small"
-              variant="primary"
-              onClick={e => navigate('/tokens/new', e)}
-            >
-              New token
-            </Button>
-          </PageHeader.Actions>
-        </PageHeader>
-      </PageLayout.Header>
-      <PageLayout.Content>
-        <Box>
-          <TokensTable />
-        </Box>
-      </PageLayout.Content>
-    </PageLayout>
-  );
-};
-
-export default Tokens;