diff --git a/.github/workflows/test_toolbox_dsl.yml b/.github/workflows/test_toolbox_dsl.yml
index 0946fca0..73faeadb 100644
--- a/.github/workflows/test_toolbox_dsl.yml
+++ b/.github/workflows/test_toolbox_dsl.yml
@@ -1,5 +1,5 @@
-# Unit tests for projects/core/dsl (task decorators, execute_tasks, failure/always/skip).
-name: Toolbox DSL tests
+# Python tests for repo-managed suites discovered via pyproject testpaths.
+name: Python test suites
 
 on:
   pull_request:
@@ -29,9 +29,9 @@ jobs:
         run: |
           set -o errexit
           python -m pip install --upgrade pip
-          python -m pip install pytest pyyaml jinja2 jsonpath_ng
+          python -m pip install .[dev]
 
-      - name: Run projects/core/tests
+      - name: Run pytest suites
         run: |
           set -o errexit
           # Tree + docstrings (what is being tested), then execute with one line per test + result.
diff --git a/projects/core/dsl/log.py b/projects/core/dsl/log.py
index b5c911de..dc28ffab 100644
--- a/projects/core/dsl/log.py
+++ b/projects/core/dsl/log.py
@@ -16,20 +16,17 @@ def setup_clean_logger(name: str):
     logger = logging.getLogger(name)
     logger.setLevel(logging.INFO)
 
-    # Only configure if not already configured
     if not logger.handlers:
-        # Create console handler with clean format
         console_handler = logging.StreamHandler()
         console_handler.setLevel(logging.INFO)
         console_handler.setFormatter(logging.Formatter("%(message)s"))
 
         logger.addHandler(console_handler)
 
-    logger.propagate = False  # Don't propagate to root logger
+    logger.propagate = False
     return logger
 
 
-# Configure clean logging for DSL operations
 logger = setup_clean_logger("DSL")
 
 
@@ -45,30 +42,23 @@ def log_task_header(task_name: str, task_doc: str, rel_filename: str, line_no: i
 
 def log_execution_banner(function_args: dict = None, log_file: str = None):
     """Log the execution banner with function info and arguments"""
-    # Get the caller's filename and function name for the header
     frame = inspect.currentframe()
-    caller_frame = (
-        frame.f_back.f_back
-    )  # Go back 2 frames (this func -> execute_tasks -> actual caller)
+    caller_frame = frame.f_back.f_back
     filename = caller_frame.f_code.co_filename
 
     rel_filename = _get_forge_relative_path(filename)
-
-    # Use parent directory name as function name for toolbox operations
     function_name = _get_toolbox_function_name(filename)
 
-    # Print execution header
     logger.info("")
     logger.info("===============================================================================")
     logger.info(f"| FILE: {rel_filename}")
     logger.info(f"| COMMAND: {function_name}")
 
     if function_args:
-        # Display arguments in YAML format
         logger.info("| ARGUMENTS:")
 
         for key, value in function_args.items():
-            if key == "function_args":  # Skip the function_args parameter itself
+            if key == "function_args":
                 continue
             if value is None:
                 continue
@@ -83,19 +73,13 @@ def log_execution_banner(function_args: dict = None, log_file: str = None):
 
 def log_completion_banner(function_args: dict = None, status: str = "SUCCESS"):
     """Log the completion banner with function info and completion status"""
-    # Get the caller's filename and function name for the header
     frame = inspect.currentframe()
-    caller_frame = (
-        frame.f_back.f_back
-    )  # Go back 2 frames (this func -> execute_tasks -> actual caller)
+    caller_frame = frame.f_back.f_back
     filename = caller_frame.f_code.co_filename
 
     rel_filename = _get_forge_relative_path(filename)
-
-    # Use parent directory name as function name for toolbox operations
     function_name = _get_toolbox_function_name(filename)
 
-    # Print completion header
     logger.info("")
     logger.info("===============================================================================")
     logger.info(f"| {rel_filename}")
@@ -115,8 +99,4 @@ def _get_forge_relative_path(filename):
 
 def _get_toolbox_function_name(filename):
     """Extract toolbox function name from file path (parent directory name)"""
-    filename_path = Path(filename)
-
-    # For paths like projects/llm_d/toolbox/capture_isvc_state/main.py
-    # Return the parent directory name: capture_isvc_state
-    return filename_path.parent.name
+    return Path(filename).parent.name
diff --git a/projects/core/dsl/runtime.py b/projects/core/dsl/runtime.py
index d05d91ac..4b986805 100644
--- a/projects/core/dsl/runtime.py
+++ b/projects/core/dsl/runtime.py
@@ -16,7 +16,13 @@
 from projects.core.library.run import SignalError
 
 from .context import create_task_parameters
-from .log import log_completion_banner, log_execution_banner, logger
+from .log import (
+    _get_forge_relative_path,
+    _get_toolbox_function_name,
+    log_completion_banner,
+    log_execution_banner,
+    logger,
+)
 from .script_manager import get_script_manager
 
 # Import from task.py to avoid circular imports
@@ -471,18 +477,3 @@ def _generate_restart_script(function_args: dict, caller_frame, meta_dir):
     os.chmod(restart_file, 0o755)
 
     logger.debug(f"Generated restart script: {restart_file}")
-
-
-def _get_forge_relative_path(filename):
-    """Get file path relative to FORGE home directory (forge root)"""
-
-    return Path(filename).relative_to(env.FORGE_HOME)
-
-
-def _get_toolbox_function_name(filename):
-    """Extract toolbox function name from file path (parent directory name)"""
-    filename_path = Path(filename)
-
-    # For paths like projects/llm_d/toolbox/capture_isvc_state/main.py
-    # Return the parent directory name: capture_isvc_state
-    return filename_path.parent.name
diff --git a/projects/core/library/config.py b/projects/core/library/config.py
index d84005c4..b8e2c79b 100644
--- a/projects/core/library/config.py
+++ b/projects/core/library/config.py
@@ -472,8 +472,6 @@ def init(orchestration_dir, *, apply_config_overrides=True):
 
     project = Config(config_path)
 
-    env.ARTIFACT_DIR / VARIABLE_OVERRIDES_FILENAME
-
     if not apply_config_overrides:
         logger.info(
             "config.init: running with 'apply_config_overrides=False', "
@@ -489,3 +487,20 @@ def init(orchestration_dir, *, apply_config_overrides=True):
     project.apply_config_overrides()
     project.apply_presets_from_project_args()
     project.apply_config_overrides()  # reapply so that the value overrides are applied last
+
+
+def reload(orchestration_dir, *, apply_config_overrides=True):
+    global project
+
+    project = None
+
+    artifact_config = env.ARTIFACT_DIR / "config.yaml"
+    if artifact_config.exists():
+        artifact_config.unlink()
+
+    presets_applied = env.ARTIFACT_DIR / "presets_applied"
+    if presets_applied.exists():
+        presets_applied.unlink()
+
+    init(orchestration_dir, apply_config_overrides=apply_config_overrides)
+    return project
diff --git a/projects/llm_d/README.md b/projects/llm_d/README.md
index f254277f..82a108ac 100644
--- a/projects/llm_d/README.md
+++ b/projects/llm_d/README.md
@@ -1,304 +1,25 @@
-# Skeleton Project
+# llm_d
 
-This is a template/skeleton project that demonstrates how to create a new project within the **FORGE** test harness framework.
+`llm_d` is the Forge project for validating downstream llm-d on RHOAI.
 
-## Overview
+The current implementation is intentionally narrow:
 
-This skeleton shows the essential structure and patterns for building projects that comply with FORGE's constitutional principles:
+- target only downstream `LLMInferenceService`
+- keep the public interface compatible with current Fournos phase execution
+- use checked-in config chunks and manifests instead of a large mutable config surface
 
-- **CI-First Testing**: Structured phases ensure consistent CI integration
-- **Observable Measurements**: Command execution logging and timing
-- **Reproducible Results**: Deterministic operations with clear success/failure
-- **Scale-Aware Design**: Efficient synchronous operations
-- **AI Platform Specificity**: OpenShift AI focused testing patterns
+Configuration layout:
 
-## Project Structure
+- project config chunk: [`orchestration/config.d/project.yaml`](./orchestration/config.d/project.yaml)
+- config chunks: [`orchestration/config.d`](./orchestration/config.d)
+- presets: [`orchestration/presets.d`](./orchestration/presets.d)
+- manifests: [`orchestration/manifests`](./orchestration/manifests)
 
-```
-skeleton/
-├── orchestration/
-│   └── ci.py          # Main CI script with Click-based CLI
-├── README.md          # This documentation
-├── config.yaml        # Project configuration (optional)
-├── tests/             # Test scripts and data (optional)
-└── scripts/           # Helper scripts (optional)
-```
+Main entrypoints:
 
-## Quick Start
-
-### 1. Run Individual Phases
-
-```bash
-# From the FORGE root directory
-
-# Prepare environment
-./run_ci skeleton ci prepare
-
-# Run tests
-./run_ci skeleton ci test
-
-# Clean up
-./run_ci skeleton ci cleanup
-```
-
-### 2. Development Options
-
-```bash
-# Verbose output
-./run_ci skeleton ci --verbose test
-
-# See all available commands
-./run_ci skeleton ci --help
-```
-
-## Creating Your Own Project
-
-### Step 1: Copy Skeleton
-
-```bash
-cp -r projects/skeleton projects/your-project-name
-cd projects/your-project-name
-```
-
-### Step 2: Customize
-
-1. **Update `orchestration/ci.py`**:
-   - Change `self.project_name` to your project name
-   - Replace placeholder `echo` commands with actual test logic
-   - Update the CLI description and help text
-
-2. **Update `README.md`**:
-   - Document your project's purpose and usage
-   - Add specific setup instructions
-
-3. **Add configuration** (optional):
-   - Create `config.yaml` for project-specific settings
-   - Reference it in your CI script
-
-### Step 3: Implement Test Logic
-
-Replace the example `echo` commands with your actual test logic:
-
-#### Prepare Phase
-```python
-def prepare(self):
-    self.log("Starting prepare phase...")
-
-    # Example: Install dependencies
-    if not self.execute_command(
-        "oc apply -f manifests/setup.yaml",
-        "Deploy setup resources"
-    ):
-        return 1
-
-    # Example: Validate environment
-    if not self.execute_command(
-        "oc get nodes",
-        "Check cluster nodes"
-    ):
-        return 1
-
-    self.log("Prepare phase completed!", "success")
-    return 0
-```
-
-#### Test Phase
-```python
-def test(self):
-    self.log("Starting test phase...")
-
-    # Example: Run performance tests
-    if not self.execute_command(
-        "python scripts/performance_test.py --config config.yaml",
-        "Running performance tests"
-    ):
-        return 1
-
-    # Example: Run functional tests
-    if not self.execute_command(
-        "pytest tests/ -v",
-        "Running functional tests"
-    ):
-        return 1
-
-    self.log("Test phase completed!", "success")
-    return 0
-```
-
-#### Cleanup Phase
-```python
-def cleanup(self):
-    self.log("Starting cleanup phase...")
-
-    # Example: Remove test resources
-    self.execute_command(
-        "oc delete -f manifests/",
-        "Cleanup test resources"
-    )
-
-    # Example: Generate reports
-    self.execute_command(
-        "python scripts/generate_report.py",
-        "Generate final report"
-    )
-
-    self.log("Cleanup phase completed!", "success")
-    return 0
-```
-
-## Key Patterns
-
-### 1. Phase Structure
-
-Each project should implement these standard phases:
-- **prepare**: Set up environment and dependencies
-- **test**: Execute main testing logic
-- **cleanup**: Clean up resources and finalize
-
-### 2. Command Execution
-
-Use the `execute_command` method for consistent execution and logging:
-
-```python
-# Basic command execution
-success = self.execute_command("your-command", "Description")
-if not success:
-    return 1  # Exit with error
-
-# Command with complex logic
-result = self.execute_command(
-    "kubectl get pods -o json",
-    "Check pod status"
-)
-```
-
-### 3. Error Handling
-
-Always check command results and handle failures appropriately:
-
-```python
-if not self.execute_command("critical-command", "Critical step"):
-    self.log("Critical step failed!", "error")
-    return 1  # Exit with error code
-
-# Cleanup commands can be non-critical
-self.execute_command("cleanup-command", "Optional cleanup")
-# Continue regardless of success
-```
-
-### 4. Logging
-
-Use the logging methods for consistent output:
-
-```python
-self.log("Starting operation", "info")      # ℹ️ [project] Starting operation
-self.log("Operation completed", "success")  # ✅ [project] Operation completed
-self.log("Warning occurred", "warning")     # ⚠️ [project] Warning occurred
-self.log("Error occurred", "error")         # ❌ [project] Error occurred
-```
-
-### 5. Verbose Mode
-
-The framework automatically handles verbose mode:
-
-```python
-# In verbose mode, command details are automatically shown
-# Your execute_command calls will show:
-# - Command being executed
-# - Command output (if any)
-# - Execution duration
-```
-
-## Click CLI Structure
-
-The skeleton uses Click groups to organize commands:
-
-```python
-@click.group()
-@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
-@click.pass_context
-def cli(ctx, verbose):
-    """Project CI Operations for FORGE."""
-    ctx.ensure_object(types.SimpleNamespace)
-    ctx.obj.verbose = verbose
-    ctx.obj.runner = YourProjectTestRunner(verbose)
-
-@cli.command()
-@click.pass_context
-def prepare(ctx):
-    """Prepare phase - Set up environment and dependencies."""
-    runner = ctx.obj.runner
-    exit_code = runner.prepare()
-    sys.exit(exit_code)
-```
-
-## Best Practices
-
-### 1. Constitutional Compliance
-
-- ✅ **CI-First**: Design for automated execution without user interaction
-- ✅ **Observable**: Log important events and command execution
-- ✅ **Reproducible**: Use deterministic operations and clear error codes
-- ✅ **Scale-Aware**: Keep operations efficient and focused
-- ✅ **AI Platform Specific**: Focus on OpenShift AI scenarios and tooling
-
-### 2. Error Handling
-
-- Always validate prerequisites in prepare phase
-- Check command results and fail fast on errors
-- Provide meaningful error messages with context
-- Clean up resources even when tests fail (use try/except if needed)
-
-### 3. Command Design
-
-- Make commands idempotent when possible
-- Use meaningful descriptions for all execute_command calls
-- Test commands locally before adding to CI
-- Consider timeouts for long-running operations
-
-### 4. Configuration
-
-- Keep project configuration in `config.yaml` or environment variables
-- Make tests configurable for different environments
-- Document all configuration options
-- Use sensible defaults
-
-## Testing the Skeleton
-
-```bash
-# Test individual phases
-./run_ci skeleton ci prepare
-./run_ci skeleton ci test
-./run_ci skeleton ci cleanup
-
-# Test with verbose output
-./run_ci skeleton ci --verbose prepare
-
-# See all available commands
-./run_ci skeleton ci --help
-```
-
-## Integration with CI Systems
-
-The skeleton is designed for easy CI integration:
-
-```bash
-# In your CI pipeline
-./run_ci your-project ci prepare || exit 1
-./run_ci your-project ci test || exit 1
-./run_ci your-project ci cleanup  # Always run cleanup
-```
-
-## Next Steps
-
-1. **Study the Code**: Review `orchestration/ci.py` to understand the patterns
-2. **Copy and Customize**: Create your own project based on this skeleton
-3. **Implement Tests**: Replace placeholder `echo` commands with real test logic
-4. **Test Integration**: Verify your project works with the run_ci entrypoint
-5. **Add Documentation**: Document your specific test scenarios and setup
-
-## Support
-
-- Review other projects in `projects/` for more examples
-- Check the main FORGE documentation
-- Study the run_ci entrypoint code in `projects/core/ci_entrypoint/`
+- CI phase wrapper: [`orchestration/ci.py`](./orchestration/ci.py)
+- CLI wrapper: [`orchestration/cli.py`](./orchestration/cli.py)
+- Shared runtime/config loader: [`runtime/llmd_runtime.py`](./runtime/llmd_runtime.py)
+- Toolbox prepare command: [`toolbox/prepare/main.py`](./toolbox/prepare/main.py)
+- Toolbox test command: [`toolbox/test/main.py`](./toolbox/test/main.py)
+- Toolbox cleanup command: [`toolbox/cleanup/main.py`](./toolbox/cleanup/main.py)
diff --git a/projects/llm_d/orchestration/ci.py b/projects/llm_d/orchestration/ci.py
old mode 100755
new mode 100644
index 7623510f..bc5ae6f4
--- a/projects/llm_d/orchestration/ci.py
+++ b/projects/llm_d/orchestration/ci.py
@@ -4,13 +4,46 @@
 
 """
 
+import os
 import types
 
 import click
-import prepare_llmd
-import test_llmd
 
 from projects.core.library import ci as ci_lib
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+from projects.llm_d.toolbox.cleanup.main import run as cleanup_toolbox_run
+from projects.llm_d.toolbox.prepare.main import run as prepare_toolbox_run
+from projects.llm_d.toolbox.test.main import run as test_toolbox_run
+
+
+def init_runtime() -> None:
+    llmd_runtime.init()
+
+
+def load_runtime_configuration():
+    return llmd_runtime.load_run_configuration(
+        requested_preset=os.environ.get("FORGE_PRESET"),
+        raw_overrides=os.environ.get("FORGE_CONFIG_OVERRIDES"),
+        job_name=os.environ.get("FORGE_JOB_NAME"),
+    )
+
+
+def run_prepare_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_prepare_inputs(config)
+    return prepare_toolbox_run(inputs_file=str(inputs_file))
+
+
+def run_test_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_test_inputs(config)
+    return test_toolbox_run(inputs_file=str(inputs_file))
+
+
+def run_cleanup_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_cleanup_inputs(config)
+    return cleanup_toolbox_run(inputs_file=str(inputs_file))
 
 
 @click.group()
@@ -19,31 +52,31 @@
 def main(ctx):
     """LLM-D Project CI Operations for FORGE."""
     ctx.ensure_object(types.SimpleNamespace)
-    test_llmd.init()
+    init_runtime()
 
 
 @main.command()
 @click.pass_context
 @ci_lib.safe_ci_command
-def prepare(ctx):
+def prepare(ctx) -> int:
     """Prepare phase - Set up environment and dependencies."""
-    return prepare_llmd.prepare()
+    return run_prepare_phase()
 
 
 @main.command()
 @click.pass_context
 @ci_lib.safe_ci_command
-def test(ctx):
+def test(ctx) -> int:
     """Test phase - Execute the main testing logic."""
-    return test_llmd.test()
+    return run_test_phase()
 
 
 @main.command()
 @click.pass_context
 @ci_lib.safe_ci_command
-def pre_cleanup(ctx):
+def pre_cleanup(ctx) -> int:
     """Cleanup phase - Clean up resources and finalize."""
-    return prepare_llmd.cleanup()
+    return run_cleanup_phase()
 
 
 if __name__ == "__main__":
diff --git a/projects/llm_d/orchestration/cli.py b/projects/llm_d/orchestration/cli.py
old mode 100755
new mode 100644
index def09477..fdb84fa9
--- a/projects/llm_d/orchestration/cli.py
+++ b/projects/llm_d/orchestration/cli.py
@@ -1,63 +1,88 @@
 #!/usr/bin/env python3
-"""
-LLM-D Project CLI Operations
-"""
 
 import logging
-import sys
+import os
 import types
 
 import click
-import prepare_llmd
-import test_llmd
 
 from projects.core.library.cli import safe_cli_command
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+from projects.llm_d.toolbox.cleanup.main import run as cleanup_toolbox_run
+from projects.llm_d.toolbox.prepare.main import run as prepare_toolbox_run
+from projects.llm_d.toolbox.test.main import run as test_toolbox_run
 
 logger = logging.getLogger(__name__)
 
 
+def init_runtime() -> None:
+    llmd_runtime.init()
+
+
+def load_runtime_configuration():
+    return llmd_runtime.load_run_configuration(
+        requested_preset=os.environ.get("FORGE_PRESET"),
+        raw_overrides=os.environ.get("FORGE_CONFIG_OVERRIDES"),
+        job_name=os.environ.get("FORGE_JOB_NAME"),
+    )
+
+
+def run_prepare_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_prepare_inputs(config)
+    return prepare_toolbox_run(inputs_file=str(inputs_file))
+
+
+def run_test_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_test_inputs(config)
+    return test_toolbox_run(inputs_file=str(inputs_file))
+
+
+def run_cleanup_phase() -> int:
+    config = load_runtime_configuration()
+    inputs_file = phase_inputs.write_cleanup_inputs(config)
+    return cleanup_toolbox_run(inputs_file=str(inputs_file))
+
+
 @click.group()
 @click.pass_context
 def main(ctx):
-    """LLM-D Project CI Operations for FORGE."""
+    """LLM-D Project CLI Operations for FORGE."""
     ctx.ensure_object(types.SimpleNamespace)
-    test_llmd.init()
+    init_runtime()
 
 
 @main.command()
 @click.pass_context
 @safe_cli_command
-def prepare(ctx):
+def prepare(ctx) -> int:
     """Prepare phase - Set up environment and dependencies."""
-    exit_code = prepare_llmd.prepare()
-    sys.exit(exit_code)
+    return run_prepare_phase()
 
 
 @main.command()
 @click.pass_context
 @safe_cli_command
-def test(ctx):
+def test(ctx) -> int:
     """Test phase - Execute the main testing logic."""
-    exit_code = test_llmd.test()
-    sys.exit(exit_code)
+    return run_test_phase()
 
 
 @main.command()
 @click.pass_context
 @safe_cli_command
-def pre_cleanup(ctx):
+def pre_cleanup(ctx) -> int:
     """Cleanup phase - Clean up resources and finalize."""
-    exit_code = prepare_llmd.cleanup()
-    sys.exit(exit_code)
+    return run_cleanup_phase()
 
 
 @main.command()
 @click.pass_context
 @safe_cli_command
-def post_cleanup(ctx):
+def post_cleanup(ctx) -> int:
     """Cleanup phase - Clean up resources and finalize."""
-    exit_code = prepare_llmd.cleanup()
-    sys.exit(exit_code)
+    return run_cleanup_phase()
 
 
 if __name__ == "__main__":
diff --git a/projects/llm_d/orchestration/config.d/model_cache.yaml b/projects/llm_d/orchestration/config.d/model_cache.yaml
new file mode 100644
index 00000000..eae01772
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/model_cache.yaml
@@ -0,0 +1,25 @@
+enabled: true
+marker_filename: .forge-model-cache.json
+
+pvc:
+  name_prefix: llm-d-model
+  size: 15Gi
+  access_mode: ReadWriteOnce
+  storage_class_name: null
+  model_directory_name: model
+
+download:
+  wait_timeout_seconds: 7200
+  poll_interval_seconds: 15
+  pod_image_pull_policy: IfNotPresent
+
+hf:
+  downloader_image: registry.access.redhat.com/ubi9/python-311
+  token_secret_name: null
+  token_secret_key: token
+
+oci:
+  extractor_image: registry.redhat.io/openshift4/ose-cli:v4.19
+  registry_auth_secret_name: null
+  registry_auth_secret_key: .dockerconfigjson
+  image_path: /
diff --git a/projects/llm_d/orchestration/config.d/models.yaml b/projects/llm_d/orchestration/config.d/models.yaml
new file mode 100644
index 00000000..4334cf4a
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/models.yaml
@@ -0,0 +1,32 @@
+qwen3-0-6b:
+  served_model_name: Qwen/Qwen3-0.6B
+  uri: hf://Qwen/Qwen3-0.6B
+  cache:
+    pvc_size: 10Gi
+    access_mode: ReadWriteOnce
+  resources:
+    requests:
+      cpu: "4"
+      memory: 16Gi
+      nvidia.com/gpu: "1"
+    limits:
+      cpu: "4"
+      memory: 16Gi
+      nvidia.com/gpu: "1"
+
+llama-3-1-8b-instruct-fp8:
+  served_model_name: llama-3-1-8b-instruct-fp8
+  uri: oci://registry.redhat.io/rhelai1/modelcar-llama-3-1-8b-instruct-fp8-dynamic:1.5
+  cache:
+    pvc_size: 40Gi
+    access_mode: ReadWriteOnce
+    oci_image_path: /
+  resources:
+    requests:
+      cpu: "4"
+      memory: 8Gi
+      nvidia.com/gpu: "1"
+    limits:
+      cpu: "4"
+      memory: 8Gi
+      nvidia.com/gpu: "1"
diff --git a/projects/llm_d/orchestration/config.d/platform.yaml b/projects/llm_d/orchestration/config.d/platform.yaml
new file mode 100644
index 00000000..6f823eba
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/platform.yaml
@@ -0,0 +1,86 @@
+cluster:
+  minimum_openshift_version: "4.19.9"
+  namespace:
+    name: forge-llm-d
+    prefix: llm-d
+    max_length: 63
+  cleanup_timeout_seconds: 900
+  gpu_node_label_selector: nvidia.com/gpu.present=true
+  nfd_gpu_detection_labels:
+    - feature.node.kubernetes.io/pci-10de.present
+    - feature.node.kubernetes.io/pci-0302_10de.present
+    - feature.node.kubernetes.io/pci-0300_10de.present
+
+operators:
+  openshift-cert-manager-operator:
+    display_name: OpenShift Cert Manager
+    namespace: openshift-cert-manager-operator
+    channel: stable-v1.18
+    source: redhat-operators
+    wait_timeout_seconds: 900
+  leader-worker-set:
+    display_name: Leader Worker Set
+    namespace: openshift-lws
+    channel: stable
+    source: redhat-operators
+    wait_timeout_seconds: 900
+  nfd:
+    display_name: Node Feature Discovery
+    namespace: openshift-nfd
+    channel: stable
+    source: redhat-operators
+    wait_timeout_seconds: 900
+    bootstrap_crd: nodefeaturediscoveries.nfd.openshift.io
+    bootstrap_manifest: manifests/nfd-nodefeaturediscovery.yaml
+  gpu-operator-certified:
+    display_name: NVIDIA GPU Operator
+    namespace: nvidia-gpu-operator
+    channel: stable
+    source: certified-operators
+    wait_timeout_seconds: 1800
+    bootstrap_crd: clusterpolicies.nvidia.com
+    bootstrap_manifest: manifests/gpu-clusterpolicy.yaml
+  rhods-operator:
+    display_name: Red Hat OpenShift AI
+    namespace: redhat-ods-operator
+    channel: stable-3.x
+    source: redhat-operators
+    wait_timeout_seconds: 1800
+
+rhoai:
+  namespace: redhat-ods-applications
+  datasciencecluster_name: default-dsc
+  datasciencecluster_template: manifests/datasciencecluster.yaml
+  wait_timeout_seconds: 1800
+  required_crds_before_dsc:
+    - datascienceclusters.datasciencecluster.opendatahub.io
+  required_crds_after_dsc:
+    - llminferenceservices.serving.kserve.io
+
+gateway:
+  namespace: openshift-ingress
+  name: openshift-ai-inference
+  gateway_class_name: data-science-gateway-class
+  status_address_name: gateway-external
+  create_if_missing: true
+  manifest_template: manifests/gateway.yaml
+  wait_timeout_seconds: 600
+
+inference_service:
+  name: llm-d
+  template: manifests/llminferenceservice.yaml
+  workload_deployment_name_suffix: -kserve
+  pod_appearance_timeout_seconds: 600
+  ready_timeout_seconds: 1800
+  delete_timeout_seconds: 900
+
+artifacts:
+  capture_namespace_events: true
+
+smoke:
+  job_name: llm-d-smoke
+  client_image: curlimages/curl:8.11.1
+  endpoint_path: /v1/completions
+  request_retries: 30
+  request_retry_delay_seconds: 10
+  request_timeout_seconds: 60
diff --git a/projects/llm_d/orchestration/config.d/project.yaml b/projects/llm_d/orchestration/config.d/project.yaml
new file mode 100644
index 00000000..f957c25d
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/project.yaml
@@ -0,0 +1,2 @@
+name: llm_d
+args: []
diff --git a/projects/llm_d/orchestration/config.d/runtime.yaml b/projects/llm_d/orchestration/config.d/runtime.yaml
new file mode 100644
index 00000000..c8715ccb
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/runtime.yaml
@@ -0,0 +1,8 @@
+default_preset: smoke
+allowed_override_keys:
+  - namespace
+selected_preset: smoke
+model_key: qwen3-0-6b
+scheduler_profile_key: approximate
+smoke_request_key: default
+benchmark_key: null
diff --git a/projects/llm_d/orchestration/config.d/scheduler_profiles.yaml b/projects/llm_d/orchestration/config.d/scheduler_profiles.yaml
new file mode 100644
index 00000000..b3bca162
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/scheduler_profiles.yaml
@@ -0,0 +1,9 @@
+approximate:
+  config_path: scheduler_profiles/approximate.yaml
+
+precise:
+  config_path: scheduler_profiles/precise.yaml
+
+# Compatibility alias for earlier llm_d presets.
+approximate-prefix-cache:
+  config_path: scheduler_profiles/approximate.yaml
diff --git a/projects/llm_d/orchestration/config.d/workloads.yaml b/projects/llm_d/orchestration/config.d/workloads.yaml
new file mode 100644
index 00000000..1ce9bdc6
--- /dev/null
+++ b/projects/llm_d/orchestration/config.d/workloads.yaml
@@ -0,0 +1,19 @@
+smoke_requests:
+  default:
+    prompt: San Francisco is a
+    max_tokens: 50
+    temperature: 0.7
+
+benchmarks:
+  short:
+    job_name: guidellm-benchmark
+    image: ghcr.io/vllm-project/guidellm:v0.5.4
+    pvc_size: 1Gi
+    timeout_seconds: 900
+    rate: 1
+    args:
+      backend_type: openai_http
+      rate_type: concurrent
+      max_seconds: 120
+      sample_requests: 20
+      data: prompt_tokens=256,output_tokens=128
diff --git a/projects/llm_d/orchestration/config.yaml b/projects/llm_d/orchestration/config.yaml
deleted file mode 100644
index e7367e8f..00000000
--- a/projects/llm_d/orchestration/config.yaml
+++ /dev/null
@@ -1,230 +0,0 @@
-prepare:
-  skip: false
-  namespace:
-    name: llm-d-project
-
-  operators:
-    skip: false
-    list:
-      - name: "Red Hat Connectivity Link"
-        catalog: redhat-operators
-        operator: rhcl-operator
-        namespace: all
-        enabled: false
-
-      - name: "OpenShift Cert Manager"
-        catalog: redhat-operators
-        operator: openshift-cert-manager-operator
-        namespace: openshift-cert-manager-operator
-        enabled: true
-
-      - name: "Leader Worker Set"
-        catalog: redhat-operators
-        operator: leader-worker-set
-        namespace: openshift-lws
-        deploy_cr: true
-        enabled: true
-
-      - name: "Node Feature Discovery"
-        catalog: redhat-operators
-        operator: nfd
-        namespace: openshift-nfd
-        deploy_cr: 1
-        enabled: true
-
-      - name: "NVIDIA GPU Operator"
-        catalog: certified-operators
-        operator: gpu-operator-certified
-        namespace: nvidia-gpu-operator
-        deploy_cr: true
-        enabled: true
-
-      - name: "Grafana Operator"
-        catalog: community-operators
-        operator: grafana-operator
-        namespace: grafana-operator
-        enabled: true
-        extra_args:
-          all_namespaces: true
-
-  cluster:
-    skip: false
-    nodes:
-      auto_scale: false
-      auto_scale_down_on_exit: false
-      instance_type: gx3-16x80x1l4
-      count: 2
-
-  rhoai:
-    skip: false
-    image: "quay.io/rhoai/rhoai-fbc-fragment"
-    tag: "rhoai-3.3@sha256:f6e7db613cd040e53da2d47850477a9b914de18979adaaac47e15dc7c76f8a76"
-    channel: "stable-3.x"
-    datasciencecluster:
-      enable: "[kserve]"
-      extra_settings: '{"spec.components.kserve.rawDeploymentServiceConfig": "Headless"}'
-
-  gateway:
-    skip: false
-    name: openshift-ai-inference  # NOTE: Should not be changed for the time being
-
-  grafana:
-    skip: false
-    namespace: grafana
-    datasources:
-      - grafana/datasource.yaml
-    dashboards_dir: grafana/dashboards
-
-  monitoring:
-    skip: false
-    namespaces:
-      - "@prepare.namespace.name"
-
-  gpu:
-    wait_for_readiness: false
-
-  preload:
-    skip: false
-    extra_images: {}
-    node_selector_key: nvidia.com/gpu.present
-    node_selector_value: "true"
-
-  pvc:
-    enabled: true
-    size: 2000Gi
-    name: storage
-    access_mode: ReadWriteOnce
-    storage_class: null
-
-  model_downloader:
-    image: ghcr.io/opendatahub-io/rhaii-on-xks/kserve-storage-initializer:e6b5db0@sha256:b305264fe2211be2c6063500c4c11da79e8357af4b34dd8567b0d8e8dea7e1d4
-
-  cleanup:
-    skip: false
-
-models:
-  facebook-opt-125m:
-    name: facebook/opt-125m
-    source: hf://facebook/opt-125m
-    resources:
-      cpu: 2
-      memory: 8Gi
-
-  llama3-1-8b:
-    name: RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic
-    uri: oci://registry.redhat.io/rhelai1/modelcar-llama-3-1-8b-instruct-fp8-dynamic:1.5
-    # source: hf://RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic
-    resources: {}
-
-  llama3-3-70b:
-    name: RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
-    source: hf://RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
-    resources:
-      cpu: 4
-      memory: 64Gi
-
-  gpt-oss-120:
-    name: openai/gpt-oss-120b
-    source: hf://openai/gpt-oss-120b
-    resources:
-      cpu: 4
-      memory: 64Gi
-
-  granite4-tiny:
-    name: RedHatAI/granite-4.0-h-tiny-FP8-dynamic
-    source: hf://RedHatAI/granite-4.0-h-tiny-FP8-dynamic
-    resources: {}
-
-tests:
-  llmd:
-    skip: false
-    skip_prepare: false
-    flavors: intelligentrouting
-    namespace: "@prepare.namespace.name"
-
-    inference_service:
-      skip_deployment: false
-      name: llm-d
-      yaml_file: llama-3-1-8b-instruct-fp8.yaml
-      timeout: 900
-      do_simple_test: true
-      gateway:
-        name: gateway-external
-      model: llama3-1-8b
-      metrics:
-        manual_capture: true
-        scheduler_servicemonitor_name: kserve-llm-isvc-scheduler
-        vllm_podmonitor_name: kserve-llm-isvc-vllm-engine
-
-      # vLLM arguments (always applied)
-      vllm_args:
-        - "--disable-uvicorn-access-log"
-        - "--enable-prefix-caching"
-        - "--uvicorn-log-level=debug"
-        - "--trust-remote-code"
-        - "--disable-log-requests"
-        - "--max-model-len=40960" # keep in 5th position or uddate the presets
-        - "--gpu-memory-utilization=0.92"
-
-      kueue:
-        enabled: false
-        prefix: "kueue.x-k8s.io/"
-        labels:
-          pod-group-name: llmisvc
-          managed: "true"
-        annotations:
-          queue-name: perf-gpu-queue
-
-      # Extra properties to inject into the LLMISVC YAML using dotted-key notation
-      extra_properties: {}
-
-    benchmarks:
-      guidellm:
-        enabled: true
-        name: guidellm-benchmark
-        backend_type: openai_http
-        rate_type: concurrent
-        max_seconds: 120
-        max_requests: null
-        timeout: 900
-        data: prompt_tokens=256,output_tokens=128
-        rate: 1
-        sample_requests: 20
-
-  capture_prom: true
-  capture_prom_uwm: true
-  dry_mode: false
-  visualize: true
-
-export_artifacts:
-  enabled: false
-
-matbench:
-  enabled: true
-  preset: null
-  workload: projects.llm-d.visualizations.llmd_inference
-  config_file: plots.yaml
-  # directory to plot
-  lts:
-    generate: true
-    opensearch:
-      export:
-        enabled: false
-        enabled_on_replot: false
-        fail_test_on_fail: true
-      instance: smoke
-      index: forge-llm-d-cpt
-      index_prefix: ""
-      build_counter_index: "forge-llm-d-builds" # used to generate a unique ID for each build
-    regression_analyses:
-      enabled: false
-      enabled_on_replot: true
-      upload_lts_on_regression: true
-      # if the regression analyses fail, mark the test as failed
-      fail_test_on_regression: true
-      notification:
-        enabled: true
-        title: "llm-d CPT"
-  download:
-    mode: prefer_cache
-    url: null
diff --git a/projects/llm_d/orchestration/manifests/datasciencecluster.yaml b/projects/llm_d/orchestration/manifests/datasciencecluster.yaml
new file mode 100644
index 00000000..fd45316d
--- /dev/null
+++ b/projects/llm_d/orchestration/manifests/datasciencecluster.yaml
@@ -0,0 +1,22 @@
+apiVersion: datasciencecluster.opendatahub.io/v1
+kind: DataScienceCluster
+metadata:
+  name: default-dsc
+  namespace: redhat-ods-applications
+spec:
+  components:
+    codeflare:
+      managementState: Removed
+    dashboard:
+      managementState: Removed
+    datasciencepipelines:
+      managementState: Removed
+    kserve:
+      managementState: Managed
+      rawDeploymentServiceConfig: Headless
+    modelmeshserving:
+      managementState: Removed
+    ray:
+      managementState: Removed
+    workbenches:
+      managementState: Removed
diff --git a/projects/llm_d/orchestration/manifests/gateway.yaml b/projects/llm_d/orchestration/manifests/gateway.yaml
new file mode 100644
index 00000000..dff0c398
--- /dev/null
+++ b/projects/llm_d/orchestration/manifests/gateway.yaml
@@ -0,0 +1,14 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: openshift-ai-inference
+  namespace: openshift-ingress
+spec:
+  gatewayClassName: data-science-gateway-class
+  listeners:
+    - name: http
+      port: 80
+      protocol: HTTP
+      allowedRoutes:
+        namespaces:
+          from: All
diff --git a/projects/llm_d/orchestration/manifests/gpu-clusterpolicy.yaml b/projects/llm_d/orchestration/manifests/gpu-clusterpolicy.yaml
new file mode 100644
index 00000000..6a9ad7ee
--- /dev/null
+++ b/projects/llm_d/orchestration/manifests/gpu-clusterpolicy.yaml
@@ -0,0 +1,37 @@
+apiVersion: nvidia.com/v1
+kind: ClusterPolicy
+metadata:
+  name: gpu-cluster-policy
+spec:
+  daemonsets:
+    tolerations:
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+    updateStrategy: RollingUpdate
+  dcgm:
+    enabled: true
+  dcgmExporter:
+    enabled: true
+  devicePlugin:
+    enabled: true
+  driver:
+    enabled: true
+    kernelModuleType: auto
+  gfd:
+    enabled: true
+  mig:
+    strategy: single
+  nodeStatusExporter:
+    enabled: true
+  operator:
+    defaultRuntime: crio
+    runtimeClass: nvidia
+  toolkit:
+    enabled: true
+    installDir: /usr/local/nvidia
+  validator:
+    plugin:
+      env:
+        - name: WITH_WORKLOAD
+          value: "false"
diff --git a/projects/llm_d/orchestration/manifests/llminferenceservice.yaml b/projects/llm_d/orchestration/manifests/llminferenceservice.yaml
new file mode 100644
index 00000000..cff616f8
--- /dev/null
+++ b/projects/llm_d/orchestration/manifests/llminferenceservice.yaml
@@ -0,0 +1,96 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: llm-d
+  namespace: llm-d
+  annotations:
+    security.opendatahub.io/enable-auth: "false"
+    prometheus.io/path: /metrics
+    prometheus.io/port: "8000"
+spec:
+  replicas: 1
+  model:
+    uri: hf://Qwen/Qwen3-0.6B
+    name: Qwen/Qwen3-0.6B
+  router:
+    scheduler:
+      template:
+        containers:
+          - name: main
+            env:
+              - name: TOKENIZER_CACHE_DIR
+                value: /tmp/tokenizer-cache
+              - name: HF_HOME
+                value: /tmp/tokenizer-cache
+              - name: TRANSFORMERS_CACHE
+                value: /tmp/tokenizer-cache
+              - name: XDG_CACHE_HOME
+                value: /tmp
+            args:
+              - --cert-path
+              - /var/run/kserve/tls
+              - --pool-group
+              - inference.networking.x-k8s.io
+              - --pool-name
+              - "{{ ChildName .ObjectMeta.Name `-inference-pool` }}"
+              - --pool-namespace
+              - "{{ .ObjectMeta.Namespace }}"
+              - --zap-encoder
+              - json
+              - --grpc-port
+              - "9002"
+              - --grpc-health-port
+              - "9003"
+              - --secure-serving
+              - --model-server-metrics-scheme
+              - https
+              - --config-text
+            volumeMounts:
+              - name: tokenizer-cache
+                mountPath: /tmp/tokenizer-cache
+              - name: cachi2-cache
+                mountPath: /cachi2
+        volumes:
+          - name: tokenizer-cache
+            emptyDir: {}
+          - name: cachi2-cache
+            emptyDir: {}
+        nodeSelector:
+          nvidia.com/gpu.present: "true"
+    route: {}
+    gateway: {}
+  template:
+    tolerations:
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+    containers:
+      - name: main
+        resources:
+          requests:
+            cpu: "4"
+            memory: 16Gi
+            nvidia.com/gpu: "1"
+          limits:
+            cpu: "4"
+            memory: 16Gi
+            nvidia.com/gpu: "1"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8000
+            scheme: HTTPS
+          initialDelaySeconds: 900
+          periodSeconds: 60
+          timeoutSeconds: 60
+          failureThreshold: 1000
+        readinessProbe:
+          failureThreshold: 10000
+          httpGet:
+            path: /health
+            port: 8000
+            scheme: HTTPS
+          initialDelaySeconds: 60
+          periodSeconds: 30
+          successThreshold: 1
+          timeoutSeconds: 30
diff --git a/projects/llm_d/orchestration/manifests/nfd-nodefeaturediscovery.yaml b/projects/llm_d/orchestration/manifests/nfd-nodefeaturediscovery.yaml
new file mode 100644
index 00000000..df19596f
--- /dev/null
+++ b/projects/llm_d/orchestration/manifests/nfd-nodefeaturediscovery.yaml
@@ -0,0 +1,6 @@
+apiVersion: nfd.openshift.io/v1
+kind: NodeFeatureDiscovery
+metadata:
+  name: nfd-instance
+  namespace: openshift-nfd
+spec: {}
diff --git a/projects/llm_d/orchestration/prepare_llmd.py b/projects/llm_d/orchestration/prepare_llmd.py
deleted file mode 100644
index c28ad8c7..00000000
--- a/projects/llm_d/orchestration/prepare_llmd.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import logging
-
-from projects.core.library import config
-
-logger = logging.getLogger(__name__)
-
-
-def prepare():
-    ns = config.project.get_config("prepare.namespace.name")
-    logger.warning(f"Hello prepare {ns}")
-    pass
-
-
-def cleanup():
-    logger.warning("Hello cleanup")
-    pass
diff --git a/projects/llm_d/orchestration/presets.d/cks.yaml b/projects/llm_d/orchestration/presets.d/cks.yaml
deleted file mode 100644
index b4f842dc..00000000
--- a/projects/llm_d/orchestration/presets.d/cks.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-extends: [pvc_rwx, llama-70b]
-
-tests.capture_prom: false
-tests.capture_prom_uwm: false
-tests.llmd.skip_prepare: true
-prepare.namespace.name: kpouget-dev
-prepare.preload.node_selector_key: gpu.nvidia.com/class
-prepare.preload.node_selector_value: "H200"
-tests.llmd.inference_service.extra_properties:
-  spec.template.affinity:
-    nodeAffinity:
-      requiredDuringSchedulingIgnoredDuringExecution:
-        nodeSelectorTerms:
-          - matchExpressions:
-            - key: kubernetes.io/hostname
-              operator: NotIn
-              values:
-              - gf48e48
-              - gf4334a
-prepare.preload.extra_images:
-  vllm-cuda-rhel9: registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:094db84a1da5e8a575d0c9eade114fa30f4a2061064a338e3e032f3578f8082a
-  llm-d-inference-scheduler: ghcr.io/opendatahub-io/rhaii-on-xks/llm-d-inference-scheduler:e6b5db0@sha256:43e8b8edc158f31535c8b23d77629f8cde111cc762a8f4ee5f2f884470566211
-  guidellm: ghcr.io/vllm-project/guidellm:v0.5.4
diff --git a/projects/llm_d/orchestration/presets.d/presets.yaml b/projects/llm_d/orchestration/presets.d/presets.yaml
index 3bd1e3fb..0b3de3a7 100644
--- a/projects/llm_d/orchestration/presets.d/presets.yaml
+++ b/projects/llm_d/orchestration/presets.d/presets.yaml
@@ -1,9 +1,31 @@
 __multiple: true
 
-pvc_rwx:
-  prepare.pvc.name: storage-rwx
-  prepare.pvc.access_mode: ReadWriteMany
+smoke:
+  runtime.selected_preset: smoke
+  runtime.model_key: qwen3-0-6b
+  runtime.scheduler_profile_key: approximate
+  runtime.smoke_request_key: default
+  runtime.benchmark_key: null
 
+smoke-precise:
+  extends:
+    - smoke
+  runtime.selected_preset: smoke-precise
+  runtime.scheduler_profile_key: precise
 
-llama-70b:
-  tests.llmd.inference_service.model: llama3-3-70b
+smoke-default-scheduler:
+  extends:
+    - smoke
+  runtime.selected_preset: smoke-default-scheduler
+  runtime.scheduler_profile_key: default
+
+benchmark-short:
+  runtime.selected_preset: benchmark-short
+  runtime.model_key: llama-3-1-8b-instruct-fp8
+  runtime.scheduler_profile_key: approximate
+  runtime.smoke_request_key: default
+  runtime.benchmark_key: short
+
+cks:
+  extends:
+    - smoke
diff --git a/projects/llm_d/orchestration/scheduler_profiles/approximate.yaml b/projects/llm_d/orchestration/scheduler_profiles/approximate.yaml
new file mode 100644
index 00000000..e584dcf2
--- /dev/null
+++ b/projects/llm_d/orchestration/scheduler_profiles/approximate.yaml
@@ -0,0 +1,15 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: EndpointPickerConfig
+plugins:
+  - type: queue-scorer
+  - type: kv-cache-utilization-scorer
+  - type: prefix-cache-scorer
+schedulingProfiles:
+  - name: default
+    plugins:
+      - pluginRef: queue-scorer
+        weight: 2
+      - pluginRef: kv-cache-utilization-scorer
+        weight: 2
+      - pluginRef: prefix-cache-scorer
+        weight: 3
diff --git a/projects/llm_d/orchestration/scheduler_profiles/precise.yaml b/projects/llm_d/orchestration/scheduler_profiles/precise.yaml
new file mode 100644
index 00000000..707e5e0c
--- /dev/null
+++ b/projects/llm_d/orchestration/scheduler_profiles/precise.yaml
@@ -0,0 +1,26 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: EndpointPickerConfig
+plugins:
+  - type: single-profile-handler
+  - type: precise-prefix-cache-scorer
+    parameters:
+      indexerConfig:
+        tokenProcessorConfig:
+          blockSize: 64
+          hashSeed: "42"
+        tokenizersPoolConfig:
+          hf:
+            tokenizersCacheDir: /tmp/tokenizers
+  - type: kv-cache-utilization-scorer
+  - type: queue-scorer
+  - type: max-score-picker
+schedulingProfiles:
+  - name: default
+    plugins:
+      - pluginRef: precise-prefix-cache-scorer
+        weight: 3.0
+      - pluginRef: kv-cache-utilization-scorer
+        weight: 2.0
+      - pluginRef: queue-scorer
+        weight: 2.0
+      - pluginRef: max-score-picker
diff --git a/projects/llm_d/orchestration/test_llmd.py b/projects/llm_d/orchestration/test_llmd.py
deleted file mode 100644
index 8290ee63..00000000
--- a/projects/llm_d/orchestration/test_llmd.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import logging
-import pathlib
-
-from projects.core.library import config, env, run
-from projects.llm_d.toolbox.capture_isvc_state.main import run as capture_isvc_state
-
-logger = logging.getLogger(__name__)
-
-
-def init():
-    env.init()
-    run.init()
-    config.init(pathlib.Path(__file__).parent)
-
-
-@config.requires(
-    ns="prepare.namespace.name",
-    name="tests.llmd.flavors",
-)
-def test(_cfg):
-    logger.warning(f"Hello test {_cfg.ns}/{_cfg.name}")
-
-    # two alternatives to query the configuration:
-    # @config.requires(dict) or config.project.get_config("<path>")
-    # and we will define something similar for the secrets
-
-    config.project.get_config("tests.llmd.flavors")
-
-    capture_isvc_state(_cfg.name, namespace=_cfg.ns)
diff --git a/projects/llm_d/runtime/llmd_runtime.py b/projects/llm_d/runtime/llmd_runtime.py
new file mode 100644
index 00000000..53d4662b
--- /dev/null
+++ b/projects/llm_d/runtime/llmd_runtime.py
@@ -0,0 +1,637 @@
+from __future__ import annotations
+
+import json
+import logging
+import re
+import shlex
+import subprocess
+import time
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from projects.llm_d.runtime.runtime_config import (
+    CONFIG_DIR,
+    ORCHESTRATION_DIR,
+    ModelCacheSpec,
+    ResolvedConfig,
+    apply_requested_preset,
+    derive_namespace,
+    ensure_artifact_directories,
+    init,
+    load_fournos_config,
+    load_run_configuration,
+    load_yaml,
+    normalize_gpu_count,
+    parse_overrides,
+    resolve_model_cache,
+    slugify_identifier,
+    truncate_k8s_name,
+    version_tuple,
+    write_json,
+    write_text,
+    write_yaml,
+)
+from projects.llm_d.runtime.runtime_manifests import (
+    load_manifest_template,
+    render_datasciencecluster,
+    render_gateway,
+    render_guidellm_copy_pod,
+    render_guidellm_job,
+    render_guidellm_pvc,
+    render_inference_service,
+    render_model_cache_pvc,
+    render_smoke_request_job,
+)
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "CONFIG_DIR",
+    "ORCHESTRATION_DIR",
+    "CommandError",
+    "ModelCacheSpec",
+    "ResolvedConfig",
+    "annotate_model_cache_pvc",
+    "apply_manifest",
+    "apply_requested_preset",
+    "condition_status",
+    "derive_namespace",
+    "desired_subscription",
+    "ensure_artifact_directories",
+    "ensure_namespace",
+    "ensure_operator_group",
+    "ensure_subscription",
+    "init",
+    "job_pod_names",
+    "load_fournos_config",
+    "load_manifest_template",
+    "load_run_configuration",
+    "load_yaml",
+    "model_cache_pvc_ready",
+    "normalize_gpu_count",
+    "oc",
+    "oc_get_json",
+    "operator_spec_by_package",
+    "parse_overrides",
+    "pvc_access_mode_matches",
+    "render_datasciencecluster",
+    "render_gateway",
+    "render_guidellm_copy_pod",
+    "render_guidellm_job",
+    "render_guidellm_pvc",
+    "render_inference_service",
+    "render_model_cache_job",
+    "render_model_cache_pvc",
+    "render_smoke_request_job",
+    "resource_exists",
+    "resolve_default_serviceaccount_image_pull_secret",
+    "resolve_model_cache",
+    "run_command",
+    "slugify_identifier",
+    "subscription_spec_matches",
+    "truncate_k8s_name",
+    "version_tuple",
+    "wait_for_crd",
+    "wait_for_job_completion",
+    "wait_for_namespace_deleted",
+    "wait_for_operator_csv",
+    "wait_for_pvc_bound",
+    "wait_until",
+    "write_json",
+    "write_text",
+    "write_yaml",
+]
+
+
+class CommandError(RuntimeError):
+    """Raised when an external command exits unsuccessfully."""
+
+
+def run_command(
+    args: Iterable[str],
+    *,
+    check: bool = True,
+    capture_output: bool = True,
+    input_text: str | None = None,
+    timeout_seconds: float | None = 300,
+) -> subprocess.CompletedProcess[str]:
+    cmd = [str(arg) for arg in args]
+    logger.info("run: %s", " ".join(shlex.quote(arg) for arg in cmd))
+    try:
+        result = subprocess.run(
+            cmd,
+            check=False,
+            text=True,
+            capture_output=capture_output,
+            input=input_text,
+            timeout=timeout_seconds,
+        )
+    except subprocess.TimeoutExpired:
+        logger.error(
+            "Command timed out after %ss: %s",
+            timeout_seconds,
+            " ".join(shlex.quote(arg) for arg in cmd),
+        )
+        raise
+
+    if capture_output:
+        if result.stdout:
+            logger.info("stdout:\n%s", result.stdout.rstrip())
+        if result.stderr:
+            logger.info("stderr:\n%s", result.stderr.rstrip())
+
+    if check and result.returncode != 0:
+        raise CommandError(
+            f"Command failed with exit code {result.returncode}: "
+            f"{' '.join(shlex.quote(arg) for arg in cmd)}"
+        )
+
+    return result
+
+
+def oc(
+    *args: str,
+    check: bool = True,
+    capture_output: bool = True,
+    input_text: str | None = None,
+    timeout_seconds: float | None = 300,
+) -> subprocess.CompletedProcess[str]:
+    return run_command(
+        ["oc", *args],
+        check=check,
+        capture_output=capture_output,
+        input_text=input_text,
+        timeout_seconds=timeout_seconds,
+    )
+
+
+def apply_manifest(artifact_path: Any, manifest: dict[str, Any]) -> None:
+    write_yaml(artifact_path, manifest)
+    oc("apply", "-f", str(artifact_path))
+
+
+def oc_get_json(
+    kind: str,
+    *,
+    name: str | None = None,
+    namespace: str | None = None,
+    selector: str | None = None,
+    ignore_not_found: bool = False,
+) -> dict[str, Any] | None:
+    args = ["get", kind]
+    if name:
+        args.append(name)
+    if namespace:
+        args.extend(["-n", namespace])
+    if selector:
+        args.extend(["-l", selector])
+    args.extend(["-o", "json"])
+
+    result = oc(*args, check=not ignore_not_found, capture_output=True)
+    if result.returncode != 0:
+        if ignore_not_found and _is_oc_not_found_error(result.stderr):
+            return None
+        raise CommandError(
+            f"oc {' '.join(shlex.quote(arg) for arg in args)} failed with exit code "
+            f"{result.returncode}: {result.stderr.strip()}"
+        )
+    if not result.stdout:
+        raise CommandError(f"oc {' '.join(shlex.quote(arg) for arg in args)} returned no output")
+    return json.loads(result.stdout)
+
+
+def resource_exists(kind: str, name: str, *, namespace: str | None = None) -> bool:
+    return (
+        oc_get_json(
+            kind,
+            name=name,
+            namespace=namespace,
+            ignore_not_found=True,
+        )
+        is not None
+    )
+
+
+def _is_oc_not_found_error(stderr: str | None) -> bool:
+    if not stderr:
+        return False
+
+    normalized = stderr.lower()
+    if "error from server (notfound)" in normalized:
+        return True
+    if "no resources found" in normalized:
+        return True
+
+    return bool(re.search(r"\bnot found\b", normalized))
+
+
+def wait_until(
+    description: str,
+    *,
+    timeout_seconds: int,
+    interval_seconds: int,
+    predicate,
+) -> Any:
+    deadline = time.time() + timeout_seconds
+    last_error: Exception | None = None
+
+    while time.time() < deadline:
+        try:
+            value = predicate()
+            if value:
+                return value
+            last_error = None
+        except Exception as exc:  # pragma: no cover - exercised in integration paths
+            if isinstance(exc, RuntimeError):
+                raise
+            last_error = exc
+            logger.info("waiting for %s: %s", description, exc)
+        time.sleep(interval_seconds)
+
+    if last_error:
+        raise RuntimeError(f"Timed out waiting for {description}: {last_error}") from last_error
+    raise RuntimeError(f"Timed out waiting for {description}")
+
+
+def wait_for_namespace_deleted(namespace: str, timeout_seconds: int) -> None:
+    wait_until(
+        f"namespace/{namespace} deletion",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: not resource_exists("namespace", namespace),
+    )
+
+
+def wait_for_crd(crd_name: str, timeout_seconds: int) -> None:
+    wait_until(
+        f"crd/{crd_name}",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: resource_exists("crd", crd_name),
+    )
+
+
+def wait_for_operator_csv(package: str, namespace: str, timeout_seconds: int) -> dict[str, Any]:
+    selector = f"operators.coreos.com/{package}.{namespace}"
+
+    def _csv_ready() -> dict[str, Any] | None:
+        data = oc_get_json("csv", namespace=namespace, selector=selector, ignore_not_found=True)
+        if not data:
+            return None
+        items = data.get("items", [])
+        if not items:
+            return None
+        csv = items[0]
+        if csv.get("status", {}).get("phase") == "Succeeded":
+            return csv
+        return None
+
+    return wait_until(
+        f"{package} CSV in {namespace}",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=15,
+        predicate=_csv_ready,
+    )
+
+
+def ensure_namespace(namespace: str, *, labels: dict[str, str] | None = None) -> None:
+    if not resource_exists("namespace", namespace):
+        oc("create", "namespace", namespace)
+
+    if labels:
+        oc("label", "namespace", namespace, "--overwrite", *[f"{k}={v}" for k, v in labels.items()])
+
+
+def ensure_operator_group(namespace: str, package: str) -> None:
+    data = oc_get_json("operatorgroup", namespace=namespace, ignore_not_found=True)
+    if data and data.get("items"):
+        for item in data["items"]:
+            targets = item.get("spec", {}).get("targetNamespaces") or [namespace]
+            if namespace in targets:
+                return
+        raise RuntimeError(
+            f"Existing OperatorGroup objects in {namespace} do not target {namespace}"
+        )
+
+    operator_group = {
+        "apiVersion": "operators.coreos.com/v1",
+        "kind": "OperatorGroup",
+        "metadata": {"name": package, "namespace": namespace},
+        "spec": {"targetNamespaces": [namespace]},
+    }
+    oc("apply", "-f", "-", input_text=yaml.safe_dump(operator_group, sort_keys=False))
+
+
+def ensure_subscription(operator_spec: dict[str, Any]) -> None:
+    namespace = operator_spec["namespace"]
+    package = operator_spec["package"]
+
+    ensure_namespace(namespace)
+    ensure_operator_group(namespace, package)
+
+    subscription = desired_subscription(operator_spec)
+    current = oc_get_json(
+        "subscription.operators.coreos.com",
+        name=package,
+        namespace=namespace,
+        ignore_not_found=True,
+    )
+    if current and not subscription_spec_matches(current.get("spec", {}), subscription["spec"]):
+        logger.info("Reconciling subscription drift for %s in %s", package, namespace)
+
+    oc("apply", "-f", "-", input_text=yaml.safe_dump(subscription, sort_keys=False))
+
+    def _subscription_reconciled() -> dict[str, Any] | None:
+        payload = oc_get_json(
+            "subscription.operators.coreos.com",
+            name=package,
+            namespace=namespace,
+        )
+        if subscription_spec_matches(payload.get("spec", {}), subscription["spec"]):
+            return payload
+        return None
+
+    wait_until(
+        f"subscription/{package} reconciliation in {namespace}",
+        timeout_seconds=60,
+        interval_seconds=5,
+        predicate=_subscription_reconciled,
+    )
+
+
+def desired_subscription(operator_spec: dict[str, Any]) -> dict[str, Any]:
+    namespace = operator_spec["namespace"]
+    package = operator_spec["package"]
+    return {
+        "apiVersion": "operators.coreos.com/v1alpha1",
+        "kind": "Subscription",
+        "metadata": {"name": package, "namespace": namespace},
+        "spec": {
+            "channel": operator_spec["channel"],
+            "installPlanApproval": "Automatic",
+            "name": package,
+            "source": operator_spec["source"],
+            "sourceNamespace": "openshift-marketplace",
+        },
+    }
+
+
+def subscription_spec_matches(actual: dict[str, Any], expected: dict[str, Any]) -> bool:
+    keys = ("channel", "installPlanApproval", "name", "source", "sourceNamespace")
+    return all(actual.get(key) == expected.get(key) for key in keys)
+
+
+def operator_spec_by_package(platform: dict[str, Any], package: str) -> dict[str, Any]:
+    operators = platform["operators"]
+    if isinstance(operators, dict):
+        if package in operators:
+            return {"package": package, **operators[package]}
+        raise KeyError(f"Unknown operator package in llm_d platform config: {package}")
+
+    for operator_spec in operators:
+        if operator_spec["package"] == package:
+            return operator_spec
+    raise KeyError(f"Unknown operator package in llm_d platform config: {package}")
+
+
+def condition_status(resource: dict[str, Any], condition_type: str) -> str | None:
+    for condition in resource.get("status", {}).get("conditions", []):
+        if condition.get("type") == condition_type:
+            return condition.get("status")
+    return None
+
+
+def pvc_access_mode_matches(actual_modes: list[str], expected_mode: str) -> bool:
+    return expected_mode in actual_modes
+
+
+def wait_for_pvc_bound(pvc_name: str, namespace: str, *, timeout_seconds: int) -> dict[str, Any]:
+    def _pvc_bound() -> dict[str, Any] | None:
+        payload = oc_get_json(
+            "persistentvolumeclaim",
+            name=pvc_name,
+            namespace=namespace,
+            ignore_not_found=True,
+        )
+        if not payload:
+            return None
+        if payload.get("status", {}).get("phase") == "Bound":
+            return payload
+        return None
+
+    return wait_until(
+        f"persistentvolumeclaim/{pvc_name} bound in {namespace}",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=5,
+        predicate=_pvc_bound,
+    )
+
+
+def wait_for_job_completion(
+    job_name: str, namespace: str, *, timeout_seconds: int, interval_seconds: int = 10
+) -> dict[str, Any]:
+    def _job_completed() -> dict[str, Any] | None:
+        payload = oc_get_json(
+            "job",
+            name=job_name,
+            namespace=namespace,
+            ignore_not_found=True,
+        )
+        if not payload:
+            return None
+        status = payload.get("status", {})
+        if status.get("succeeded", 0):
+            return payload
+        failed_count = status.get("failed", 0)
+        for condition in status.get("conditions", []):
+            if condition.get("type") == "Failed" and condition.get("status") == "True":
+                raise RuntimeError(
+                    f"job/{job_name} failed: {condition.get('reason') or 'unknown reason'}"
+                )
+        if failed_count:
+            raise RuntimeError(f"job/{job_name} failed after {failed_count} attempt(s)")
+        return None
+
+    return wait_until(
+        f"job/{job_name} completion in {namespace}",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=interval_seconds,
+        predicate=_job_completed,
+    )
+
+
+def job_pod_names(job_name: str, namespace: str) -> list[str]:
+    payload = oc_get_json(
+        "pods",
+        namespace=namespace,
+        selector=f"job-name={job_name}",
+        ignore_not_found=True,
+    )
+    if not payload:
+        return []
+    return [item["metadata"]["name"] for item in payload.get("items", [])]
+
+
+def resolve_default_serviceaccount_image_pull_secret(namespace: str) -> str | None:
+    payload = oc_get_json(
+        "serviceaccount", name="default", namespace=namespace, ignore_not_found=True
+    )
+    if not payload:
+        return None
+
+    for item in payload.get("imagePullSecrets", []):
+        name = item.get("name")
+        if name:
+            return name
+    return None
+
+
+def load_runtime_script(name: str) -> str:
+    script_path = Path(__file__).resolve().parent / "scripts" / name
+    return script_path.read_text(encoding="utf-8")
+
+
+def render_model_cache_job(config: ResolvedConfig, spec: ModelCacheSpec) -> dict[str, Any]:
+    common_env = [
+        {"name": "MODEL_SOURCE", "value": spec.source_uri},
+        {"name": "MODEL_TARGET_DIR", "value": f"/cache/{spec.model_path}"},
+        {"name": "MARKER_FILE", "value": spec.marker_path},
+        {"name": "CACHE_KEY", "value": spec.cache_key},
+    ]
+    volumes: list[dict[str, Any]] = [
+        {"name": "cache", "persistentVolumeClaim": {"claimName": spec.pvc_name}}
+    ]
+
+    if spec.source_scheme == "hf":
+        command = load_runtime_script("download_hf_model.sh")
+        volume_mounts = [{"name": "cache", "mountPath": "/cache"}]
+        if spec.hf_token_secret_name:
+            volumes.append(
+                {"name": "hf-token", "secret": {"secretName": spec.hf_token_secret_name}}
+            )
+            volume_mounts.append(
+                {
+                    "name": "hf-token",
+                    "mountPath": "/var/run/forge/hf-token",
+                    "readOnly": True,
+                }
+            )
+            common_env.append(
+                {
+                    "name": "HF_TOKEN_FILE",
+                    "value": f"/var/run/forge/hf-token/{spec.hf_token_secret_key}",
+                }
+            )
+
+        container = {
+            "name": "hf-model-downloader",
+            "image": config.model_cache["hf"]["downloader_image"],
+            "imagePullPolicy": config.model_cache["download"]["pod_image_pull_policy"],
+            "command": ["/bin/bash", "-ceu", command],
+            "env": common_env,
+            "volumeMounts": volume_mounts,
+        }
+    elif spec.source_scheme == "oci":
+        registry_auth_secret_name = (
+            spec.oci_registry_auth_secret_name
+            or resolve_default_serviceaccount_image_pull_secret(spec.namespace)
+        )
+        command = load_runtime_script("extract_oci_model.sh")
+        volume_mounts = [{"name": "cache", "mountPath": "/cache"}]
+        common_env.append({"name": "OCI_IMAGE_PATH", "value": spec.oci_image_path or "/"})
+        if registry_auth_secret_name:
+            volumes.append(
+                {"name": "registry-auth", "secret": {"secretName": registry_auth_secret_name}}
+            )
+            volume_mounts.append(
+                {
+                    "name": "registry-auth",
+                    "mountPath": "/var/run/forge/registry-auth",
+                    "readOnly": True,
+                }
+            )
+            common_env.append(
+                {
+                    "name": "REGISTRY_AUTH_FILE",
+                    "value": f"/var/run/forge/registry-auth/{spec.oci_registry_auth_secret_key}",
+                }
+            )
+
+        container = {
+            "name": "oci-model-extractor",
+            "image": config.model_cache["oci"]["extractor_image"],
+            "imagePullPolicy": config.model_cache["download"]["pod_image_pull_policy"],
+            "command": ["/bin/bash", "-ceu", command],
+            "env": common_env,
+            "volumeMounts": volume_mounts,
+        }
+    else:  # pragma: no cover - guarded by resolve_model_cache
+        raise ValueError(f"Unsupported model cache source scheme: {spec.source_scheme}")
+
+    return {
+        "apiVersion": "batch/v1",
+        "kind": "Job",
+        "metadata": {
+            "name": spec.download_job_name,
+            "namespace": spec.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+            },
+        },
+        "spec": {
+            "backoffLimit": 0,
+            "activeDeadlineSeconds": config.model_cache["download"]["wait_timeout_seconds"],
+            "template": {
+                "metadata": {
+                    "labels": {
+                        "job-name": spec.download_job_name,
+                        "app.kubernetes.io/managed-by": "forge",
+                        "forge.openshift.io/project": "llm_d",
+                    }
+                },
+                "spec": {
+                    "serviceAccountName": "default",
+                    "restartPolicy": "Never",
+                    "containers": [container],
+                    "volumes": volumes,
+                },
+            },
+        },
+    }
+
+
+def model_cache_pvc_ready(spec: ModelCacheSpec) -> bool:
+    payload = oc_get_json(
+        "persistentvolumeclaim",
+        name=spec.pvc_name,
+        namespace=spec.namespace,
+        ignore_not_found=True,
+    )
+    if not payload:
+        return False
+
+    annotations = payload.get("metadata", {}).get("annotations", {})
+    return (
+        annotations.get("forge.openshift.io/model-cache-ready") == "true"
+        and annotations.get("forge.openshift.io/model-cache-key") == spec.cache_key
+        and annotations.get("forge.openshift.io/model-source-uri") == spec.source_uri
+    )
+
+
+def annotate_model_cache_pvc(spec: ModelCacheSpec) -> None:
+    oc(
+        "annotate",
+        "persistentvolumeclaim",
+        spec.pvc_name,
+        "-n",
+        spec.namespace,
+        "--overwrite",
+        "forge.openshift.io/model-cache-ready=true",
+        f"forge.openshift.io/model-cache-key={spec.cache_key}",
+        f"forge.openshift.io/model-source-uri={spec.source_uri}",
+        f"forge.openshift.io/model-uri={spec.model_uri}",
+    )
diff --git a/projects/llm_d/runtime/phase_inputs.py b/projects/llm_d/runtime/phase_inputs.py
new file mode 100644
index 00000000..5b985737
--- /dev/null
+++ b/projects/llm_d/runtime/phase_inputs.py
@@ -0,0 +1,207 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from projects.llm_d.runtime.runtime_config import ResolvedConfig, load_yaml, write_yaml
+
+
+@dataclass(frozen=True)
+class CleanupInputs:
+    artifact_dir: Path
+    namespace: str
+    platform: dict[str, Any]
+    benchmark: dict[str, Any] | None
+
+
+@dataclass(frozen=True)
+class PrepareModelCacheInputs:
+    artifact_dir: Path
+    preset_name: str
+    namespace: str
+    namespace_is_managed: bool
+    model_key: str
+    model: dict[str, Any]
+    model_cache: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class PrepareInputs:
+    artifact_dir: Path
+    config_dir: Path
+    preset_name: str
+    namespace: str
+    namespace_is_managed: bool
+    platform: dict[str, Any]
+    model_key: str
+    model: dict[str, Any]
+    model_cache: dict[str, Any]
+    benchmark: dict[str, Any] | None
+
+
+@dataclass(frozen=True)
+class TestInputs:
+    artifact_dir: Path
+    config_dir: Path
+    preset_name: str
+    namespace: str
+    platform: dict[str, Any]
+    model_key: str
+    model: dict[str, Any]
+    scheduler_profile_key: str
+    scheduler_profile: dict[str, Any] | None
+    model_cache: dict[str, Any]
+    smoke_request: dict[str, Any]
+    benchmark: dict[str, Any] | None
+
+
+def write_cleanup_inputs(config: ResolvedConfig) -> Path:
+    path = config.artifact_dir / "_meta" / "cleanup.inputs.yaml"
+    write_yaml(
+        path,
+        {
+            "artifact_dir": str(config.artifact_dir),
+            "namespace": config.namespace,
+            "platform": config.platform,
+            "benchmark": config.benchmark,
+        },
+    )
+    return path
+
+
+def write_prepare_model_cache_inputs(config: ResolvedConfig) -> Path:
+    path = config.artifact_dir / "_meta" / "prepare_model_cache.inputs.yaml"
+    write_yaml(
+        path,
+        {
+            "artifact_dir": str(config.artifact_dir),
+            "preset_name": config.preset_name,
+            "namespace": config.namespace,
+            "namespace_is_managed": config.namespace_is_managed,
+            "model_key": config.model_key,
+            "model": config.model,
+            "model_cache": config.model_cache,
+        },
+    )
+    return path
+
+
+def write_prepare_inputs(config: ResolvedConfig) -> Path:
+    path = config.artifact_dir / "_meta" / "prepare.inputs.yaml"
+    write_yaml(
+        path,
+        {
+            "artifact_dir": str(config.artifact_dir),
+            "config_dir": str(config.config_dir),
+            "preset_name": config.preset_name,
+            "namespace": config.namespace,
+            "namespace_is_managed": config.namespace_is_managed,
+            "platform": config.platform,
+            "model_key": config.model_key,
+            "model": config.model,
+            "model_cache": config.model_cache,
+            "benchmark": config.benchmark,
+        },
+    )
+    return path
+
+
+def write_test_inputs(config: ResolvedConfig) -> Path:
+    path = config.artifact_dir / "_meta" / "test.inputs.yaml"
+    write_yaml(
+        path,
+        {
+            "artifact_dir": str(config.artifact_dir),
+            "config_dir": str(config.config_dir),
+            "preset_name": config.preset_name,
+            "namespace": config.namespace,
+            "platform": config.platform,
+            "model_key": config.model_key,
+            "model": config.model,
+            "scheduler_profile_key": config.scheduler_profile_key,
+            "scheduler_profile": config.scheduler_profile,
+            "model_cache": config.model_cache,
+            "smoke_request": config.smoke_request,
+            "benchmark": config.benchmark,
+        },
+    )
+    return path
+
+
+def load_cleanup_inputs(path: str | Path) -> CleanupInputs:
+    payload = load_yaml(Path(path))
+    return CleanupInputs(
+        artifact_dir=Path(payload["artifact_dir"]),
+        namespace=payload["namespace"],
+        platform=payload["platform"],
+        benchmark=payload["benchmark"],
+    )
+
+
+def load_prepare_model_cache_inputs(path: str | Path) -> PrepareModelCacheInputs:
+    payload = load_yaml(Path(path))
+    return PrepareModelCacheInputs(
+        artifact_dir=Path(payload["artifact_dir"]),
+        preset_name=payload["preset_name"],
+        namespace=payload["namespace"],
+        namespace_is_managed=payload["namespace_is_managed"],
+        model_key=payload["model_key"],
+        model=payload["model"],
+        model_cache=payload["model_cache"],
+    )
+
+
+def load_prepare_inputs(path: str | Path) -> PrepareInputs:
+    payload = load_yaml(Path(path))
+    return PrepareInputs(
+        artifact_dir=Path(payload["artifact_dir"]),
+        config_dir=Path(payload["config_dir"]),
+        preset_name=payload["preset_name"],
+        namespace=payload["namespace"],
+        namespace_is_managed=payload["namespace_is_managed"],
+        platform=payload["platform"],
+        model_key=payload["model_key"],
+        model=payload["model"],
+        model_cache=payload["model_cache"],
+        benchmark=payload["benchmark"],
+    )
+
+
+def load_test_inputs(path: str | Path) -> TestInputs:
+    payload = load_yaml(Path(path))
+    return TestInputs(
+        artifact_dir=Path(payload["artifact_dir"]),
+        config_dir=Path(payload["config_dir"]),
+        preset_name=payload["preset_name"],
+        namespace=payload["namespace"],
+        platform=payload["platform"],
+        model_key=payload["model_key"],
+        model=payload["model"],
+        scheduler_profile_key=payload["scheduler_profile_key"],
+        scheduler_profile=payload["scheduler_profile"],
+        model_cache=payload["model_cache"],
+        smoke_request=payload["smoke_request"],
+        benchmark=payload["benchmark"],
+    )
+
+
+def cleanup_inputs_from_prepare(inputs: PrepareInputs) -> CleanupInputs:
+    return CleanupInputs(
+        artifact_dir=inputs.artifact_dir,
+        namespace=inputs.namespace,
+        platform=inputs.platform,
+        benchmark=inputs.benchmark,
+    )
+
+
+def prepare_model_cache_inputs_from_prepare(inputs: PrepareInputs) -> PrepareModelCacheInputs:
+    return PrepareModelCacheInputs(
+        artifact_dir=inputs.artifact_dir,
+        preset_name=inputs.preset_name,
+        namespace=inputs.namespace,
+        namespace_is_managed=inputs.namespace_is_managed,
+        model_key=inputs.model_key,
+        model=inputs.model,
+        model_cache=inputs.model_cache,
+    )
diff --git a/projects/llm_d/runtime/runtime_config.py b/projects/llm_d/runtime/runtime_config.py
new file mode 100644
index 00000000..a16e1711
--- /dev/null
+++ b/projects/llm_d/runtime/runtime_config.py
@@ -0,0 +1,368 @@
+from __future__ import annotations
+
+import copy
+import hashlib
+import json
+import logging
+import os
+import re
+from collections.abc import Iterable
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from projects.core.library import config, env, run
+
+logger = logging.getLogger(__name__)
+RUNTIME_DIR = Path(__file__).resolve().parent
+PROJECT_DIR = RUNTIME_DIR.parent
+ORCHESTRATION_DIR = PROJECT_DIR / "orchestration"
+CONFIG_DIR = ORCHESTRATION_DIR
+
+
+@dataclass(frozen=True)
+class ResolvedConfig:
+    artifact_dir: Path
+    project_root: Path
+    config_dir: Path
+    preset_name: str
+    preset_alias: str | None
+    job_name: str
+    namespace: str
+    namespace_is_managed: bool
+    gpu_count: int | None
+    platform: dict[str, Any]
+    model_key: str
+    model: dict[str, Any]
+    scheduler_profile_key: str
+    scheduler_profile: dict[str, Any] | None
+    model_cache: dict[str, Any]
+    smoke_request: dict[str, Any]
+    benchmark: dict[str, Any] | None
+    fournos_config: dict[str, Any]
+    overrides: dict[str, Any]
+
+    @property
+    def manifests_dir(self) -> Path:
+        return self.config_dir / "manifests"
+
+
+@dataclass(frozen=True)
+class ModelCacheSpec:
+    source_uri: str
+    source_scheme: str
+    cache_key: str
+    namespace: str
+    pvc_name: str
+    pvc_size: str
+    access_mode: str
+    storage_class_name: str | None
+    model_path: str
+    model_uri: str
+    marker_filename: str
+    download_job_name: str
+    hf_token_secret_name: str | None
+    hf_token_secret_key: str | None
+    oci_image_path: str | None
+    oci_registry_auth_secret_name: str | None
+    oci_registry_auth_secret_key: str | None
+
+    @property
+    def marker_path(self) -> str:
+        return f"/cache/{self.model_path}/{self.marker_filename}"
+
+
+def init() -> Path:
+    if not logging.getLogger().handlers:
+        logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+
+    env.init()
+    run.init()
+    ensure_artifact_directories(env.ARTIFACT_DIR)
+    return env.ARTIFACT_DIR
+
+
+def ensure_artifact_directories(artifact_dir: Path) -> None:
+    for relative in ("src", "artifacts", "artifacts/results"):
+        (artifact_dir / relative).mkdir(parents=True, exist_ok=True)
+
+
+def load_run_configuration(
+    *,
+    cwd: Path | None = None,
+    artifact_dir: Path | None = None,
+    requested_preset: str | None = None,
+    raw_overrides: str | None = None,
+    job_name: str | None = None,
+) -> ResolvedConfig:
+    cwd = cwd or Path.cwd()
+    if artifact_dir is not None:
+        os.environ["ARTIFACT_DIR"] = str(artifact_dir)
+    artifact_dir = init()
+    config.reload(ORCHESTRATION_DIR)
+
+    platform_data = normalize_platform_config(copy.deepcopy(config.project.get_config("platform")))
+    model_cache = copy.deepcopy(config.project.get_config("model_cache"))
+    fournos_config = load_fournos_config(cwd)
+    overrides = parse_overrides(
+        raw_overrides or "",
+        allowed_keys=config.project.get_config("runtime.allowed_override_keys", []),
+    )
+
+    requested_preset = (
+        requested_preset
+        or fournos_config.get("preset")
+        or config.project.get_config("runtime.default_preset")
+    )
+    apply_requested_preset(requested_preset)
+
+    preset_name = config.project.get_config("runtime.selected_preset")
+    preset_alias = requested_preset if requested_preset != preset_name else None
+
+    model_name = config.project.get_config("runtime.model_key")
+    model = copy.deepcopy(config.project.get_config(f"models.{model_name}"))
+
+    scheduler_profile_key = config.project.get_config("runtime.scheduler_profile_key")
+    scheduler_profile = None
+    if scheduler_profile_key != "default":
+        scheduler_profile = copy.deepcopy(
+            config.project.get_config(f"scheduler_profiles.{scheduler_profile_key}")
+        )
+
+    smoke_request_name = config.project.get_config("runtime.smoke_request_key")
+    smoke_request = copy.deepcopy(
+        config.project.get_config(f"workloads.smoke_requests.{smoke_request_name}")
+    )
+
+    benchmark_name = config.project.get_config("runtime.benchmark_key", None)
+    benchmark = None
+    if benchmark_name:
+        benchmark = copy.deepcopy(
+            config.project.get_config(f"workloads.benchmarks.{benchmark_name}")
+        )
+
+    job_name = job_name or fournos_config.get("job-name")
+    if not job_name:
+        job_name = f"local-{preset_name}"
+
+    namespace_override = overrides.get("namespace") or fournos_config.get("namespace")
+    namespace_config = platform_data["cluster"]["namespace"]
+    default_namespace = namespace_config.get("name")
+    namespace = (
+        namespace_override
+        or default_namespace
+        or derive_namespace(
+            job_name,
+            namespace_config["prefix"],
+            namespace_config["max_length"],
+        )
+    )
+
+    gpu_count = normalize_gpu_count(fournos_config.get("gpu-count"))
+
+    return ResolvedConfig(
+        artifact_dir=Path(artifact_dir),
+        project_root=env.FORGE_HOME,
+        config_dir=ORCHESTRATION_DIR,
+        preset_name=preset_name,
+        preset_alias=preset_alias,
+        job_name=job_name,
+        namespace=namespace,
+        namespace_is_managed=namespace_override is None and default_namespace is None,
+        gpu_count=gpu_count,
+        platform=platform_data,
+        model_key=model_name,
+        model=model,
+        scheduler_profile_key=scheduler_profile_key,
+        scheduler_profile=scheduler_profile,
+        model_cache=model_cache,
+        smoke_request=smoke_request,
+        benchmark=benchmark,
+        fournos_config=fournos_config,
+        overrides=overrides,
+    )
+
+
+def normalize_platform_config(platform_data: dict[str, Any]) -> dict[str, Any]:
+    cluster = platform_data["cluster"]
+    if "namespace" not in cluster:
+        cluster["namespace"] = {
+            "name": cluster.pop("namespace_name", None),
+            "prefix": cluster.pop("namespace_prefix"),
+            "max_length": cluster.pop("namespace_max_length"),
+        }
+
+    operators = platform_data["operators"]
+    if isinstance(operators, list):
+        platform_data["operators"] = {
+            operator_spec["package"]: {
+                key: value for key, value in operator_spec.items() if key != "package"
+            }
+            for operator_spec in operators
+        }
+
+    return platform_data
+
+
+def apply_requested_preset(requested_preset: str) -> None:
+    if not config.project.get_preset(requested_preset):
+        raise ValueError(f"Unknown llm_d preset: {requested_preset}")
+
+    config.project.apply_preset(requested_preset)
+
+
+def load_fournos_config(cwd: Path) -> dict[str, Any]:
+    config_path = cwd / "fournos_config.yaml"
+    if not config_path.exists():
+        return {}
+
+    data = load_yaml(config_path)
+    if data is None:
+        return {}
+    if not isinstance(data, dict):
+        raise ValueError(f"Unexpected FOURNOS config type in {config_path}: {type(data)}")
+    return data
+
+
+def parse_overrides(raw: str, *, allowed_keys: Iterable[str]) -> dict[str, Any]:
+    if not raw or raw.strip() in {"", "null", "{}"}:
+        return {}
+
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"FORGE_CONFIG_OVERRIDES is not valid JSON: {exc}") from exc
+
+    if not isinstance(data, dict):
+        raise ValueError("FORGE_CONFIG_OVERRIDES must decode to a JSON object")
+
+    allowed_keys = frozenset(allowed_keys)
+    unsupported = sorted(set(data) - allowed_keys)
+    if unsupported:
+        raise ValueError(
+            "Unsupported llm_d override keys: "
+            f"{', '.join(unsupported)}. Allowed keys: {', '.join(sorted(allowed_keys))}"
+        )
+
+    return data
+
+
+def normalize_gpu_count(value: Any) -> int | None:
+    if value in (None, ""):
+        return None
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        logger.warning("Ignoring invalid gpu-count value: %s", value)
+        return None
+
+
+def derive_namespace(job_name: str, prefix: str, max_length: int) -> str:
+    slug = re.sub(r"[^a-z0-9-]+", "-", job_name.lower())
+    slug = re.sub(r"-{2,}", "-", slug).strip("-")
+    if not slug:
+        slug = "run"
+
+    if slug.startswith(f"{prefix}-"):
+        namespace = slug
+    else:
+        namespace = f"{prefix}-{slug}"
+
+    namespace = namespace[:max_length].rstrip("-")
+    if not namespace:
+        raise ValueError(f"Could not derive a valid namespace from job name: {job_name}")
+    return namespace
+
+
+def slugify_identifier(value: str, *, max_length: int = 63) -> str:
+    slug = re.sub(r"[^a-z0-9-]+", "-", value.lower())
+    slug = re.sub(r"-{2,}", "-", slug).strip("-")
+    return slug[:max_length].rstrip("-") or "item"
+
+
+def truncate_k8s_name(value: str, *, max_length: int = 63) -> str:
+    return value[:max_length].rstrip("-")
+
+
+def version_tuple(value: str) -> tuple[int, ...]:
+    numbers = re.findall(r"\d+", value)
+    return tuple(int(number) for number in numbers[:3])
+
+
+def resolve_model_cache(config: ResolvedConfig) -> ModelCacheSpec | None:
+    if not config.model_cache.get("enabled", False):
+        return None
+
+    source_uri = config.model["uri"]
+    if source_uri.startswith(("pvc://", "pvc+hf://")):
+        return None
+
+    if source_uri.startswith("hf://"):
+        source_scheme = "hf"
+    elif source_uri.startswith("oci://"):
+        source_scheme = "oci"
+    else:
+        raise ValueError(f"Unsupported model cache source URI for {config.model_key}: {source_uri}")
+
+    model_cache_overrides = config.model.get("cache", {})
+    pvc_defaults = config.model_cache["pvc"]
+    pvc_prefix = config.model_cache["pvc"]["name_prefix"]
+    cache_key = hashlib.sha256(source_uri.encode("utf-8")).hexdigest()[:10]
+    pvc_name = truncate_k8s_name(
+        f"{pvc_prefix}-{slugify_identifier(config.model_key, max_length=32)}-{cache_key}"
+    )
+    model_path = pvc_defaults["model_directory_name"]
+
+    return ModelCacheSpec(
+        source_uri=source_uri,
+        source_scheme=source_scheme,
+        cache_key=cache_key,
+        namespace=config.namespace,
+        pvc_name=pvc_name,
+        pvc_size=model_cache_overrides.get("pvc_size", pvc_defaults["size"]),
+        access_mode=model_cache_overrides.get("access_mode", pvc_defaults["access_mode"]),
+        storage_class_name=model_cache_overrides.get(
+            "storage_class_name", pvc_defaults.get("storage_class_name")
+        ),
+        model_path=model_path,
+        model_uri=f"pvc://{pvc_name}/{model_path}",
+        marker_filename=config.model_cache["marker_filename"],
+        download_job_name=truncate_k8s_name(f"{pvc_name}-download"),
+        hf_token_secret_name=model_cache_overrides.get(
+            "hf_token_secret_name", config.model_cache["hf"].get("token_secret_name")
+        ),
+        hf_token_secret_key=config.model_cache["hf"].get("token_secret_key"),
+        oci_image_path=model_cache_overrides.get(
+            "oci_image_path", config.model_cache["oci"].get("image_path")
+        ),
+        oci_registry_auth_secret_name=model_cache_overrides.get(
+            "oci_registry_auth_secret_name",
+            config.model_cache["oci"].get("registry_auth_secret_name"),
+        ),
+        oci_registry_auth_secret_key=config.model_cache["oci"].get("registry_auth_secret_key"),
+    )
+
+
+def load_yaml(path: Path) -> Any:
+    with path.open(encoding="utf-8") as handle:
+        return yaml.safe_load(handle)
+
+
+def write_yaml(path: Path, payload: Any) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        yaml.safe_dump(payload, handle, sort_keys=False)
+
+
+def write_json(path: Path, payload: Any) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        json.dump(payload, handle, indent=2, sort_keys=True)
+        handle.write("\n")
+
+
+def write_text(path: Path, content: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding="utf-8")
diff --git a/projects/llm_d/runtime/runtime_manifests.py b/projects/llm_d/runtime/runtime_manifests.py
new file mode 100644
index 00000000..bc5fdca8
--- /dev/null
+++ b/projects/llm_d/runtime/runtime_manifests.py
@@ -0,0 +1,327 @@
+from __future__ import annotations
+
+import copy
+import json
+from typing import Any
+
+from projects.llm_d.runtime.runtime_config import (
+    ModelCacheSpec,
+    ResolvedConfig,
+    load_yaml,
+    resolve_model_cache,
+)
+
+
+def load_manifest_template(config: ResolvedConfig, relative_path: str) -> dict[str, Any]:
+    return load_yaml(config.config_dir / relative_path)
+
+
+def render_datasciencecluster(config: ResolvedConfig) -> dict[str, Any]:
+    template_path = config.config_dir / config.platform["rhoai"]["datasciencecluster_template"]
+    manifest = load_yaml(template_path)
+    manifest["metadata"]["name"] = config.platform["rhoai"]["datasciencecluster_name"]
+    manifest["metadata"]["namespace"] = config.platform["rhoai"]["namespace"]
+    return manifest
+
+
+def render_gateway(config: ResolvedConfig) -> dict[str, Any]:
+    template_path = config.config_dir / config.platform["gateway"]["manifest_template"]
+    manifest = load_yaml(template_path)
+    manifest["metadata"]["name"] = config.platform["gateway"]["name"]
+    manifest["metadata"]["namespace"] = config.platform["gateway"]["namespace"]
+    manifest["spec"]["gatewayClassName"] = config.platform["gateway"]["gateway_class_name"]
+    return manifest
+
+
+def render_model_cache_pvc(spec: ModelCacheSpec) -> dict[str, Any]:
+    manifest: dict[str, Any] = {
+        "apiVersion": "v1",
+        "kind": "PersistentVolumeClaim",
+        "metadata": {
+            "name": spec.pvc_name,
+            "namespace": spec.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+                "forge.openshift.io/model-cache": "true",
+                "forge.openshift.io/preserve": "true",
+            },
+            "annotations": {
+                "forge.openshift.io/model-cache-key": spec.cache_key,
+                "forge.openshift.io/model-source-uri": spec.source_uri,
+            },
+        },
+        "spec": {
+            "accessModes": [spec.access_mode],
+            "resources": {"requests": {"storage": spec.pvc_size}},
+        },
+    }
+    if spec.storage_class_name:
+        manifest["spec"]["storageClassName"] = spec.storage_class_name
+    return manifest
+
+
+def render_inference_service(config: ResolvedConfig) -> dict[str, Any]:
+    template_path = config.config_dir / config.platform["inference_service"]["template"]
+    manifest = load_yaml(template_path)
+
+    name = config.platform["inference_service"]["name"]
+    manifest["metadata"]["name"] = name
+    manifest["metadata"]["namespace"] = config.namespace
+    manifest["metadata"].setdefault("labels", {})
+    manifest["metadata"]["labels"].update(
+        {
+            "app.kubernetes.io/managed-by": "forge",
+            "forge.openshift.io/project": "llm_d",
+        }
+    )
+
+    cache_spec = resolve_model_cache(config)
+    manifest["spec"]["model"]["uri"] = cache_spec.model_uri if cache_spec else config.model["uri"]
+    manifest["spec"]["model"]["name"] = config.model["served_model_name"]
+    manifest["spec"]["template"]["containers"][0]["resources"] = copy.deepcopy(
+        config.model["resources"]
+    )
+
+    if config.scheduler_profile_key == "default":
+        manifest["spec"]["router"]["scheduler"] = {}
+        return manifest
+
+    if config.scheduler_profile is None:
+        raise ValueError(f"Missing scheduler profile config for {config.scheduler_profile_key}")
+
+    scheduler_profile_path = config.config_dir / config.scheduler_profile["config_path"]
+    scheduler_profile_config = scheduler_profile_path.read_text(encoding="utf-8")
+    router_args = manifest["spec"]["router"]["scheduler"]["template"]["containers"][0]["args"]
+    if not router_args or router_args[-1] != "--config-text":
+        raise ValueError("Expected llm-d router args to end with --config-text")
+    router_args.append(scheduler_profile_config)
+
+    return manifest
+
+
+def render_smoke_request_job(
+    config: ResolvedConfig, endpoint_url: str, payload: dict[str, Any]
+) -> dict[str, Any]:
+    smoke = config.platform["smoke"]
+    command = """
+set -eu
+attempt=1
+while [ "${attempt}" -le "${REQUEST_RETRIES}" ]; do
+  if curl -k -sSf --max-time "${REQUEST_TIMEOUT_SECONDS}" \
+    "${ENDPOINT_URL}${ENDPOINT_PATH}" \
+    -H "Content-Type: application/json" \
+    -d "${REQUEST_PAYLOAD}" \
+    -o /tmp/smoke-response.json \
+    2>/tmp/smoke-error.log; then
+    cat /tmp/smoke-response.json
+    exit 0
+  fi
+  attempt=$((attempt + 1))
+  sleep "${REQUEST_RETRY_DELAY_SECONDS}"
+done
+cat /tmp/smoke-error.log >&2 || true
+exit 1
+"""
+
+    return {
+        "apiVersion": "batch/v1",
+        "kind": "Job",
+        "metadata": {
+            "name": smoke["job_name"],
+            "namespace": config.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+                "forge.openshift.io/component": "smoke",
+            },
+        },
+        "spec": {
+            "backoffLimit": 0,
+            "activeDeadlineSeconds": (
+                smoke["request_retries"]
+                * (smoke["request_timeout_seconds"] + smoke["request_retry_delay_seconds"])
+            ),
+            "template": {
+                "metadata": {
+                    "labels": {
+                        "app.kubernetes.io/managed-by": "forge",
+                        "forge.openshift.io/project": "llm_d",
+                        "forge.openshift.io/component": "smoke",
+                    }
+                },
+                "spec": {
+                    "restartPolicy": "Never",
+                    "containers": [
+                        {
+                            "name": "smoke",
+                            "image": smoke["client_image"],
+                            "command": ["/bin/sh", "-ceu", command],
+                            "env": [
+                                {"name": "ENDPOINT_URL", "value": endpoint_url},
+                                {"name": "ENDPOINT_PATH", "value": smoke["endpoint_path"]},
+                                {"name": "REQUEST_PAYLOAD", "value": json.dumps(payload)},
+                                {"name": "REQUEST_RETRIES", "value": str(smoke["request_retries"])},
+                                {
+                                    "name": "REQUEST_RETRY_DELAY_SECONDS",
+                                    "value": str(smoke["request_retry_delay_seconds"]),
+                                },
+                                {
+                                    "name": "REQUEST_TIMEOUT_SECONDS",
+                                    "value": str(smoke["request_timeout_seconds"]),
+                                },
+                            ],
+                        }
+                    ],
+                },
+            },
+        },
+    }
+
+
+def render_guidellm_pvc(config: ResolvedConfig) -> dict[str, Any]:
+    if not config.benchmark:
+        raise ValueError("Benchmark configuration is not enabled for this preset")
+
+    return {
+        "apiVersion": "v1",
+        "kind": "PersistentVolumeClaim",
+        "metadata": {
+            "name": config.benchmark["job_name"],
+            "namespace": config.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+            },
+        },
+        "spec": {
+            "accessModes": ["ReadWriteOnce"],
+            "resources": {"requests": {"storage": config.benchmark["pvc_size"]}},
+        },
+    }
+
+
+def render_guidellm_job(config: ResolvedConfig, endpoint_url: str) -> dict[str, Any]:
+    if not config.benchmark:
+        raise ValueError("Benchmark configuration is not enabled for this preset")
+
+    args = [
+        "benchmark",
+        "run",
+        f"--target={endpoint_url}",
+        f"--rate={config.benchmark['rate']}",
+    ]
+    for key, value in config.benchmark["args"].items():
+        if value is None:
+            continue
+        args.append(f"--{key.replace('_', '-')}={value}")
+    args.append("--outputs=json")
+
+    return {
+        "apiVersion": "batch/v1",
+        "kind": "Job",
+        "metadata": {
+            "name": config.benchmark["job_name"],
+            "namespace": config.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+            },
+        },
+        "spec": {
+            "backoffLimit": 0,
+            "template": {
+                "metadata": {
+                    "labels": {
+                        "app.kubernetes.io/managed-by": "forge",
+                        "forge.openshift.io/project": "llm_d",
+                    }
+                },
+                "spec": {
+                    "serviceAccountName": "default",
+                    "restartPolicy": "Never",
+                    "containers": [
+                        {
+                            "name": "guidellm",
+                            "image": config.benchmark["image"],
+                            "command": ["/opt/app-root/bin/guidellm"],
+                            "args": args,
+                            "env": [{"name": "USER", "value": "guidellm"}],
+                            "volumeMounts": [
+                                {"name": "home", "mountPath": "/home/guidellm"},
+                                {"name": "results", "mountPath": "/results"},
+                            ],
+                        }
+                    ],
+                    "volumes": [
+                        {"name": "home", "emptyDir": {}},
+                        {
+                            "name": "results",
+                            "persistentVolumeClaim": {"claimName": config.benchmark["job_name"]},
+                        },
+                    ],
+                },
+            },
+        },
+    }
+
+
+def render_guidellm_copy_pod(
+    config: ResolvedConfig, node_name: str | None = None
+) -> dict[str, Any]:
+    if not config.benchmark:
+        raise ValueError("Benchmark configuration is not enabled for this preset")
+
+    pod = {
+        "apiVersion": "v1",
+        "kind": "Pod",
+        "metadata": {
+            "name": f"{config.benchmark['job_name']}-copy",
+            "namespace": config.namespace,
+            "labels": {
+                "app.kubernetes.io/managed-by": "forge",
+                "forge.openshift.io/project": "llm_d",
+            },
+        },
+        "spec": {
+            "restartPolicy": "Never",
+            "initContainers": [
+                {
+                    "name": "permission-fixer",
+                    "image": config.benchmark["image"],
+                    "command": [
+                        "/bin/sh",
+                        "-c",
+                        "chmod 755 /results && chown -R 1001:1001 /results || true",
+                    ],
+                    "securityContext": {
+                        "runAsUser": 0,
+                        "allowPrivilegeEscalation": True,
+                    },
+                    "volumeMounts": [{"name": "results", "mountPath": "/results"}],
+                }
+            ],
+            "containers": [
+                {
+                    "name": "copy-helper",
+                    "image": config.benchmark["image"],
+                    "command": ["/bin/sleep", "300"],
+                    "securityContext": {
+                        "runAsUser": 1001,
+                        "runAsNonRoot": True,
+                        "allowPrivilegeEscalation": False,
+                    },
+                    "volumeMounts": [{"name": "results", "mountPath": "/results"}],
+                }
+            ],
+            "volumes": [
+                {
+                    "name": "results",
+                    "persistentVolumeClaim": {"claimName": config.benchmark["job_name"]},
+                }
+            ],
+        },
+    }
+    if node_name:
+        pod["spec"]["nodeName"] = node_name
+    return pod
diff --git a/projects/llm_d/runtime/scripts/download_hf_model.sh b/projects/llm_d/runtime/scripts/download_hf_model.sh
new file mode 100644
index 00000000..9623d2aa
--- /dev/null
+++ b/projects/llm_d/runtime/scripts/download_hf_model.sh
@@ -0,0 +1,28 @@
+set -euo pipefail
+
+mkdir -p "${MODEL_TARGET_DIR}"
+rm -rf "${MODEL_TARGET_DIR}"/*
+
+python -m pip install --quiet --no-cache-dir 'huggingface_hub[hf_xet]'
+python - <<'PY'
+import os
+
+from huggingface_hub import snapshot_download
+
+token = None
+token_file = os.environ.get("HF_TOKEN_FILE")
+if token_file and os.path.exists(token_file):
+    with open(token_file, encoding="utf-8") as handle:
+        token = handle.read().strip() or None
+
+snapshot_download(
+    repo_id=os.environ["MODEL_SOURCE"][5:],
+    local_dir=os.environ["MODEL_TARGET_DIR"],
+    local_dir_use_symlinks=False,
+    token=token,
+)
+PY
+
+cat > "${MARKER_FILE}" <<EOF
+{"source_uri":"${MODEL_SOURCE}","cache_key":"${CACHE_KEY}","scheme":"hf"}
+EOF
diff --git a/projects/llm_d/runtime/scripts/extract_oci_model.sh b/projects/llm_d/runtime/scripts/extract_oci_model.sh
new file mode 100644
index 00000000..297791ef
--- /dev/null
+++ b/projects/llm_d/runtime/scripts/extract_oci_model.sh
@@ -0,0 +1,18 @@
+set -euo pipefail
+
+mkdir -p "${MODEL_TARGET_DIR}"
+rm -rf "${MODEL_TARGET_DIR}"/*
+
+auth_args=()
+if [[ -n "${REGISTRY_AUTH_FILE:-}" && -f "${REGISTRY_AUTH_FILE}" ]]; then
+  auth_args+=(--registry-config="${REGISTRY_AUTH_FILE}")
+fi
+
+oc image extract "${MODEL_SOURCE#oci://}" \
+  --path "${OCI_IMAGE_PATH}:${MODEL_TARGET_DIR}" \
+  --confirm \
+  "${auth_args[@]}"
+
+cat > "${MARKER_FILE}" <<EOF
+{"source_uri":"${MODEL_SOURCE}","cache_key":"${CACHE_KEY}","scheme":"oci","image_path":"${OCI_IMAGE_PATH}"}
+EOF
diff --git a/projects/llm_d/tests/test_runtime.py b/projects/llm_d/tests/test_runtime.py
new file mode 100644
index 00000000..6e835b4c
--- /dev/null
+++ b/projects/llm_d/tests/test_runtime.py
@@ -0,0 +1,822 @@
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from projects.llm_d.orchestration import ci as llmd_ci
+from projects.llm_d.orchestration import cli as llmd_cli
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+from projects.llm_d.toolbox.cleanup import main as cleanup_toolbox
+from projects.llm_d.toolbox.prepare import main as prepare_toolbox
+from projects.llm_d.toolbox.prepare_model_cache import main as prepare_model_cache_toolbox
+from projects.llm_d.toolbox.test import main as test_toolbox
+
+
+def test_derive_namespace_uses_prefix_once() -> None:
+    namespace = llmd_runtime.derive_namespace("llm-d-nightly-smoke", "llm-d", 63)
+    assert namespace == "llm-d-nightly-smoke"
+
+
+def test_parse_overrides_rejects_unknown_keys() -> None:
+    with pytest.raises(ValueError, match="Unsupported llm_d override keys"):
+        llmd_runtime.parse_overrides('{"model":"other"}', allowed_keys=("namespace",))
+
+
+def test_load_run_configuration_resolves_alias(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    fournos_config = tmp_path / "fournos_config.yaml"
+    fournos_config.write_text(
+        "preset: cks\njob-name: llm-d-e2e\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    assert config.preset_name == "smoke"
+    assert config.preset_alias == "cks"
+    assert config.model["served_model_name"] == "Qwen/Qwen3-0.6B"
+    assert config.namespace == "forge-llm-d"
+    assert config.namespace_is_managed is False
+
+
+def test_load_run_configuration_consolidates_config_d(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    consolidated = llmd_runtime.load_yaml(artifact_dir / "config.yaml")
+
+    assert "platform" in consolidated
+    assert "model_cache" in consolidated
+    assert "models" in consolidated
+    assert "runtime" in consolidated
+    assert "scheduler_profiles" in consolidated
+    assert "workloads" in consolidated
+    assert consolidated["project"]["name"] == "llm_d"
+    assert consolidated["runtime"]["default_preset"] == "smoke"
+    assert consolidated["platform"]["cluster"]["namespace"]["name"] == "forge-llm-d"
+    assert isinstance(consolidated["platform"]["operators"], dict)
+
+
+def test_namespace_override_is_not_managed(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(
+        cwd=tmp_path,
+        artifact_dir=artifact_dir,
+        raw_overrides='{"namespace":"custom-ns"}',
+    )
+
+    assert config.namespace == "custom-ns"
+    assert config.namespace_is_managed is False
+
+
+def test_default_namespace_comes_from_project_config(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    (tmp_path / "fournos_config.yaml").write_text(
+        "job-name: llm-d-nightly\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    assert config.namespace == "forge-llm-d"
+    assert config.namespace_is_managed is False
+    assert config.platform["cluster"]["namespace"]["prefix"] == "llm-d"
+    assert "rhods-operator" in config.platform["operators"]
+
+
+def test_load_run_configuration_ignores_runtime_env_vars(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", '{"namespace":"ignored-ns"}')
+    monkeypatch.setenv("FORGE_PRESET", "benchmark-short")
+    monkeypatch.setenv("FORGE_JOB_NAME", "ignored-job")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    assert config.preset_name == "smoke"
+    assert config.namespace == "forge-llm-d"
+    assert config.job_name == "local-smoke"
+
+
+def test_write_prepare_inputs_round_trip(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    path = phase_inputs.write_prepare_inputs(config)
+    payload = llmd_runtime.load_yaml(path)
+    loaded = phase_inputs.load_prepare_inputs(path)
+
+    assert set(payload) == {
+        "artifact_dir",
+        "config_dir",
+        "preset_name",
+        "namespace",
+        "namespace_is_managed",
+        "platform",
+        "model_key",
+        "model",
+        "model_cache",
+        "benchmark",
+    }
+    assert loaded.artifact_dir == config.artifact_dir
+    assert loaded.config_dir == config.config_dir
+    assert loaded.namespace == config.namespace
+    assert loaded.platform == config.platform
+    assert loaded.model == config.model
+    assert loaded.model_cache == config.model_cache
+    assert loaded.benchmark == config.benchmark
+
+
+def test_write_test_inputs_round_trip(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    path = phase_inputs.write_test_inputs(config)
+    payload = llmd_runtime.load_yaml(path)
+    loaded = phase_inputs.load_test_inputs(path)
+
+    assert set(payload) == {
+        "artifact_dir",
+        "config_dir",
+        "preset_name",
+        "namespace",
+        "platform",
+        "model_key",
+        "model",
+        "scheduler_profile_key",
+        "scheduler_profile",
+        "model_cache",
+        "smoke_request",
+        "benchmark",
+    }
+    assert loaded.namespace == config.namespace
+    assert loaded.scheduler_profile_key == config.scheduler_profile_key
+    assert loaded.smoke_request == config.smoke_request
+    assert loaded.benchmark == config.benchmark
+
+
+@pytest.mark.parametrize("orchestration", [llmd_ci, llmd_cli])
+def test_orchestration_prepare_writes_inputs_and_invokes_toolbox(
+    orchestration, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    captured: dict[str, str] = {}
+
+    monkeypatch.setattr(
+        orchestration.llmd_runtime,
+        "load_run_configuration",
+        lambda **_kwargs: config,
+    )
+    monkeypatch.setattr(
+        orchestration,
+        "prepare_toolbox_run",
+        lambda *, inputs_file: captured.setdefault("inputs_file", inputs_file) or 17,
+    )
+
+    result = orchestration.run_prepare_phase()
+    loaded = phase_inputs.load_prepare_inputs(captured["inputs_file"])
+
+    assert result == captured["inputs_file"]
+    assert loaded.namespace == config.namespace
+
+
+@pytest.mark.parametrize("orchestration", [llmd_ci, llmd_cli])
+def test_orchestration_test_writes_inputs_and_invokes_toolbox(
+    orchestration, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    captured: dict[str, str] = {}
+
+    monkeypatch.setattr(
+        orchestration.llmd_runtime,
+        "load_run_configuration",
+        lambda **_kwargs: config,
+    )
+    monkeypatch.setattr(
+        orchestration,
+        "test_toolbox_run",
+        lambda *, inputs_file: captured.setdefault("inputs_file", inputs_file) or 23,
+    )
+
+    result = orchestration.run_test_phase()
+    loaded = phase_inputs.load_test_inputs(captured["inputs_file"])
+
+    assert result == captured["inputs_file"]
+    assert loaded.namespace == config.namespace
+    assert loaded.model == config.model
+
+
+@pytest.mark.parametrize("orchestration", [llmd_ci, llmd_cli])
+def test_orchestration_cleanup_writes_inputs_and_invokes_toolbox(
+    orchestration, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    captured: dict[str, str] = {}
+
+    monkeypatch.setattr(
+        orchestration.llmd_runtime,
+        "load_run_configuration",
+        lambda **_kwargs: config,
+    )
+    monkeypatch.setattr(
+        orchestration,
+        "cleanup_toolbox_run",
+        lambda *, inputs_file: captured.setdefault("inputs_file", inputs_file) or 29,
+    )
+
+    result = orchestration.run_cleanup_phase()
+    loaded = phase_inputs.load_cleanup_inputs(captured["inputs_file"])
+
+    assert result == captured["inputs_file"]
+    assert loaded.namespace == config.namespace
+    assert loaded.platform == config.platform
+
+
+@pytest.mark.parametrize("orchestration", [llmd_ci, llmd_cli])
+def test_orchestration_load_runtime_configuration_reads_env(
+    orchestration, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_PRESET", "smoke-precise")
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", '{"namespace":"custom-ns"}')
+    monkeypatch.setenv("FORGE_JOB_NAME", "job-from-env")
+    captured: dict[str, str | None] = {}
+    sentinel = object()
+
+    def fake_load_run_configuration(**kwargs):
+        captured.update(kwargs)
+        return sentinel
+
+    monkeypatch.setattr(
+        orchestration.llmd_runtime, "load_run_configuration", fake_load_run_configuration
+    )
+
+    result = orchestration.load_runtime_configuration()
+
+    assert result is sentinel
+    assert captured == {
+        "requested_preset": "smoke-precise",
+        "raw_overrides": '{"namespace":"custom-ns"}',
+        "job_name": "job-from-env",
+    }
+
+
+def test_render_inference_service_injects_model_and_scheduler_profile(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    manifest = llmd_runtime.render_inference_service(config)
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+
+    assert manifest["metadata"]["name"] == "llm-d"
+    assert manifest["metadata"]["namespace"] == config.namespace
+    assert manifest["spec"]["model"]["name"] == "Qwen/Qwen3-0.6B"
+    assert manifest["spec"]["model"]["uri"] == cache_spec.model_uri
+    assert manifest["spec"]["model"]["name"] == config.model["served_model_name"]
+    assert config.scheduler_profile_key == "approximate"
+    router_args = manifest["spec"]["router"]["scheduler"]["template"]["containers"][0]["args"]
+    assert router_args[-2] == "--config-text"
+    assert "EndpointPickerConfig" in router_args[-1]
+    assert "prefix-cache-scorer" in router_args[-1]
+
+
+def test_render_inference_service_supports_precise_scheduler_profile(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    (tmp_path / "fournos_config.yaml").write_text(
+        "preset: smoke-precise\njob-name: llm-d-precise\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    manifest = llmd_runtime.render_inference_service(config)
+
+    assert config.scheduler_profile_key == "precise"
+    router_args = manifest["spec"]["router"]["scheduler"]["template"]["containers"][0]["args"]
+    assert router_args[-2] == "--config-text"
+    assert "precise-prefix-cache-scorer" in router_args[-1]
+    assert "tokenizersCacheDir" in router_args[-1]
+
+
+def test_render_inference_service_supports_default_scheduler(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    (tmp_path / "fournos_config.yaml").write_text(
+        "preset: smoke-default-scheduler\njob-name: llm-d-default\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    manifest = llmd_runtime.render_inference_service(config)
+
+    assert config.scheduler_profile_key == "default"
+    assert config.scheduler_profile is None
+    assert manifest["spec"]["router"]["scheduler"] == {}
+
+
+def test_resolve_model_cache_for_hf_model(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+
+    assert cache_spec is not None
+    assert cache_spec.source_scheme == "hf"
+    assert cache_spec.pvc_name.startswith("llm-d-model-qwen3-0-6b-")
+    assert cache_spec.model_uri == f"pvc://{cache_spec.pvc_name}/model"
+    assert cache_spec.pvc_size == "10Gi"
+    assert cache_spec.access_mode == "ReadWriteOnce"
+
+
+def test_render_model_cache_job_for_hf_model(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+    manifest = llmd_runtime.render_model_cache_job(config, cache_spec)
+
+    container = manifest["spec"]["template"]["spec"]["containers"][0]
+    assert container["name"] == "hf-model-downloader"
+    assert container["image"] == "registry.access.redhat.com/ubi9/python-311"
+    assert any(
+        env["name"] == "MODEL_SOURCE" and env["value"] == "hf://Qwen/Qwen3-0.6B"
+        for env in container["env"]
+    )
+    assert "huggingface_hub" in container["command"][-1]
+
+
+def test_render_model_cache_job_for_oci_model_uses_registry_auth_secret(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    (tmp_path / "fournos_config.yaml").write_text(
+        "preset: benchmark-short\njob-name: llm-d-benchmark\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    monkeypatch.setattr(
+        llmd_runtime,
+        "resolve_default_serviceaccount_image_pull_secret",
+        lambda namespace: "pull-secret",
+    )
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+    manifest = llmd_runtime.render_model_cache_job(config, cache_spec)
+
+    container = manifest["spec"]["template"]["spec"]["containers"][0]
+    volume_names = {volume["name"] for volume in manifest["spec"]["template"]["spec"]["volumes"]}
+
+    assert cache_spec.source_scheme == "oci"
+    assert container["name"] == "oci-model-extractor"
+    assert container["image"] == "registry.redhat.io/openshift4/ose-cli:v4.19"
+    assert any(env["name"] == "OCI_IMAGE_PATH" and env["value"] == "/" for env in container["env"])
+    assert "registry-auth" in volume_names
+    assert "oc image extract" in container["command"][-1]
+
+
+def test_render_guidellm_job_uses_target_and_rate(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    (tmp_path / "fournos_config.yaml").write_text(
+        "preset: benchmark-short\njob-name: llm-d-benchmark\n",
+        encoding="utf-8",
+    )
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    manifest = llmd_runtime.render_guidellm_job(config, "https://example.test")
+
+    container = manifest["spec"]["template"]["spec"]["containers"][0]
+    assert container["image"] == "ghcr.io/vllm-project/guidellm:v0.5.4"
+    assert "--target=https://example.test" in container["args"]
+    assert "--rate=1" in container["args"]
+
+
+def test_render_smoke_request_job_uses_curl_helper(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    payload = {"model": "Qwen/Qwen3-0.6B", "prompt": "test"}
+    manifest = llmd_runtime.render_smoke_request_job(config, "https://example.test", payload)
+
+    container = manifest["spec"]["template"]["spec"]["containers"][0]
+    env = {item["name"]: item["value"] for item in container["env"]}
+
+    assert manifest["kind"] == "Job"
+    assert manifest["metadata"]["name"] == "llm-d-smoke"
+    assert container["image"] == "curlimages/curl:8.11.1"
+    assert env["ENDPOINT_URL"] == "https://example.test"
+    assert env["ENDPOINT_PATH"] == "/v1/completions"
+    assert env["REQUEST_PAYLOAD"] == '{"model": "Qwen/Qwen3-0.6B", "prompt": "test"}'
+
+
+def test_prepare_model_cache_skips_ready_pvc(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    calls: list[str] = []
+
+    monkeypatch.setattr(
+        prepare_model_cache_toolbox,
+        "ensure_model_cache_pvc",
+        lambda _config, _cache_spec: calls.append("ensure-pvc"),
+    )
+    monkeypatch.setattr(llmd_runtime, "model_cache_pvc_ready", lambda _cache_spec: True)
+    monkeypatch.setattr(
+        prepare_model_cache_toolbox,
+        "capture_model_cache_state",
+        lambda _config, _cache_spec: calls.append("capture"),
+    )
+    monkeypatch.setattr(
+        prepare_model_cache_toolbox,
+        "run_model_cache_download_job",
+        lambda _config, _cache_spec: calls.append("download"),
+    )
+
+    prepare_model_cache_toolbox.run_prepare_model_cache(config)
+
+    assert calls == ["ensure-pvc", "capture"]
+
+
+def test_cleanup_deletes_leftovers_but_not_namespace_or_preserved_pvcs(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    shell_calls: list[str] = []
+
+    def fake_resource_exists(kind: str, name: str, namespace: str | None = None) -> bool:
+        if kind == "namespace":
+            return True
+        return False
+
+    monkeypatch.setattr(llmd_runtime, "resource_exists", fake_resource_exists)
+    monkeypatch.setattr(
+        cleanup_toolbox.shell,
+        "run",
+        lambda command, **kwargs: shell_calls.append(command),
+    )
+    monkeypatch.setattr(llmd_runtime, "wait_until", lambda *args, **kwargs: True)
+    monkeypatch.setattr(cleanup_toolbox, "_llm_d_pods_gone", lambda *_args: True)
+
+    cleanup_toolbox.delete_run_leftovers(config)
+
+    assert f"oc delete namespace {config.namespace} --ignore-not-found=true" not in shell_calls
+    assert (
+        f"oc delete pvc -n {config.namespace} "
+        '-l "forge.openshift.io/project=llm_d,forge.openshift.io/preserve!=true" '
+        "--ignore-not-found=true"
+    ) in shell_calls
+
+
+def test_prepare_gpu_operator_skips_existing_clusterpolicy(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    calls: list[str] = []
+
+    monkeypatch.setattr(
+        prepare_toolbox,
+        "ensure_operator_subscription",
+        lambda operator_spec: calls.append(f"subscription:{operator_spec['package']}"),
+    )
+    monkeypatch.setattr(
+        llmd_runtime,
+        "wait_for_crd",
+        lambda crd_name, *, timeout_seconds: calls.append(f"crd:{crd_name}"),
+    )
+    monkeypatch.setattr(
+        llmd_runtime,
+        "load_manifest_template",
+        lambda _config, _path: {
+            "apiVersion": "nvidia.com/v1",
+            "kind": "ClusterPolicy",
+            "metadata": {"name": "gpu-cluster-policy"},
+            "spec": {},
+        },
+    )
+    monkeypatch.setattr(llmd_runtime, "resource_exists", lambda kind, name: True)
+
+    def fail_apply(*_: object, **__: object) -> None:
+        raise AssertionError("existing ClusterPolicy must not be reapplied")
+
+    monkeypatch.setattr(llmd_runtime, "apply_manifest", fail_apply)
+    monkeypatch.setattr(
+        llmd_runtime,
+        "oc_get_json",
+        lambda kind, name: {"status": {"state": "ready"}},
+    )
+
+    prepare_toolbox.prepare_gpu_operator(config)
+
+    assert calls == [
+        "subscription:gpu-operator-certified",
+        "crd:clusterpolicies.nvidia.com",
+    ]
+
+
+def test_prepare_gpu_operator_bootstraps_missing_clusterpolicy(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    applied: list[Path] = []
+    manifest = {
+        "apiVersion": "nvidia.com/v1",
+        "kind": "ClusterPolicy",
+        "metadata": {"name": "gpu-cluster-policy"},
+        "spec": {},
+    }
+
+    monkeypatch.setattr(prepare_toolbox, "ensure_operator_subscription", lambda _: None)
+    monkeypatch.setattr(llmd_runtime, "wait_for_crd", lambda *_, **__: None)
+    monkeypatch.setattr(llmd_runtime, "load_manifest_template", lambda _config, _path: manifest)
+    monkeypatch.setattr(llmd_runtime, "resource_exists", lambda kind, name: False)
+    monkeypatch.setattr(
+        llmd_runtime,
+        "apply_manifest",
+        lambda artifact_path, _manifest: applied.append(artifact_path),
+    )
+    monkeypatch.setattr(
+        llmd_runtime,
+        "oc_get_json",
+        lambda kind, name: {"status": {"state": "ready"}},
+    )
+
+    prepare_toolbox.prepare_gpu_operator(config)
+
+    assert applied == [artifact_dir / "src" / "gpu-clusterpolicy.yaml"]
+
+
+def test_prepare_nfd_skips_existing_nodefeaturediscovery(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    calls: list[str] = []
+    manifest = {
+        "apiVersion": "nfd.openshift.io/v1",
+        "kind": "NodeFeatureDiscovery",
+        "metadata": {"name": "nfd-instance", "namespace": "openshift-nfd"},
+    }
+
+    monkeypatch.setattr(
+        prepare_toolbox,
+        "ensure_operator_subscription",
+        lambda operator_spec: calls.append(f"subscription:{operator_spec['package']}"),
+    )
+    monkeypatch.setattr(
+        llmd_runtime,
+        "wait_for_crd",
+        lambda crd_name, *, timeout_seconds: calls.append(f"crd:{crd_name}"),
+    )
+    monkeypatch.setattr(llmd_runtime, "load_manifest_template", lambda _config, _path: manifest)
+    monkeypatch.setattr(llmd_runtime, "resource_exists", lambda *args, **kwargs: True)
+    monkeypatch.setattr(
+        llmd_runtime,
+        "wait_until",
+        lambda *args, **kwargs: calls.append("wait-nfd"),
+    )
+    monkeypatch.setattr(
+        prepare_toolbox,
+        "wait_for_nfd_gpu_labels",
+        lambda _config, *, timeout_seconds: calls.append("wait-labels"),
+    )
+
+    def fail_apply(*_: object, **__: object) -> None:
+        raise AssertionError("existing NodeFeatureDiscovery must not be reapplied")
+
+    monkeypatch.setattr(llmd_runtime, "apply_manifest", fail_apply)
+
+    prepare_toolbox.prepare_nfd(config)
+
+    assert calls == [
+        "subscription:nfd",
+        "crd:nodefeaturediscoveries.nfd.openshift.io",
+        "wait-nfd",
+        "wait-labels",
+    ]
+
+
+def test_gpu_clusterpolicy_manifest_has_required_default_sections() -> None:
+    manifest = llmd_runtime.load_yaml(
+        llmd_runtime.CONFIG_DIR / "manifests" / "gpu-clusterpolicy.yaml"
+    )
+
+    assert manifest["kind"] == "ClusterPolicy"
+    assert manifest["metadata"]["name"] == "gpu-cluster-policy"
+    assert {
+        "daemonsets",
+        "dcgm",
+        "dcgmExporter",
+        "devicePlugin",
+        "driver",
+        "gfd",
+        "nodeStatusExporter",
+        "operator",
+        "toolkit",
+    } <= set(manifest["spec"])
+
+
+def test_resolve_endpoint_url_requires_gateway_address(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+
+    def fake_oc_get_json(kind: str, **_: object) -> dict[str, object]:
+        assert kind == "llminferenceservice"
+        return {"status": {"addresses": [{"name": "other", "url": "https://wrong"}]}}
+
+    monkeypatch.setattr(llmd_runtime, "oc_get_json", fake_oc_get_json)
+
+    with pytest.raises(RuntimeError, match="Gateway address"):
+        test_toolbox.resolve_endpoint_url(config)
+
+
+def test_run_smoke_request_uses_helper_job(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("FORGE_CONFIG_OVERRIDES", "{}")
+    artifact_dir = tmp_path / "artifacts"
+    artifact_dir.mkdir()
+    config = llmd_runtime.load_run_configuration(cwd=tmp_path, artifact_dir=artifact_dir)
+    oc_calls: list[tuple[str, ...]] = []
+    applied: list[Path] = []
+
+    def fake_oc(*args, **kwargs):
+        oc_calls.append(tuple(args))
+        if args[:2] == ("logs", "job/llm-d-smoke"):
+            return subprocess.CompletedProcess(
+                args,
+                0,
+                stdout='{"choices":[{"text":"ok"}]}\n',
+                stderr="",
+            )
+        return subprocess.CompletedProcess(args, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(llmd_runtime, "oc", fake_oc)
+    monkeypatch.setattr(llmd_runtime, "resource_exists", lambda *args, **kwargs: False)
+    monkeypatch.setattr(llmd_runtime, "wait_until", lambda *args, **kwargs: True)
+    monkeypatch.setattr(llmd_runtime, "wait_for_job_completion", lambda *args, **kwargs: True)
+    monkeypatch.setattr(
+        llmd_runtime,
+        "apply_manifest",
+        lambda artifact_path, _manifest: applied.append(artifact_path),
+    )
+    monkeypatch.setattr(test_toolbox, "capture_smoke_state", lambda _config: None)
+
+    response = test_toolbox.run_smoke_request(config, "https://example.test")
+
+    assert response["choices"][0]["text"] == "ok"
+    assert applied == [artifact_dir / "src" / "smoke-job.yaml"]
+    assert not any(call and call[0] == "exec" for call in oc_calls)
+
+
+def test_wait_until_reraises_runtime_error() -> None:
+    with pytest.raises(RuntimeError, match="terminal failure"):
+        llmd_runtime.wait_until(
+            "test condition",
+            timeout_seconds=1,
+            interval_seconds=0,
+            predicate=lambda: (_ for _ in ()).throw(RuntimeError("terminal failure")),
+        )
+
+
+def test_oc_forwards_timeout_to_run_command(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured: dict[str, object] = {}
+
+    def fake_run_command(args, **kwargs):
+        captured["args"] = list(args)
+        captured["kwargs"] = kwargs
+        return subprocess.CompletedProcess(args, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(llmd_runtime, "run_command", fake_run_command)
+
+    llmd_runtime.oc("get", "pods", timeout_seconds=42)
+
+    assert captured["args"] == ["oc", "get", "pods"]
+    assert captured["kwargs"]["timeout_seconds"] == 42
+
+
+def test_oc_get_json_returns_none_only_for_not_found(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        llmd_runtime,
+        "oc",
+        lambda *args, **kwargs: subprocess.CompletedProcess(
+            args,
+            1,
+            stdout="",
+            stderr='Error from server (NotFound): llminferenceservices.serving.kserve.io "llm-d" not found',
+        ),
+    )
+
+    payload = llmd_runtime.oc_get_json(
+        "llminferenceservice",
+        name="llm-d",
+        namespace="forge-llm-d",
+        ignore_not_found=True,
+    )
+
+    assert payload is None
+
+
+def test_oc_get_json_raises_for_non_not_found_errors(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        llmd_runtime,
+        "oc",
+        lambda *args, **kwargs: subprocess.CompletedProcess(
+            args,
+            1,
+            stdout="",
+            stderr='Error from server (Forbidden): pods is forbidden: User "alice" cannot list resource "pods"',
+        ),
+    )
+
+    with pytest.raises(llmd_runtime.CommandError, match="Forbidden"):
+        llmd_runtime.oc_get_json("pods", namespace="forge-llm-d", ignore_not_found=True)
+
+
+def test_resource_exists_propagates_non_not_found_errors(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        llmd_runtime,
+        "oc_get_json",
+        lambda *args, **kwargs: (_ for _ in ()).throw(llmd_runtime.CommandError("boom")),
+    )
+
+    with pytest.raises(llmd_runtime.CommandError, match="boom"):
+        llmd_runtime.resource_exists("namespace", "forge-llm-d")
diff --git a/projects/llm_d/toolbox/capture_isvc_state/main.py b/projects/llm_d/toolbox/capture_llmisvc_state/main.py
old mode 100755
new mode 100644
similarity index 86%
rename from projects/llm_d/toolbox/capture_isvc_state/main.py
rename to projects/llm_d/toolbox/capture_llmisvc_state/main.py
index 78448e15..1e4577c5
--- a/projects/llm_d/toolbox/capture_isvc_state/main.py
+++ b/projects/llm_d/toolbox/capture_llmisvc_state/main.py
@@ -2,15 +2,10 @@
 
 """
 LLMInferenceService state capture using task-based DSL
-Replaces llmd_capture_isvc_state Ansible role
+Replaces llmd_capture_llmisvc_state Ansible role
 """
 
-from projects.core.dsl import (
-    execute_tasks,
-    shell,
-    task,
-    toolbox,
-)
+from projects.core.dsl import execute_tasks, shell, task, toolbox
 
 
 def run(llmisvc_name: str, *, namespace: str = ""):
@@ -22,7 +17,6 @@ def run(llmisvc_name: str, *, namespace: str = ""):
         namespace: Namespace of the LLMInferenceService (empty string auto-detects current namespace)
     """
 
-    # Execute all registered tasks in order, respecting conditions
     return execute_tasks(locals())
 
 
@@ -157,7 +151,6 @@ def capture_podmonitors(args, context):
 @task
 def capture_pod_logs(args, context):
     """Capture logs from LLMInferenceService pods"""
-    # Get list of pod names
     result = shell.run(
         f'oc get pods -l "app.kubernetes.io/name={args.llmisvc_name}" -n {context.target_namespace} -o jsonpath="{{.items[*].metadata.name}}"',
         check=False,
@@ -170,19 +163,16 @@ def capture_pod_logs(args, context):
 
     log_file = args.artifact_dir / "artifacts/llminferenceservice.pods.logs"
 
-    # Capture logs for each pod
-    with open(log_file, "w") as f:  # Start with empty file
+    with open(log_file, "w") as handle:
         for pod_name in pod_names:
-            f.write(f"=== Logs for pod: {pod_name} ===\n")
-
-            # Get logs for this pod
+            handle.write(f"=== Logs for pod: {pod_name} ===\n")
             log_result = shell.run(
                 f"oc logs {pod_name} -n {context.target_namespace} --all-containers=true",
                 check=False,
                 log_stdout=False,
             )
-            f.write(log_result.stdout)
-            f.write("\n")
+            handle.write(log_result.stdout)
+            handle.write("\n")
 
     return f"Pod logs captured for {len(pod_names)} pods"
 
@@ -190,7 +180,6 @@ def capture_pod_logs(args, context):
 @task
 def capture_pod_previous_logs(args, context):
     """Capture previous logs from LLMInferenceService pods if available"""
-    # Get list of pod names
     result = shell.run(
         f'oc get pods -l "app.kubernetes.io/name={args.llmisvc_name}" -n {context.target_namespace} -o jsonpath="{{.items[*].metadata.name}}"',
         check=False,
@@ -202,19 +191,16 @@ def capture_pod_previous_logs(args, context):
 
     log_file = args.artifact_dir / "artifacts/llminferenceservice.pods.previous.logs"
 
-    # Capture previous logs for each pod
-    with open(log_file, "w") as f:  # Start with empty file
+    with open(log_file, "w") as handle:
         for pod_name in pod_names:
-            f.write(f"=== Previous logs for pod: {pod_name} ===\n")
-
-            # Get previous logs for this pod
+            handle.write(f"=== Previous logs for pod: {pod_name} ===\n")
             log_result = shell.run(
                 f"oc logs {pod_name} -n {context.target_namespace} --previous --all-containers=true",
                 check=False,
                 log_stdout=False,
             )
-            f.write(log_result.stdout)
-            f.write("\n")
+            handle.write(log_result.stdout)
+            handle.write("\n")
 
     return f"Pod previous logs captured for {len(pod_names)} pods"
 
@@ -233,7 +219,6 @@ def capture_llminferenceservice_describe(args, context):
 @task
 def capture_pods_describe(args, context):
     """Capture describe output for related pods"""
-    # Get list of pod names
     result = shell.run(
         f'oc get pods -l "app.kubernetes.io/name={args.llmisvc_name}" -n {context.target_namespace} -o jsonpath="{{.items[*].metadata.name}}"',
         check=False,
@@ -245,24 +230,20 @@ def capture_pods_describe(args, context):
 
     describe_file = args.artifact_dir / "artifacts/llminferenceservice.pods.describe.txt"
 
-    # Capture describe output for each pod
-    with open(describe_file, "w") as f:  # Start with empty file
+    with open(describe_file, "w") as handle:
         for pod_name in pod_names:
-            f.write(f"=== Describe for pod: {pod_name} ===\n")
-
-            # Get describe output for this pod
+            handle.write(f"=== Describe for pod: {pod_name} ===\n")
             describe_result = shell.run(
                 f"oc describe pod {pod_name} -n {context.target_namespace}",
                 log_stdout=False,
                 check=False,
             )
-            f.write(describe_result.stdout)
-            f.write("\n")
+            handle.write(describe_result.stdout)
+            handle.write("\n")
 
     return f"Pod describe output captured for {len(pod_names)} pods"
 
 
-# Create the main function using the toolbox library
 main = toolbox.create_toolbox_main(run)
 
 
diff --git a/projects/llm_d/toolbox/cleanup/main.py b/projects/llm_d/toolbox/cleanup/main.py
new file mode 100644
index 00000000..a32dbd6d
--- /dev/null
+++ b/projects/llm_d/toolbox/cleanup/main.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+from projects.core.dsl import execute_tasks, shell, task, toolbox
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+
+
+def run(*, inputs_file: str) -> int:
+    """Delete llm_d runtime leftovers from a namespace.
+
+    Args:
+        inputs_file: Path to the cleanup phase input file generated by orchestration
+    """
+
+    llmd_runtime.init()
+    execute_tasks(locals())
+    return 0
+
+
+@task
+def load_inputs(args, ctx):
+    """Load the cleanup phase inputs"""
+
+    ctx.inputs = phase_inputs.load_cleanup_inputs(args.inputs_file)
+    return f"Loaded cleanup inputs for namespace {ctx.inputs.namespace}"
+
+
+@task
+def delete_leftovers(args, ctx):
+    """Delete llm_d runtime leftovers"""
+
+    inputs = ctx.inputs
+    if not llmd_runtime.resource_exists("namespace", inputs.namespace):
+        return f"Namespace {inputs.namespace} does not exist; nothing to clean"
+
+    inference_service_name = inputs.platform["inference_service"]["name"]
+    namespace = inputs.namespace
+    cleanup_timeout_seconds = inputs.platform["cluster"]["cleanup_timeout_seconds"]
+    benchmark_names = {"guidellm-benchmark"}
+    if inputs.benchmark:
+        benchmark_names.add(inputs.benchmark["job_name"])
+
+    shell.run(
+        f"oc delete llminferenceservice {inference_service_name} "
+        f"-n {namespace} --ignore-not-found=true",
+        check=False,
+    )
+
+    for benchmark_name in sorted(benchmark_names):
+        shell.run(
+            f"oc delete job,pvc {benchmark_name} -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+        shell.run(
+            f"oc delete pod {benchmark_name}-copy -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+
+    shell.run(
+        f'oc delete job -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f'oc delete pod -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f"oc delete pvc -n {namespace} "
+        '-l "forge.openshift.io/project=llm_d,forge.openshift.io/preserve!=true" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    llmd_runtime.wait_until(
+        f"llminferenceservice/{inference_service_name} deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: not llmd_runtime.resource_exists(
+            "llminferenceservice", inference_service_name, namespace=namespace
+        ),
+    )
+
+    llmd_runtime.wait_until(
+        f"llm-d workload pods deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: _llm_d_pods_gone(namespace, inference_service_name),
+    )
+
+    return f"Cleanup finished for namespace {namespace}"
+
+
+def delete_run_leftovers(inputs: phase_inputs.CleanupInputs) -> None:
+    if not llmd_runtime.resource_exists("namespace", inputs.namespace):
+        return
+
+    inference_service_name = inputs.platform["inference_service"]["name"]
+    namespace = inputs.namespace
+    cleanup_timeout_seconds = inputs.platform["cluster"]["cleanup_timeout_seconds"]
+    benchmark_names = {"guidellm-benchmark"}
+    if inputs.benchmark:
+        benchmark_names.add(inputs.benchmark["job_name"])
+
+    shell.run(
+        f"oc delete llminferenceservice {inference_service_name} "
+        f"-n {namespace} --ignore-not-found=true",
+        check=False,
+    )
+
+    for benchmark_name in sorted(benchmark_names):
+        shell.run(
+            f"oc delete job,pvc {benchmark_name} -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+        shell.run(
+            f"oc delete pod {benchmark_name}-copy -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+
+    shell.run(
+        f'oc delete job -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f'oc delete pod -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f"oc delete pvc -n {namespace} "
+        '-l "forge.openshift.io/project=llm_d,forge.openshift.io/preserve!=true" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    llmd_runtime.wait_until(
+        f"llminferenceservice/{inference_service_name} deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: not llmd_runtime.resource_exists(
+            "llminferenceservice", inference_service_name, namespace=namespace
+        ),
+    )
+
+    llmd_runtime.wait_until(
+        f"llm-d workload pods deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: _llm_d_pods_gone(namespace, inference_service_name),
+    )
+
+
+def _llm_d_pods_gone(namespace: str, inference_service_name: str) -> bool:
+    payload = llmd_runtime.oc_get_json(
+        "pods",
+        namespace=namespace,
+        selector=f"app.kubernetes.io/name={inference_service_name}",
+        ignore_not_found=True,
+    )
+    return not payload or not payload.get("items")
+
+
+main = toolbox.create_toolbox_main(run)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/llm_d/toolbox/prepare/main.py b/projects/llm_d/toolbox/prepare/main.py
new file mode 100644
index 00000000..34b23478
--- /dev/null
+++ b/projects/llm_d/toolbox/prepare/main.py
@@ -0,0 +1,761 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+
+from projects.core.dsl import execute_tasks, shell, task, toolbox
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+from projects.llm_d.toolbox.prepare_model_cache import main as prepare_model_cache
+
+LOGGER = logging.getLogger(__name__)
+
+
+def run(*, inputs_file: str) -> int:
+    """Prepare a cluster for llm_d downstream smoke and benchmark runs.
+
+    Args:
+        inputs_file: Path to the prepare phase input file generated by orchestration
+    """
+
+    llmd_runtime.init()
+    execute_tasks(locals())
+    return 0
+
+
+@task
+def load_inputs(args, ctx):
+    """Load the prepare phase inputs"""
+
+    ctx.config = phase_inputs.load_prepare_inputs(args.inputs_file)
+    return f"Loaded prepare inputs for preset {ctx.config.preset_name}"
+
+
+@task
+def verify_oc_access_task(args, ctx):
+    """Verify OpenShift CLI access"""
+
+    llmd_runtime.oc("whoami", capture_output=True)
+    return "OpenShift CLI access verified"
+
+
+@task
+def verify_cluster_version_task(args, ctx):
+    """Validate the cluster version against llm_d requirements"""
+
+    version_info = llmd_runtime.oc("version", "-o", "json", capture_output=True)
+    payload = json.loads(version_info.stdout)
+
+    openshift_version = (
+        payload.get("openshiftVersion")
+        or payload.get("serverVersion", {}).get("gitVersion")
+        or payload.get("serverVersion", {}).get("platform")
+    )
+    if not openshift_version:
+        raise RuntimeError("Could not determine OpenShift version from `oc version -o json`")
+
+    minimum = ctx.config.platform["cluster"]["minimum_openshift_version"]
+    if llmd_runtime.version_tuple(openshift_version) < llmd_runtime.version_tuple(minimum):
+        raise RuntimeError(
+            f"Cluster version {openshift_version} is older than the llm_d minimum {minimum}"
+        )
+
+    return f"Cluster version satisfies {minimum}"
+
+
+@task
+def prepare_cert_manager_task(args, ctx):
+    """Ensure the cert-manager operator is installed"""
+
+    operator_spec = llmd_runtime.operator_spec_by_package(
+        ctx.config.platform, "openshift-cert-manager-operator"
+    )
+    ensure_operator_subscription(operator_spec)
+    return "cert-manager operator ready"
+
+
+@task
+def prepare_leader_worker_set_task(args, ctx):
+    """Ensure the leader-worker-set operator is installed"""
+
+    operator_spec = llmd_runtime.operator_spec_by_package(ctx.config.platform, "leader-worker-set")
+    ensure_operator_subscription(operator_spec)
+    return "leader-worker-set operator ready"
+
+
+@task
+def prepare_nfd_task(args, ctx):
+    """Ensure Node Feature Discovery is installed and reporting GPU labels"""
+
+    config = ctx.config
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "nfd")
+    ensure_operator_subscription(operator_spec)
+    llmd_runtime.wait_for_crd(
+        operator_spec["bootstrap_crd"],
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+    manifest = llmd_runtime.load_manifest_template(config, operator_spec["bootstrap_manifest"])
+    nfd_name = manifest["metadata"]["name"]
+    nfd_namespace = manifest["metadata"]["namespace"]
+    if llmd_runtime.resource_exists("nodefeaturediscovery", nfd_name, namespace=nfd_namespace):
+        LOGGER.info(
+            "NodeFeatureDiscovery/%s already exists in %s; verifying GPU discovery labels",
+            nfd_name,
+            nfd_namespace,
+        )
+    else:
+        llmd_runtime.apply_manifest(
+            config.artifact_dir / "src" / "nfd-nodefeaturediscovery.yaml",
+            manifest,
+        )
+
+    llmd_runtime.wait_until(
+        "NodeFeatureDiscovery bootstrap resource",
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=lambda: llmd_runtime.resource_exists(
+            "nodefeaturediscovery",
+            nfd_name,
+            namespace=nfd_namespace,
+        ),
+    )
+
+    wait_for_nfd_gpu_labels(config, timeout_seconds=operator_spec["wait_timeout_seconds"])
+    return "Node Feature Discovery ready"
+
+
+@task
+def prepare_gpu_operator_task(args, ctx):
+    """Ensure the GPU operator is installed and ready"""
+
+    config = ctx.config
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "gpu-operator-certified")
+    ensure_operator_subscription(operator_spec)
+    llmd_runtime.wait_for_crd(
+        operator_spec["bootstrap_crd"],
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+    manifest = llmd_runtime.load_manifest_template(config, operator_spec["bootstrap_manifest"])
+    clusterpolicy_name = manifest["metadata"]["name"]
+    if llmd_runtime.resource_exists("clusterpolicy", clusterpolicy_name):
+        LOGGER.info(
+            "ClusterPolicy/%s already exists; verifying readiness instead of applying bootstrap manifest",
+            clusterpolicy_name,
+        )
+    else:
+        llmd_runtime.apply_manifest(
+            config.artifact_dir / "src" / "gpu-clusterpolicy.yaml",
+            manifest,
+        )
+
+    wait_for_gpu_clusterpolicy_ready(
+        clusterpolicy_name,
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+    return "GPU operator ready"
+
+
+@task
+def prepare_rhoai_operator_task(args, ctx):
+    """Ensure the RHOAI operator is installed"""
+
+    config = ctx.config
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "rhods-operator")
+    ensure_operator_subscription(operator_spec)
+    for crd_name in config.platform["rhoai"]["required_crds_before_dsc"]:
+        llmd_runtime.wait_for_crd(
+            crd_name,
+            timeout_seconds=config.platform["rhoai"]["wait_timeout_seconds"],
+        )
+    return "RHOAI operator ready"
+
+
+@task
+def apply_datasciencecluster_task(args, ctx):
+    """Apply the DataScienceCluster manifest"""
+
+    config = ctx.config
+    manifest = llmd_runtime.render_datasciencecluster(config)
+    llmd_runtime.apply_manifest(config.artifact_dir / "src" / "datasciencecluster.yaml", manifest)
+    llmd_runtime.oc(
+        "get",
+        "datasciencecluster",
+        config.platform["rhoai"]["datasciencecluster_name"],
+        "-n",
+        config.platform["rhoai"]["namespace"],
+        "-o",
+        "yaml",
+        capture_output=True,
+    )
+    return "DataScienceCluster applied"
+
+
+@task
+def wait_for_datasciencecluster_ready_task(args, ctx):
+    """Wait for the DataScienceCluster to become ready"""
+
+    rhoai = ctx.config.platform["rhoai"]
+
+    def _dsc_ready() -> bool:
+        payload = llmd_runtime.oc_get_json(
+            "datasciencecluster",
+            name=rhoai["datasciencecluster_name"],
+            namespace=rhoai["namespace"],
+        )
+        phase = payload.get("status", {}).get("phase")
+        if phase == "Ready":
+            return True
+        if phase in {"Failed", "Error"}:
+            raise RuntimeError(f"DataScienceCluster entered terminal phase {phase}")
+        return False
+
+    llmd_runtime.wait_until(
+        f"datasciencecluster/{rhoai['datasciencecluster_name']} ready",
+        timeout_seconds=rhoai["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_dsc_ready,
+    )
+    return "DataScienceCluster ready"
+
+
+@task
+def ensure_required_crds_task(args, ctx):
+    """Wait for the llm_d-required CRDs to exist"""
+
+    for crd_name in ctx.config.platform["rhoai"]["required_crds_after_dsc"]:
+        llmd_runtime.wait_for_crd(
+            crd_name,
+            timeout_seconds=ctx.config.platform["rhoai"]["wait_timeout_seconds"],
+        )
+    return "Required CRDs present"
+
+
+@task
+def ensure_gateway_task(args, ctx):
+    """Ensure the gateway exists and is programmed"""
+
+    config = ctx.config
+    gateway = config.platform["gateway"]
+    if not llmd_runtime.resource_exists("gateway", gateway["name"], namespace=gateway["namespace"]):
+        if not gateway["create_if_missing"]:
+            raise RuntimeError(
+                f"Required gateway {gateway['name']} does not exist in {gateway['namespace']}"
+            )
+        manifest = llmd_runtime.render_gateway(config)
+        llmd_runtime.apply_manifest(config.artifact_dir / "src" / "gateway.yaml", manifest)
+
+    def _gateway_programmed() -> bool:
+        resource = llmd_runtime.oc_get_json(
+            "gateway",
+            name=gateway["name"],
+            namespace=gateway["namespace"],
+        )
+        return llmd_runtime.condition_status(resource, "Programmed") == "True"
+
+    llmd_runtime.wait_until(
+        f"gateway/{gateway['name']} programmed",
+        timeout_seconds=gateway["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_gateway_programmed,
+    )
+    return "Gateway ready"
+
+
+@task
+def ensure_test_namespace_task(args, ctx):
+    """Ensure the llm_d namespace exists"""
+
+    llmd_runtime.ensure_namespace(
+        ctx.config.namespace,
+        labels={
+            "app.kubernetes.io/managed-by": "forge",
+            "forge.openshift.io/project": "llm_d",
+        },
+    )
+    return f"Namespace {ctx.config.namespace} ready"
+
+
+@task
+def cleanup_previous_run_task(args, ctx):
+    """Delete leftover llm_d resources from the namespace"""
+
+    config = ctx.config
+    inference_service_name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    cleanup_timeout_seconds = config.platform["cluster"]["cleanup_timeout_seconds"]
+    benchmark_names = {"guidellm-benchmark"}
+    if config.benchmark:
+        benchmark_names.add(config.benchmark["job_name"])
+
+    shell.run(
+        f"oc delete llminferenceservice {inference_service_name} "
+        f"-n {namespace} --ignore-not-found=true",
+        check=False,
+    )
+
+    for benchmark_name in sorted(benchmark_names):
+        shell.run(
+            f"oc delete job,pvc {benchmark_name} -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+        shell.run(
+            f"oc delete pod {benchmark_name}-copy -n {namespace} --ignore-not-found=true",
+            check=False,
+        )
+
+    shell.run(
+        f'oc delete job -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f'oc delete pod -n {namespace} -l "forge.openshift.io/project=llm_d" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+    shell.run(
+        f"oc delete pvc -n {namespace} "
+        '-l "forge.openshift.io/project=llm_d,forge.openshift.io/preserve!=true" '
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    llmd_runtime.wait_until(
+        f"llminferenceservice/{inference_service_name} deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: not llmd_runtime.resource_exists(
+            "llminferenceservice", inference_service_name, namespace=namespace
+        ),
+    )
+
+    llmd_runtime.wait_until(
+        f"llm-d workload pods deletion in {namespace}",
+        timeout_seconds=cleanup_timeout_seconds,
+        interval_seconds=10,
+        predicate=lambda: not (
+            pods := llmd_runtime.oc_get_json(
+                "pods",
+                namespace=namespace,
+                selector=f"app.kubernetes.io/name={inference_service_name}",
+                ignore_not_found=True,
+            )
+        )
+        or not pods.get("items"),
+    )
+    return f"Previous llm_d leftovers deleted from {ctx.config.namespace}"
+
+
+@task
+def prepare_model_cache_task(args, ctx):
+    """Prepare the shared model cache if enabled"""
+
+    cache_inputs = phase_inputs.prepare_model_cache_inputs_from_prepare(ctx.config)
+    cache_spec = llmd_runtime.resolve_model_cache(cache_inputs)
+    if not cache_spec:
+        LOGGER.info("Model cache disabled for preset=%s", cache_inputs.preset_name)
+        return "Model cache disabled"
+
+    if cache_inputs.namespace_is_managed:
+        LOGGER.warning(
+            "Model cache PVC %s lives in managed namespace %s. Namespace cleanup will remove it; cache reuse requires a stable namespace override.",
+            cache_spec.pvc_name,
+            cache_spec.namespace,
+        )
+
+    prepare_model_cache.ensure_model_cache_pvc(cache_inputs, cache_spec)
+    if llmd_runtime.model_cache_pvc_ready(cache_spec):
+        LOGGER.info(
+            "Model cache PVC %s already contains %s; skipping download",
+            cache_spec.pvc_name,
+            cache_spec.source_uri,
+        )
+        prepare_model_cache.capture_model_cache_state(cache_inputs, cache_spec)
+        return "Model cache already populated"
+
+    prepare_model_cache.run_model_cache_download_job(cache_inputs, cache_spec)
+    llmd_runtime.annotate_model_cache_pvc(cache_spec)
+    prepare_model_cache.capture_model_cache_state(cache_inputs, cache_spec)
+    return "Model cache prepared"
+
+
+@task
+def verify_gpu_nodes_task(args, ctx):
+    """Verify that GPU nodes are available on the cluster"""
+
+    selector = ctx.config.platform["cluster"]["gpu_node_label_selector"]
+    data = llmd_runtime.oc_get_json("nodes", selector=selector, ignore_not_found=True)
+    items = data.get("items", []) if data else []
+    if not items:
+        raise RuntimeError(
+            f"No GPU nodes found with selector {selector}. The llm_d smoke path requires GPUs."
+        )
+    return "GPU nodes detected"
+
+
+@task
+def capture_prepare_state_task(args, ctx):
+    """Capture cluster state after the prepare phase"""
+
+    config = ctx.config
+    artifacts_dir = config.artifact_dir / "artifacts"
+    rhoai = config.platform["rhoai"]
+    gateway = config.platform["gateway"]
+
+    capture_resource_yaml(
+        "datasciencecluster",
+        rhoai["datasciencecluster_name"],
+        rhoai["namespace"],
+        artifacts_dir / "datasciencecluster.yaml",
+    )
+    capture_resource_yaml(
+        "gateway",
+        gateway["name"],
+        gateway["namespace"],
+        artifacts_dir / "gateway.yaml",
+    )
+    gateway_service = llmd_runtime.oc(
+        "get",
+        "service",
+        "-A",
+        "-l",
+        f"gateway.networking.k8s.io/gateway-name={gateway['name']}",
+        "-o",
+        "yaml",
+        check=False,
+        capture_output=True,
+    )
+    if gateway_service.returncode == 0 and gateway_service.stdout:
+        llmd_runtime.write_text(artifacts_dir / "gateway.service.yaml", gateway_service.stdout)
+    if config.platform["artifacts"]["capture_namespace_events"]:
+        capture_namespace_events(config.namespace, artifacts_dir / "namespace.events.txt")
+    return "Prepare-state artifacts captured"
+
+
+def verify_oc_access() -> None:
+    llmd_runtime.oc("whoami", capture_output=True)
+
+
+def verify_cluster_version(config: phase_inputs.PrepareInputs) -> None:
+    version_info = llmd_runtime.oc("version", "-o", "json", capture_output=True)
+    payload = json.loads(version_info.stdout)
+
+    openshift_version = (
+        payload.get("openshiftVersion")
+        or payload.get("serverVersion", {}).get("gitVersion")
+        or payload.get("serverVersion", {}).get("platform")
+    )
+    if not openshift_version:
+        raise RuntimeError("Could not determine OpenShift version from `oc version -o json`")
+
+    minimum = config.platform["cluster"]["minimum_openshift_version"]
+    if llmd_runtime.version_tuple(openshift_version) < llmd_runtime.version_tuple(minimum):
+        raise RuntimeError(
+            f"Cluster version {openshift_version} is older than the llm_d minimum {minimum}"
+        )
+
+
+def ensure_operator_subscription(operator_spec: dict[str, str]) -> dict[str, object]:
+    llmd_runtime.ensure_subscription(operator_spec)
+    return llmd_runtime.wait_for_operator_csv(
+        operator_spec["package"],
+        operator_spec["namespace"],
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+
+def prepare_cert_manager(config: phase_inputs.PrepareInputs) -> None:
+    operator_spec = llmd_runtime.operator_spec_by_package(
+        config.platform, "openshift-cert-manager-operator"
+    )
+    ensure_operator_subscription(operator_spec)
+
+
+def prepare_leader_worker_set(config: phase_inputs.PrepareInputs) -> None:
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "leader-worker-set")
+    ensure_operator_subscription(operator_spec)
+
+
+def prepare_nfd(config: phase_inputs.PrepareInputs) -> None:
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "nfd")
+    ensure_operator_subscription(operator_spec)
+    llmd_runtime.wait_for_crd(
+        operator_spec["bootstrap_crd"],
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+    manifest = llmd_runtime.load_manifest_template(config, operator_spec["bootstrap_manifest"])
+    nfd_name = manifest["metadata"]["name"]
+    nfd_namespace = manifest["metadata"]["namespace"]
+    if llmd_runtime.resource_exists("nodefeaturediscovery", nfd_name, namespace=nfd_namespace):
+        LOGGER.info(
+            "NodeFeatureDiscovery/%s already exists in %s; verifying GPU discovery labels",
+            nfd_name,
+            nfd_namespace,
+        )
+    else:
+        llmd_runtime.apply_manifest(
+            config.artifact_dir / "src" / "nfd-nodefeaturediscovery.yaml",
+            manifest,
+        )
+
+    llmd_runtime.wait_until(
+        "NodeFeatureDiscovery bootstrap resource",
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=lambda: llmd_runtime.resource_exists(
+            "nodefeaturediscovery",
+            nfd_name,
+            namespace=nfd_namespace,
+        ),
+    )
+
+    wait_for_nfd_gpu_labels(config, timeout_seconds=operator_spec["wait_timeout_seconds"])
+
+
+def prepare_gpu_operator(config: phase_inputs.PrepareInputs) -> None:
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "gpu-operator-certified")
+    ensure_operator_subscription(operator_spec)
+    llmd_runtime.wait_for_crd(
+        operator_spec["bootstrap_crd"],
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+    manifest = llmd_runtime.load_manifest_template(config, operator_spec["bootstrap_manifest"])
+    clusterpolicy_name = manifest["metadata"]["name"]
+    if llmd_runtime.resource_exists("clusterpolicy", clusterpolicy_name):
+        LOGGER.info(
+            "ClusterPolicy/%s already exists; verifying readiness instead of applying bootstrap manifest",
+            clusterpolicy_name,
+        )
+        wait_for_gpu_clusterpolicy_ready(
+            clusterpolicy_name,
+            timeout_seconds=operator_spec["wait_timeout_seconds"],
+        )
+        return
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "gpu-clusterpolicy.yaml",
+        manifest,
+    )
+
+    wait_for_gpu_clusterpolicy_ready(
+        clusterpolicy_name,
+        timeout_seconds=operator_spec["wait_timeout_seconds"],
+    )
+
+
+def wait_for_gpu_clusterpolicy_ready(clusterpolicy_name: str, *, timeout_seconds: int) -> None:
+    def _clusterpolicy_ready() -> bool:
+        payload = llmd_runtime.oc_get_json(
+            "clusterpolicy",
+            name=clusterpolicy_name,
+        )
+        state = payload.get("status", {}).get("state", "")
+        return state.lower() == "ready"
+
+    llmd_runtime.wait_until(
+        f"clusterpolicy/{clusterpolicy_name} ready",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=15,
+        predicate=_clusterpolicy_ready,
+    )
+
+
+def prepare_rhoai_operator(config: phase_inputs.PrepareInputs) -> None:
+    operator_spec = llmd_runtime.operator_spec_by_package(config.platform, "rhods-operator")
+    ensure_operator_subscription(operator_spec)
+    ensure_required_crds(config.platform["rhoai"]["required_crds_before_dsc"], config)
+
+
+def ensure_required_crds(crd_names: list[str], config: phase_inputs.PrepareInputs) -> None:
+    for crd_name in crd_names:
+        llmd_runtime.wait_for_crd(
+            crd_name,
+            timeout_seconds=config.platform["rhoai"]["wait_timeout_seconds"],
+        )
+
+
+def apply_datasciencecluster(config: phase_inputs.PrepareInputs) -> None:
+    manifest = llmd_runtime.render_datasciencecluster(config)
+    llmd_runtime.apply_manifest(config.artifact_dir / "src" / "datasciencecluster.yaml", manifest)
+    llmd_runtime.oc(
+        "get",
+        "datasciencecluster",
+        config.platform["rhoai"]["datasciencecluster_name"],
+        "-n",
+        config.platform["rhoai"]["namespace"],
+        "-o",
+        "yaml",
+        capture_output=True,
+    )
+
+
+def wait_for_datasciencecluster_ready(config: phase_inputs.PrepareInputs) -> None:
+    rhoai = config.platform["rhoai"]
+
+    def _dsc_ready() -> bool:
+        payload = llmd_runtime.oc_get_json(
+            "datasciencecluster",
+            name=rhoai["datasciencecluster_name"],
+            namespace=rhoai["namespace"],
+        )
+        phase = payload.get("status", {}).get("phase")
+        if phase == "Ready":
+            return True
+        if phase in {"Failed", "Error"}:
+            raise RuntimeError(f"DataScienceCluster entered terminal phase {phase}")
+        return False
+
+    llmd_runtime.wait_until(
+        f"datasciencecluster/{rhoai['datasciencecluster_name']} ready",
+        timeout_seconds=rhoai["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_dsc_ready,
+    )
+
+
+def ensure_gateway(config: phase_inputs.PrepareInputs) -> None:
+    gateway = config.platform["gateway"]
+    if not llmd_runtime.resource_exists("gateway", gateway["name"], namespace=gateway["namespace"]):
+        if not gateway["create_if_missing"]:
+            raise RuntimeError(
+                f"Required gateway {gateway['name']} does not exist in {gateway['namespace']}"
+            )
+        manifest = llmd_runtime.render_gateway(config)
+        llmd_runtime.apply_manifest(config.artifact_dir / "src" / "gateway.yaml", manifest)
+
+    def _gateway_programmed() -> bool:
+        resource = llmd_runtime.oc_get_json(
+            "gateway",
+            name=gateway["name"],
+            namespace=gateway["namespace"],
+        )
+        return llmd_runtime.condition_status(resource, "Programmed") == "True"
+
+    llmd_runtime.wait_until(
+        f"gateway/{gateway['name']} programmed",
+        timeout_seconds=gateway["wait_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_gateway_programmed,
+    )
+
+
+def ensure_test_namespace(config: phase_inputs.PrepareInputs) -> None:
+    llmd_runtime.ensure_namespace(
+        config.namespace,
+        labels={
+            "app.kubernetes.io/managed-by": "forge",
+            "forge.openshift.io/project": "llm_d",
+        },
+    )
+
+
+def verify_gpu_nodes(config: phase_inputs.PrepareInputs) -> None:
+    selector = config.platform["cluster"]["gpu_node_label_selector"]
+    data = llmd_runtime.oc_get_json("nodes", selector=selector, ignore_not_found=True)
+    items = data.get("items", []) if data else []
+    if not items:
+        raise RuntimeError(
+            f"No GPU nodes found with selector {selector}. The llm_d smoke path requires GPUs."
+        )
+
+
+def wait_for_nfd_gpu_labels(config: phase_inputs.PrepareInputs, *, timeout_seconds: int) -> None:
+    selectors = config.platform["cluster"]["nfd_gpu_detection_labels"]
+
+    def _labels_present() -> bool:
+        for selector in selectors:
+            data = llmd_runtime.oc_get_json("nodes", selector=selector, ignore_not_found=True)
+            if data and data.get("items"):
+                return True
+        return False
+
+    llmd_runtime.wait_until(
+        "NFD GPU discovery labels on cluster nodes",
+        timeout_seconds=timeout_seconds,
+        interval_seconds=15,
+        predicate=_labels_present,
+    )
+
+
+def capture_prepare_state(config: phase_inputs.PrepareInputs) -> None:
+    artifacts_dir = config.artifact_dir / "artifacts"
+    rhoai = config.platform["rhoai"]
+    gateway = config.platform["gateway"]
+
+    capture_resource_yaml(
+        "datasciencecluster",
+        rhoai["datasciencecluster_name"],
+        rhoai["namespace"],
+        artifacts_dir / "datasciencecluster.yaml",
+    )
+    capture_resource_yaml(
+        "gateway",
+        gateway["name"],
+        gateway["namespace"],
+        artifacts_dir / "gateway.yaml",
+    )
+    gateway_service = llmd_runtime.oc(
+        "get",
+        "service",
+        "-A",
+        "-l",
+        f"gateway.networking.k8s.io/gateway-name={gateway['name']}",
+        "-o",
+        "yaml",
+        check=False,
+        capture_output=True,
+    )
+    if gateway_service.returncode == 0 and gateway_service.stdout:
+        llmd_runtime.write_text(artifacts_dir / "gateway.service.yaml", gateway_service.stdout)
+    if config.platform["artifacts"]["capture_namespace_events"]:
+        capture_namespace_events(config.namespace, artifacts_dir / "namespace.events.txt")
+
+
+def capture_resource_yaml(
+    kind: str,
+    name: str,
+    namespace: str,
+    destination: Path,
+    *,
+    check: bool = True,
+) -> None:
+    result = llmd_runtime.oc(
+        "get",
+        kind,
+        name,
+        "-n",
+        namespace,
+        "-o",
+        "yaml",
+        check=check,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(destination, result.stdout)
+
+
+def capture_namespace_events(namespace: str, destination: Path) -> None:
+    result = llmd_runtime.oc(
+        "get",
+        "events",
+        "-n",
+        namespace,
+        "--sort-by=.metadata.creationTimestamp",
+        check=False,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(destination, result.stdout)
+
+
+main = toolbox.create_toolbox_main(run)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/llm_d/toolbox/prepare_model_cache/main.py b/projects/llm_d/toolbox/prepare_model_cache/main.py
new file mode 100644
index 00000000..73cfc24e
--- /dev/null
+++ b/projects/llm_d/toolbox/prepare_model_cache/main.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import logging
+
+from projects.core.dsl import execute_tasks, task, toolbox
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+
+LOGGER = logging.getLogger(__name__)
+
+
+def run(*, inputs_file: str) -> int:
+    """Prepare the shared model cache PVC and populate it when needed.
+
+    Args:
+        inputs_file: Path to the prepare_model_cache phase input file generated by orchestration
+    """
+
+    llmd_runtime.init()
+    execute_tasks(locals())
+    return 0
+
+
+@task
+def load_inputs(args, ctx):
+    """Load the model cache phase inputs"""
+
+    ctx.inputs = phase_inputs.load_prepare_model_cache_inputs(args.inputs_file)
+    return f"Loaded model cache inputs for preset {ctx.inputs.preset_name}"
+
+
+@task
+def prepare_model_cache(args, ctx):
+    """Ensure the model cache PVC exists and is populated"""
+
+    config = ctx.inputs
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+    if not cache_spec:
+        LOGGER.info("Model cache disabled for preset=%s", config.preset_name)
+        return "Model cache disabled"
+
+    if config.namespace_is_managed:
+        LOGGER.warning(
+            "Model cache PVC %s lives in managed namespace %s. Namespace cleanup will remove it; cache reuse requires a stable namespace override.",
+            cache_spec.pvc_name,
+            cache_spec.namespace,
+        )
+
+    ensure_model_cache_pvc(config, cache_spec)
+    if llmd_runtime.model_cache_pvc_ready(cache_spec):
+        LOGGER.info(
+            "Model cache PVC %s already contains %s; skipping download",
+            cache_spec.pvc_name,
+            cache_spec.source_uri,
+        )
+        capture_model_cache_state(config, cache_spec)
+        return f"Model cache already populated in {cache_spec.pvc_name}"
+
+    run_model_cache_download_job(config, cache_spec)
+    llmd_runtime.annotate_model_cache_pvc(cache_spec)
+    capture_model_cache_state(config, cache_spec)
+    return f"Model cache step finished for namespace {config.namespace}"
+
+
+def run_prepare_model_cache(config: phase_inputs.PrepareModelCacheInputs) -> int:
+    cache_spec = llmd_runtime.resolve_model_cache(config)
+    if not cache_spec:
+        LOGGER.info("Model cache disabled for preset=%s", config.preset_name)
+        return 0
+
+    if config.namespace_is_managed:
+        LOGGER.warning(
+            "Model cache PVC %s lives in managed namespace %s. Namespace cleanup will remove it; cache reuse requires a stable namespace override.",
+            cache_spec.pvc_name,
+            cache_spec.namespace,
+        )
+
+    ensure_model_cache_pvc(config, cache_spec)
+    if llmd_runtime.model_cache_pvc_ready(cache_spec):
+        LOGGER.info(
+            "Model cache PVC %s already contains %s; skipping download",
+            cache_spec.pvc_name,
+            cache_spec.source_uri,
+        )
+        capture_model_cache_state(config, cache_spec)
+        return 0
+
+    run_model_cache_download_job(config, cache_spec)
+    llmd_runtime.annotate_model_cache_pvc(cache_spec)
+    capture_model_cache_state(config, cache_spec)
+    return 0
+
+
+def ensure_model_cache_pvc(
+    config: phase_inputs.PrepareModelCacheInputs, cache_spec: llmd_runtime.ModelCacheSpec
+) -> None:
+    existing = llmd_runtime.oc_get_json(
+        "persistentvolumeclaim",
+        name=cache_spec.pvc_name,
+        namespace=cache_spec.namespace,
+        ignore_not_found=True,
+    )
+    if existing:
+        actual_modes = existing.get("spec", {}).get("accessModes", [])
+        if not llmd_runtime.pvc_access_mode_matches(actual_modes, cache_spec.access_mode):
+            raise RuntimeError(
+                f"PVC {cache_spec.pvc_name} exists with access modes {actual_modes}, expected {cache_spec.access_mode}"
+            )
+
+        actual_storage_class = existing.get("spec", {}).get("storageClassName")
+        if cache_spec.storage_class_name and actual_storage_class != cache_spec.storage_class_name:
+            raise RuntimeError(
+                f"PVC {cache_spec.pvc_name} exists with storageClassName={actual_storage_class}, expected {cache_spec.storage_class_name}"
+            )
+
+        llmd_runtime.wait_for_pvc_bound(
+            cache_spec.pvc_name,
+            cache_spec.namespace,
+            timeout_seconds=config.model_cache["download"]["wait_timeout_seconds"],
+        )
+        return
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "model-cache-pvc.yaml",
+        llmd_runtime.render_model_cache_pvc(cache_spec),
+    )
+    llmd_runtime.wait_for_pvc_bound(
+        cache_spec.pvc_name,
+        cache_spec.namespace,
+        timeout_seconds=config.model_cache["download"]["wait_timeout_seconds"],
+    )
+
+
+def run_model_cache_download_job(
+    config: phase_inputs.PrepareModelCacheInputs, cache_spec: llmd_runtime.ModelCacheSpec
+) -> None:
+    llmd_runtime.oc(
+        "delete",
+        "job",
+        cache_spec.download_job_name,
+        "-n",
+        cache_spec.namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.wait_until(
+        f"job/{cache_spec.download_job_name} deletion in {cache_spec.namespace}",
+        timeout_seconds=120,
+        interval_seconds=5,
+        predicate=lambda: not llmd_runtime.resource_exists(
+            "job", cache_spec.download_job_name, namespace=cache_spec.namespace
+        ),
+    )
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "model-cache-job.yaml",
+        llmd_runtime.render_model_cache_job(config, cache_spec),
+    )
+
+    try:
+        llmd_runtime.wait_for_job_completion(
+            cache_spec.download_job_name,
+            cache_spec.namespace,
+            timeout_seconds=config.model_cache["download"]["wait_timeout_seconds"],
+            interval_seconds=config.model_cache["download"]["poll_interval_seconds"],
+        )
+    finally:
+        capture_model_cache_state(config, cache_spec)
+
+
+def capture_model_cache_state(
+    config: phase_inputs.PrepareModelCacheInputs, cache_spec: llmd_runtime.ModelCacheSpec
+) -> None:
+    artifact_dir = config.artifact_dir / "artifacts" / "model-cache"
+    llmd_runtime.write_json(
+        artifact_dir / "spec.json",
+        {
+            "pvc_name": cache_spec.pvc_name,
+            "model_uri": cache_spec.model_uri,
+            "source_uri": cache_spec.source_uri,
+            "source_scheme": cache_spec.source_scheme,
+        },
+    )
+
+    capture_resource_yaml(
+        "persistentvolumeclaim",
+        cache_spec.pvc_name,
+        cache_spec.namespace,
+        artifact_dir / "pvc.yaml",
+    )
+    capture_resource_yaml(
+        "job",
+        cache_spec.download_job_name,
+        cache_spec.namespace,
+        artifact_dir / "job.yaml",
+        check=False,
+    )
+
+    for pod_name in llmd_runtime.job_pod_names(cache_spec.download_job_name, cache_spec.namespace):
+        capture_resource_yaml(
+            "pod",
+            pod_name,
+            cache_spec.namespace,
+            artifact_dir / f"{pod_name}.yaml",
+            check=False,
+        )
+        log_result = llmd_runtime.oc(
+            "logs",
+            pod_name,
+            "-n",
+            cache_spec.namespace,
+            check=False,
+            capture_output=True,
+        )
+        if log_result.returncode == 0 and log_result.stdout:
+            llmd_runtime.write_text(artifact_dir / f"{pod_name}.log", log_result.stdout)
+
+
+def capture_resource_yaml(
+    kind: str,
+    name: str,
+    namespace: str,
+    destination,
+    *,
+    check: bool = True,
+) -> None:
+    result = llmd_runtime.oc(
+        "get",
+        kind,
+        name,
+        "-n",
+        namespace,
+        "-o",
+        "yaml",
+        check=check,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(destination, result.stdout)
+
+
+main = toolbox.create_toolbox_main(run)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/llm_d/toolbox/test/main.py b/projects/llm_d/toolbox/test/main.py
new file mode 100644
index 00000000..609c9e46
--- /dev/null
+++ b/projects/llm_d/toolbox/test/main.py
@@ -0,0 +1,910 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+
+from projects.core.dsl import always, execute_tasks, task, toolbox
+from projects.llm_d.runtime import llmd_runtime, phase_inputs
+
+LOGGER = logging.getLogger(__name__)
+
+
+def run(*, inputs_file: str) -> int:
+    """Deploy llm_d, run the smoke request, and optionally execute GuideLLM.
+
+    Args:
+        inputs_file: Path to the test phase input file generated by orchestration
+    """
+
+    llmd_runtime.init()
+    execute_tasks(locals())
+    return 0
+
+
+@task
+def load_inputs(args, ctx):
+    """Load the test phase inputs"""
+
+    ctx.config = phase_inputs.load_test_inputs(args.inputs_file)
+    return f"Loaded test inputs for preset {ctx.config.preset_name}"
+
+
+@task
+def deploy_inference_service_task(args, ctx):
+    """Deploy the LLMInferenceService and resolve its endpoint"""
+
+    config = ctx.config
+    name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    selector = f"app.kubernetes.io/name={name}"
+
+    llmd_runtime.oc(
+        "delete",
+        "llminferenceservice",
+        name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    def _old_pods_gone() -> bool:
+        pods = llmd_runtime.oc_get_json(
+            "pods", namespace=namespace, selector=selector, ignore_not_found=True
+        )
+        return not pods or not pods.get("items")
+
+    llmd_runtime.wait_until(
+        f"old llm-d pods to disappear in {namespace}",
+        timeout_seconds=config.platform["inference_service"]["delete_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_old_pods_gone,
+    )
+
+    manifest = llmd_runtime.render_inference_service(config)
+    llmd_runtime.apply_manifest(config.artifact_dir / "src" / "llminferenceservice.yaml", manifest)
+
+    def _pods_present() -> bool:
+        pods = llmd_runtime.oc_get_json(
+            "pods", namespace=namespace, selector=selector, ignore_not_found=True
+        )
+        return bool(pods and pods.get("items"))
+
+    llmd_runtime.wait_until(
+        f"llm-d pods to appear in {namespace}",
+        timeout_seconds=config.platform["inference_service"]["pod_appearance_timeout_seconds"],
+        interval_seconds=5,
+        predicate=_pods_present,
+    )
+
+    def _service_ready() -> bool:
+        payload = llmd_runtime.oc_get_json("llminferenceservice", name=name, namespace=namespace)
+        return llmd_runtime.condition_status(payload, "Ready") == "True"
+
+    llmd_runtime.wait_until(
+        f"llminferenceservice/{name} ready",
+        timeout_seconds=config.platform["inference_service"]["ready_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_service_ready,
+    )
+
+    ctx.endpoint_url = llmd_runtime.wait_until(
+        f"gateway address for llminferenceservice/{name}",
+        timeout_seconds=config.platform["inference_service"]["ready_timeout_seconds"],
+        interval_seconds=10,
+        predicate=lambda: try_resolve_endpoint_url(config),
+    )
+    return f"Endpoint resolved: {ctx.endpoint_url}"
+
+
+@task
+def run_smoke_request_task(args, ctx):
+    """Run the smoke request against the deployed service"""
+
+    config = ctx.config
+    namespace = config.namespace
+    job_name = config.platform["smoke"]["job_name"]
+    payload = {
+        "model": config.model["served_model_name"],
+        "prompt": config.smoke_request["prompt"],
+        "max_tokens": config.smoke_request["max_tokens"],
+        "temperature": config.smoke_request["temperature"],
+    }
+    llmd_runtime.write_json(config.artifact_dir / "artifacts" / "smoke.request.json", payload)
+
+    llmd_runtime.oc(
+        "delete",
+        "job",
+        job_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.wait_until(
+        f"job/{job_name} deletion in {namespace}",
+        timeout_seconds=120,
+        interval_seconds=5,
+        predicate=lambda: not llmd_runtime.resource_exists("job", job_name, namespace=namespace),
+    )
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "smoke-job.yaml",
+        llmd_runtime.render_smoke_request_job(config, ctx.endpoint_url, payload),
+    )
+
+    try:
+        llmd_runtime.wait_for_job_completion(
+            job_name,
+            namespace,
+            timeout_seconds=(
+                config.platform["smoke"]["request_retries"]
+                * (
+                    config.platform["smoke"]["request_timeout_seconds"]
+                    + config.platform["smoke"]["request_retry_delay_seconds"]
+                )
+            ),
+            interval_seconds=5,
+        )
+    finally:
+        capture_smoke_state(config)
+
+    result = llmd_runtime.oc(
+        "logs",
+        f"job/{job_name}",
+        "-n",
+        namespace,
+        check=False,
+        capture_output=True,
+    )
+
+    if result.returncode != 0 or not result.stdout:
+        raise RuntimeError(
+            f"Smoke request job {job_name} completed but response logs could not be read: {result.stderr}"
+        )
+
+    response = json.loads(result.stdout)
+    if not response.get("choices"):
+        raise RuntimeError(f"Invalid smoke response payload: {result.stdout}")
+
+    llmd_runtime.write_json(config.artifact_dir / "artifacts" / "smoke.response.json", response)
+    ctx.smoke_response = response
+    return "Smoke request completed"
+
+
+@task
+def run_guidellm_benchmark_task(args, ctx):
+    """Run the GuideLLM benchmark when enabled for the preset"""
+
+    if not ctx.config.benchmark:
+        return "GuideLLM benchmark disabled"
+
+    config = ctx.config
+    benchmark_name = config.benchmark["job_name"]
+    namespace = config.namespace
+
+    llmd_runtime.oc(
+        "delete",
+        "job,pvc",
+        benchmark_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "pod",
+        f"{benchmark_name}-copy",
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "guidellm-pvc.yaml",
+        llmd_runtime.render_guidellm_pvc(config),
+    )
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "guidellm-job.yaml",
+        llmd_runtime.render_guidellm_job(config, ctx.endpoint_url),
+    )
+
+    def _job_terminal() -> dict[str, object] | None:
+        payload = llmd_runtime.oc_get_json("job", name=benchmark_name, namespace=namespace)
+        status = payload.get("status", {})
+        if status.get("succeeded"):
+            return payload
+        if status.get("failed"):
+            raise RuntimeError(f"GuideLLM job {benchmark_name} failed")
+        return None
+
+    llmd_runtime.wait_until(
+        f"GuideLLM job/{benchmark_name}",
+        timeout_seconds=config.benchmark["timeout_seconds"],
+        interval_seconds=10,
+        predicate=_job_terminal,
+    )
+
+    capture_guidellm_state(config)
+    copy_guidellm_results(config)
+    return f"GuideLLM benchmark {ctx.config.benchmark['job_name']} completed"
+
+
+@always
+@task
+def capture_inference_service_state_task(args, ctx):
+    """Capture the LLMInferenceService state and related resources"""
+
+    config = getattr(ctx, "config", None)
+    if not config:
+        return "Test inputs unavailable; skipping state capture"
+
+    name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    artifacts_dir = config.artifact_dir / "artifacts"
+    selector = f"app.kubernetes.io/name={name}"
+
+    capture_get(
+        "llminferenceservice",
+        name,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.yaml",
+    )
+    capture_get(
+        "llminferenceservice",
+        name,
+        namespace,
+        "json",
+        artifacts_dir / "llminferenceservice.json",
+    )
+    capture_get(
+        "pods",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.pods.yaml",
+        selector=selector,
+    )
+    capture_get(
+        "deployments",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.deployments.yaml",
+        selector=selector,
+    )
+    capture_get(
+        "replicasets",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.replicasets.yaml",
+        selector=selector,
+    )
+    capture_get("pods", None, namespace, "wide", artifacts_dir / "namespace.pods.status")
+    capture_get("services", None, namespace, "wide", artifacts_dir / "namespace.services.status")
+
+    pod_list = llmd_runtime.oc_get_json(
+        "pods", namespace=namespace, selector=selector, ignore_not_found=True
+    )
+    if pod_list:
+        lines = []
+        previous_lines = []
+        for pod in pod_list.get("items", []):
+            pod_name = pod["metadata"]["name"]
+            lines.append(f"=== {pod_name} ===")
+            log_result = llmd_runtime.oc(
+                "logs",
+                pod_name,
+                "-n",
+                namespace,
+                "--all-containers=true",
+                check=False,
+                capture_output=True,
+            )
+            if log_result.stdout:
+                lines.append(log_result.stdout.rstrip())
+
+            previous_lines.append(f"=== {pod_name} ===")
+            previous_result = llmd_runtime.oc(
+                "logs",
+                pod_name,
+                "-n",
+                namespace,
+                "--previous",
+                "--all-containers=true",
+                check=False,
+                capture_output=True,
+            )
+            if previous_result.stdout:
+                previous_lines.append(previous_result.stdout.rstrip())
+
+        llmd_runtime.write_text(
+            artifacts_dir / "llminferenceservice.pods.logs", "\n".join(lines) + "\n"
+        )
+        llmd_runtime.write_text(
+            artifacts_dir / "llminferenceservice.pods.previous.logs",
+            "\n".join(previous_lines) + "\n",
+        )
+    return "Inference-service artifacts captured"
+
+
+@always
+@task
+def write_endpoint_url_task(args, ctx):
+    """Persist the resolved endpoint URL when available"""
+
+    config = getattr(ctx, "config", None)
+    if not config:
+        return "Test inputs unavailable; skipping endpoint capture"
+
+    endpoint_url = getattr(ctx, "endpoint_url", None)
+    if not endpoint_url:
+        return "Endpoint URL not available"
+
+    llmd_runtime.write_text(config.artifact_dir / "artifacts" / "endpoint.url", f"{endpoint_url}\n")
+    return "Endpoint URL captured"
+
+
+@always
+@task
+def cleanup_runtime_resources_task(args, ctx):
+    """Delete smoke and benchmark helper resources"""
+
+    config = getattr(ctx, "config", None)
+    if not config:
+        return "Test inputs unavailable; skipping cleanup"
+
+    benchmark_name = config.benchmark["job_name"] if config.benchmark else "guidellm-benchmark"
+    smoke_job_name = config.platform["smoke"]["job_name"]
+    namespace = config.namespace
+
+    llmd_runtime.oc(
+        "delete",
+        "job",
+        smoke_job_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "job,pvc",
+        benchmark_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "pod",
+        f"{benchmark_name}-copy",
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    return "Test helper resources deleted"
+
+
+@always
+@task
+def capture_namespace_events_task(args, ctx):
+    """Capture namespace events after the test run"""
+
+    config = getattr(ctx, "config", None)
+    if not config:
+        return "Test inputs unavailable; skipping namespace events capture"
+
+    events = llmd_runtime.oc(
+        "get",
+        "events",
+        "-n",
+        config.namespace,
+        "--sort-by=.metadata.creationTimestamp",
+        check=False,
+        capture_output=True,
+    )
+    if events.returncode == 0 and events.stdout:
+        llmd_runtime.write_text(
+            config.artifact_dir / "artifacts" / "namespace.events.txt", events.stdout
+        )
+    return "Namespace events captured"
+
+
+def cleanup_runtime_resources(config: phase_inputs.TestInputs) -> None:
+    benchmark_name = config.benchmark["job_name"] if config.benchmark else "guidellm-benchmark"
+    smoke_job_name = config.platform["smoke"]["job_name"]
+    namespace = config.namespace
+
+    llmd_runtime.oc(
+        "delete",
+        "job",
+        smoke_job_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "job,pvc",
+        benchmark_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "pod",
+        f"{benchmark_name}-copy",
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+
+def capture_namespace_events(config: phase_inputs.TestInputs) -> None:
+    events = llmd_runtime.oc(
+        "get",
+        "events",
+        "-n",
+        config.namespace,
+        "--sort-by=.metadata.creationTimestamp",
+        check=False,
+        capture_output=True,
+    )
+    if events.returncode == 0 and events.stdout:
+        llmd_runtime.write_text(
+            config.artifact_dir / "artifacts" / "namespace.events.txt", events.stdout
+        )
+
+
+def deploy_inference_service(config: phase_inputs.TestInputs) -> str:
+    name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    selector = f"app.kubernetes.io/name={name}"
+
+    llmd_runtime.oc(
+        "delete",
+        "llminferenceservice",
+        name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    def _old_pods_gone() -> bool:
+        pods = llmd_runtime.oc_get_json(
+            "pods", namespace=namespace, selector=selector, ignore_not_found=True
+        )
+        return not pods or not pods.get("items")
+
+    llmd_runtime.wait_until(
+        f"old llm-d pods to disappear in {namespace}",
+        timeout_seconds=config.platform["inference_service"]["delete_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_old_pods_gone,
+    )
+
+    manifest = llmd_runtime.render_inference_service(config)
+    llmd_runtime.apply_manifest(config.artifact_dir / "src" / "llminferenceservice.yaml", manifest)
+
+    def _pods_present() -> bool:
+        pods = llmd_runtime.oc_get_json(
+            "pods", namespace=namespace, selector=selector, ignore_not_found=True
+        )
+        return bool(pods and pods.get("items"))
+
+    llmd_runtime.wait_until(
+        f"llm-d pods to appear in {namespace}",
+        timeout_seconds=config.platform["inference_service"]["pod_appearance_timeout_seconds"],
+        interval_seconds=5,
+        predicate=_pods_present,
+    )
+
+    def _service_ready() -> bool:
+        payload = llmd_runtime.oc_get_json("llminferenceservice", name=name, namespace=namespace)
+        return llmd_runtime.condition_status(payload, "Ready") == "True"
+
+    llmd_runtime.wait_until(
+        f"llminferenceservice/{name} ready",
+        timeout_seconds=config.platform["inference_service"]["ready_timeout_seconds"],
+        interval_seconds=10,
+        predicate=_service_ready,
+    )
+
+    return llmd_runtime.wait_until(
+        f"gateway address for llminferenceservice/{name}",
+        timeout_seconds=config.platform["inference_service"]["ready_timeout_seconds"],
+        interval_seconds=10,
+        predicate=lambda: try_resolve_endpoint_url(config),
+    )
+
+
+def resolve_endpoint_url(config: phase_inputs.TestInputs) -> str:
+    endpoint_url = try_resolve_endpoint_url(config)
+    if endpoint_url:
+        return endpoint_url
+
+    name = config.platform["inference_service"]["name"]
+    gateway_name = config.platform["gateway"]["status_address_name"]
+    raise RuntimeError(
+        f"Gateway address {gateway_name} is missing from llminferenceservice/{name} status.addresses"
+    )
+
+
+def try_resolve_endpoint_url(config: phase_inputs.TestInputs) -> str | None:
+    name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    gateway_name = config.platform["gateway"]["status_address_name"]
+    payload = llmd_runtime.oc_get_json("llminferenceservice", name=name, namespace=namespace)
+
+    for address in payload.get("status", {}).get("addresses", []):
+        if address.get("name") == gateway_name and address.get("url"):
+            return address["url"]
+    return None
+
+
+def run_smoke_request(config: phase_inputs.TestInputs, endpoint_url: str) -> dict[str, object]:
+    namespace = config.namespace
+    job_name = config.platform["smoke"]["job_name"]
+
+    payload = {
+        "model": config.model["served_model_name"],
+        "prompt": config.smoke_request["prompt"],
+        "max_tokens": config.smoke_request["max_tokens"],
+        "temperature": config.smoke_request["temperature"],
+    }
+    llmd_runtime.write_json(config.artifact_dir / "artifacts" / "smoke.request.json", payload)
+
+    llmd_runtime.oc(
+        "delete",
+        "job",
+        job_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.wait_until(
+        f"job/{job_name} deletion in {namespace}",
+        timeout_seconds=120,
+        interval_seconds=5,
+        predicate=lambda: not llmd_runtime.resource_exists("job", job_name, namespace=namespace),
+    )
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "smoke-job.yaml",
+        llmd_runtime.render_smoke_request_job(config, endpoint_url, payload),
+    )
+
+    try:
+        llmd_runtime.wait_for_job_completion(
+            job_name,
+            namespace,
+            timeout_seconds=(
+                config.platform["smoke"]["request_retries"]
+                * (
+                    config.platform["smoke"]["request_timeout_seconds"]
+                    + config.platform["smoke"]["request_retry_delay_seconds"]
+                )
+            ),
+            interval_seconds=5,
+        )
+    finally:
+        capture_smoke_state(config)
+
+    result = llmd_runtime.oc(
+        "logs",
+        f"job/{job_name}",
+        "-n",
+        namespace,
+        check=False,
+        capture_output=True,
+    )
+
+    if result.returncode != 0 or not result.stdout:
+        raise RuntimeError(
+            f"Smoke request job {job_name} completed but response logs could not be read: {result.stderr}"
+        )
+
+    response = json.loads(result.stdout)
+    if not response.get("choices"):
+        raise RuntimeError(f"Invalid smoke response payload: {result.stdout}")
+    return response
+
+
+def capture_smoke_state(config: phase_inputs.TestInputs) -> None:
+    job_name = config.platform["smoke"]["job_name"]
+    namespace = config.namespace
+    artifacts_dir = config.artifact_dir / "artifacts"
+
+    capture_get("job", job_name, namespace, "yaml", artifacts_dir / "smoke_job.yaml")
+    capture_get(
+        "pods",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "smoke_job.pods.yaml",
+        selector=f"job-name={job_name}",
+    )
+    result = llmd_runtime.oc(
+        "logs",
+        f"job/{job_name}",
+        "-n",
+        namespace,
+        check=False,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(artifacts_dir / "smoke_job.logs", result.stdout)
+
+
+def run_guidellm_benchmark(config: phase_inputs.TestInputs, endpoint_url: str) -> None:
+    benchmark_name = config.benchmark["job_name"]
+    namespace = config.namespace
+
+    llmd_runtime.oc(
+        "delete",
+        "job,pvc",
+        benchmark_name,
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+    llmd_runtime.oc(
+        "delete",
+        "pod",
+        f"{benchmark_name}-copy",
+        "-n",
+        namespace,
+        "--ignore-not-found=true",
+        check=False,
+    )
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "guidellm-pvc.yaml",
+        llmd_runtime.render_guidellm_pvc(config),
+    )
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "guidellm-job.yaml",
+        llmd_runtime.render_guidellm_job(config, endpoint_url),
+    )
+
+    def _job_terminal() -> dict[str, object] | None:
+        payload = llmd_runtime.oc_get_json("job", name=benchmark_name, namespace=namespace)
+        status = payload.get("status", {})
+        if status.get("succeeded"):
+            return payload
+        if status.get("failed"):
+            raise RuntimeError(f"GuideLLM job {benchmark_name} failed")
+        return None
+
+    llmd_runtime.wait_until(
+        f"GuideLLM job/{benchmark_name}",
+        timeout_seconds=config.benchmark["timeout_seconds"],
+        interval_seconds=10,
+        predicate=_job_terminal,
+    )
+
+    capture_guidellm_state(config)
+    copy_guidellm_results(config)
+
+
+def copy_guidellm_results(config: phase_inputs.TestInputs) -> None:
+    benchmark_name = config.benchmark["job_name"]
+    namespace = config.namespace
+    pod_data = llmd_runtime.oc_get_json(
+        "pods",
+        namespace=namespace,
+        selector=f"job-name={benchmark_name}",
+        ignore_not_found=True,
+    )
+    node_name = None
+    if pod_data and pod_data.get("items"):
+        node_name = pod_data["items"][0].get("spec", {}).get("nodeName")
+
+    llmd_runtime.apply_manifest(
+        config.artifact_dir / "src" / "guidellm-copy-pod.yaml",
+        llmd_runtime.render_guidellm_copy_pod(config, node_name=node_name),
+    )
+
+    def _helper_ready() -> bool:
+        payload = llmd_runtime.oc_get_json(
+            "pod",
+            name=f"{benchmark_name}-copy",
+            namespace=namespace,
+        )
+        conditions = payload.get("status", {}).get("conditions", [])
+        return any(
+            condition.get("type") == "Ready" and condition.get("status") == "True"
+            for condition in conditions
+        )
+
+    llmd_runtime.wait_until(
+        f"GuideLLM copy helper pod/{benchmark_name}-copy",
+        timeout_seconds=120,
+        interval_seconds=5,
+        predicate=_helper_ready,
+    )
+
+    result = llmd_runtime.oc(
+        "exec",
+        "-n",
+        namespace,
+        f"{benchmark_name}-copy",
+        "--",
+        "cat",
+        "/results/benchmarks.json",
+        check=False,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(
+            config.artifact_dir / "artifacts" / "results" / "benchmarks.json",
+            result.stdout,
+        )
+
+
+def capture_inference_service_state(config: phase_inputs.TestInputs) -> None:
+    name = config.platform["inference_service"]["name"]
+    namespace = config.namespace
+    artifacts_dir = config.artifact_dir / "artifacts"
+    selector = f"app.kubernetes.io/name={name}"
+
+    capture_get(
+        "llminferenceservice",
+        name,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.yaml",
+    )
+    capture_get(
+        "llminferenceservice",
+        name,
+        namespace,
+        "json",
+        artifacts_dir / "llminferenceservice.json",
+    )
+    capture_get(
+        "pods",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.pods.yaml",
+        selector=selector,
+    )
+    capture_get(
+        "deployments",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.deployments.yaml",
+        selector=selector,
+    )
+    capture_get(
+        "replicasets",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "llminferenceservice.replicasets.yaml",
+        selector=selector,
+    )
+    capture_get("pods", None, namespace, "wide", artifacts_dir / "namespace.pods.status")
+    capture_get("services", None, namespace, "wide", artifacts_dir / "namespace.services.status")
+
+    pod_list = llmd_runtime.oc_get_json(
+        "pods", namespace=namespace, selector=selector, ignore_not_found=True
+    )
+    if pod_list:
+        lines = []
+        previous_lines = []
+        for pod in pod_list.get("items", []):
+            pod_name = pod["metadata"]["name"]
+            lines.append(f"=== {pod_name} ===")
+            log_result = llmd_runtime.oc(
+                "logs",
+                pod_name,
+                "-n",
+                namespace,
+                "--all-containers=true",
+                check=False,
+                capture_output=True,
+            )
+            if log_result.stdout:
+                lines.append(log_result.stdout.rstrip())
+
+            previous_lines.append(f"=== {pod_name} ===")
+            previous_result = llmd_runtime.oc(
+                "logs",
+                pod_name,
+                "-n",
+                namespace,
+                "--previous",
+                "--all-containers=true",
+                check=False,
+                capture_output=True,
+            )
+            if previous_result.stdout:
+                previous_lines.append(previous_result.stdout.rstrip())
+
+        llmd_runtime.write_text(
+            artifacts_dir / "llminferenceservice.pods.logs", "\n".join(lines) + "\n"
+        )
+        llmd_runtime.write_text(
+            artifacts_dir / "llminferenceservice.pods.previous.logs",
+            "\n".join(previous_lines) + "\n",
+        )
+
+
+def capture_guidellm_state(config: phase_inputs.TestInputs) -> None:
+    benchmark_name = config.benchmark["job_name"]
+    namespace = config.namespace
+    artifacts_dir = config.artifact_dir / "artifacts"
+
+    capture_get(
+        "job",
+        benchmark_name,
+        namespace,
+        "yaml",
+        artifacts_dir / "guidellm_benchmark_job.yaml",
+    )
+    capture_get(
+        "pods",
+        None,
+        namespace,
+        "yaml",
+        artifacts_dir / "guidellm_benchmark_job.pods.yaml",
+        selector=f"job-name={benchmark_name}",
+    )
+    result = llmd_runtime.oc(
+        "logs",
+        f"job/{benchmark_name}",
+        "-n",
+        namespace,
+        check=False,
+        capture_output=True,
+    )
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(artifacts_dir / "guidellm_benchmark_job.logs", result.stdout)
+
+
+def capture_get(
+    kind: str,
+    name: str | None,
+    namespace: str,
+    output: str,
+    destination: Path,
+    *,
+    selector: str | None = None,
+) -> None:
+    args = ["get", kind]
+    if name:
+        args.append(name)
+    args.extend(["-n", namespace])
+    if selector:
+        args.extend(["-l", selector])
+    args.extend(["-o", output])
+    result = llmd_runtime.oc(*args, check=False, capture_output=True)
+    if result.returncode == 0 and result.stdout:
+        llmd_runtime.write_text(destination, result.stdout)
+
+
+main = toolbox.create_toolbox_main(run)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index c6632bf9..139c1bc7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "plotly>=5.17.0",
     "dash>=2.14.0",
     "dash-bootstrap-components>=1.5.0",
+    "jinja2",
     "pyyaml>=6.0",
     "jsonschema>=4.19.0",
     "structlog>=23.1.0",
@@ -125,7 +126,7 @@ ignore = [
 [tool.pytest.ini_options]
 minversion = "7.0"
 addopts = "-ra -q --strict-markers --strict-config"
-testpaths = ["projects/core/tests"]
+testpaths = ["projects/core/tests", "projects/llm_d/tests"]
 python_files = ["test_*.py", "*_test.py"]
 python_classes = ["Test*"]
 python_functions = ["test_*"]