EntityProcess · christso · Mar 28, 2026 · Mar 28, 2026
diff --git a/.env.example b/.env.example
@@ -1,8 +1,5 @@
 # Copy this file to .env and fill in your credentials
 
-# Eval run mode (used by agentv-bench skill)
-AGENT_EVAL_MODE=agent # agent | cli
-
 # Azure OpenAI Configuration
 AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
 AZURE_OPENAI_API_KEY=your-openai-api-key-here

diff --git a/apps/cli/src/templates/.env.example b/apps/cli/src/templates/.env.example
@@ -1,8 +1,5 @@
 # Copy this file to .env and fill in your credentials
 
-# Eval run mode (used by agentv-bench skill)
-AGENT_EVAL_MODE=agent # agent | cli
-
 # Azure OpenAI Configuration
 AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
 AZURE_OPENAI_API_KEY=your-openai-api-key-here

diff --git a/plugins/agentv-dev/skills/agentv-bench/SKILL.md b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
@@ -172,43 +172,22 @@ Put results in a workspace directory organized by iteration (`iteration-1/`, `it
 
 ### Choosing a run mode
 
-**User instruction takes priority.** If the user says "run in agent mode", "use agent mode", or "use CLI mode", use that mode directly — do not check `.env`.
+Default run mode is `agent` unless the user specifies otherwise.
 
-Only read `.env` when the user has not specified a mode:
+| Mode | How |
+|------|-----|
+| **`agent`** (default) | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. See "Agent mode: Running eval.yaml without CLI" below. |
+| **`cli`** | `agentv eval <path>` — end-to-end, multi-provider. Works with all providers. Use when you need multi-provider benchmarking or CLI-specific features. |
 
-```bash
-grep AGENTV_CLI .env 2>/dev/null || echo "AGENTV_CLI=(not set, using global agentv)"
-grep AGENT_EVAL_MODE .env 2>/dev/null || echo "AGENT_EVAL_MODE=agent"
-```
-
-**`AGENTV_CLI` override:** If `AGENTV_CLI` is set in `.env`, use that value as the command prefix in place of `agentv` for every pipeline command. This lets you run from a local source checkout instead of the globally installed binary.
-
-```bash
-# Example .env:
-# AGENTV_CLI=bun D:\GitHub\christso\agentv\apps\cli\src\cli.ts
-
-# With AGENTV_CLI set, replace 'agentv' with its value:
-# PowerShell:
-$cli = (Get-Content .env | Select-String "^AGENTV_CLI=" | ForEach-Object { $_ -replace "^AGENTV_CLI=","" })
-if (-not $cli) { $cli = "agentv" }
-# Then: Invoke-Expression "$cli pipeline run ..."
-
-# Bash/zsh:
-cli=$(grep '^AGENTV_CLI=' .env 2>/dev/null | sed 's/^AGENTV_CLI=//' || echo "agentv")
-```
-
-The Python wrapper scripts (`scripts/run_tests.py`, etc.) pick up `AGENTV_CLI` automatically from `.env` — no extra steps needed when calling them.
-
-| `AGENT_EVAL_MODE` | Mode | How |
-|-------------------|------|-----|
-| `agent` (default) | **Agent mode** | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. |
-| `cli` | **AgentV CLI** | `agentv eval <path>` — end-to-end, multi-provider |
+### CLI resolution
 
-Set `AGENT_EVAL_MODE` in `.env` at the project root as the default when no mode is specified. If absent, default to `agent`. **User instruction always overrides this.**
+The Python wrapper `scripts/agentv_cli.py` resolves the `agentv` command deterministically:
 
-**`agent`** — Parses eval.yaml directly, spawns executor subagents to run each test case in the current workspace, then spawns grader subagents to evaluate all assertion types natively. No CLI or external API calls required. See "Agent mode: Running eval.yaml without CLI" below.
+1. `AGENTV_CLI` environment variable (supports multi-word, e.g. `bun /path/to/cli.ts`)
+2. `AGENTV_CLI` in nearest `.env` file (searching upward from cwd)
+3. `agentv` on PATH
 
-**`cli`** — AgentV CLI handles execution, grading, and artifact generation end-to-end. Works with all providers. Use when you need multi-provider benchmarking or CLI-specific features.
+All pipeline scripts (`run_tests.py`, `run_code_graders.py`, `bench.py`) import from `agentv_cli.py` — no manual CLI resolution needed.
 
 ### Running evaluations
 

diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py b/plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py
@@ -0,0 +1,66 @@
+"""Resolve and invoke the agentv CLI.
+
+Centralises CLI resolution so individual scripts don't duplicate
+the lookup logic. Also usable as a standalone wrapper:
+
+    uv run agentv_cli.py eval evals/my.eval.yaml --artifacts out/
+
+Resolution order:
+1. AGENTV_CLI environment variable
+2. AGENTV_CLI in nearest .env file (searching upward from cwd)
+3. `agentv` on PATH
+"""
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def _find_env_key(key: str) -> str | None:
+    """Search up from cwd for .env and return a specific key value."""
+    current = Path(os.getcwd())
+    while True:
+        env_file = current / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith(f"{key}="):
+                    return line[len(key) + 1 :]
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def find_agentv() -> list[str]:
+    """Resolve the agentv CLI command.
+
+    Checks AGENTV_CLI env var first (supports multi-word commands like
+    'bun /path/to/cli.ts' for running from source). If not in environment,
+    also searches the nearest .env file. Falls back to PATH lookup.
+    """
+    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
+    if cli:
+        parts = cli.split()
+        if parts:
+            return parts
+    path = shutil.which("agentv")
+    if not path:
+        print(
+            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return [path]
+
+
+def main() -> None:
+    """Pass-through wrapper: resolve agentv and forward all arguments."""
+    cmd = find_agentv() + sys.argv[1:]
+    sys.exit(subprocess.call(cmd))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
@@ -29,50 +29,11 @@
     <export-dir>/<test-id>/grading.json <- merged grading per test
 """
 import argparse
-import os
-import shutil
 import subprocess
 import sys
-from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def main():

diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
@@ -19,50 +19,11 @@
     <export-dir>/<test-id>/code_grader_results/<name>.json
 """
 import argparse
-import os
-import shutil
 import subprocess
 import sys
-from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def main():

diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
@@ -26,7 +26,6 @@
 import argparse
 import json
 import os
-import shutil
 import subprocess
 import sys
 import tempfile
@@ -36,43 +35,7 @@
 from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def _load_env(env_file: Path) -> dict: