From a2e2e8c912bda7bf5971baf2b721149a5ef92a66 Mon Sep 17 00:00:00 2001
From: Christopher <christso@gmail.com>
Date: Sat, 28 Mar 2026 03:00:52 +0000
Subject: [PATCH] fix: extract CLI resolution into agentv_cli.py wrapper,
 remove AGENT_EVAL_MODE from .env
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AGENT_EVAL_MODE was a workflow preference stored alongside API keys in
.env — wrong place for non-secret config. Eval mode is now determined
by user instruction, defaulting to agent mode.

Also deduplicated _find_agentv() / _find_env_key() from three scripts
into a shared agentv_cli.py module that handles CLI resolution
deterministically.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .env.example                                  |  3 -
 apps/cli/src/templates/.env.example           |  3 -
 .../agentv-dev/skills/agentv-bench/SKILL.md   | 43 ++++--------
 .../skills/agentv-bench/scripts/agentv_cli.py | 66 +++++++++++++++++++
 .../skills/agentv-bench/scripts/bench.py      | 41 +-----------
 .../agentv-bench/scripts/run_code_graders.py  | 41 +-----------
 .../skills/agentv-bench/scripts/run_tests.py  | 39 +----------
 7 files changed, 80 insertions(+), 156 deletions(-)
 create mode 100644 plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py
diff --git a/.env.example b/.env.example
index ad782714b..91e448936 100644
--- a/.env.example
+++ b/.env.example
@@ -1,8 +1,5 @@
 # Copy this file to .env and fill in your credentials
 
-# Eval run mode (used by agentv-bench skill)
-AGENT_EVAL_MODE=agent # agent | cli
-
 # Azure OpenAI Configuration
 AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
 AZURE_OPENAI_API_KEY=your-openai-api-key-here
diff --git a/apps/cli/src/templates/.env.example b/apps/cli/src/templates/.env.example
index 3a6a1f566..1f8e22057 100644
--- a/apps/cli/src/templates/.env.example
+++ b/apps/cli/src/templates/.env.example
@@ -1,8 +1,5 @@
 # Copy this file to .env and fill in your credentials
 
-# Eval run mode (used by agentv-bench skill)
-AGENT_EVAL_MODE=agent # agent | cli
-
 # Azure OpenAI Configuration
 AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
 AZURE_OPENAI_API_KEY=your-openai-api-key-here
diff --git a/plugins/agentv-dev/skills/agentv-bench/SKILL.md b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
index 29cf83171..89b4cf659 100644
--- a/plugins/agentv-dev/skills/agentv-bench/SKILL.md
+++ b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
@@ -172,43 +172,22 @@ Put results in a workspace directory organized by iteration (`iteration-1/`, `it
 
 ### Choosing a run mode
 
-**User instruction takes priority.** If the user says "run in agent mode", "use agent mode", or "use CLI mode", use that mode directly — do not check `.env`.
+Default run mode is `agent` unless the user specifies otherwise.
 
-Only read `.env` when the user has not specified a mode:
+| Mode | How |
+|------|-----|
+| **`agent`** (default) | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. See "Agent mode: Running eval.yaml without CLI" below. |
+| **`cli`** | `agentv eval <path>` — end-to-end, multi-provider. Works with all providers. Use when you need multi-provider benchmarking or CLI-specific features. |
 
-```bash
-grep AGENTV_CLI .env 2>/dev/null || echo "AGENTV_CLI=(not set, using global agentv)"
-grep AGENT_EVAL_MODE .env 2>/dev/null || echo "AGENT_EVAL_MODE=agent"
-```
-
-**`AGENTV_CLI` override:** If `AGENTV_CLI` is set in `.env`, use that value as the command prefix in place of `agentv` for every pipeline command. This lets you run from a local source checkout instead of the globally installed binary.
-
-```bash
-# Example .env:
-# AGENTV_CLI=bun D:\GitHub\christso\agentv\apps\cli\src\cli.ts
-
-# With AGENTV_CLI set, replace 'agentv' with its value:
-# PowerShell:
-$cli = (Get-Content .env | Select-String "^AGENTV_CLI=" | ForEach-Object { $_ -replace "^AGENTV_CLI=","" })
-if (-not $cli) { $cli = "agentv" }
-# Then: Invoke-Expression "$cli pipeline run ..."
-
-# Bash/zsh:
-cli=$(grep '^AGENTV_CLI=' .env 2>/dev/null | sed 's/^AGENTV_CLI=//' || echo "agentv")
-```
-
-The Python wrapper scripts (`scripts/run_tests.py`, etc.) pick up `AGENTV_CLI` automatically from `.env` — no extra steps needed when calling them.
-
-| `AGENT_EVAL_MODE` | Mode | How |
-|-------------------|------|-----|
-| `agent` (default) | **Agent mode** | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. |
-| `cli` | **AgentV CLI** | `agentv eval <path>` — end-to-end, multi-provider |
+### CLI resolution
 
-Set `AGENT_EVAL_MODE` in `.env` at the project root as the default when no mode is specified. If absent, default to `agent`. **User instruction always overrides this.**
+The Python wrapper `scripts/agentv_cli.py` resolves the `agentv` command deterministically:
 
-**`agent`** — Parses eval.yaml directly, spawns executor subagents to run each test case in the current workspace, then spawns grader subagents to evaluate all assertion types natively. No CLI or external API calls required. See "Agent mode: Running eval.yaml without CLI" below.
+1. `AGENTV_CLI` environment variable (supports multi-word, e.g. `bun /path/to/cli.ts`)
+2. `AGENTV_CLI` in nearest `.env` file (searching upward from cwd)
+3. `agentv` on PATH
 
-**`cli`** — AgentV CLI handles execution, grading, and artifact generation end-to-end. Works with all providers. Use when you need multi-provider benchmarking or CLI-specific features.
+All pipeline scripts (`run_tests.py`, `run_code_graders.py`, `bench.py`) import from `agentv_cli.py` — no manual CLI resolution needed.
 
 ### Running evaluations
 
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py b/plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py
new file mode 100644
index 000000000..3e8fe40af
--- /dev/null
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/agentv_cli.py
@@ -0,0 +1,66 @@
+"""Resolve and invoke the agentv CLI.
+
+Centralises CLI resolution so individual scripts don't duplicate
+the lookup logic. Also usable as a standalone wrapper:
+
+    uv run agentv_cli.py eval evals/my.eval.yaml --artifacts out/
+
+Resolution order:
+1. AGENTV_CLI environment variable
+2. AGENTV_CLI in nearest .env file (searching upward from cwd)
+3. `agentv` on PATH
+"""
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def _find_env_key(key: str) -> str | None:
+    """Search up from cwd for .env and return a specific key value."""
+    current = Path(os.getcwd())
+    while True:
+        env_file = current / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith(f"{key}="):
+                    return line[len(key) + 1 :]
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def find_agentv() -> list[str]:
+    """Resolve the agentv CLI command.
+
+    Checks AGENTV_CLI env var first (supports multi-word commands like
+    'bun /path/to/cli.ts' for running from source). If not in environment,
+    also searches the nearest .env file. Falls back to PATH lookup.
+    """
+    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
+    if cli:
+        parts = cli.split()
+        if parts:
+            return parts
+    path = shutil.which("agentv")
+    if not path:
+        print(
+            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return [path]
+
+
+def main() -> None:
+    """Pass-through wrapper: resolve agentv and forward all arguments."""
+    cmd = find_agentv() + sys.argv[1:]
+    sys.exit(subprocess.call(cmd))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
index 51c3d581a..d7616bb13 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
@@ -29,50 +29,11 @@
     <export-dir>/<test-id>/grading.json <- merged grading per test
 """
 import argparse
-import os
-import shutil
 import subprocess
 import sys
-from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def main():
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
index 5bc8e3972..45280494c 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
@@ -19,50 +19,11 @@
     <export-dir>/<test-id>/code_grader_results/<name>.json
 """
 import argparse
-import os
-import shutil
 import subprocess
 import sys
-from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def main():
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
index 62313812c..871cd86b1 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
@@ -26,7 +26,6 @@
 import argparse
 import json
 import os
-import shutil
 import subprocess
 import sys
 import tempfile
@@ -36,43 +35,7 @@
 from pathlib import Path
 
 
-def _find_env_key(key: str) -> str | None:
-    """Search up from cwd for .env and return a specific key value."""
-    current = Path(os.getcwd())
-    while True:
-        env_file = current / ".env"
-        if env_file.exists():
-            for line in env_file.read_text().splitlines():
-                line = line.strip()
-                if line.startswith(f"{key}="):
-                    return line[len(key) + 1:]
-        parent = current.parent
-        if parent == current:
-            break
-        current = parent
-    return None
-
-
-def _find_agentv() -> list[str]:
-    """Resolve the agentv CLI command.
-
-    Checks AGENTV_CLI env var first (supports multi-word commands like
-    'bun /path/to/cli.ts' for running from source). If not in environment,
-    also searches the nearest .env file. Falls back to PATH lookup.
-    """
-    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
-    if cli:
-        parts = cli.split()
-        if parts:
-            return parts
-    path = shutil.which("agentv")
-    if not path:
-        print(
-            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    return [path]
+from agentv_cli import find_agentv as _find_agentv
 
 
 def _load_env(env_file: Path) -> dict: