EntityProcess · christso · Apr 9, 2026 · Apr 9, 2026
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -107,13 +107,13 @@ result-oriented workflows. For full-fidelity span inspection, export OTLP JSON e
 
 ```bash
 # Summary-level inspection from the run manifest
-agentv trace stats .agentv/results/runs/<timestamp>/index.jsonl
+agentv inspect stats .agentv/results/runs/<timestamp>/index.jsonl
 
 # Full-fidelity OTLP JSON trace (importable by OTel backends like Jaeger, Grafana)
 agentv eval evals/my-eval.yaml --otel-file traces/eval.otlp.json
 
-# Inspect the OTLP trace export
-agentv trace show traces/eval.otlp.json --tree
+# Inspect the OTLP export
+agentv inspect show traces/eval.otlp.json --tree
 ```
 
 `index.jsonl` contains aggregate metrics such as score, latency, cost, token usage, and summary

diff --git a/apps/web/src/content/docs/docs/guides/human-review.mdx b/apps/web/src/content/docs/docs/guides/human-review.mdx
@@ -38,7 +38,7 @@ For workspace evaluations (EVAL.yaml), use the trace viewer:
 
 ```bash
 # View traces from a specific run
-agentv trace show results/2026-03-14T10-32-00_claude/index.jsonl
+agentv inspect show results/2026-03-14T10-32-00_claude/index.jsonl
 
 # View the HTML report (if generated via #562)
 open results/2026-03-14T10-32-00_claude/report.html

diff --git a/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx b/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx
@@ -156,7 +156,7 @@ Look for:
 Use trace inspection to understand why specific cases failed:
 
 ```bash
-agentv trace show <trace-id>
+agentv inspect show <trace-id>
 ```
 
 When reviewing failures, categorize them:

diff --git a/...web/src/content/docs/docs/tools/trace.mdx → ...b/src/content/docs/docs/tools/inspect.mdx b/...web/src/content/docs/docs/tools/trace.mdx → ...b/src/content/docs/docs/tools/inspect.mdx
@@ -1,11 +1,11 @@
 ---
-title: Trace
-description: Inspect and analyze evaluation traces from the CLI
+title: Inspect
+description: Inspect and analyze evaluation results from the CLI
 sidebar:
   order: 5
 ---
 
-The `trace` command provides headless trace inspection and analysis — no server or dashboard needed.
+The `inspect` command provides headless trace inspection and analysis — no server or dashboard needed.
 
 Supported sources:
 
@@ -17,22 +17,22 @@ For full tool-call inspection, prefer OTLP JSON exports over eval manifests.
 
 ## Subcommands
 
-### `trace list`
+### `inspect list`
 
 Enumerate canonical evaluation run workspaces from `.agentv/results/runs/`.
 
 ```bash
-agentv trace list [--limit N] [--format json|table]
+agentv inspect list [--limit N] [--format json|table]
 ```
 
 Shows filename, test count, pass rate, average score, file size, and timestamp for each run workspace.
 
-### `trace show`
+### `inspect show`
 
 Display evaluation results with trace details.
 
 ```bash
-agentv trace show <trace-source> [--test-id <id>] [--tree] [--format json|table]
+agentv inspect show <trace-source> [--test-id <id>] [--tree] [--format json|table]
 ```
 
 | Option | Description |
@@ -58,12 +58,12 @@ Scores: response_quality 75% | routing_accuracy 100%
 
 Falls back to a flat summary when output messages are not present in the run workspace.
 
-### `trace stats`
+### `inspect stats`
 
 Compute summary statistics (percentiles) across evaluation results.
 
 ```bash
-agentv trace stats <trace-source> [--group-by target|suite|test-id] [--format json|table]
+agentv inspect stats <trace-source> [--group-by target|suite|test-id] [--format json|table]
 ```
 
 | Option | Description |
@@ -90,11 +90,11 @@ All commands support `--format json` for piping to `jq`:
 
 ```bash
 # Find tests costing more than $0.10
-agentv trace show trace.otlp.json --format json \
+agentv inspect show trace.otlp.json --format json \
   | jq '[.[] | select(.cost_usd > 0.10) | {test_id, score, cost: .cost_usd}]'
 
 # Compare providers
-agentv trace stats .agentv/results/runs/<timestamp>/index.jsonl --group-by target --format json \
+agentv inspect stats .agentv/results/runs/<timestamp>/index.jsonl --group-by target --format json \
   | jq '.groups[] | {label, score_mean: .metrics.score.mean}'
 ```
 

diff --git a/plugins/agentv-dev/skills/agentv-bench/SKILL.md b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
@@ -263,7 +263,7 @@ Read the JSONL results and look for:
 
 Use CLI tools for deeper investigation:
 ```bash
-agentv trace <results-file>          # Detailed execution trace inspection
+agentv inspect <results-file>          # Detailed execution trace inspection
 agentv compare <file-a> <file-b>     # Structured diff between runs
 ```
 

diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
@@ -681,7 +681,7 @@ After running evals, perform a human review before iterating. Create `feedback.j
 
 Use `evaluator_overrides` for workspace evaluations to annotate specific grader results (e.g., "code-grader was too strict"). Use `workspace_notes` for observations about workspace state.
 
-Review workflow: run evals → inspect results (`agentv trace show`) → write feedback → tune prompts/graders → re-run.
+Review workflow: run evals → inspect results (`agentv inspect show`) → write feedback → tune prompts/graders → re-run.
 
 Full guide: https://agentv.dev/guides/human-review/
 

diff --git a/plugins/agentv-dev/skills/agentv-trace-analyst/SKILL.md b/plugins/agentv-dev/skills/agentv-trace-analyst/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: agentv-trace-analyst
 description: >-
-  Analyze AgentV evaluation traces and result JSONL files using `agentv trace` and `agentv compare` CLI commands.
+  Analyze AgentV evaluation traces and result JSONL files using `agentv inspect` and `agentv compare` CLI commands.
   Use when asked to inspect AgentV eval results, find regressions between AgentV evaluation runs,
   identify failure patterns in AgentV trace data, analyze tool trajectories, or compute cost/latency/score statistics
   from AgentV result files.
@@ -11,19 +11,19 @@ description: >-
 
 # AgentV Trace Analyst
 
-Analyze evaluation traces headlessly using `agentv trace` primitives and `jq`.
+Analyze evaluation traces headlessly using `agentv inspect` primitives and `jq`.
 
 ## Primitives
 
 ```bash
 # List result files (most recent first)
-agentv trace list [--limit N] [--format json|table]
+agentv inspect list [--limit N] [--format json|table]
 
 # Show results with trace details
-agentv trace show <result-file> [--test-id <id>] [--tree] [--format json|table]
+agentv inspect show <result-file> [--test-id <id>] [--tree] [--format json|table]
 
 # Percentile statistics
-agentv trace stats <result-file> [--group-by target|suite|test-id] [--format json|table]
+agentv inspect stats <result-file> [--group-by target|suite|test-id] [--format json|table]
 
 # A/B comparison between runs
 agentv compare <baseline.jsonl> <candidate.jsonl> [--threshold 0.1] [--format json|table]
@@ -34,15 +34,15 @@ agentv compare <baseline.jsonl> <candidate.jsonl> [--threshold 0.1] [--format js
 ### 1. Discover results
 
 ```bash
-agentv trace list
+agentv inspect list
 ```
 
 Pick the result file to analyze. Most recent is first.
 
 ### 2. Get overview
 
 ```bash
-agentv trace stats <result-file>
+agentv inspect stats <result-file>
 ```
 
 Read the percentile table. Key signals:
@@ -54,7 +54,7 @@ Read the percentile table. Key signals:
 ### 3. Investigate failures
 
 ```bash
-agentv trace show <result-file> --format json | jq '[.[] | select(.score < 0.8) | {test_id, score, assertions: [.assertions[] | select(.passed | not)], trace: {tools: (.trace.tool_calls | keys)}, duration_ms, cost_usd}]'
+agentv inspect show <result-file> --format json | jq '[.[] | select(.score < 0.8) | {test_id, score, assertions: [.assertions[] | select(.passed | not)], trace: {tools: (.trace.tool_calls | keys)}, duration_ms, cost_usd}]'
 ```
 
 For each failing test, examine:
@@ -67,10 +67,10 @@ For each failing test, examine:
 
 ```bash
 # Flat view with trace summary
-agentv trace show <result-file> --test-id <id>
+agentv inspect show <result-file> --test-id <id>
 
 # Tree view (if output messages available)
-agentv trace show <result-file> --test-id <id> --tree
+agentv inspect show <result-file> --test-id <id> --tree
 ```
 
 The tree view shows the agent's execution path — LLM calls interspersed with tool invocations. Look for:
@@ -93,10 +93,10 @@ Look for:
 
 ```bash
 # By target provider
-agentv trace stats <result-file> --group-by target
+agentv inspect stats <result-file> --group-by target
 
 # By suite
-agentv trace stats <result-file> --group-by suite
+agentv inspect stats <result-file> --group-by suite
 ```
 
 Compare providers side-by-side: which is cheaper, faster, more accurate?
@@ -107,19 +107,19 @@ All commands support `--format json` for piping to `jq`:
 
 ```bash
 # Top 3 most expensive tests
-agentv trace show <result-file> --format json \
+agentv inspect show <result-file> --format json \
   | jq 'sort_by(-.cost_usd) | .[0:3] | .[] | {test_id, cost: .cost_usd, score}'
 
 # Tests where token usage exceeds 10k
-agentv trace show <result-file> --format json \
+agentv inspect show <result-file> --format json \
   | jq '[.[] | select(.token_usage.input + .token_usage.output > 10000) | {test_id, tokens: (.token_usage.input + .token_usage.output)}]'
 
 # Score distribution by suite
-agentv trace show <result-file> --format json \
+agentv inspect show <result-file> --format json \
   | jq 'group_by(.suite) | .[] | {suite: .[0].suite, count: length, avg_score: ([.[].score] | add / length)}'
 
 # Tool usage frequency across all tests
-agentv trace show <result-file> --format json \
+agentv inspect show <result-file> --format json \
   | jq '[.[].trace.tool_calls // {} | to_entries[]] | group_by(.key) | .[] | {tool: .[0].key, total_calls: ([.[].value] | add)}'
 
 # Find regressions > 0.1 between two runs