QuantaAlpha · hj1650782738 · Apr 8, 2026
diff --git a/src/epochx/cli.py b/src/epochx/cli.py
@@ -652,11 +652,17 @@ def bench_submit_run(
         task_results_list = []
         for task_id, result in task_results_data.items():
             external_id = task_id.split("/", 1)[1] if "/" in task_id else task_id
-            task_results_list.append({
+            item = {
                 "task_id": external_id,
                 "passed": result.get("passed", False),
                 "score": result.get("score", 0.0),
-            })
+            }
+            # Attach trajectory and output if available
+            if result.get("trajectory"):
+                item["trajectory"] = result["trajectory"]
+            if result.get("output"):
+                item["output"] = result["output"][:5000]
+            task_results_list.append(item)
 
         payload = {
             "benchmark_name": stats.benchmark,

diff --git a/src/epochx/core/prompt_generator.py b/src/epochx/core/prompt_generator.py
@@ -91,6 +91,19 @@ def generate_prompt(task: Task, ws_info: WorkspaceInfo) -> str:
         )
     sections.append("")
 
+    # Trajectory
+    sections.append("## Trajectory (optional)\n")
+    sections.append(
+        "Your SSH commands are logged automatically. "
+        "For richer trajectory data (reasoning, tool choices), "
+        "append JSONL to `/.epochx/trajectory.jsonl` inside the container:\n"
+    )
+    sections.append("```json")
+    sections.append('{"step":1,"type":"thought","content":"analyzing the issue..."}')
+    sections.append('{"step":2,"type":"tool_call","tool_name":"grep","tool_input":"grep -r pattern .","tool_output":"..."}')
+    sections.append("```")
+    sections.append("")
+
     # When Done
     sections.append("## When Done\n")
     sections.append(

diff --git a/src/epochx/core/runtime.py b/src/epochx/core/runtime.py
@@ -175,6 +175,30 @@ def setup(self, task_id: str, workspace_spec: WorkspaceSpec) -> WorkspaceInfo:
 echo "{pubkey}" >> /root/.ssh/authorized_keys
 chmod 600 /root/.ssh/authorized_keys
 /usr/sbin/sshd 2>/dev/null || true
+
+# ── Trajectory: auto-log ALL bash commands (interactive + non-interactive) ──
+# Uses DEBUG trap which fires for every command in every bash session,
+# including non-interactive 'ssh host "cmd"' invocations by agents.
+cat > /etc/bash.epochx_log << 'LOGEOF'
+_epochx_trap() {{
+    local cmd="$BASH_COMMAND"
+    case "$cmd" in _epochx_trap*|true|false|"") return;; esac
+    [ -d "/.epochx" ] && printf '{{"ts":"%s","cmd":"%s"}}\\n' \
+        "$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo unknown)" \
+        "$(echo "$cmd" | head -c 2000 | sed 's/\\\\/\\\\\\\\/g; s/"/\\\\"/g' | tr '\\n' ' ')" \
+        >> /.epochx/ssh_log.jsonl 2>/dev/null
+}}
+trap '_epochx_trap' DEBUG
+LOGEOF
+# Inject into ALL bash startup paths so non-interactive SSH also picks it up
+for f in /etc/bash.bashrc /root/.bashrc; do
+    grep -q 'epochx_log' "$f" 2>/dev/null || echo '. /etc/bash.epochx_log' >> "$f" 2>/dev/null
+done
+# Set BASH_ENV in sshd so non-interactive 'ssh host "cmd"' also sources it
+# This is the critical line — without it, non-interactive SSH won't log commands.
+echo 'SetEnv BASH_ENV=/etc/bash.epochx_log' >> /etc/ssh/sshd_config 2>/dev/null || true
+# Restart sshd to pick up the new config
+pkill sshd 2>/dev/null; /usr/sbin/sshd 2>/dev/null || true
 """
         container.exec_run(["bash", "-c", setup_script])
 

diff --git a/src/epochx/exporter.py b/src/epochx/exporter.py
@@ -96,13 +96,18 @@ def get_task_results(self, benchmark: str | None = None) -> list[dict]:
         results = self.state.get_results(benchmark=benchmark)
         out = []
         for task_id, r in sorted(results.items()):
-            out.append({
+            item = {
                 "task_id": task_id,
                 "benchmark": r.get("benchmark", ""),
                 "passed": r.get("passed", False),
                 "score": r.get("score", 0.0),
                 "details": r.get("details", {}),
-            })
+            }
+            if r.get("trajectory"):
+                item["trajectory"] = r["trajectory"]
+            if r.get("output"):
+                item["output"] = r["output"]
+            out.append(item)
         return out
 
 

diff --git a/src/epochx/runner.py b/src/epochx/runner.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import json
+import subprocess
 from dataclasses import asdict
 from datetime import datetime, timezone
 from pathlib import Path
@@ -154,22 +155,33 @@ def collect_task(self, task_id: str) -> dict:
         output = adapter.collect_output(env.workspace, task, env=env)
 
         # Write output to .epochx/output.txt
-        output_path = Path(env.workspace) / ".epochx" / "output.txt"
-        output_path.parent.mkdir(parents=True, exist_ok=True)
+        epochx_dir = Path(env.workspace) / ".epochx"
+        epochx_dir.mkdir(parents=True, exist_ok=True)
+        output_path = epochx_dir / "output.txt"
         output_path.write_text(output)
 
+        # Collect trajectory
+        trajectory = self._collect_trajectory(env)
+        if trajectory:
+            (epochx_dir / "trajectory_collected.json").write_text(
+                json.dumps(trajectory, ensure_ascii=False, indent=2)
+            )
+
         # Update status
         self.state.update_status(task_id, TaskStatus.COLLECTING.value)
 
         truncated = output[:500] + ("..." if len(output) > 500 else "")
-        return {
+        result = {
             "status": "collected",
             "task_id": task_id,
             "output_type": task.output_spec.type.value,
             "content": truncated,
             "saved_to": str(output_path),
             "next_command": f"epochx-bench grade {task_id}",
         }
+        if trajectory:
+            result["trajectory_steps"] = len(trajectory)
+        return result
 
     # ------------------------------------------------------------------
     # grade_task
@@ -202,6 +214,20 @@ def grade_task(self, task_id: str) -> dict:
         # Save result to task's own .epochx/result.json
         result_dict = asdict(result)
         result_dict["benchmark"] = env.benchmark
+
+        # Attach trajectory if collected
+        traj_path = Path(env.workspace) / ".epochx" / "trajectory_collected.json"
+        if traj_path.exists():
+            try:
+                result_dict["trajectory"] = json.loads(traj_path.read_text())
+            except Exception:
+                pass
+
+        # Attach output
+        output_path_for_result = Path(env.workspace) / ".epochx" / "output.txt"
+        if output_path_for_result.exists():
+            result_dict["output"] = output_path_for_result.read_text()
+
         result_path = Path(env.workspace) / ".epochx" / "result.json"
         result_path.write_text(json.dumps(result_dict, indent=2))
 
@@ -290,6 +316,93 @@ def get_next_task(self, benchmark_name: str) -> dict:
             "start_command": f"epochx-bench run {benchmark_name} --task {next_task.external_id}",
         }
 
+    def _collect_trajectory(self, env: EnvironmentState) -> list[dict]:
+        """Collect trajectory from multiple sources, merge into one list.
+
+        Sources (in priority order):
+        1. /.epochx/trajectory.jsonl  — agent-written rich trajectory
+        2. /.epochx/ssh_log.jsonl     — auto-recorded SSH commands
+        3. git log inside container    — fallback: extract commits as steps
+        """
+        epochx_dir = Path(env.workspace) / ".epochx"
+        trajectory: list[dict] = []
+
+        # Source 1: agent-written trajectory
+        agent_traj_path = epochx_dir / "trajectory.jsonl"
+        if agent_traj_path.exists():
+            for line in agent_traj_path.read_text().splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    trajectory.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+
+        # Source 2: SSH command log (from host-side runtime.exec logging)
+        ssh_log_path = epochx_dir / "ssh_log.jsonl"
+        if ssh_log_path.exists():
+            step = len(trajectory)
+            for line in ssh_log_path.read_text().splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    entry = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                step += 1
+                trajectory.append({
+                    "step": step,
+                    "type": "tool_call",
+                    "tool_name": "shell",
+                    "tool_input": entry.get("cmd", ""),
+                    "tool_output": entry.get("output", "")[:500],
+                    "duration_ms": entry.get("ms"),
+                    "content": f"[auto-logged] {entry.get('ts', '')}",
+                    "source": "ssh_log",
+                })
+
+        # Source 3: git log fallback
+        if not trajectory and env.ssh_host:
+            workdir = env.container_workdir or "/testbed"
+            try:
+                result = subprocess.run(
+                    ["ssh", env.ssh_host,
+                     f"cd {workdir} && git log --oneline --reverse --format='%H|%s|%ai' 2>/dev/null | tail -20"],
+                    capture_output=True, text=True, timeout=15,
+                )
+                step = 0
+                for line in result.stdout.strip().splitlines():
+                    parts = line.split("|", 2)
+                    if len(parts) < 2:
+                        continue
+                    step += 1
+                    trajectory.append({
+                        "step": step,
+                        "type": "action",
+                        "tool_name": "git_commit",
+                        "content": parts[1],
+                        "tool_input": parts[0][:12],
+                        "source": "git_log",
+                    })
+                result2 = subprocess.run(
+                    ["ssh", env.ssh_host,
+                     f"cd {workdir} && git diff --stat HEAD~1 HEAD 2>/dev/null || true"],
+                    capture_output=True, text=True, timeout=15,
+                )
+                if result2.stdout.strip() and trajectory:
+                    trajectory.append({
+                        "step": step + 1,
+                        "type": "observation",
+                        "content": f"Changes: {result2.stdout.strip()}",
+                        "source": "git_log",
+                    })
+            except Exception:
+                pass
+
+        return trajectory
+
     @staticmethod
     def _to_external_id(task_id: str, benchmark: str) -> str:
         """Strip benchmark prefix from full task ID to get external_id.