Skip to content

Commit 3319e47

Browse files
committed
add interrupt termination reason
1 parent e26d4b7 commit 3319e47

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

eval_protocol/mcp/execution/manager.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,11 @@ async def _execute_with_semaphore(idx):
178178
TerminationReason.USER_STOP,
179179
}:
180180
evaluation_rows[idx].rollout_status.status = "finished"
181-
elif trajectory.termination_reason == TerminationReason.MAX_STEPS:
181+
elif trajectory.termination_reason in {TerminationReason.MAX_STEPS, TerminationReason.INTERRUPTED}:
182182
evaluation_rows[idx].rollout_status.status = "stopped"
183+
evaluation_rows[idx].rollout_status.error_message = trajectory.control_plane_summary.get(
184+
"termination_reason", trajectory.termination_reason
185+
)
183186
else:
184187
evaluation_rows[idx].rollout_status.status = "error"
185188
evaluation_rows[idx].rollout_status.error_message = trajectory.control_plane_summary.get(
@@ -315,8 +318,7 @@ async def _execute_rollout(
315318
# If there's no user simulator, no tool call means policy failed and we should terminate the rollout
316319
elif tool_calls[0].tool_name in ["_playback_terminate", "_no_tool_call"]:
317320
trajectory.terminated = True
318-
trajectory.termination_reason = TerminationReason.ERROR
319-
trajectory.control_plane_summary.update({"error_message": "No expected tool call"})
321+
trajectory.termination_reason = TerminationReason.INTERRUPTED
320322
break
321323

322324
# Execute each tool call sequentially

eval_protocol/types/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@ class TerminationReason(str, Enum):
1111
MAX_STEPS: Trajectory ends because we hit the step limit
1212
CONTROL_PLANE_SIGNAL: Trajectory ends because the control plane signals termination (e.g. env goal reached or failure condition)
1313
USER_STOP: Trajectory ends because the simulated user signals to stop
14+
INTERRUPTED: Trajectory ends unexpectedly, for example, expecting tool call but there is no tool call
15+
ERROR: Trajectory ends because of an error
1416
"""
1517

1618
MAX_STEPS = "max_steps"
1719
CONTROL_PLANE_SIGNAL = "control_plane_signal"
1820
USER_STOP = "user_stop"
21+
INTERRUPTED = "interrupted"
1922
ERROR = "error"
2023

2124

0 commit comments

Comments
 (0)