acailic · acailic · Jun 5, 2026 · Jun 5, 2026 · acailic · Jun 5, 2026
diff --git a/agent_debugger_sdk/core/context/trace_context.py b/agent_debugger_sdk/core/context/trace_context.py
@@ -188,6 +188,8 @@ async def restore(
         ctx._restored_state = restored_state
         ctx.replayed_events: list[dict[str, Any]] = []
         ctx._drift_detector = None
+        ctx._drift_events: list[Any] = []
+        ctx._drift_compare_index = 0
         ctx._hook_errors: list[Exception] = []
         ctx._restored_target: Any = None
 

diff --git a/agent_debugger_sdk/core/recorders.py b/agent_debugger_sdk/core/recorders.py
@@ -98,8 +98,9 @@ async def record_decision(
         self,
         reasoning: str,
         confidence: float,
-        evidence: list[dict[str, Any]],
         chosen_action: str,
+        *,
+        evidence: list[dict[str, Any]] | None = None,
         evidence_event_ids: list[str] | None = None,
         upstream_event_ids: list[str] | None = None,
         alternatives: list[dict[str, Any]] | None = None,
@@ -114,14 +115,39 @@ async def record_decision(
             name=name,
             reasoning=reasoning,
             confidence=max(0.0, min(1.0, confidence)),
-            evidence=evidence,
+            evidence=evidence or [],
             evidence_event_ids=evidence_event_ids or [],
             alternatives=alternatives or [],
             chosen_action=chosen_action,
             importance=0.7,
             upstream_event_ids=upstream_event_ids or [],
         )
         await self._emit_event(event)
+
+        # Detect drift against the original execution if a detector is active
+        drift_detector = getattr(self, "_drift_detector", None)
+        if drift_detector is not None:
+            drift_index = getattr(self, "_drift_compare_index", 0)
+            event_dict = {
+                "event_type": "decision",
+                "data": {
+                    "chosen_action": chosen_action,
+                    "action": chosen_action,
+                    "confidence": event.confidence,
+                },
+            }
+            drift = drift_detector.compare(event_dict, drift_index)
+            # Advance to the next decision event in the baseline, skipping non-decision events
+            next_index = drift_index + 1
+            original_events = getattr(drift_detector, "original_events", [])
+            while next_index < len(original_events) and original_events[next_index].get("event_type") != "decision":
+                next_index += 1
+            self._drift_compare_index = next_index
+            if drift is not None:
+                drift_events_list = getattr(self, "_drift_events", None)
+                if drift_events_list is not None:
+                    drift_events_list.append(drift)
+
         return event.id
 
     async def record_tool_call(

diff --git a/tests/test_replay_depth_l3.py b/tests/test_replay_depth_l3.py
@@ -758,11 +758,6 @@ async def test_drift_detected_during_replay_emits_event(self):
         try:
             from agent_debugger_sdk import TraceContext
 
-            emitted_events = []
-
-            async def capture_event(event):
-                emitted_events.append(event)
-
             mock_checkpoint_data = {
                 "id": "cp-drift-emit",
                 "session_id": "sess-original",
@@ -774,9 +769,17 @@ async def capture_event(event):
                 "importance": 0.5,
             }
 
-            # Original events show different action than what will be replayed
+            # Original events show different action than what will be replayed.
+            # Timestamp must be after the checkpoint timestamp so the event passes
+            # the post-checkpoint filter in TraceContext.restore.
             mock_events = [
-                {"id": "evt-2", "sequence": 2, "event_type": "decision", "data": {"chosen_action": "tool_a"}},
+                {
+                    "id": "evt-2",
+                    "sequence": 2,
+                    "event_type": "decision",
+                    "timestamp": "2026-03-24T13:00:00Z",
+                    "data": {"chosen_action": "tool_a"},
+                },
             ]
 
             with patch("httpx.AsyncClient.get", new_callable=AsyncMock) as mock_get:
@@ -785,8 +788,8 @@ def side_effect(url, *args, **kwargs):
                     mock_response = MagicMock()
                     if "checkpoints" in url:
                         mock_response.json.return_value = mock_checkpoint_data
-                    elif "events" in url:
-                        mock_response.json.return_value = {"events": mock_events}
+                    elif "traces" in url:
+                        mock_response.json.return_value = {"traces": mock_events}
                     mock_response.raise_for_status = MagicMock()
                     return mock_response
 
@@ -805,9 +808,8 @@ def side_effect(url, *args, **kwargs):
                         chosen_action="tool_b",  # Different from original "tool_a"
                     )
 
-                    # Drift event should have been emitted
-                    drift_events = [e for e in emitted_events if getattr(e, "event_type", None) == "drift"]
-                    assert len(drift_events) > 0
+                    # Drift events are collected in ctx._drift_events by record_decision
+                    assert len(ctx._drift_events) > 0
         except (TypeError, ImportError, AttributeError) as e:
             pytest.skip(f"Drift event emission not yet implemented: {e}")