fix tests

Dylan Huang · Dylan Huang · commit 4f8c8b8af62c · 2025-08-13T13:13:22.000-07:00
diff --git a/eval_protocol/models.py b/eval_protocol/models.py
@@ -198,7 +198,9 @@ class InputMetadata(BaseModel):
     model_config = ConfigDict(extra="allow")
 
     row_id: Optional[str] = Field(default_factory=generate_id, description="Unique string to ID the row")
-    completion_params: CompletionParams = Field(..., description="Completion endpoint parameters used")
+    completion_params: CompletionParams = Field(
+        default_factory=dict, description="Completion endpoint parameters used"
+    )
     dataset_info: Optional[Dict[str, Any]] = Field(
         None, description="Dataset row details: seed, system_prompt, environment_context, etc"
     )
diff --git a/tests/test_logs_server.py b/tests/test_logs_server.py
@@ -29,6 +29,8 @@
 class TestWebSocketManager:
     """Test WebSocketManager class."""
 
+    input_metadata = InputMetadata(row_id="test-123", completion_params={"model": "gpt-4o"})
+
     def test_initialization(self):
         """Test WebSocketManager initialization."""
         manager = WebSocketManager()
@@ -64,7 +66,7 @@ async def test_connect_sends_initial_logs(self):
         mock_logs = [
             EvaluationRow(
                 messages=[Message(role="user", content="test")],
-                input_metadata=InputMetadata(row_id="test-123"),
+                input_metadata=self.input_metadata,
             )
         ]
 
@@ -82,7 +84,7 @@ def test_broadcast_row_upserted(self):
         manager = WebSocketManager()
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
         )
 
         # Test that broadcast doesn't fail when no connections
@@ -96,6 +98,7 @@ def test_broadcast_row_upserted(self):
         assert "row" in data
         assert data["row"]["messages"][0]["content"] == "test"
         assert data["row"]["input_metadata"]["row_id"] == "test-123"
+        assert data["row"]["input_metadata"]["completion_params"]["model"] == "gpt-4o"
 
     @pytest.mark.asyncio
     async def test_broadcast_loop(self):
@@ -221,7 +224,7 @@ def test_should_update_status_stopped_process(self, mock_process):
 
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
             eval_metadata=EvalMetadata(name="test_eval", num_runs=1, aggregation_method="mean", status="running"),
             pid=12345,
         )
@@ -240,7 +243,7 @@ def test_should_update_status_no_such_process(self, mock_process):
 
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
             eval_metadata=EvalMetadata(name="test_eval", num_runs=1, aggregation_method="mean", status="running"),
             pid=999,
         )
@@ -255,7 +258,7 @@ def test_should_update_status_not_running(self):
 
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
             eval_metadata=EvalMetadata(name="test_eval", num_runs=1, aggregation_method="mean", status="finished"),
             pid=12345,
         )
@@ -270,7 +273,7 @@ def test_should_update_status_no_pid(self):
 
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
             eval_metadata=EvalMetadata(name="test_eval", num_runs=1, aggregation_method="mean", status="running"),
             pid=None,
         )
@@ -326,7 +329,7 @@ async def test_handle_event(self, temp_build_dir):
         # Test handling a log event
         test_row = {
             "messages": [{"role": "user", "content": "test"}],
-            "input_metadata": {"row_id": "test-123"},
+            "input_metadata": self.input_metadata.model_dump(),
         }
 
         server._handle_event(LOG_EVENT_TYPE, test_row)
@@ -543,7 +546,7 @@ async def test_websocket_connection_lifecycle(self):
         # Test broadcasting without starting the loop
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
         )
         manager.broadcast_row_upserted(test_row)
 
@@ -573,7 +576,7 @@ async def test_multiple_websocket_connections(self):
         # Test broadcasting to all without starting the loop
         test_row = EvaluationRow(
             messages=[Message(role="user", content="test")],
-            input_metadata=InputMetadata(row_id="test-123"),
+            input_metadata=self.input_metadata,
         )
         manager.broadcast_row_upserted(test_row)