11import os
22from unittest .mock import Mock , patch
33
4- import eval_protocol .dataset_logger as dataset_logger
5- from eval_protocol .dataset_logger .dataset_logger import DatasetLogger
6- from eval_protocol .dataset_logger .sqlite_evaluation_row_store import SqliteEvaluationRowStore
7- from eval_protocol .models import EvaluationRow
8- from eval_protocol .pytest .default_no_op_rollout_processor import NoOpRolloutProcessor
9- from tests .pytest .test_markdown_highlighting import markdown_dataset_to_evaluation_row
10-
114
125async def test_ensure_logging (monkeypatch ):
136 """
@@ -25,41 +18,42 @@ async def test_ensure_logging(monkeypatch):
2518 "eval_protocol.dataset_logger.sqlite_dataset_logger_adapter.SqliteEvaluationRowStore" , return_value = mock_store
2619 ):
2720 from eval_protocol .models import EvaluationRow
21+ from eval_protocol .pytest .default_no_op_rollout_processor import NoOpRolloutProcessor
2822 from eval_protocol .pytest .evaluation_test import evaluation_test
2923 from tests .pytest .test_markdown_highlighting import markdown_dataset_to_evaluation_row
3024
31- @evaluation_test (
32- input_dataset = [
33- "tests/pytest/data/markdown_dataset.jsonl" ,
34- ],
35- completion_params = [{"temperature" : 0.0 , "model" : "dummy/local-model" }],
36- dataset_adapter = markdown_dataset_to_evaluation_row ,
37- rollout_processor = NoOpRolloutProcessor (),
38- mode = "pointwise" ,
39- combine_datasets = False ,
40- num_runs = 2 ,
41- # Don't pass logger parameter - let it use the default_logger (which we've replaced)
42- )
43- def eval_fn (row : EvaluationRow ) -> EvaluationRow :
44- return row
45-
46- await eval_fn (
47- dataset_path = ["tests/pytest/data/markdown_dataset.jsonl" ],
48- completion_params = {"temperature" : 0.0 , "model" : "dummy/local-model" },
49- )
50-
51- # Verify that the store's upsert_row method was called
52- assert mock_store .upsert_row .called , "SqliteEvaluationRowStore.upsert_row should have been called"
53-
54- # Check that it was called multiple times (once for each row)
55- call_count = mock_store .upsert_row .call_count
56- assert call_count > 0 , f"Expected upsert_row to be called at least once, but it was called { call_count } times"
57-
58- # Verify the calls were made with proper data structure
59- for call in mock_store .upsert_row .call_args_list :
60- args , kwargs = call
61- data = args [0 ] if args else kwargs .get ("data" )
62- assert data is not None , "upsert_row should be called with data parameter"
63- assert isinstance (data , dict ), "data should be a dictionary"
64- assert "execution_metadata" in data , "data should contain execution_metadata"
65- assert "rollout_id" in data ["execution_metadata" ], "data should contain rollout_id in execution_metadata"
25+ @evaluation_test (
26+ input_dataset = [
27+ "tests/pytest/data/markdown_dataset.jsonl" ,
28+ ],
29+ completion_params = [{"temperature" : 0.0 , "model" : "dummy/local-model" }],
30+ dataset_adapter = markdown_dataset_to_evaluation_row ,
31+ rollout_processor = NoOpRolloutProcessor (),
32+ mode = "pointwise" ,
33+ combine_datasets = False ,
34+ num_runs = 2 ,
35+ # Don't pass logger parameter - let it use the default_logger (which we've replaced)
36+ )
37+ def eval_fn (row : EvaluationRow ) -> EvaluationRow :
38+ return row
39+
40+ await eval_fn (
41+ dataset_path = ["tests/pytest/data/markdown_dataset.jsonl" ],
42+ completion_params = {"temperature" : 0.0 , "model" : "dummy/local-model" },
43+ )
44+
45+ # Verify that the store's upsert_row method was called
46+ assert mock_store .upsert_row .called , "SqliteEvaluationRowStore.upsert_row should have been called"
47+
48+ # Check that it was called multiple times (once for each row)
49+ call_count = mock_store .upsert_row .call_count
50+ assert call_count > 0 , f"Expected upsert_row to be called at least once, but it was called { call_count } times"
51+
52+ # Verify the calls were made with proper data structure
53+ for call in mock_store .upsert_row .call_args_list :
54+ args , kwargs = call
55+ data = args [0 ] if args else kwargs .get ("data" )
56+ assert data is not None , "upsert_row should be called with data parameter"
57+ assert isinstance (data , dict ), "data should be a dictionary"
58+ assert "execution_metadata" in data , "data should contain execution_metadata"
59+ assert "rollout_id" in data ["execution_metadata" ], "data should contain rollout_id in execution_metadata"
0 commit comments