Skip to content

Commit 5829873

Browse files
committed
bug
1 parent b06c608 commit 5829873

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ async def _execute_with_semaphore(row):
483483
for r in result:
484484
if r.eval_metadata is not None:
485485
r.eval_metadata.passed = passed
486+
if r.evaluation_result is not None:
486487
r.evaluation_result.agg_score = agg_score
487488
r.evaluation_result.standard_error = standard_error
488489
active_logger.log(r)

tests/test_models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ def test_evaluate_result_dict_access():
210210
"step_outputs",
211211
"trajectory_info",
212212
"final_control_plane_info",
213+
"agg_score",
214+
"standard_error",
213215
}
214216

215217
# values() - check presence due to potential order variation of model_fields
@@ -232,6 +234,8 @@ def test_evaluate_result_dict_access():
232234
("step_outputs", None),
233235
("trajectory_info", None),
234236
("final_control_plane_info", None),
237+
("agg_score", None),
238+
("standard_error", None),
235239
]
236240
)
237241
# result.items() returns a list of tuples, so convert to list then sort.
@@ -250,6 +254,8 @@ def test_evaluate_result_dict_access():
250254
"step_outputs",
251255
"trajectory_info",
252256
"final_control_plane_info",
257+
"agg_score",
258+
"standard_error",
253259
}
254260

255261

0 commit comments

Comments
 (0)