@@ -191,9 +191,9 @@ async def process_single_row(row: EvaluationRow) -> EvaluationRow:
191191def test_fail_fast_exceptions (row : EvaluationRow ) -> EvaluationRow :
192192 """Test that fail-fast exceptions like ValueError are not retried."""
193193 print (
194- f"π EVALUATED: { row .execution_metadata .rollout_id } ({ 'SUCCESS' if row .rollout_status .status == 'finished' else 'FAILURE' } )"
194+ f"π EVALUATED: { row .execution_metadata .rollout_id } ({ 'SUCCESS' if row .rollout_status .is_finished () else 'FAILURE' } )"
195195 )
196- score = 1.0 if row .rollout_status .status == "finished" else 0.0
196+ score = 1.0 if row .rollout_status .is_finished () else 0.0
197197 row .evaluation_result = EvaluateResult (score = score )
198198 return row
199199
@@ -283,8 +283,8 @@ def custom_http_giveup(e):
283283def test_custom_giveup_function (row : EvaluationRow ) -> EvaluationRow :
284284 """Test custom giveup function behavior."""
285285 task_content = row .messages [0 ].content if row .messages else ""
286- print (f"π EVALUATED: { task_content } ({ 'SUCCESS' if row .rollout_status .status == 'finished' else 'FAILURE' } )" )
287- score = 1.0 if row .rollout_status .status == "finished" else 0.0
286+ print (f"π EVALUATED: { task_content } ({ 'SUCCESS' if row .rollout_status .is_finished () else 'FAILURE' } )" )
287+ score = 1.0 if row .rollout_status .is_finished () else 0.0
288288 row .evaluation_result = EvaluateResult (score = score )
289289 return row
290290
@@ -368,9 +368,9 @@ def simple_4xx_giveup(e):
368368def test_simple_giveup_function (row : EvaluationRow ) -> EvaluationRow :
369369 """Test that giveup function prevents retries immediately."""
370370 print (
371- f"π EVALUATED: { row .execution_metadata .rollout_id } ({ 'SUCCESS' if row .rollout_status .status == 'finished' else 'FAILURE' } )"
371+ f"π EVALUATED: { row .execution_metadata .rollout_id } ({ 'SUCCESS' if row .rollout_status .is_finished () else 'FAILURE' } )"
372372 )
373- score = 1.0 if row .rollout_status .status == "finished" else 0.0
373+ score = 1.0 if row .rollout_status .is_finished () else 0.0
374374 row .evaluation_result = EvaluateResult (score = score )
375375 return row
376376
0 commit comments