5050 generate_parameter_combinations ,
5151 log_eval_status_and_rows ,
5252 parse_ep_max_rows ,
53+ parse_ep_num_runs ,
5354 rollout_processor_with_retry ,
5455 sanitize_filename ,
5556)
@@ -456,7 +457,10 @@ def create_wrapper_with_signature() -> Callable:
456457
457458 async def wrapper_body (** kwargs ):
458459 eval_metadata = None
459- all_results : List [List [EvaluationRow ]] = [[] for _ in range (num_runs )]
460+
461+ # Apply environment override for num_runs if present
462+ effective_num_runs = parse_ep_num_runs (num_runs )
463+ all_results : List [List [EvaluationRow ]] = [[] for _ in range (effective_num_runs )]
460464
461465 experiment_id = generate_id ()
462466
@@ -530,7 +534,7 @@ def _log_eval_error(
530534 name = test_func .__name__ ,
531535 description = test_func .__doc__ ,
532536 status = "running" ,
533- num_runs = num_runs ,
537+ num_runs = effective_num_runs ,
534538 aggregation_method = aggregation_method ,
535539 passed_threshold = threshold ,
536540 passed = None ,
@@ -564,7 +568,7 @@ def _log_eval_error(
564568 exception_handler_config = exception_handler_config ,
565569 )
566570
567- for i in range (num_runs ):
571+ for i in range (effective_num_runs ):
568572 # Regenerate outputs each run by deep-copying the pristine dataset
569573 # so model responses are not reused across runs.
570574 run_id = generate_id ()
@@ -693,7 +697,8 @@ async def _collect_result(config, lst):
693697 # rollout_id is used to differentiate the result from different completion_params
694698 if mode == "groupwise" :
695699 results_by_group = [
696- [[] for _ in range (num_runs )] for _ in range (len (original_completion_params_list ))
700+ [[] for _ in range (effective_num_runs )]
701+ for _ in range (len (original_completion_params_list ))
697702 ]
698703 for i_run , result in enumerate (all_results ):
699704 for r in result :
@@ -708,7 +713,7 @@ async def _collect_result(config, lst):
708713 mode ,
709714 original_completion_params_list [rollout_id ],
710715 test_func .__name__ ,
711- num_runs ,
716+ effective_num_runs ,
712717 )
713718 else :
714719 postprocess (
@@ -719,7 +724,7 @@ async def _collect_result(config, lst):
719724 mode ,
720725 completion_params ,
721726 test_func .__name__ ,
722- num_runs ,
727+ effective_num_runs ,
723728 )
724729
725730 except AssertionError :
0 commit comments