We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4873b1e commit 3ce6c7fCopy full SHA for 3ce6c7f
eval_protocol/pytest/evaluation_test.py
@@ -766,7 +766,7 @@ def dual_mode_wrapper(*args, **kwargs):
766
"rollout_processor": rollout_processor,
767
"evaluation_test_kwargs": evaluation_test_kwargs,
768
"aggregation_method": aggregation_method,
769
- "threshold_of_success": threshold_of_success,
+ "passed_threshold": passed_threshold,
770
"num_runs": num_runs,
771
"max_dataset_rows": max_dataset_rows,
772
"mcp_config_path": mcp_config_path,
@@ -802,7 +802,7 @@ def __ep_run_direct(
802
rollout_input_params=rip,
803
rollout_processor=cfg.get("rollout_processor"),
804
aggregation_method=cfg.get("aggregation_method"),
805
- threshold_of_success=cfg.get("threshold_of_success"),
+ threshold_of_success=cfg.get("passed_threshold"),
806
num_runs=(num_runs_override if num_runs_override is not None else cfg.get("num_runs")),
807
max_dataset_rows=cfg.get("max_dataset_rows"),
808
mcp_config_path=cfg.get("mcp_config_path"),
0 commit comments