88
99from eval_protocol .cli_commands import create_rft as cr
1010from eval_protocol .cli_commands import upload as upload_mod
11+ from eval_protocol .cli_commands import local_test as local_test_mod
1112import eval_protocol .fireworks_rft as fr
1213from eval_protocol .cli import parse_args
1314import eval_protocol .cli_commands .utils as cli_utils
@@ -103,7 +104,7 @@ def rft_test_harness(tmp_path, monkeypatch, stub_fireworks):
103104 # Account id is derived from API key; mock the verify call to keep tests offline.
104105 monkeypatch .setattr (cli_utils , "verify_api_key_and_get_account_id" , lambda * a , ** k : "acct123" )
105106
106- monkeypatch .setattr (upload_mod , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
107+ monkeypatch .setattr (cli_utils , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
107108 monkeypatch .setattr (upload_mod , "upload_command" , lambda args : 0 )
108109 monkeypatch .setattr (cr , "_poll_evaluator_version_status" , lambda ** kwargs : True )
109110 monkeypatch .setattr (cr , "upload_and_ensure_evaluator" , lambda * a , ** k : True )
@@ -225,7 +226,7 @@ def test_create_rft_evaluator_validation_fails(rft_test_harness, monkeypatch):
225226 test_file .parent .mkdir (parents = True , exist_ok = True )
226227 test_file .write_text ("# dummy eval test" , encoding = "utf-8" )
227228 single_disc = SimpleNamespace (qualname = "metric.test_eval_validation" , file_path = str (test_file ))
228- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
229+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
229230
230231 # Force local evaluator validation to fail
231232 calls = {"count" : 0 , "pytest_target" : None }
@@ -235,7 +236,7 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
235236 calls ["pytest_target" ] = pytest_target
236237 return 1 # non-zero exit code => validation failure
237238
238- monkeypatch .setattr (cr , "run_evaluator_test" , _fake_run_evaluator_test )
239+ monkeypatch .setattr (local_test_mod , "run_evaluator_test" , _fake_run_evaluator_test )
239240
240241 args = argparse .Namespace (
241242 evaluator = None ,
@@ -284,7 +285,7 @@ def test_create_rft_evaluator_validation_passes(rft_test_harness, monkeypatch):
284285 test_file .parent .mkdir (parents = True , exist_ok = True )
285286 test_file .write_text ("# dummy ok eval test" , encoding = "utf-8" )
286287 single_disc = SimpleNamespace (qualname = "metric.test_eval_ok" , file_path = str (test_file ))
287- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
288+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
288289
289290 # Force local evaluator validation to succeed
290291 calls = {"count" : 0 , "pytest_target" : None }
@@ -294,7 +295,7 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
294295 calls ["pytest_target" ] = pytest_target
295296 return 0 # success
296297
297- monkeypatch .setattr (cr , "run_evaluator_test" , _fake_run_evaluator_test )
298+ monkeypatch .setattr (local_test_mod , "run_evaluator_test" , _fake_run_evaluator_test )
298299
299300 args = argparse .Namespace (
300301 evaluator = None ,
@@ -442,8 +443,8 @@ def test_create_rft_picks_most_recent_evaluator_and_dataset_id_follows(rft_test_
442443 one_file .write_text ("# single" , encoding = "utf-8" )
443444 single_disc = SimpleNamespace (qualname = "metric.test_single" , file_path = str (one_file ))
444445 # New flow uses _discover_and_select_tests; patch it to return our single test.
445- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
446- monkeypatch .setattr (upload_mod , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
446+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
447+ monkeypatch .setattr (cli_utils , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
447448 monkeypatch .setattr (upload_mod , "upload_command" , lambda args : 0 )
448449 monkeypatch .setattr (cr , "_poll_evaluator_version_status" , lambda ** kwargs : True )
449450
@@ -505,7 +506,7 @@ def test_create_rft_passes_matching_evaluator_id_and_entry_with_multiple_tests(r
505506 # Fake discovered tests: foo and bar
506507 cal_disc = SimpleNamespace (qualname = "foo_eval.test_bar_evaluation" , file_path = str (cal_file ))
507508 svg_disc = SimpleNamespace (qualname = "bar_eval.test_baz_evaluation" , file_path = str (svg_file ))
508- monkeypatch .setattr (cr , "_discover_tests" , lambda cwd : [cal_disc , svg_disc ])
509+ monkeypatch .setattr (cli_utils , "_discover_tests" , lambda cwd : [cal_disc , svg_disc ])
509510
510511 # Capture dataset id used during dataset creation
511512 captured = {"dataset_id" : None }
@@ -572,7 +573,7 @@ def test_create_rft_interactive_selector_single_test(rft_test_harness, monkeypat
572573 test_file .write_text ("# one" , encoding = "utf-8" )
573574 single_disc = SimpleNamespace (qualname = "metric.test_one" , file_path = str (test_file ))
574575 # New flow uses _discover_and_select_tests; patch it to return our single test.
575- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
576+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
576577
577578 # Capture dataset id used during dataset creation
578579 captured = {"dataset_id" : None }
@@ -703,7 +704,7 @@ def test_create_rft_quiet_new_evaluator_ambiguous_without_entry_errors(tmp_path,
703704 f2 .write_text ("# b" , encoding = "utf-8" )
704705 d1 = SimpleNamespace (qualname = "a.test_one" , file_path = str (f1 ))
705706 d2 = SimpleNamespace (qualname = "b.test_two" , file_path = str (f2 ))
706- monkeypatch .setattr (cr , "_discover_tests" , lambda cwd : [d1 , d2 ])
707+ monkeypatch .setattr (cli_utils , "_discover_tests" , lambda cwd : [d1 , d2 ])
707708
708709 args = argparse .Namespace (
709710 evaluator = "some-eval" ,
@@ -742,9 +743,9 @@ def test_create_rft_fallback_to_dataset_builder(rft_test_harness, monkeypatch):
742743 test_file .write_text ("# builder case" , encoding = "utf-8" )
743744 single_disc = SimpleNamespace (qualname = "metric.test_builder" , file_path = str (test_file ))
744745 # New flow uses _discover_and_select_tests for evaluator resolution; patch it to return our single test.
745- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
746+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
746747 # Also patch _discover_tests for any direct calls during dataset inference.
747- monkeypatch .setattr (cr , "_discover_tests" , lambda cwd : [single_disc ])
748+ monkeypatch .setattr (cli_utils , "_discover_tests" , lambda cwd : [single_disc ])
748749
749750 # Dataset builder fallback
750751 out_jsonl = project / "metric" / "builder_out.jsonl"
@@ -807,7 +808,7 @@ def test_create_rft_rejects_dataloader_jsonl(rft_test_harness, monkeypatch):
807808 test_file .write_text ("# loader case" , encoding = "utf-8" )
808809 single_disc = SimpleNamespace (qualname = "metric.test_loader" , file_path = str (test_file ))
809810 # New flow uses _discover_and_select_tests; patch it to return our single test.
810- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
811+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
811812
812813 # Provide JSONL via dataloader extractor
813814 dl_jsonl = project / "metric" / "loader_out.jsonl"
@@ -868,7 +869,7 @@ def test_create_rft_uses_input_dataset_jsonl_when_available(rft_test_harness, mo
868869 test_file .write_text ("# input_dataset case" , encoding = "utf-8" )
869870 single_disc = SimpleNamespace (qualname = "metric.test_input_ds" , file_path = str (test_file ))
870871 # New flow uses _discover_and_select_tests; patch it to return our single test.
871- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
872+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
872873
873874 # Provide JSONL via input_dataset extractor
874875 id_jsonl = project / "metric" / "input_ds_out.jsonl"
@@ -933,7 +934,7 @@ def test_create_rft_quiet_existing_evaluator_infers_dataset_from_matching_test(r
933934 f2 .write_text ("# beta" , encoding = "utf-8" )
934935 d1 = SimpleNamespace (qualname = "alpha.test_one" , file_path = str (f1 ))
935936 d2 = SimpleNamespace (qualname = "beta.test_two" , file_path = str (f2 ))
936- monkeypatch .setattr (cr , "_discover_tests" , lambda cwd : [d1 , d2 ])
937+ monkeypatch .setattr (cli_utils , "_discover_tests" , lambda cwd : [d1 , d2 ])
937938
938939 # Evaluator upload succeeds and version becomes ACTIVE
939940 monkeypatch .setattr (cr , "upload_and_ensure_evaluator" , lambda * a , ** k : True )
@@ -1097,9 +1098,9 @@ def test_create_rft_prefers_explicit_dataset_jsonl_over_input_dataset(rft_test_h
10971098 test_file .write_text ("# prefer explicit dataset_jsonl" , encoding = "utf-8" )
10981099 single_disc = SimpleNamespace (qualname = "metric.test_pref" , file_path = str (test_file ))
10991100 # New flow uses _discover_and_select_tests; patch it to return our single test.
1100- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
1101+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
11011102
1102- monkeypatch .setattr (upload_mod , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
1103+ monkeypatch .setattr (cli_utils , "_prompt_select" , lambda tests , non_interactive = False : tests [:1 ])
11031104 monkeypatch .setattr (upload_mod , "upload_command" , lambda args : 0 )
11041105 monkeypatch .setattr (cr , "_poll_evaluator_version_status" , lambda ** kwargs : True )
11051106
@@ -1203,7 +1204,7 @@ def test_adapt(row: EvaluationRow) -> EvaluationRow:
12031204
12041205 # Discovery: exactly one test, and resolve_selected_test points to our module/function
12051206 single_disc = SimpleNamespace (qualname = "metric.test_adapt.test_adapt" , file_path = str (test_file ))
1206- monkeypatch .setattr (cr , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
1207+ monkeypatch .setattr (cli_utils , "_discover_and_select_tests" , lambda cwd , non_interactive = False : [single_disc ])
12071208 monkeypatch .setattr (
12081209 cr ,
12091210 "_resolve_selected_test" ,
0 commit comments