Skip to content

Commit 37f4856

Browse files
author
Dylan Huang
committed
passes
1 parent b3adfee commit 37f4856

File tree

2 files changed

+30
-26
lines changed

2 files changed

+30
-26
lines changed

tests/test_cli_create_rft.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from eval_protocol.cli_commands import create_rft as cr
1010
from eval_protocol.cli_commands import upload as upload_mod
11+
from eval_protocol.cli_commands import local_test as local_test_mod
1112
import eval_protocol.fireworks_rft as fr
1213
from eval_protocol.cli import parse_args
1314
import eval_protocol.cli_commands.utils as cli_utils
@@ -103,7 +104,7 @@ def rft_test_harness(tmp_path, monkeypatch, stub_fireworks):
103104
# Account id is derived from API key; mock the verify call to keep tests offline.
104105
monkeypatch.setattr(cli_utils, "verify_api_key_and_get_account_id", lambda *a, **k: "acct123")
105106

106-
monkeypatch.setattr(upload_mod, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
107+
monkeypatch.setattr(cli_utils, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
107108
monkeypatch.setattr(upload_mod, "upload_command", lambda args: 0)
108109
monkeypatch.setattr(cr, "_poll_evaluator_version_status", lambda **kwargs: True)
109110
monkeypatch.setattr(cr, "upload_and_ensure_evaluator", lambda *a, **k: True)
@@ -225,7 +226,7 @@ def test_create_rft_evaluator_validation_fails(rft_test_harness, monkeypatch):
225226
test_file.parent.mkdir(parents=True, exist_ok=True)
226227
test_file.write_text("# dummy eval test", encoding="utf-8")
227228
single_disc = SimpleNamespace(qualname="metric.test_eval_validation", file_path=str(test_file))
228-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
229+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
229230

230231
# Force local evaluator validation to fail
231232
calls = {"count": 0, "pytest_target": None}
@@ -235,7 +236,7 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
235236
calls["pytest_target"] = pytest_target
236237
return 1 # non-zero exit code => validation failure
237238

238-
monkeypatch.setattr(cr, "run_evaluator_test", _fake_run_evaluator_test)
239+
monkeypatch.setattr(local_test_mod, "run_evaluator_test", _fake_run_evaluator_test)
239240

240241
args = argparse.Namespace(
241242
evaluator=None,
@@ -284,7 +285,7 @@ def test_create_rft_evaluator_validation_passes(rft_test_harness, monkeypatch):
284285
test_file.parent.mkdir(parents=True, exist_ok=True)
285286
test_file.write_text("# dummy ok eval test", encoding="utf-8")
286287
single_disc = SimpleNamespace(qualname="metric.test_eval_ok", file_path=str(test_file))
287-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
288+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
288289

289290
# Force local evaluator validation to succeed
290291
calls = {"count": 0, "pytest_target": None}
@@ -294,7 +295,7 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
294295
calls["pytest_target"] = pytest_target
295296
return 0 # success
296297

297-
monkeypatch.setattr(cr, "run_evaluator_test", _fake_run_evaluator_test)
298+
monkeypatch.setattr(local_test_mod, "run_evaluator_test", _fake_run_evaluator_test)
298299

299300
args = argparse.Namespace(
300301
evaluator=None,
@@ -442,8 +443,8 @@ def test_create_rft_picks_most_recent_evaluator_and_dataset_id_follows(rft_test_
442443
one_file.write_text("# single", encoding="utf-8")
443444
single_disc = SimpleNamespace(qualname="metric.test_single", file_path=str(one_file))
444445
# New flow uses _discover_and_select_tests; patch it to return our single test.
445-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
446-
monkeypatch.setattr(upload_mod, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
446+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
447+
monkeypatch.setattr(cli_utils, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
447448
monkeypatch.setattr(upload_mod, "upload_command", lambda args: 0)
448449
monkeypatch.setattr(cr, "_poll_evaluator_version_status", lambda **kwargs: True)
449450

@@ -505,7 +506,7 @@ def test_create_rft_passes_matching_evaluator_id_and_entry_with_multiple_tests(r
505506
# Fake discovered tests: foo and bar
506507
cal_disc = SimpleNamespace(qualname="foo_eval.test_bar_evaluation", file_path=str(cal_file))
507508
svg_disc = SimpleNamespace(qualname="bar_eval.test_baz_evaluation", file_path=str(svg_file))
508-
monkeypatch.setattr(cr, "_discover_tests", lambda cwd: [cal_disc, svg_disc])
509+
monkeypatch.setattr(cli_utils, "_discover_tests", lambda cwd: [cal_disc, svg_disc])
509510

510511
# Capture dataset id used during dataset creation
511512
captured = {"dataset_id": None}
@@ -572,7 +573,7 @@ def test_create_rft_interactive_selector_single_test(rft_test_harness, monkeypat
572573
test_file.write_text("# one", encoding="utf-8")
573574
single_disc = SimpleNamespace(qualname="metric.test_one", file_path=str(test_file))
574575
# New flow uses _discover_and_select_tests; patch it to return our single test.
575-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
576+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
576577

577578
# Capture dataset id used during dataset creation
578579
captured = {"dataset_id": None}
@@ -703,7 +704,7 @@ def test_create_rft_quiet_new_evaluator_ambiguous_without_entry_errors(tmp_path,
703704
f2.write_text("# b", encoding="utf-8")
704705
d1 = SimpleNamespace(qualname="a.test_one", file_path=str(f1))
705706
d2 = SimpleNamespace(qualname="b.test_two", file_path=str(f2))
706-
monkeypatch.setattr(cr, "_discover_tests", lambda cwd: [d1, d2])
707+
monkeypatch.setattr(cli_utils, "_discover_tests", lambda cwd: [d1, d2])
707708

708709
args = argparse.Namespace(
709710
evaluator="some-eval",
@@ -742,9 +743,9 @@ def test_create_rft_fallback_to_dataset_builder(rft_test_harness, monkeypatch):
742743
test_file.write_text("# builder case", encoding="utf-8")
743744
single_disc = SimpleNamespace(qualname="metric.test_builder", file_path=str(test_file))
744745
# New flow uses _discover_and_select_tests for evaluator resolution; patch it to return our single test.
745-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
746+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
746747
# Also patch _discover_tests for any direct calls during dataset inference.
747-
monkeypatch.setattr(cr, "_discover_tests", lambda cwd: [single_disc])
748+
monkeypatch.setattr(cli_utils, "_discover_tests", lambda cwd: [single_disc])
748749

749750
# Dataset builder fallback
750751
out_jsonl = project / "metric" / "builder_out.jsonl"
@@ -807,7 +808,7 @@ def test_create_rft_rejects_dataloader_jsonl(rft_test_harness, monkeypatch):
807808
test_file.write_text("# loader case", encoding="utf-8")
808809
single_disc = SimpleNamespace(qualname="metric.test_loader", file_path=str(test_file))
809810
# New flow uses _discover_and_select_tests; patch it to return our single test.
810-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
811+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
811812

812813
# Provide JSONL via dataloader extractor
813814
dl_jsonl = project / "metric" / "loader_out.jsonl"
@@ -868,7 +869,7 @@ def test_create_rft_uses_input_dataset_jsonl_when_available(rft_test_harness, mo
868869
test_file.write_text("# input_dataset case", encoding="utf-8")
869870
single_disc = SimpleNamespace(qualname="metric.test_input_ds", file_path=str(test_file))
870871
# New flow uses _discover_and_select_tests; patch it to return our single test.
871-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
872+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
872873

873874
# Provide JSONL via input_dataset extractor
874875
id_jsonl = project / "metric" / "input_ds_out.jsonl"
@@ -933,7 +934,7 @@ def test_create_rft_quiet_existing_evaluator_infers_dataset_from_matching_test(r
933934
f2.write_text("# beta", encoding="utf-8")
934935
d1 = SimpleNamespace(qualname="alpha.test_one", file_path=str(f1))
935936
d2 = SimpleNamespace(qualname="beta.test_two", file_path=str(f2))
936-
monkeypatch.setattr(cr, "_discover_tests", lambda cwd: [d1, d2])
937+
monkeypatch.setattr(cli_utils, "_discover_tests", lambda cwd: [d1, d2])
937938

938939
# Evaluator upload succeeds and version becomes ACTIVE
939940
monkeypatch.setattr(cr, "upload_and_ensure_evaluator", lambda *a, **k: True)
@@ -1097,9 +1098,9 @@ def test_create_rft_prefers_explicit_dataset_jsonl_over_input_dataset(rft_test_h
10971098
test_file.write_text("# prefer explicit dataset_jsonl", encoding="utf-8")
10981099
single_disc = SimpleNamespace(qualname="metric.test_pref", file_path=str(test_file))
10991100
# New flow uses _discover_and_select_tests; patch it to return our single test.
1100-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
1101+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
11011102

1102-
monkeypatch.setattr(upload_mod, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
1103+
monkeypatch.setattr(cli_utils, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
11031104
monkeypatch.setattr(upload_mod, "upload_command", lambda args: 0)
11041105
monkeypatch.setattr(cr, "_poll_evaluator_version_status", lambda **kwargs: True)
11051106

@@ -1203,7 +1204,7 @@ def test_adapt(row: EvaluationRow) -> EvaluationRow:
12031204

12041205
# Discovery: exactly one test, and resolve_selected_test points to our module/function
12051206
single_disc = SimpleNamespace(qualname="metric.test_adapt.test_adapt", file_path=str(test_file))
1206-
monkeypatch.setattr(cr, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
1207+
monkeypatch.setattr(cli_utils, "_discover_and_select_tests", lambda cwd, non_interactive=False: [single_disc])
12071208
monkeypatch.setattr(
12081209
cr,
12091210
"_resolve_selected_test",

tests/test_ep_upload_e2e.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ def test_ep_upload_discovers_and_uploads_evaluation_test(
151151
- Upload via upload_command
152152
- Verify all API calls
153153
"""
154-
from eval_protocol.cli_commands.upload import upload_command, _discover_tests
154+
from eval_protocol.cli_commands.upload import upload_command
155+
from eval_protocol.cli_commands.utils import _discover_tests
155156

156157
# 1. CREATE TEST PROJECT STRUCTURE
157158
test_content = """
@@ -211,7 +212,7 @@ async def test_simple_evaluation(row: EvaluationRow) -> EvaluationRow:
211212
)
212213

213214
# Mock the selection (auto-select the discovered test)
214-
with patch("eval_protocol.cli_commands.upload._prompt_select") as mock_select:
215+
with patch("eval_protocol.cli_commands.utils._prompt_select") as mock_select:
215216
mock_select.return_value = discovered_tests
216217

217218
# Execute upload command
@@ -280,7 +281,8 @@ def test_ep_upload_with_parametrized_test(
280281
Test ep upload with a parametrized @evaluation_test
281282
Verifies that parametrized tests are discovered and uploaded as single evaluator
282283
"""
283-
from eval_protocol.cli_commands.upload import upload_command, _discover_tests
284+
from eval_protocol.cli_commands.upload import upload_command
285+
from eval_protocol.cli_commands.utils import _discover_tests
284286

285287
test_content = """
286288
import pytest
@@ -327,7 +329,7 @@ async def test_multi_model_eval(row: EvaluationRow) -> EvaluationRow:
327329
yes=True,
328330
)
329331

330-
with patch("eval_protocol.cli_commands.upload._prompt_select") as mock_select:
332+
with patch("eval_protocol.cli_commands.utils._prompt_select") as mock_select:
331333
mock_select.return_value = discovered_tests
332334
exit_code = upload_command(args)
333335

@@ -352,7 +354,7 @@ def test_ep_upload_discovery_skips_problematic_files(mock_env_variables):
352354
Test that discovery properly skips files like setup.py, versioneer.py
353355
that would cause issues during pytest collection
354356
"""
355-
from eval_protocol.cli_commands.upload import _discover_tests
357+
from eval_protocol.cli_commands.utils import _discover_tests
356358

357359
test_content = """
358360
from eval_protocol.pytest import evaluation_test
@@ -400,7 +402,7 @@ def test_ep_upload_discovers_non_test_prefixed_files(mock_env_variables):
400402
Test that discovery finds @evaluation_test in files like quickstart.py
401403
(files that don't start with 'test_')
402404
"""
403-
from eval_protocol.cli_commands.upload import _discover_tests
405+
from eval_protocol.cli_commands.utils import _discover_tests
404406

405407
test_content = """
406408
from eval_protocol.pytest import evaluation_test
@@ -450,7 +452,8 @@ def test_ep_upload_complete_workflow_with_entry_point_validation(
450452
- Full 5-step upload flow
451453
- Payload structure
452454
"""
453-
from eval_protocol.cli_commands.upload import upload_command, _discover_tests
455+
from eval_protocol.cli_commands.upload import upload_command
456+
from eval_protocol.cli_commands.utils import _discover_tests
454457

455458
test_content = """
456459
from typing import List
@@ -506,7 +509,7 @@ async def test_math_correctness(row: EvaluationRow) -> EvaluationRow:
506509
yes=True,
507510
)
508511

509-
with patch("eval_protocol.cli_commands.upload._prompt_select") as mock_select:
512+
with patch("eval_protocol.cli_commands.utils._prompt_select") as mock_select:
510513
mock_select.return_value = discovered_tests
511514
exit_code = upload_command(args)
512515

0 commit comments

Comments
 (0)