Skip to content

Commit ce6277f

Browse files
committed
cli reduce
1 parent 6efbf13 commit ce6277f

File tree

3 files changed

+133
-1
lines changed

3 files changed

+133
-1
lines changed

eval_protocol/__init__.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,25 @@
99
"""
1010

1111
import importlib
12+
import sys
1213
import warnings
1314
from typing import TYPE_CHECKING
1415

1516
warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
1617

18+
# Eager imports for symbols that conflict with module names - ONLY when pytest is running.
19+
# The reward_function.py module exports RewardFunction class, and we also export the
20+
# reward_function decorator from typed_interface. When pytest's AssertionRewritingHook
21+
# imports eval_protocol.reward_function as a module, Python would set
22+
# eval_protocol.reward_function to the module, shadowing our function export.
23+
#
24+
# We detect pytest by checking if _pytest or pytest is already loaded. This avoids
25+
# the ~500ms import overhead for non-test scenarios like the CLI.
26+
_running_under_pytest = "_pytest" in sys.modules or "pytest" in sys.modules
27+
if _running_under_pytest:
28+
from .reward_function import RewardFunction # noqa: E402
29+
from .typed_interface import reward_function # noqa: E402
30+
1731
# Lazy import mappings: name -> (module_path, attribute_name or None for module import)
1832
_LAZY_IMPORTS = {
1933
# From .auth
@@ -36,9 +50,12 @@
3650
# From .data_loader
3751
"DynamicDataLoader": (".data_loader", "DynamicDataLoader"),
3852
"InlineDataLoader": (".data_loader", "InlineDataLoader"),
39-
# Submodules
53+
# Submodules (accessible as eval_protocol.submodule)
4054
"mcp": (".mcp", None),
4155
"rewards": (".rewards", None),
56+
"models": (".models", None),
57+
"auth": (".auth", None),
58+
"config": (".config", None),
4259
# From .models
4360
"EvaluateResult": (".models", "EvaluateResult"),
4461
"Message": (".models", "Message"),

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[pytest]
22
markers =
33
asyncio
4+
benchmark: marks tests as benchmark tests (for CLI startup time checks)
45
asyncio_mode = auto
56
asyncio_default_fixture_loop_scope = function
67
testpaths = tests ./eval_protocol/quickstart
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
Benchmark test for CLI startup time.
3+
4+
This test ensures the CLI startup time stays under the target threshold.
5+
Run with: pytest tests/test_cli_startup_benchmark.py -v
6+
"""
7+
8+
import subprocess
9+
import sys
10+
import time
11+
12+
import pytest
13+
14+
# Target: CLI should start in under 0.4 seconds
15+
CLI_STARTUP_TARGET_SECONDS = 0.4
16+
17+
# Number of runs to average (first run may be slower due to cold cache)
18+
NUM_RUNS = 3
19+
20+
21+
def measure_cli_startup_time() -> float:
22+
"""Measure CLI --help startup time in seconds."""
23+
start = time.perf_counter()
24+
result = subprocess.run(
25+
[sys.executable, "-m", "eval_protocol.cli", "--help"],
26+
capture_output=True,
27+
text=True,
28+
env={**dict(__import__("os").environ), "FIREWORKS_API_KEY": "benchmark-test-key"},
29+
)
30+
elapsed = time.perf_counter() - start
31+
32+
# Ensure the command succeeded
33+
assert result.returncode == 0, f"CLI failed: {result.stderr}"
34+
35+
return elapsed
36+
37+
38+
@pytest.mark.benchmark
39+
def test_cli_startup_time():
40+
"""Test that CLI startup time is under the target threshold."""
41+
times = []
42+
43+
for i in range(NUM_RUNS):
44+
elapsed = measure_cli_startup_time()
45+
times.append(elapsed)
46+
print(f" Run {i + 1}: {elapsed:.3f}s")
47+
48+
avg_time = sum(times) / len(times)
49+
min_time = min(times)
50+
max_time = max(times)
51+
52+
print(f"\n Average: {avg_time:.3f}s")
53+
print(f" Min: {min_time:.3f}s")
54+
print(f" Max: {max_time:.3f}s")
55+
print(f" Target: {CLI_STARTUP_TARGET_SECONDS}s")
56+
57+
# Use the best time (min) as some CI environments have variable overhead
58+
assert min_time < CLI_STARTUP_TARGET_SECONDS, (
59+
f"CLI startup time ({min_time:.3f}s) exceeds target ({CLI_STARTUP_TARGET_SECONDS}s). "
60+
f"Check for import-time side effects or eager module loading."
61+
)
62+
63+
64+
@pytest.mark.benchmark
65+
def test_package_import_time():
66+
"""Test that importing eval_protocol package is fast (lazy loading check)."""
67+
# Use subprocess to get a clean import measurement
68+
code = """
69+
import time
70+
start = time.perf_counter()
71+
import eval_protocol
72+
elapsed = time.perf_counter() - start
73+
print(f"{elapsed:.6f}")
74+
"""
75+
result = subprocess.run(
76+
[sys.executable, "-c", code],
77+
capture_output=True,
78+
text=True,
79+
)
80+
81+
assert result.returncode == 0, f"Import failed: {result.stderr}"
82+
83+
import_time = float(result.stdout.strip())
84+
print(f"\n Package import time: {import_time * 1000:.1f}ms")
85+
86+
# Package import should be very fast with lazy loading (< 50ms)
87+
assert import_time < 0.05, (
88+
f"Package import time ({import_time * 1000:.1f}ms) is too slow. "
89+
f"Check that __init__.py uses lazy loading correctly."
90+
)
91+
92+
93+
if __name__ == "__main__":
94+
print("=== CLI Startup Benchmark ===\n")
95+
96+
print("Testing CLI startup time...")
97+
times = []
98+
for i in range(NUM_RUNS):
99+
elapsed = measure_cli_startup_time()
100+
times.append(elapsed)
101+
print(f" Run {i + 1}: {elapsed:.3f}s")
102+
103+
avg_time = sum(times) / len(times)
104+
min_time = min(times)
105+
106+
print(f"\n Average: {avg_time:.3f}s")
107+
print(f" Best: {min_time:.3f}s")
108+
print(f" Target: {CLI_STARTUP_TARGET_SECONDS}s")
109+
110+
if min_time < CLI_STARTUP_TARGET_SECONDS:
111+
print(f"\n✓ PASS: CLI startup ({min_time:.3f}s) is under target ({CLI_STARTUP_TARGET_SECONDS}s)")
112+
else:
113+
print(f"\n✗ FAIL: CLI startup ({min_time:.3f}s) exceeds target ({CLI_STARTUP_TARGET_SECONDS}s)")
114+
sys.exit(1)

0 commit comments

Comments
 (0)