1515import logging
1616import os
1717from typing import Optional
18+ import json
19+ import pathlib
1820
1921
2022def pytest_addoption (parser ) -> None :
@@ -87,6 +89,21 @@ def pytest_addoption(parser) -> None:
8789 "Default: true (fail on permanent failures). Set to 'false' to continue with remaining rollouts."
8890 ),
8991 )
92+ group .addoption (
93+ "--ep-success-threshold" ,
94+ action = "store" ,
95+ default = None ,
96+ help = ("Override the success threshold for evaluation_test. Pass a float between 0.0 and 1.0 (e.g., 0.8)." ),
97+ )
98+ group .addoption (
99+ "--ep-se-threshold" ,
100+ action = "store" ,
101+ default = None ,
102+ help = (
103+ "Override the standard error threshold for evaluation_test. "
104+ "Pass a float >= 0.0 (e.g., 0.05). If only this is set, success threshold defaults to 0.0."
105+ ),
106+ )
90107
91108
92109def _normalize_max_rows (val : Optional [str ]) -> Optional [str ]:
@@ -117,6 +134,49 @@ def _normalize_number(val: Optional[str]) -> Optional[str]:
117134 return None
118135
119136
137+ def _normalize_success_threshold (val : Optional [str ]) -> Optional [float ]:
138+ """Normalize success threshold value as float between 0.0 and 1.0."""
139+ if val is None :
140+ return None
141+
142+ try :
143+ threshold_float = float (val .strip ())
144+ if 0.0 <= threshold_float <= 1.0 :
145+ return threshold_float
146+ else :
147+ return None # threshold must be between 0 and 1
148+ except ValueError :
149+ return None
150+
151+
152+ def _normalize_se_threshold (val : Optional [str ]) -> Optional [float ]:
153+ """Normalize standard error threshold value as float >= 0.0."""
154+ if val is None :
155+ return None
156+
157+ try :
158+ threshold_float = float (val .strip ())
159+ if threshold_float >= 0.0 :
160+ return threshold_float
161+ else :
162+ return None # standard error must be >= 0
163+ except ValueError :
164+ return None
165+
166+
167+ def _build_passed_threshold_env (success : Optional [float ], se : Optional [float ]) -> Optional [str ]:
168+ """Build the EP_PASSED_THRESHOLD environment variable value from the two separate thresholds."""
169+ if success is None and se is None :
170+ return None
171+
172+ if se is None :
173+ return str (success )
174+ else :
175+ success_val = success if success is not None else 0.0
176+ threshold_dict = {"success" : success_val , "standard_error" : se }
177+ return json .dumps (threshold_dict )
178+
179+
120180def pytest_configure (config ) -> None :
121181 # Quiet LiteLLM INFO spam early in pytest session unless user set a level
122182 try :
@@ -161,11 +221,16 @@ def pytest_configure(config) -> None:
161221 if fail_on_max_retry is not None :
162222 os .environ ["EP_FAIL_ON_MAX_RETRY" ] = fail_on_max_retry
163223
224+ success_threshold_val = config .getoption ("--ep-success-threshold" )
225+ se_threshold_val = config .getoption ("--ep-se-threshold" )
226+ norm_success = _normalize_success_threshold (success_threshold_val )
227+ norm_se = _normalize_se_threshold (se_threshold_val )
228+ threshold_env = _build_passed_threshold_env (norm_success , norm_se )
229+ if threshold_env is not None :
230+ os .environ ["EP_PASSED_THRESHOLD" ] = threshold_env
231+
164232 # Allow ad-hoc overrides of input params via CLI flags
165233 try :
166- import json as _json
167- import pathlib as _pathlib
168-
169234 merged : dict = {}
170235 input_params_opts = config .getoption ("--ep-input-param" )
171236 if input_params_opts :
@@ -174,17 +239,17 @@ def pytest_configure(config) -> None:
174239 continue
175240 opt = str (opt )
176241 if opt .startswith ("@" ): # load JSON file
177- p = _pathlib .Path (opt [1 :])
242+ p = pathlib .Path (opt [1 :])
178243 if p .is_file ():
179244 with open (p , "r" , encoding = "utf-8" ) as f :
180- obj = _json .load (f )
245+ obj = json .load (f )
181246 if isinstance (obj , dict ):
182247 merged .update (obj )
183248 elif "=" in opt :
184249 k , v = opt .split ("=" , 1 )
185250 # Try parse JSON values, fallback to string
186251 try :
187- merged [k ] = _json .loads (v )
252+ merged [k ] = json .loads (v )
188253 except Exception :
189254 merged [k ] = v
190255 reasoning_effort = config .getoption ("--ep-reasoning-effort" )
@@ -194,7 +259,7 @@ def pytest_configure(config) -> None:
194259 # Convert "none" string to None value for API compatibility
195260 eb ["reasoning_effort" ] = None if reasoning_effort .lower () == "none" else str (reasoning_effort )
196261 if merged :
197- os .environ ["EP_INPUT_PARAMS_JSON" ] = _json .dumps (merged )
262+ os .environ ["EP_INPUT_PARAMS_JSON" ] = json .dumps (merged )
198263 except Exception :
199264 # best effort, do not crash pytest session
200265 pass
0 commit comments