Skip to content

Commit 7386347

Browse files
committed
bugs in retry processor
1 parent 3f6545f commit 7386347

File tree

3 files changed

+14
-11
lines changed

3 files changed

+14
-11
lines changed

eval_protocol/pytest/plugin.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,13 @@ def pytest_addoption(parser) -> None:
6363
"--ep-max-retry",
6464
action="store",
6565
type=int,
66-
default=None,
66+
default=0,
6767
help=("Failed rollouts (with rollout_status.status == 'error') will be retried up to this many times."),
6868
)
6969
group.addoption(
70-
"--ep-fail-on-permanent-failure",
70+
"--ep-fail-on-max-retry",
7171
action="store",
72-
default=None,
72+
default="true",
7373
choices=["true", "false"],
7474
help=(
7575
"Whether to fail the entire rollout when permanent failures occur after max retries. "
@@ -118,12 +118,10 @@ def pytest_configure(config) -> None:
118118
os.environ["EP_SUMMARY_JSON"] = summary_json_path
119119

120120
max_retry = config.getoption("--ep-max-retry")
121-
if max_retry is not None:
122-
os.environ["EP_MAX_RETRY"] = str(max_retry)
121+
os.environ["EP_MAX_RETRY"] = str(max_retry)
123122

124-
fail_on_permanent_failure = config.getoption("--ep-fail-on-permanent-failure")
125-
if fail_on_permanent_failure is not None:
126-
os.environ["EP_FAIL_ON_PERMANENT_FAILURE"] = fail_on_permanent_failure
123+
fail_on_max_retry = config.getoption("--ep-fail-on-max-retry")
124+
os.environ["EP_FAIL_ON_MAX_RETRY"] = fail_on_max_retry
127125

128126
# Allow ad-hoc overrides of input params via CLI flags
129127
try:

eval_protocol/pytest/utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,13 @@ async def retry_handler(failed_row: EvaluationRow):
280280

281281
async def initial_processor():
282282
"""Process initial batch and spawn retries for failures"""
283-
base_tasks = rollout_processor(fresh_dataset, config)
283+
# catch any task creation errors and raise them immediately, i.e. port already in use
284+
try:
285+
base_tasks = rollout_processor(fresh_dataset, config)
286+
except Exception as e:
287+
print(f"❌ Rollout processor failed to initialize: {e}")
288+
raise e
289+
284290
pending = set(base_tasks)
285291

286292
while pending:
@@ -310,7 +316,7 @@ async def initial_processor():
310316

311317
# only permanent failure rows are put on the queue, so we can check for them here
312318
if finished_row.rollout_status and finished_row.rollout_status.status == "error":
313-
if os.getenv("EP_FAIL_ON_PERMANENT_FAILURE", "true") != "false":
319+
if max_retry > 0 and os.getenv("EP_FAIL_ON_MAX_RETRY", "true") != "false":
314320
raise RuntimeError(
315321
f"Rollout {finished_row.execution_metadata.rollout_id} failed after {max_retry} retries. Errors: {finished_row.rollout_status.termination_reason}"
316322
)

tests/pytest/test_livesvgbench.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,6 @@ def test_svg_combined_evaluation(row: EvaluationRow) -> EvaluationRow:
427427
428428
Combines results to catch issues like Google logos that are just colored circles.
429429
"""
430-
logger.info(f"Evaluating row {row.input_metadata.row_id} at {time.time()}")
431430
# Extract dataset info
432431
requirements = row.input_metadata.dataset_info["requirements"]
433432
total_requirements = row.input_metadata.dataset_info["total_requirements"]

0 commit comments

Comments
 (0)