Skip to content

Commit d000f19

Browse files
committed
rename listwise to all
1 parent d587101 commit d000f19

File tree

6 files changed

+15
-17
lines changed

6 files changed

+15
-17
lines changed

eval_protocol/pytest/default_single_turn_rollout_process.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,9 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
7373

7474
_litellm = importlib.import_module("litellm")
7575
acompletion = getattr(_litellm, "acompletion")
76-
logger.debug(f"********** request_params: {request_params} **********")
7776
response = await acompletion(**request_params)
7877

7978
assistant_content = response.choices[0].message.content or ""
80-
logger.debug(f"********** assistant_content: {assistant_content} **********")
8179
tool_calls = response.choices[0].message.tool_calls if response.choices[0].message.tool_calls else None
8280

8381
converted_tool_calls = None

eval_protocol/pytest/evaluation_test.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def evaluation_test( # noqa: C901
310310
steps: Number of rollout steps to execute (default: 30).
311311
mode: Evaluation mode. "pointwise" (default) applies test function to each row (rollout result).
312312
"groupwise" applies test function to a group of rollout results from the same original row (for use cases such as dpo/grpo).
313-
"listwise" applies test function to the whole dataset.
313+
"all" applies test function to the whole dataset.
314314
logger: DatasetLogger to use for logging. If not provided, a default logger will be used.
315315
"""
316316

@@ -349,29 +349,29 @@ def decorator(
349349
# additional check for groupwise evaluation
350350
elif mode == "groupwise":
351351
if "rows" not in sig.parameters:
352-
raise ValueError("In listwise mode, your eval function must have a parameter named 'rows'")
352+
raise ValueError("In groupwise mode, your eval function must have a parameter named 'rows'")
353353

354354
# validate that "Rows" is of type List[EvaluationRow]
355355
if sig.parameters["rows"].annotation is not List[EvaluationRow]:
356-
raise ValueError("In listwise mode, the 'rows' parameter must be of type List[EvaluationRow")
356+
raise ValueError("In groupwise mode, the 'rows' parameter must be of type List[EvaluationRow")
357357

358358
# validate that the function has a return type of List[EvaluationRow]
359359
if sig.return_annotation is not List[EvaluationRow]:
360-
raise ValueError("In listwise mode, your eval function must return a list of EvaluationRow instances")
360+
raise ValueError("In groupwise mode, your eval function must return a list of EvaluationRow instances")
361361
if len(completion_params) < 2:
362362
raise ValueError("In groupwise mode, you must provide at least 2 completion parameters")
363363
else:
364-
# listwise mode: function should accept input_dataset and model
364+
# all mode: function should accept input_dataset and model
365365
if "rows" not in sig.parameters:
366-
raise ValueError("In batch mode, your eval function must have a parameter named 'rows'")
366+
raise ValueError("In all mode, your eval function must have a parameter named 'rows'")
367367

368368
# validate that "Rows" is of type List[EvaluationRow]
369369
if sig.parameters["rows"].annotation is not List[EvaluationRow]:
370-
raise ValueError("In batch mode, the 'rows' parameter must be of type List[EvaluationRow")
370+
raise ValueError("In all mode, the 'rows' parameter must be of type List[EvaluationRow")
371371

372372
# validate that the function has a return type of List[EvaluationRow]
373373
if sig.return_annotation is not List[EvaluationRow]:
374-
raise ValueError("In listwise mode, your eval function must return a list of EvaluationRow instances")
374+
raise ValueError("In all mode, your eval function must return a list of EvaluationRow instances")
375375

376376
async def execute_with_params(
377377
test_func: TestFunction,
@@ -434,7 +434,7 @@ async def execute_with_params(
434434
param_tuple.append(etk)
435435
param_tuples.append(tuple(param_tuple))
436436

437-
# For listwise mode, preserve the original parameter names
437+
# For all mode, preserve the original parameter names
438438
test_param_names = []
439439
if input_dataset is not None:
440440
test_param_names.append("dataset_path")

eval_protocol/pytest/types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919

2020
Dataset = List[EvaluationRow]
2121

22-
EvaluationTestMode = Literal["pointwise", "groupwise", "listwise"]
22+
EvaluationTestMode = Literal["pointwise", "groupwise", "all"]
2323
"""
2424
"pointwise": (default) applies test function to each row (rollout result).
2525
"groupwise": applies test function to a group of rollout results from the same original row (for use cases such as dpo/grpo).
26-
"listwise": applies test function to the whole dataset.
26+
"all": applies test function to the whole dataset.
2727
"""
2828

2929
"""

tests/pytest/test_pytest_async.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
],
1919
],
2020
completion_params=[{"model": "accounts/fireworks/models/kimi-k2-instruct"}],
21-
mode="listwise",
21+
mode="all",
2222
)
2323
async def test_pytest_async(rows: List[EvaluationRow]) -> List[EvaluationRow]:
2424
"""Run math evaluation on sample dataset using pytest interface."""
@@ -32,7 +32,7 @@ async def test_pytest_async(rows: List[EvaluationRow]) -> List[EvaluationRow]:
3232
],
3333
],
3434
completion_params=[{"model": "accounts/fireworks/models/kimi-k2-instruct"}],
35-
mode="pointwise",
35+
mode="all",
3636
)
3737
async def test_pytest_async_pointwise(row: EvaluationRow) -> EvaluationRow:
3838
"""Run pointwise evaluation on sample dataset using pytest interface."""

tests/pytest/test_pytest_default_agent_rollout_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
],
1919
rollout_processor=AgentRolloutProcessor(),
2020
completion_params=[{"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct"}],
21-
mode="listwise",
21+
mode="all",
2222
)
2323
def test_pytest_default_agent_rollout_processor(rows: List[EvaluationRow]) -> List[EvaluationRow]:
2424
"""Run math evaluation on sample dataset using pytest interface."""

tests/pytest/test_pytest_input_messages.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
],
1313
completion_params=[{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}],
1414
rollout_processor=SingleTurnRolloutProcessor(),
15-
mode="listwise",
15+
mode="all",
1616
)
1717
def test_input_messages_in_decorator(rows: List[EvaluationRow]) -> List[EvaluationRow]:
1818
"""Run math evaluation on sample dataset using pytest interface."""

0 commit comments

Comments
 (0)