|
18 | 18 | Either a single completion params object or None. |
19 | 19 | """ |
20 | 20 |
|
21 | | -InputMessagesKwarg = InputMessagesParam | None |
| 21 | +InputMessagesKwarg = list[InputMessagesParam] | None |
22 | 22 | InputRowsKwarg = Dataset | None |
23 | 23 | EvaluationTestKwargs = EvaluationInputParam | None |
24 | 24 |
|
@@ -47,7 +47,7 @@ class ParameterizedTestKwargs(TypedDict): |
47 | 47 | def generate_parameter_combinations( |
48 | 48 | input_dataset: Sequence[DatasetPathParam] | None, |
49 | 49 | completion_params: Sequence[CompletionParams | None], |
50 | | - input_messages: Sequence[InputMessagesParam | None] | None, |
| 50 | + input_messages: Sequence[list[InputMessagesParam] | None] | None, |
51 | 51 | input_rows: Sequence[list[EvaluationRow] | None] | None, |
52 | 52 | evaluation_test_kwargs: Sequence[EvaluationInputParam | None] | None, |
53 | 53 | max_dataset_rows: int | None, |
@@ -83,11 +83,15 @@ def generate_parameter_combinations( |
83 | 83 | # Apply EP_MAX_DATASET_ROWS to input_messages, but do NOT parameterize over |
84 | 84 | # each row. Instead, pass the entire sliced list through in a single test run |
85 | 85 | # so summaries aggregate all rows together (AIME-style behavior). |
86 | | - messages: Sequence[InputMessagesParam | None] = [None] |
| 86 | + messages: Sequence[list[InputMessagesParam] | None] = [None] |
87 | 87 | if input_messages is not None: |
88 | 88 | effective_max_rows = parse_ep_max_rows(max_dataset_rows) |
89 | 89 | if effective_max_rows is not None: |
90 | | - sliced_messages: Sequence[InputMessagesParam | None] = input_messages[:effective_max_rows] |
| 90 | + sliced_messages: Sequence[list[InputMessagesParam] | None] = [ |
| 91 | + dataset_messages[:effective_max_rows] |
| 92 | + for dataset_messages in input_messages |
| 93 | + if dataset_messages is not None |
| 94 | + ] |
91 | 95 | else: |
92 | 96 | sliced_messages = input_messages |
93 | 97 | # Wrap as a single parameter payload |
|
0 commit comments