|
5 | 5 | from tests.pytest.test_markdown_highlighting import markdown_dataset_to_evaluation_row |
6 | 6 |
|
7 | 7 |
|
8 | | -async def test_evaluation_test_decorator_ids_single(): |
| 8 | +def test_evaluation_test_decorator_ids_single(): |
9 | 9 | from eval_protocol.pytest.evaluation_test import evaluation_test |
10 | 10 |
|
11 | 11 | row_ids = set() |
@@ -35,18 +35,18 @@ def eval_fn(row: EvaluationRow) -> EvaluationRow: |
35 | 35 | # Manually invoke all parameter combinations within a single test |
36 | 36 | for ds_path in input_dataset: |
37 | 37 | for params in completion_params_list: |
38 | | - await eval_fn(dataset_path=[ds_path], completion_params=params) |
| 38 | + eval_fn(dataset_path=[ds_path], completion_params=params) |
39 | 39 |
|
40 | 40 | # Second invocation to ensure that IDs are stable across multiple invocations |
41 | 41 | for ds_path in input_dataset: |
42 | 42 | for params in completion_params_list: |
43 | | - await eval_fn(dataset_path=[ds_path], completion_params=params) |
| 43 | + eval_fn(dataset_path=[ds_path], completion_params=params) |
44 | 44 |
|
45 | 45 | # Assertions on IDs generated by the decorator logic |
46 | 46 | assert len(row_ids) == 19 # from the markdown dataset |
47 | 47 |
|
48 | 48 |
|
49 | | -async def test_evaluation_test_generated_row_ids_without_dataset_keys(): |
| 49 | +def test_evaluation_test_generated_row_ids_without_dataset_keys(): |
50 | 50 | from eval_protocol.pytest.evaluation_test import evaluation_test |
51 | 51 |
|
52 | 52 | # Adapter that does NOT set row_id; lets evaluation_test generate IDs |
@@ -86,12 +86,12 @@ def eval_fn(row: EvaluationRow) -> EvaluationRow: |
86 | 86 | # Single invocation (one dataset, one param set) with multiple runs |
87 | 87 | for ds_path in input_dataset: |
88 | 88 | for params in completion_params: |
89 | | - await eval_fn(dataset_path=[ds_path], completion_params=params) |
| 89 | + eval_fn(dataset_path=[ds_path], completion_params=params) |
90 | 90 |
|
91 | 91 | # Second invocation to ensure that IDs are stable across multiple invocations |
92 | 92 | for ds_path in input_dataset: |
93 | 93 | for params in completion_params: |
94 | | - await eval_fn(dataset_path=[ds_path], completion_params=params) |
| 94 | + eval_fn(dataset_path=[ds_path], completion_params=params) |
95 | 95 |
|
96 | 96 | # Even with multiple runs, generated row_ids should be stable within the invocation |
97 | 97 | assert len(row_ids) == 19 # equals dataset size when IDs are generated once and preserved across runs |
0 commit comments