test_import_logs works

Dylan Huang · Dylan Huang · commit 94ae1b3ddef8 · 2025-09-18T16:01:55.000-07:00
diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -205,6 +205,7 @@ def decorator(
         # Create parameter tuples for pytest.mark.parametrize
         pytest_parametrize_args = pytest_parametrize(
             combinations,
+            test_func,
             input_dataset,
             completion_params,
             completion_params_provided,
@@ -268,7 +269,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                             index = abs(index) % (max_index + 1)
                             row.input_metadata.row_id = generate_id(seed=0, index=index)
 
-                    completion_params = kwargs["completion_params"]
+                    completion_params = kwargs["completion_params"] if "completion_params" in kwargs else None
                     # Create eval metadata with test function info and current commit hash
                     eval_metadata = EvalMetadata(
                         name=test_func.__name__,
diff --git a/eval_protocol/pytest/generate_parameter_combinations.py b/eval_protocol/pytest/generate_parameter_combinations.py
@@ -31,7 +31,7 @@
 ]
 
 
-class ParameterizedTestKwargs(TypedDict):
+class ParameterizedTestKwargs(TypedDict, total=False):
     """
     These are the type of parameters that can be passed to the generated pytest
     function. Every experiment is a unique combination of these parameters.
diff --git a/eval_protocol/pytest/parameterize.py b/eval_protocol/pytest/parameterize.py
@@ -1,3 +1,4 @@
+import ast
 import inspect
 from typing import TypedDict, Protocol
 from collections.abc import Callable, Sequence, Iterable, Awaitable
@@ -9,6 +10,111 @@
 from eval_protocol.pytest.types import DatasetPathParam, EvaluationInputParam, InputMessagesParam, TestFunction
 
 
+def _has_pytest_parametrize_with_completion_params(test_func: TestFunction) -> bool:
+    """
+    Check if a test function has a pytest.mark.parametrize decorator with argnames="completion_params".
+
+    This function uses inspect.getsource and ast to parse the function's source code and look for
+    pytest.mark.parametrize decorators that include "completion_params" in their argnames.
+
+    Args:
+        test_func: The test function to analyze
+
+    Returns:
+        True if the function has a pytest.mark.parametrize decorator with "completion_params" in argnames,
+        False otherwise
+
+    Raises:
+        OSError: If the source code cannot be retrieved (e.g., function is defined in interactive mode)
+        SyntaxError: If the source code cannot be parsed as valid Python
+    """
+    try:
+        source = inspect.getsource(test_func)
+    except OSError:
+        # Function source cannot be retrieved (e.g., defined in interactive mode)
+        return False
+
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        # Source code cannot be parsed
+        return False
+
+    # Walk through the AST to find pytest.mark.parametrize decorators
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
+            # Check decorators on this function
+            for decorator in node.decorator_list:
+                if _is_pytest_parametrize_with_completion_params(decorator):
+                    return True
+
+    return False
+
+
+def _is_pytest_parametrize_with_completion_params(decorator: ast.expr) -> bool:
+    """
+    Check if a decorator is pytest.mark.parametrize with "completion_params" in argnames.
+
+    Args:
+        decorator: AST node representing a decorator
+
+    Returns:
+        True if this is a pytest.mark.parametrize decorator with "completion_params" in argnames
+    """
+    # Look for pytest.mark.parametrize pattern
+    if isinstance(decorator, ast.Call):
+        # Check if it's pytest.mark.parametrize
+        if isinstance(decorator.func, ast.Attribute):
+            if (
+                isinstance(decorator.func.value, ast.Attribute)
+                and isinstance(decorator.func.value.value, ast.Name)
+                and decorator.func.value.value.id == "pytest"
+                and decorator.func.value.attr == "mark"
+                and decorator.func.attr == "parametrize"
+            ):
+                # Check positional arguments first (argnames is typically the first positional arg)
+                if len(decorator.args) > 0:
+                    argnames_arg = decorator.args[0]
+                    if _check_argnames_for_completion_params(argnames_arg):
+                        return True
+
+                # Check keyword arguments for argnames
+                for keyword in decorator.keywords:
+                    if keyword.arg == "argnames":
+                        if _check_argnames_for_completion_params(keyword.value):
+                            return True
+
+    return False
+
+
+def _check_argnames_for_completion_params(argnames_node: ast.expr) -> bool:
+    """
+    Check if an argnames AST node contains "completion_params".
+
+    Args:
+        argnames_node: AST node representing the argnames value
+
+    Returns:
+        True if argnames contains "completion_params"
+    """
+    if isinstance(argnames_node, ast.Constant):
+        # Single string case: argnames="completion_params"
+        if argnames_node.value == "completion_params":
+            return True
+    elif isinstance(argnames_node, ast.List):
+        # List case: argnames=["completion_params", ...]
+        for elt in argnames_node.elts:
+            if isinstance(elt, ast.Constant) and elt.value == "completion_params":
+                return True
+    elif isinstance(argnames_node, ast.Tuple):
+        # Tuple case: argnames=("completion_params", ...)
+        for elt in argnames_node.elts:
+            if isinstance(elt, ast.Constant) and elt.value == "completion_params":
+                return True
+
+    return False
+
+
 class PytestMarkParametrizeKwargs(TypedDict):
     argnames: Sequence[str]
     argvalues: Iterable[ParameterSet | Sequence[object] | object]
@@ -96,6 +202,7 @@ def generate_id_from_dict(d: dict[str, object], max_length: int = 200) -> str |
 
 def pytest_parametrize(
     combinations: list[CombinationTuple],
+    test_func: TestFunction | None,
     input_dataset: Sequence[DatasetPathParam] | None,
     completion_params: Sequence[CompletionParams | None] | None,
     completion_params_provided: bool,
@@ -112,16 +219,22 @@ def pytest_parametrize(
     API.
     """
 
+    if test_func is not None:
+        has_pytest_parametrize = _has_pytest_parametrize_with_completion_params(test_func)
+    else:
+        has_pytest_parametrize = False
+
     # Create parameter tuples for pytest.mark.parametrize
     argnames: list[str] = []
     sig_parameters: list[str] = []
     if input_dataset is not None:
         argnames.append("dataset_path")
         sig_parameters.append("dataset_path")
     if completion_params is not None:
-        if completion_params_provided:
+        if completion_params_provided and not has_pytest_parametrize:
             argnames.append("completion_params")
-        sig_parameters.append("completion_params")
+        if has_pytest_parametrize or completion_params_provided:
+            sig_parameters.append("completion_params")
     if input_messages is not None:
         argnames.append("input_messages")
         sig_parameters.append("input_messages")
diff --git a/eval_protocol/quickstart/llm_judge_openai_responses.py b/eval_protocol/quickstart/llm_judge_openai_responses.py
@@ -52,7 +52,6 @@
             "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905",
         },
     ],
-    ids=DefaultParameterIdGenerator.generate_id_from_dict,
 )
 @evaluation_test(
     input_rows=[input_rows],
diff --git a/tests/pytest/test_parameterized_ids.py b/tests/pytest/test_parameterized_ids.py
@@ -149,6 +149,7 @@ def test_pytest_parametrize_with_custom_id_generator():
     # Test with default generator
     result = pytest_parametrize(
         combinations=combinations,
+        test_func=None,
         input_dataset=None,
         completion_params=[{"model": "gpt-4"}, {"model": "claude-3"}, {"temperature": 0.5}],
         completion_params_provided=True,

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`	`]`
`32`	`32`
`33`	`33`
`34`		`-class ParameterizedTestKwargs(TypedDict):`
	`34`	`+class ParameterizedTestKwargs(TypedDict, total=False):`
`35`	`35`	`"""`
`36`	`36`	`These are the type of parameters that can be passed to the generated pytest`
`37`	`37`	`function. Every experiment is a unique combination of these parameters.`
Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,6 @@`
`52`	`52`	`"model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905",`
`53`	`53`	`},`
`54`	`54`	`],`
`55`		`- ids=DefaultParameterIdGenerator.generate_id_from_dict,`
`56`	`55`	`)`
`57`	`56`	`@evaluation_test(`
`58`	`57`	`input_rows=[input_rows],`