From c96dbd6722c9b1904957de27162afcf6c2136a4c Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:39:35 +0000
Subject: [PATCH 01/10] chore: remove dead Python 3.9 skipif guard in
 test_context

The SDK requires Python >=3.10, so the `sys.version_info < (3, 9)`
guard on `test_to_thread_preserves_context` is always False and can
be removed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/test_context.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
index 17b3c6d1..cb3b78a9 100644
--- a/py/src/braintrust/test_context.py
+++ b/py/src/braintrust/test_context.py
@@ -277,7 +277,6 @@ async def async_worker():
     assert parent_log["span_id"] in worker_log.get("span_parents", []), "Worker should have parent as parent"
 
 
-@pytest.mark.skipif(sys.version_info < (3, 9), reason="to_thread requires Python 3.9+")
 @pytest.mark.asyncio
 async def test_to_thread_preserves_context(test_logger, with_memory_logger):
     """

From f0a5a5baf4a365c4c2bae14d8fbaa5e37a26e77d Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:40:32 +0000
Subject: [PATCH 02/10] chore: add proper type annotation to prettify_xact

Now that min Python is 3.10, use `int | str` union syntax directly
in the type annotation instead of leaving it untyped with a comment.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/xact_ids.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/py/src/braintrust/xact_ids.py b/py/src/braintrust/xact_ids.py
index 0327066e..1231e3bd 100644
--- a/py/src/braintrust/xact_ids.py
+++ b/py/src/braintrust/xact_ids.py
@@ -9,9 +9,7 @@ def modular_multiply(value: int, prime: int):
     return (value * prime) % MOD
 
 
-# value : int | str
-# Cannot use a | because of python 3.8
-def prettify_xact(value) -> str:
+def prettify_xact(value: int | str) -> str:
     encoded = modular_multiply(int(value), COPRIME)
     return hex(encoded)[2:].rjust(16, "0")
 

From 9ddc09ccc6f0aa48203e7ecb282da68bad36dfc6 Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:41:30 +0000
Subject: [PATCH 03/10] chore: use proper Future[Any] return type for
 TracedThreadPoolExecutor.submit

Future[T] generic typing has been stable since Python 3.9; with min
Python 3.10 we no longer need to return Any as a workaround.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/logger.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 32cd554b..9ef35c50 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -5618,9 +5618,7 @@ def __str__(self):
 
 
 class TracedThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
-    # Returns Any because Future[T] generic typing was stabilized in Python 3.9,
-    # but we maintain compatibility with older type checkers.
-    def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
+    def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> concurrent.futures.Future[Any]:
         # Capture all current context variables
         context = contextvars.copy_context()
 

From f6268820b35689a99a8ceeaa8808819985797ae8 Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:43:11 +0000
Subject: [PATCH 04/10] chore: move Protocol and TypedDict from
 typing_extensions to typing

Both have been in stdlib typing since Python 3.8. With min Python
3.10, no need for the typing_extensions backport.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/devserver/schemas.py | 4 +---
 py/src/braintrust/framework.py         | 3 ++-
 py/src/braintrust/types/_eval.py       | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/py/src/braintrust/devserver/schemas.py b/py/src/braintrust/devserver/schemas.py
index cd8f49da..a359a93d 100644
--- a/py/src/braintrust/devserver/schemas.py
+++ b/py/src/braintrust/devserver/schemas.py
@@ -1,8 +1,6 @@
 import json
 from collections.abc import Sequence
-from typing import Any, Union, get_args, get_origin, get_type_hints
-
-from typing_extensions import TypedDict
+from typing import Any, TypedDict, Union, get_args, get_origin, get_type_hints
 
 
 # This is not beautiful code, but it saves us from introducing Pydantic as a dependency, and it is fairly
diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index 5ad20a25..4dd58a33 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -17,13 +17,14 @@
     Any,
     Generic,
     Literal,
+    Protocol,
+    TypedDict,
     TypeVar,
     Union,
 )
 
 from tqdm.asyncio import tqdm as async_tqdm
 from tqdm.auto import tqdm as std_tqdm
-from typing_extensions import Protocol, TypedDict
 
 from .generated_types import FunctionFormat, FunctionOutputType, ObjectReference
 from .git_fields import GitMetadataSettings, RepoInfo
diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py
index 0f5be193..a199d8a2 100644
--- a/py/src/braintrust/types/_eval.py
+++ b/py/src/braintrust/types/_eval.py
@@ -5,9 +5,9 @@
 underscore-prefixed so pyright strict mode doesn't flag them as private.
 """
 
-from typing import Any, Generic, Sequence, TypeVar
+from typing import Any, Generic, Sequence, TypedDict, TypeVar
 
-from typing_extensions import NotRequired, TypedDict
+from typing_extensions import NotRequired
 
 
 Input = TypeVar("Input")

From 3eb39056faf05e7711fa575dbaf9a0e0c64232d3 Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:46:56 +0000
Subject: [PATCH 05/10] chore: replace Union[X, Y] with X | Y syntax (PEP 604)

With min Python 3.10, the union pipe syntax is available at runtime.
Migrates type aliases and annotations in framework.py, logger.py,
util.py, prompt.py, functions/stream.py, and langchain/callbacks.py.

Keeps Union imports where used as a runtime sentinel for
get_origin() checks (serializable_data_class.py, devserver/schemas.py).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/framework.py                | 74 +++++++++----------
 py/src/braintrust/functions/stream.py         | 16 ++--
 .../integrations/langchain/callbacks.py       |  3 +-
 py/src/braintrust/logger.py                   |  9 +--
 py/src/braintrust/prompt.py                   |  4 +-
 py/src/braintrust/util.py                     |  4 +-
 6 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index 4dd58a33..a40c5471 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -20,7 +20,6 @@
     Protocol,
     TypedDict,
     TypeVar,
-    Union,
 )
 
 from tqdm.asyncio import tqdm as async_tqdm
@@ -217,13 +216,13 @@ class EvalScorerArgs(SerializableDataClass, Generic[Input, Output, Expected]):
     metadata: Metadata | None = None
 
 
-OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]]
-OneOrMoreClassifications = Union[
-    None,
-    Classification,
-    Mapping[str, Any],
-    list[Classification | Mapping[str, Any]],
-]
+OneOrMoreScores = float | int | bool | None | Score | list[Score]
+OneOrMoreClassifications = (
+    None
+    | Classification
+    | Mapping[str, Any]
+    | list[Classification | Mapping[str, Any]]
+)
 
 
 # Synchronous scorer interface - implements callable
@@ -248,20 +247,19 @@ class AsyncScorerLike(Protocol, Generic[Input, Output, Expected]):
     async def eval_async(self, output: Output, expected: Expected | None = None, **kwargs: Any) -> OneOrMoreScores: ...
 
 
-# Union type for any kind of scorer (for typing)
-ScorerLike = Union[SyncScorerLike[Input, Output, Expected], AsyncScorerLike[Input, Output, Expected]]
+ScorerLike = SyncScorerLike[Input, Output, Expected] | AsyncScorerLike[Input, Output, Expected]
 
-EvalScorer = Union[
-    ScorerLike[Input, Output, Expected],
-    type[ScorerLike[Input, Output, Expected]],
-    Callable[[Input, Output, Expected], OneOrMoreScores],
-    Callable[[Input, Output, Expected], Awaitable[OneOrMoreScores]],
-]
+EvalScorer = (
+    ScorerLike[Input, Output, Expected]
+    | type[ScorerLike[Input, Output, Expected]]
+    | Callable[[Input, Output, Expected], OneOrMoreScores]
+    | Callable[[Input, Output, Expected], Awaitable[OneOrMoreScores]]
+)
 
-EvalClassifier = Union[
-    Callable[[Input, Output, Expected], OneOrMoreClassifications],
-    Callable[[Input, Output, Expected], Awaitable[OneOrMoreClassifications]],
-]
+EvalClassifier = (
+    Callable[[Input, Output, Expected], OneOrMoreClassifications]
+    | Callable[[Input, Output, Expected], Awaitable[OneOrMoreClassifications]]
+)
 
 
 @dataclasses.dataclass
@@ -279,27 +277,27 @@ class BaseExperiment:
     """
 
 
-_AnyEvalCase = Union[
-    EvalCase[Input, Expected],
-    EvalCaseDict[Input, Expected],
-    EvalCaseDictNoOutput[Input],
-    ExperimentDatasetEvent,
-]
+_AnyEvalCase = (
+    EvalCase[Input, Expected]
+    | EvalCaseDict[Input, Expected]
+    | EvalCaseDictNoOutput[Input]
+    | ExperimentDatasetEvent
+)
 
-_EvalDataObject = Union[
-    Iterable[_AnyEvalCase[Input, Expected]],
-    Iterator[_AnyEvalCase[Input, Expected]],
-    Awaitable[Iterator[_AnyEvalCase[Input, Expected]]],
-    Callable[[], Union[Iterator[_AnyEvalCase[Input, Expected]], Awaitable[Iterator[_AnyEvalCase[Input, Expected]]]]],
-    BaseExperiment,
-]
+_EvalDataObject = (
+    Iterable[_AnyEvalCase[Input, Expected]]
+    | Iterator[_AnyEvalCase[Input, Expected]]
+    | Awaitable[Iterator[_AnyEvalCase[Input, Expected]]]
+    | Callable[[], Iterator[_AnyEvalCase[Input, Expected]] | Awaitable[Iterator[_AnyEvalCase[Input, Expected]]]]
+    | BaseExperiment
+)
 
-EvalData = Union[_EvalDataObject[Input, Expected], type[_EvalDataObject[Input, Expected]], Dataset]
+EvalData = _EvalDataObject[Input, Expected] | type[_EvalDataObject[Input, Expected]] | Dataset
 
-EvalTask = Union[
-    Callable[[Input], Union[Output, Awaitable[Output]]],
-    Callable[[Input, EvalHooks[Expected]], Union[Output, Awaitable[Output]]],
-]
+EvalTask = (
+    Callable[[Input], Output | Awaitable[Output]]
+    | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]]
+)
 
 ErrorScoreHandler = Callable[[Span, EvalCase[Input, Expected], Sequence[str]], dict[str, float] | None]
 
diff --git a/py/src/braintrust/functions/stream.py b/py/src/braintrust/functions/stream.py
index ba651315..18ebfb6e 100644
--- a/py/src/braintrust/functions/stream.py
+++ b/py/src/braintrust/functions/stream.py
@@ -9,7 +9,7 @@
 import json
 from collections.abc import Generator, Iterable
 from itertools import tee
-from typing import Literal, Union
+from typing import Literal
 
 from sseclient import SSEClient
 
@@ -79,13 +79,13 @@ class BraintrustInvokeError(ValueError):
     pass
 
 
-BraintrustStreamChunk = Union[
-    BraintrustTextChunk,
-    BraintrustJsonChunk,
-    BraintrustErrorChunk,
-    BraintrustConsoleChunk,
-    BraintrustProgressChunk,
-]
+BraintrustStreamChunk = (
+    BraintrustTextChunk
+    | BraintrustJsonChunk
+    | BraintrustErrorChunk
+    | BraintrustConsoleChunk
+    | BraintrustProgressChunk
+)
 
 
 class BraintrustStream:
diff --git a/py/src/braintrust/integrations/langchain/callbacks.py b/py/src/braintrust/integrations/langchain/callbacks.py
index 50650da0..a80a5625 100644
--- a/py/src/braintrust/integrations/langchain/callbacks.py
+++ b/py/src/braintrust/integrations/langchain/callbacks.py
@@ -7,7 +7,6 @@
 from typing import (
     Any,
     TypedDict,
-    Union,
 )
 from uuid import UUID
 
@@ -528,7 +527,7 @@ def on_llm_new_token(
         self,
         token: str,
         *,
-        chunk: Union["GenerationChunk", "ChatGenerationChunk"] | None = None,  # type: ignore
+        chunk: "GenerationChunk | ChatGenerationChunk | None" = None,  # type: ignore
         run_id: UUID,
         parent_run_id: UUID | None = None,
         **kwargs: Any,
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 9ef35c50..6ed497a1 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -28,7 +28,6 @@
     Literal,
     TypedDict,
     TypeVar,
-    Union,
     cast,
     overload,
 )
@@ -1600,7 +1599,7 @@ def init(
     base_experiment_id: str | None = None,
     repo_info: RepoInfo | None = None,
     state: BraintrustState | None = None,
-) -> Union["Experiment", "ReadonlyExperiment"]:
+) -> "Experiment | ReadonlyExperiment":
     """
     Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
 
@@ -1767,7 +1766,7 @@ def compute_metadata():
     return ret
 
 
-def init_experiment(*args, **kwargs) -> Union["Experiment", "ReadonlyExperiment"]:
+def init_experiment(*args, **kwargs) -> "Experiment | ReadonlyExperiment":
     """Alias for `init`"""
 
     return init(*args, **kwargs)
@@ -2392,7 +2391,7 @@ def parent_context(parent: str | None, state: BraintrustState | None = None):
 
 def get_span_parent_object(
     parent: str | None = None, state: BraintrustState | None = None
-) -> Union[SpanComponentsV4, "Logger", "Experiment", Span]:
+) -> "SpanComponentsV4 | Logger | Experiment | Span":
     """Mainly for internal use. Return the parent object for starting a span in a global context.
     Applies precedence: current span > propagated parent string > experiment > logger."""
 
@@ -4857,7 +4856,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
 def render_message(render: Callable[[str], str], message: PromptMessage):
     base = {k: v for (k, v) in message.as_dict().items() if v is not None}
     # TODO: shouldn't load_prompt guarantee content is a PromptMessage?
-    content = cast(Union[str, list[Union[TextPart, ImagePart]], dict[str, Any]], message.content)
+    content = cast(str | list[TextPart | ImagePart] | dict[str, Any], message.content)
     if content is not None:
         if isinstance(content, str):
             base["content"] = render(content)
diff --git a/py/src/braintrust/prompt.py b/py/src/braintrust/prompt.py
index d4b7fa19..e9f413b7 100644
--- a/py/src/braintrust/prompt.py
+++ b/py/src/braintrust/prompt.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Literal, Union
+from typing import Literal
 
 from .generated_types import PromptOptions
 from .serializable_data_class import SerializableDataClass
@@ -61,7 +61,7 @@ class PromptChatBlock(SerializableDataClass):
     type: Literal["chat"] = "chat"
 
 
-PromptBlockData = Union[PromptCompletionBlock, PromptChatBlock]
+PromptBlockData = PromptCompletionBlock | PromptChatBlock
 
 
 @dataclass
diff --git a/py/src/braintrust/util.py b/py/src/braintrust/util.py
index 3541cb5f..7fdb8abb 100644
--- a/py/src/braintrust/util.py
+++ b/py/src/braintrust/util.py
@@ -7,7 +7,7 @@
 import urllib.parse
 from collections.abc import Callable, Mapping
 from dataclasses import dataclass
-from typing import Any, Generic, Literal, TypedDict, TypeVar, Union
+from typing import Any, Generic, Literal, TypedDict, TypeVar
 
 from requests import HTTPError, Response
 
@@ -179,7 +179,7 @@ class _LazyValuePendingState:
     has_succeeded: Literal[False] = False
 
 
-_LazyValueState = Union[_LazyValueResolvedState[T], _LazyValuePendingState]
+_LazyValueState = _LazyValueResolvedState[T] | _LazyValuePendingState
 
 
 class LazyValue(Generic[T]):

From f9ebd13f40795bfda65e3653efd1bb045111aa7c Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:50:07 +0000
Subject: [PATCH 06/10] chore: replace Optional[X] with X | None syntax

Modernizes type annotations across merge_row_batch.py, trace.py,
span_cache.py, otel/context.py, langsmith_wrapper.py, and
test_serializable_data_class.py. Removes unused Optional imports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/merge_row_batch.py          |  4 +-
 py/src/braintrust/otel/context.py             |  4 +-
 py/src/braintrust/span_cache.py               | 14 +++---
 .../test_serializable_data_class.py           |  9 ++--
 py/src/braintrust/trace.py                    | 44 +++++++++----------
 .../braintrust/wrappers/langsmith_wrapper.py  | 37 ++++++++--------
 6 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/py/src/braintrust/merge_row_batch.py b/py/src/braintrust/merge_row_batch.py
index c9047775..066744ce 100644
--- a/py/src/braintrust/merge_row_batch.py
+++ b/py/src/braintrust/merge_row_batch.py
@@ -1,5 +1,5 @@
 from collections.abc import Callable, Sequence
-from typing import Any, Optional, TypeVar
+from typing import Any, TypeVar
 
 from .db_fields import IS_MERGE_FIELD
 
@@ -8,7 +8,7 @@
 from .util import merge_dicts
 
 
-_MergedRowKey = tuple[Optional[Any], ...]
+_MergedRowKey = tuple[Any | None, ...]
 
 
 def _generate_merged_row_key(row: dict[str, Any]) -> _MergedRowKey:
diff --git a/py/src/braintrust/otel/context.py b/py/src/braintrust/otel/context.py
index bb65be77..ea9703bc 100644
--- a/py/src/braintrust/otel/context.py
+++ b/py/src/braintrust/otel/context.py
@@ -1,7 +1,7 @@
 """Unified context management using OTEL's built-in context."""
 
 import logging
-from typing import Any, Optional
+from typing import Any
 
 from braintrust.context import ParentSpanIds, SpanInfo
 from braintrust.logger import Span
@@ -18,7 +18,7 @@ class ContextManager:
     def __init__(self):
         pass
 
-    def get_current_span_info(self) -> Optional["SpanInfo"]:
+    def get_current_span_info(self) -> "SpanInfo | None":
         """Get information about the currently active span from OTEL context."""
 
         # Get the current span from OTEL context
diff --git a/py/src/braintrust/span_cache.py b/py/src/braintrust/span_cache.py
index f3248d0c..ee926614 100644
--- a/py/src/braintrust/span_cache.py
+++ b/py/src/braintrust/span_cache.py
@@ -11,7 +11,7 @@
 import os
 import tempfile
 import uuid
-from typing import Any, Optional
+from typing import Any
 
 from braintrust.types import Metadata
 from braintrust.util import merge_dicts
@@ -28,11 +28,11 @@ class CachedSpan:
     def __init__(
         self,
         span_id: str,
-        input: Optional[Any] = None,
-        output: Optional[Any] = None,
+        input: Any | None = None,
+        output: Any | None = None,
         metadata: Metadata | None = None,
-        span_parents: Optional[list[str]] = None,
-        span_attributes: Optional[dict[str, Any]] = None,
+        span_parents: list[str] | None = None,
+        span_attributes: dict[str, Any] | None = None,
     ):
         self.span_id = span_id
         self.input = input
@@ -104,7 +104,7 @@ class SpanCache:
     """
 
     def __init__(self, disabled: bool = False):
-        self._cache_file_path: Optional[str] = None
+        self._cache_file_path: str | None = None
         self._initialized = False
         # Tracks whether the cache was explicitly disabled (via constructor or disable())
         self._explicitly_disabled = disabled
@@ -226,7 +226,7 @@ def _flush_write_buffer(self) -> None:
             # This can happen if disk is full or file permissions changed
             pass
 
-    def get_by_root_span_id(self, root_span_id: str) -> Optional[list[CachedSpan]]:
+    def get_by_root_span_id(self, root_span_id: str) -> list[CachedSpan] | None:
         """
         Get all cached spans for a given rootSpanId.
 
diff --git a/py/src/braintrust/test_serializable_data_class.py b/py/src/braintrust/test_serializable_data_class.py
index 0cade6ae..e31e2078 100644
--- a/py/src/braintrust/test_serializable_data_class.py
+++ b/py/src/braintrust/test_serializable_data_class.py
@@ -1,14 +1,13 @@
 import unittest
 from dataclasses import dataclass
-from typing import List, Optional
 
 from .serializable_data_class import SerializableDataClass
 
 
 @dataclass
 class PromptData(SerializableDataClass):
-    prompt: Optional[str] = None
-    options: Optional[dict] = None
+    prompt: str | None = None
+    options: dict | None = None
 
 
 @dataclass
@@ -18,9 +17,9 @@ class PromptSchema(SerializableDataClass):
     _xact_id: str
     name: str
     slug: str
-    description: Optional[str]
+    description: str | None
     prompt_data: PromptData
-    tags: Optional[List[str]]
+    tags: list[str] | None
 
 
 class TestSerializableDataClass(unittest.TestCase):
diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py
index d3426ac4..00084495 100644
--- a/py/src/braintrust/trace.py
+++ b/py/src/braintrust/trace.py
@@ -6,7 +6,7 @@
 """
 
 import asyncio
-from typing import Any, Awaitable, Callable, Optional, Protocol, TypedDict
+from typing import Any, Awaitable, Callable, Protocol, TypedDict
 
 from braintrust.functions.invoke import invoke
 from braintrust.logger import BraintrustState, ObjectFetcher
@@ -18,12 +18,12 @@ class SpanData:
 
     def __init__(
         self,
-        input: Optional[Any] = None,
-        output: Optional[Any] = None,
+        input: Any | None = None,
+        output: Any | None = None,
         metadata: Metadata | None = None,
-        span_id: Optional[str] = None,
-        span_parents: Optional[list[str]] = None,
-        span_attributes: Optional[dict[str, Any]] = None,
+        span_id: str | None = None,
+        span_parents: list[str] | None = None,
+        span_attributes: dict[str, Any] | None = None,
         **kwargs: Any,
     ):
         self.input = input
@@ -62,7 +62,7 @@ def __init__(
         object_id: str,
         root_span_id: str,
         state: BraintrustState,
-        span_type_filter: Optional[list[str]] = None,
+        span_type_filter: list[str] | None = None,
     ):
         # Build the filter expression for root_span_id and optionally span_attributes.type
         filter_expr = self._build_filter(root_span_id, span_type_filter)
@@ -75,7 +75,7 @@ def __init__(
         self._state = state
 
     @staticmethod
-    def _build_filter(root_span_id: str, span_type_filter: Optional[list[str]] = None) -> dict[str, Any]:
+    def _build_filter(root_span_id: str, span_type_filter: list[str] | None = None) -> dict[str, Any]:
         """Build BTQL filter expression."""
         children = [
             # Base filter: root_span_id = 'value'
@@ -121,7 +121,7 @@ def _get_state(self) -> BraintrustState:
         return self._state
 
 
-SpanFetchFn = Callable[[Optional[list[str]]], Awaitable[list[SpanData]]]
+SpanFetchFn = Callable[[list[str] | None], Awaitable[list[SpanData]]]
 
 
 class GetThreadOptions(TypedDict, total=False):
@@ -140,11 +140,11 @@ class CachedSpanFetcher:
 
     def __init__(
         self,
-        object_type: Optional[str] = None,  # Literal["experiment", "project_logs", "playground_logs"]
-        object_id: Optional[str] = None,
-        root_span_id: Optional[str] = None,
-        get_state: Optional[Callable[[], Awaitable[BraintrustState]]] = None,
-        fetch_fn: Optional[SpanFetchFn] = None,
+        object_type: str | None = None,  # Literal["experiment", "project_logs", "playground_logs"]
+        object_id: str | None = None,
+        root_span_id: str | None = None,
+        get_state: Callable[[], Awaitable[BraintrustState] | None] = None,
+        fetch_fn: SpanFetchFn | None = None,
     ):
         self._span_cache: dict[str, list[SpanData]] = {}
         self._all_fetched = False
@@ -159,7 +159,7 @@ def __init__(
                     "Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state"
                 )
 
-            async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]:
+            async def _fetch_fn(span_type: list[str] | None) -> list[SpanData]:
                 state = await get_state()
                 fetcher = SpanFetcher(
                     object_type=object_type,
@@ -196,7 +196,7 @@ async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]:
 
             self._fetch_fn = _fetch_fn
 
-    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+    async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]:
         """
         Get spans, using cache when possible.
 
@@ -228,7 +228,7 @@ async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanDat
         await self._fetch_spans(missing_types)
         return self._get_from_cache(span_type)
 
-    async def _fetch_spans(self, span_type: Optional[list[str]]) -> None:
+    async def _fetch_spans(self, span_type: list[str] | None) -> None:
         """Fetch spans from the server."""
         spans = await self._fetch_fn(span_type)
 
@@ -239,7 +239,7 @@ async def _fetch_spans(self, span_type: Optional[list[str]]) -> None:
                 self._span_cache[span_type_str] = []
             self._span_cache[span_type_str].append(span)
 
-    def _get_from_cache(self, span_type: Optional[list[str]]) -> list[SpanData]:
+    def _get_from_cache(self, span_type: list[str] | None) -> list[SpanData]:
         """Get spans from cache, optionally filtering by type."""
         if not span_type or len(span_type) == 0:
             # Return all spans
@@ -266,7 +266,7 @@ def get_configuration(self) -> dict[str, str]:
         """Get the trace configuration (object_type, object_id, root_span_id)."""
         ...
 
-    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+    async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]:
         """
         Fetch all spans for this root span.
 
@@ -307,7 +307,7 @@ def __init__(
         object_type: str,  # Literal["experiment", "project_logs", "playground_logs"]
         object_id: str,
         root_span_id: str,
-        ensure_spans_flushed: Optional[Callable[[], Awaitable[None]]],
+        ensure_spans_flushed: Callable[[], Awaitable[None] | None],
         state: BraintrustState,
     ):
         # Initialize dict with trace_ref for JSON serialization
@@ -327,7 +327,7 @@ def __init__(
         self._ensure_spans_flushed = ensure_spans_flushed
         self._state = state
         self._spans_flushed = False
-        self._spans_flush_promise: Optional[asyncio.Task[None]] = None
+        self._spans_flush_promise: asyncio.Task[None] | None = None
         self._thread_cache: dict[str, asyncio.Task[list[Any]]] = {}
 
         async def get_state() -> BraintrustState:
@@ -351,7 +351,7 @@ def get_configuration(self) -> dict[str, str]:
             "root_span_id": self._root_span_id,
         }
 
-    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+    async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]:
         """
         Fetch all rows for this root span from its parent object (experiment or project logs).
         First checks the local span cache for recently logged spans, then falls
diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py
index b22117df..f6c48e78 100644
--- a/py/src/braintrust/wrappers/langsmith_wrapper.py
+++ b/py/src/braintrust/wrappers/langsmith_wrapper.py
@@ -40,7 +40,8 @@ def my_function(inputs: dict) -> dict:
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ParamSpec, TypeVar
+from collections.abc import Iterable, Iterator
+from typing import Any, Callable, ParamSpec, TypeVar
 
 from braintrust.framework import EvalCase
 from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced
@@ -50,7 +51,7 @@ def my_function(inputs: dict) -> dict:
 logger = logging.getLogger(__name__)
 
 # Global list to store Braintrust eval results when running in tandem mode
-_braintrust_eval_results: List[Any] = []
+_braintrust_eval_results: list[Any] = []
 
 # TODO: langsmith.test/unit/expect, langsmith.AsyncClient, trace
 __all__ = [
@@ -68,7 +69,7 @@ def my_function(inputs: dict) -> dict:
 R = TypeVar("R")
 
 
-def get_braintrust_results() -> List[Any]:
+def get_braintrust_results() -> list[Any]:
     """Get all Braintrust eval results collected during tandem mode."""
     return _braintrust_eval_results.copy()
 
@@ -79,9 +80,9 @@ def clear_braintrust_results() -> None:
 
 
 def setup_langsmith(
-    api_key: Optional[str] = None,
-    project_id: Optional[str] = None,
-    project_name: Optional[str] = None,
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
     standalone: bool = False,
 ) -> bool:
     """
@@ -169,7 +170,7 @@ def decorator(fn: Callable[P, R]) -> Callable[P, R]:
 
 
 def wrap_client(
-    Client: Any, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False
+    Client: Any, project_name: str | None = None, project_id: str | None = None, standalone: bool = False
 ) -> Any:
     """
     Wrap langsmith.Client to redirect evaluate() and aevaluate() to Braintrust's Eval.
@@ -204,7 +205,7 @@ def wrap_client(
 
 
 def make_evaluate_wrapper(
-    *, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False
+    *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False
 ):
     def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
         result = None
@@ -231,7 +232,7 @@ def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any
 
 
 def make_aevaluate_wrapper(
-    *, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False
+    *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False
 ):
     async def aevaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
         result = None
@@ -258,7 +259,7 @@ async def aevaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any)
 
 
 def wrap_evaluate(
-    evaluate: F, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False
+    evaluate: F, project_name: str | None = None, project_id: str | None = None, standalone: bool = False
 ) -> F:
     """
     Wrap module-level langsmith.evaluate to redirect to Braintrust's Eval.
@@ -282,8 +283,8 @@ def wrap_evaluate(
 
 def wrap_aevaluate(
     aevaluate: F,
-    project_name: Optional[str] = None,
-    project_id: Optional[str] = None,
+    project_name: str | None = None,
+    project_id: str | None = None,
     standalone: bool = False,
 ) -> F:
     """
@@ -318,8 +319,8 @@ def _is_patched(obj: Any) -> bool:
 def _run_braintrust_eval(
     args: Any,
     kwargs: Any,
-    project_name: Optional[str] = None,
-    project_id: Optional[str] = None,
+    project_name: str | None = None,
+    project_id: str | None = None,
 ) -> Any:
     """Run Braintrust Eval with LangSmith-style arguments."""
     from braintrust.framework import Eval
@@ -356,8 +357,8 @@ def _run_braintrust_eval(
 async def _run_braintrust_eval_async(
     args: Any,
     kwargs: Any,
-    project_name: Optional[str] = None,
-    project_id: Optional[str] = None,
+    project_name: str | None = None,
+    project_id: str | None = None,
 ) -> Any:
     """Run Braintrust EvalAsync with LangSmith-style arguments."""
     from braintrust.framework import EvalAsync
@@ -396,7 +397,7 @@ async def _run_braintrust_eval_async(
 # =============================================================================
 
 
-def _wrap_output(output: Any) -> Dict[str, Any]:
+def _wrap_output(output: Any) -> dict[str, Any]:
     """Wrap non-dict outputs the same way LangSmith does."""
     if not isinstance(output, dict):
         return {"output": output}
@@ -413,7 +414,7 @@ def _make_braintrust_scorer(
     """
     evaluator_name = getattr(evaluator, "__name__", "score")
 
-    def braintrust_scorer(input: Any, output: Any, expected: Optional[Any] = None, **kwargs: Any) -> Any:
+    def braintrust_scorer(input: Any, output: Any, expected: Any | None = None, **kwargs: Any) -> Any:
         from braintrust.score import Score
 
         # Run the evaluator with LangSmith's signature

From 53cfefc4eacf172a6876e6d9d806873802a918da Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 19:54:14 +0000
Subject: [PATCH 07/10] chore: replace typing.List/Dict/etc with builtins and
 collections.abc

Migrates legacy typing container imports (List, Dict, AsyncGenerator,
Generator, Callable, Mapping, Sequence, Iterable, Iterator, Awaitable)
to their modern equivalents: lowercase builtins (list, dict) and
collections.abc for abstract types.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/bt_json.py                  |  3 ++-
 .../contrib/temporal/test_temporal.py         | 10 ++++-----
 .../claude_agent_sdk/_constants.py            |  3 ++-
 .../integrations/langchain/helpers.py         |  3 ++-
 .../integrations/langchain/test_callbacks.py  | 22 +++++++++----------
 .../integrations/langchain/test_context.py    |  3 +--
 .../test_pydantic_ai_wrap_openai.py           |  4 ++--
 py/src/braintrust/test_context.py             |  3 ++-
 py/src/braintrust/test_framework.py           | 11 +++++-----
 py/src/braintrust/test_logger.py              |  8 +++----
 py/src/braintrust/test_util.py                |  7 +++---
 py/src/braintrust/trace.py                    |  3 ++-
 py/src/braintrust/types/_eval.py              |  3 ++-
 13 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/py/src/braintrust/bt_json.py b/py/src/braintrust/bt_json.py
index 76dadb01..e6035d6b 100644
--- a/py/src/braintrust/bt_json.py
+++ b/py/src/braintrust/bt_json.py
@@ -2,7 +2,8 @@
 import json
 import math
 import warnings
-from typing import Any, Callable, Mapping, NamedTuple, cast, overload
+from collections.abc import Callable, Mapping
+from typing import Any, NamedTuple, cast, overload
 
 
 # Try to import orjson for better performance
diff --git a/py/src/braintrust/contrib/temporal/test_temporal.py b/py/src/braintrust/contrib/temporal/test_temporal.py
index 8ed87264..03999310 100644
--- a/py/src/braintrust/contrib/temporal/test_temporal.py
+++ b/py/src/braintrust/contrib/temporal/test_temporal.py
@@ -4,7 +4,7 @@
 import uuid
 from dataclasses import dataclass
 from datetime import timedelta
-from typing import Any, Dict
+from typing import Any
 
 import pytest
 import pytest_asyncio
@@ -31,7 +31,7 @@ class TestHeaderSerialization:
     def test_span_context_to_headers_with_valid_context(self):
         interceptor = BraintrustInterceptor()
         span_context = {"trace_id": "test-trace-id", "span_id": "test-span-id"}
-        headers: Dict[str, temporalio.api.common.v1.Payload] = {}
+        headers: dict[str, temporalio.api.common.v1.Payload] = {}
 
         result_headers = interceptor._span_context_to_headers(span_context, headers)
 
@@ -40,8 +40,8 @@ def test_span_context_to_headers_with_valid_context(self):
 
     def test_span_context_to_headers_with_empty_context(self):
         interceptor = BraintrustInterceptor()
-        span_context: Dict[str, Any] = {}
-        headers: Dict[str, temporalio.api.common.v1.Payload] = {}
+        span_context: dict[str, Any] = {}
+        headers: dict[str, temporalio.api.common.v1.Payload] = {}
 
         result_headers = interceptor._span_context_to_headers(span_context, headers)
 
@@ -78,7 +78,7 @@ def test_span_context_from_headers_with_valid_header(self):
 
     def test_span_context_from_headers_with_missing_header(self):
         interceptor = BraintrustInterceptor()
-        headers: Dict[str, temporalio.api.common.v1.Payload] = {}
+        headers: dict[str, temporalio.api.common.v1.Payload] = {}
 
         result = interceptor._span_context_from_headers(headers)
 
diff --git a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
index fa228a34..8be12600 100644
--- a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
+++ b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
@@ -1,7 +1,8 @@
 from dataclasses import dataclass
 from enum import Enum
 from types import MappingProxyType
-from typing import Final, Mapping
+from collections.abc import Mapping
+from typing import Final
 
 
 class MessageClassName(str, Enum):
diff --git a/py/src/braintrust/integrations/langchain/helpers.py b/py/src/braintrust/integrations/langchain/helpers.py
index f75b96db..7271ef37 100644
--- a/py/src/braintrust/integrations/langchain/helpers.py
+++ b/py/src/braintrust/integrations/langchain/helpers.py
@@ -1,4 +1,5 @@
-from typing import Any, Sequence
+from collections.abc import Sequence
+from typing import Any
 from unittest.mock import ANY
 
 
diff --git a/py/src/braintrust/integrations/langchain/test_callbacks.py b/py/src/braintrust/integrations/langchain/test_callbacks.py
index da7181d3..375332c9 100644
--- a/py/src/braintrust/integrations/langchain/test_callbacks.py
+++ b/py/src/braintrust/integrations/langchain/test_callbacks.py
@@ -3,7 +3,7 @@
 import time
 import uuid
 from pathlib import Path
-from typing import Dict, List, Union, cast
+from typing import cast
 
 import pytest
 from braintrust import logger
@@ -56,7 +56,7 @@ def test_llm_calls(logger_memory_logger):
         presence_penalty=0,
         n=1,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
     chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
 
     spans = memory_logger.pop()
@@ -159,7 +159,7 @@ def test_chain_with_memory(logger_memory_logger):
     handler = BraintrustCallbackHandler(logger=test_logger)
     prompt = ChatPromptTemplate.from_template("{history} User: {input}")
     model = ChatOpenAI(model="gpt-4o-mini")
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
     memory = {"history": "Assistant: Hello! How can I assist you today?"}
     chain.invoke(
@@ -480,16 +480,16 @@ def test_langgraph_state_management(logger_memory_logger):
         n=1,
     )
 
-    def say_hello(state: Dict[str, str]):
+    def say_hello(state: dict[str, str]):
         response = model.invoke("Say hello")
-        return cast(Union[str, List[str], Dict[str, str]], response.content)
+        return cast(str | list[str] | dict[str, str], response.content)
 
-    def say_bye(state: Dict[str, str]):
+    def say_bye(state: dict[str, str]):
         print("From the 'sayBye' node: Bye world!")
         return "Bye"
 
     workflow = (
-        StateGraph(state_schema=Dict[str, str])
+        StateGraph(state_schema=dict[str, str])
         .add_node("sayHello", say_hello)
         .add_node("sayBye", say_bye)
         .add_edge(START, "sayHello")
@@ -837,10 +837,10 @@ def test_streaming_ttft(logger_memory_logger):
         max_completion_tokens=50,
         streaming=True,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
     # Collect chunks to verify streaming works
-    chunks: List[str] = []
+    chunks: list[str] = []
     for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}):
         if chunk.content:
             chunks.append(str(chunk.content))
@@ -1272,9 +1272,9 @@ async def test_async_streaming(logger_memory_logger):
     handler = BraintrustCallbackHandler(logger=test_logger)
     prompt = ChatPromptTemplate.from_template("Count from 1 to 3.")
     model = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20, streaming=True)
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
-    chunks: List[str] = []
+    chunks: list[str] = []
     async for chunk in chain.astream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}):
         if chunk.content:
             chunks.append(str(chunk.content))
diff --git a/py/src/braintrust/integrations/langchain/test_context.py b/py/src/braintrust/integrations/langchain/test_context.py
index 2076c4b3..37cf552a 100644
--- a/py/src/braintrust/integrations/langchain/test_context.py
+++ b/py/src/braintrust/integrations/langchain/test_context.py
@@ -1,5 +1,4 @@
 # pyright: reportTypedDictNotRequiredAccess=none
-from typing import Dict
 from unittest.mock import ANY
 
 import pytest
@@ -57,7 +56,7 @@ def test_global_handler(logger_memory_logger):
         presence_penalty=0,
         n=1,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
     message = chain.invoke({"number": "2"})
 
diff --git a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
index c1dfceb3..dc112b6c 100644
--- a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
+++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
@@ -1,5 +1,5 @@
 import time
-from typing import Any, Dict
+from typing import Any
 
 import pytest
 from openai import AsyncOpenAI
@@ -61,7 +61,7 @@ def memory_logger():
         yield bgl
 
 
-def _assert_metrics_are_valid(metrics: Dict[str, Any]):
+def _assert_metrics_are_valid(metrics: dict[str, Any]):
     assert metrics["tokens"] > 0
     assert metrics["prompt_tokens"] > 0
     assert metrics["completion_tokens"] > 0
diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
index cb3b78a9..80e47048 100644
--- a/py/src/braintrust/test_context.py
+++ b/py/src/braintrust/test_context.py
@@ -24,7 +24,8 @@ def _threadpool_scenario(test_logger, with_memory_logger):
 import subprocess
 import sys
 import threading
-from typing import AsyncGenerator, Callable, Generator, TypeVar
+from collections.abc import AsyncGenerator, Callable, Generator
+from typing import TypeVar
 
 import braintrust
 import pytest
diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py
index 6368cee3..608b7585 100644
--- a/py/src/braintrust/test_framework.py
+++ b/py/src/braintrust/test_framework.py
@@ -1,6 +1,5 @@
 import importlib.util
 import re
-from typing import List
 from unittest.mock import MagicMock
 
 import pytest
@@ -213,7 +212,7 @@ def _run_eval_sync(self, *args, **kwargs):
 @pytest.mark.asyncio
 async def test_hooks_trial_index():
     """Test that trial_index is correctly passed to task via hooks."""
-    trial_indices: List[int] = []
+    trial_indices: list[int] = []
 
     # Task that captures trial indices
     def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
@@ -253,7 +252,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
 @pytest.mark.asyncio
 async def test_hooks_trial_index_multiple_inputs():
     """Test trial_index with multiple inputs to ensure proper indexing."""
-    trial_data: List[tuple] = []  # (input, trial_index)
+    trial_data: list[tuple] = []  # (input, trial_index)
 
     def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
         trial_data.append((input_value, hooks.trial_index))
@@ -293,7 +292,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
 @pytest.mark.asyncio
 async def test_per_input_trial_count_overrides_global():
     """Test that per-input trial_count overrides the global trial_count."""
-    trial_data: List[tuple] = []  # (input, trial_index)
+    trial_data: list[tuple] = []  # (input, trial_index)
 
     def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
         trial_data.append((input_value, hooks.trial_index))
@@ -332,7 +331,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
 @pytest.mark.asyncio
 async def test_per_input_trial_count_without_global():
     """Test that per-input trial_count works when no global trial_count is set."""
-    trial_data: List[tuple] = []  # (input, trial_index)
+    trial_data: list[tuple] = []  # (input, trial_index)
 
     def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
         trial_data.append((input_value, hooks.trial_index))
@@ -367,7 +366,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
 @pytest.mark.asyncio
 async def test_per_input_trial_count_with_dict_data():
     """Test that per-input trial_count works when data items are plain dicts."""
-    trial_data: List[tuple] = []  # (input, trial_index)
+    trial_data: list[tuple] = []  # (input, trial_index)
 
     def task_with_hooks(input_value: int, hooks: EvalHooks) -> int:
         trial_data.append((input_value, hooks.trial_index))
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 7662ad77..e8c22bdc 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -5,7 +5,7 @@
 import logging
 import os
 import time
-from typing import AsyncGenerator, List
+from collections.abc import AsyncGenerator
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
 
@@ -384,7 +384,7 @@ def test_load_parameters_prefers_version_over_environment_for_id(self):
         assert "environment" not in mock_api_conn.get_json.call_args.args[1]
 
     def test_extract_attachments_no_op(self):
-        attachments: List[BaseAttachment] = []
+        attachments: list[BaseAttachment] = []
 
         _extract_attachments({}, attachments)
         self.assertEqual(len(attachments), 0)
@@ -441,7 +441,7 @@ def test_extract_attachments_with_attachments(self):
         }
         saved_nested = event["nested"]
 
-        attachments: List[BaseAttachment] = []
+        attachments: list[BaseAttachment] = []
         _extract_attachments(event, attachments)
 
         self.assertEqual(
@@ -3135,7 +3135,7 @@ def test_extract_attachments_with_json_attachment(self):
             },
         }
 
-        attachments: List[BaseAttachment] = []
+        attachments: list[BaseAttachment] = []
         _extract_attachments(event, attachments)
 
         self.assertEqual(len(attachments), 1)
diff --git a/py/src/braintrust/test_util.py b/py/src/braintrust/test_util.py
index 90f18602..0dd27568 100644
--- a/py/src/braintrust/test_util.py
+++ b/py/src/braintrust/test_util.py
@@ -1,6 +1,5 @@
 import os
 import unittest
-from typing import List
 
 import pytest
 
@@ -129,9 +128,9 @@ def compute_value():
         lazy = LazyValue(compute_value, use_mutex=True)
 
         # Launch multiple threads that all try to get() simultaneously
-        threads: List[threading.Thread] = []
-        results: List[str] = []
-        errors: List[Exception] = []
+        threads: list[threading.Thread] = []
+        results: list[str] = []
+        errors: list[Exception] = []
 
         def worker():
             try:
diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py
index 00084495..118fe5eb 100644
--- a/py/src/braintrust/trace.py
+++ b/py/src/braintrust/trace.py
@@ -6,7 +6,8 @@
 """
 
 import asyncio
-from typing import Any, Awaitable, Callable, Protocol, TypedDict
+from collections.abc import Awaitable, Callable
+from typing import Any, Protocol, TypedDict
 
 from braintrust.functions.invoke import invoke
 from braintrust.logger import BraintrustState, ObjectFetcher
diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py
index a199d8a2..94be4cae 100644
--- a/py/src/braintrust/types/_eval.py
+++ b/py/src/braintrust/types/_eval.py
@@ -5,7 +5,8 @@
 underscore-prefixed so pyright strict mode doesn't flag them as private.
 """
 
-from typing import Any, Generic, Sequence, TypedDict, TypeVar
+from collections.abc import Sequence
+from typing import Any, Generic, TypedDict, TypeVar
 
 from typing_extensions import NotRequired
 

From 8b95b16b1e2321fa6d0fc1d8d88480e828d97510 Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 20:03:39 +0000
Subject: [PATCH 08/10] fix: correct Optional[Callable[...]] conversion in
 trace.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The regex-based Optional→|None replacement incorrectly placed
| None inside Callable's return type instead of outside the whole
Callable, changing the type semantics:

- `Optional[Callable[[], Awaitable[X]]]` (callable is nullable)
  was wrongly converted to
  `Callable[[], Awaitable[X] | None]` (return type is nullable)

Fixed to: `Callable[[], Awaitable[X]] | None`

Also moves Callable from typing to collections.abc in
langsmith_wrapper.py for consistency with other files.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/trace.py                      | 4 ++--
 py/src/braintrust/wrappers/langsmith_wrapper.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py
index 118fe5eb..24bcefa2 100644
--- a/py/src/braintrust/trace.py
+++ b/py/src/braintrust/trace.py
@@ -144,7 +144,7 @@ def __init__(
         object_type: str | None = None,  # Literal["experiment", "project_logs", "playground_logs"]
         object_id: str | None = None,
         root_span_id: str | None = None,
-        get_state: Callable[[], Awaitable[BraintrustState] | None] = None,
+        get_state: Callable[[], Awaitable[BraintrustState]] | None = None,
         fetch_fn: SpanFetchFn | None = None,
     ):
         self._span_cache: dict[str, list[SpanData]] = {}
@@ -308,7 +308,7 @@ def __init__(
         object_type: str,  # Literal["experiment", "project_logs", "playground_logs"]
         object_id: str,
         root_span_id: str,
-        ensure_spans_flushed: Callable[[], Awaitable[None] | None],
+        ensure_spans_flushed: Callable[[], Awaitable[None]] | None,
         state: BraintrustState,
     ):
         # Initialize dict with trace_ref for JSON serialization
diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py
index f6c48e78..6c69dddb 100644
--- a/py/src/braintrust/wrappers/langsmith_wrapper.py
+++ b/py/src/braintrust/wrappers/langsmith_wrapper.py
@@ -40,8 +40,8 @@ def my_function(inputs: dict) -> dict:
 import inspect
 import logging
 import os
-from collections.abc import Iterable, Iterator
-from typing import Any, Callable, ParamSpec, TypeVar
+from collections.abc import Callable, Iterable, Iterator
+from typing import Any, ParamSpec, TypeVar
 
 from braintrust.framework import EvalCase
 from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced

From 010af87923c8467a3712aefc1b9dd7d46e742b0a Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 20:06:23 +0000
Subject: [PATCH 09/10] fix: revert TypedDict to typing_extensions in _eval.py,
 run ruff

On Python 3.10, stdlib typing.TypedDict does not support inheriting
from both Generic and TypedDict simultaneously. The typing_extensions
backport handles this correctly, so TypedDict must stay in
typing_extensions for types/_eval.py.

Also applies ruff format/check fixes (import sorting, line
collapsing).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/framework.py                    | 15 +++------------
 py/src/braintrust/functions/stream.py             |  6 +-----
 .../integrations/claude_agent_sdk/_constants.py   |  2 +-
 py/src/braintrust/types/_eval.py                  |  4 ++--
 py/src/braintrust/wrappers/langsmith_wrapper.py   |  4 +---
 5 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index a40c5471..554e4007 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -217,12 +217,7 @@ class EvalScorerArgs(SerializableDataClass, Generic[Input, Output, Expected]):
 
 
 OneOrMoreScores = float | int | bool | None | Score | list[Score]
-OneOrMoreClassifications = (
-    None
-    | Classification
-    | Mapping[str, Any]
-    | list[Classification | Mapping[str, Any]]
-)
+OneOrMoreClassifications = None | Classification | Mapping[str, Any] | list[Classification | Mapping[str, Any]]
 
 
 # Synchronous scorer interface - implements callable
@@ -278,10 +273,7 @@ class BaseExperiment:
 
 
 _AnyEvalCase = (
-    EvalCase[Input, Expected]
-    | EvalCaseDict[Input, Expected]
-    | EvalCaseDictNoOutput[Input]
-    | ExperimentDatasetEvent
+    EvalCase[Input, Expected] | EvalCaseDict[Input, Expected] | EvalCaseDictNoOutput[Input] | ExperimentDatasetEvent
 )
 
 _EvalDataObject = (
@@ -295,8 +287,7 @@ class BaseExperiment:
 EvalData = _EvalDataObject[Input, Expected] | type[_EvalDataObject[Input, Expected]] | Dataset
 
 EvalTask = (
-    Callable[[Input], Output | Awaitable[Output]]
-    | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]]
+    Callable[[Input], Output | Awaitable[Output]] | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]]
 )
 
 ErrorScoreHandler = Callable[[Span, EvalCase[Input, Expected], Sequence[str]], dict[str, float] | None]
diff --git a/py/src/braintrust/functions/stream.py b/py/src/braintrust/functions/stream.py
index 18ebfb6e..d3a6b84d 100644
--- a/py/src/braintrust/functions/stream.py
+++ b/py/src/braintrust/functions/stream.py
@@ -80,11 +80,7 @@ class BraintrustInvokeError(ValueError):
 
 
 BraintrustStreamChunk = (
-    BraintrustTextChunk
-    | BraintrustJsonChunk
-    | BraintrustErrorChunk
-    | BraintrustConsoleChunk
-    | BraintrustProgressChunk
+    BraintrustTextChunk | BraintrustJsonChunk | BraintrustErrorChunk | BraintrustConsoleChunk | BraintrustProgressChunk
 )
 
 
diff --git a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
index 8be12600..f8757deb 100644
--- a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
+++ b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py
@@ -1,7 +1,7 @@
+from collections.abc import Mapping
 from dataclasses import dataclass
 from enum import Enum
 from types import MappingProxyType
-from collections.abc import Mapping
 from typing import Final
 
 
diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py
index 94be4cae..528df11e 100644
--- a/py/src/braintrust/types/_eval.py
+++ b/py/src/braintrust/types/_eval.py
@@ -6,9 +6,9 @@
 """
 
 from collections.abc import Sequence
-from typing import Any, Generic, TypedDict, TypeVar
+from typing import Any, Generic, TypeVar
 
-from typing_extensions import NotRequired
+from typing_extensions import NotRequired, TypedDict
 
 
 Input = TypeVar("Input")
diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py
index 6c69dddb..83e1fe01 100644
--- a/py/src/braintrust/wrappers/langsmith_wrapper.py
+++ b/py/src/braintrust/wrappers/langsmith_wrapper.py
@@ -204,9 +204,7 @@ def wrap_client(
     return Client
 
 
-def make_evaluate_wrapper(
-    *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False
-):
+def make_evaluate_wrapper(*, project_name: str | None = None, project_id: str | None = None, standalone: bool = False):
     def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
         result = None
         if not standalone:

From ac790785edd29bb0b3532cee50d9ae64106e08b3 Mon Sep 17 00:00:00 2001
From: "Nova (SFK)" <nova@starfolk.ai>
Date: Mon, 27 Apr 2026 20:13:59 +0000
Subject: [PATCH 10/10] fix: handle types.UnionType in from_dict_deep, fix
 missed List reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Python 3.10-3.12, the `X | Y` syntax creates a `types.UnionType`
which is distinct from `typing.Union`. The `from_dict_deep` method
in `SerializableDataClass` only checked for `typing.Union` via
`get_origin`, so it failed to deserialize union-typed fields when
the annotation used pipe syntax (e.g. `PromptBlockData`).

Also fixes a missed `cast(List, ...)` → `cast(list, ...)` in
langchain test_callbacks.py that caused a pylint E0602.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 py/src/braintrust/integrations/langchain/test_callbacks.py | 2 +-
 py/src/braintrust/serializable_data_class.py               | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/py/src/braintrust/integrations/langchain/test_callbacks.py b/py/src/braintrust/integrations/langchain/test_callbacks.py
index 375332c9..adeaa37d 100644
--- a/py/src/braintrust/integrations/langchain/test_callbacks.py
+++ b/py/src/braintrust/integrations/langchain/test_callbacks.py
@@ -399,7 +399,7 @@ def test_parallel_execution(logger_memory_logger):
 
     map_chain.invoke({"topic": "bear"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
 
-    spans = cast(List, memory_logger.pop())
+    spans = cast(list, memory_logger.pop())
 
     # Find the LLM spans
     llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI")
diff --git a/py/src/braintrust/serializable_data_class.py b/py/src/braintrust/serializable_data_class.py
index 8f9eeefc..ba32ecca 100644
--- a/py/src/braintrust/serializable_data_class.py
+++ b/py/src/braintrust/serializable_data_class.py
@@ -1,5 +1,6 @@
 import dataclasses
 import json
+import types
 from typing import Union, get_origin
 
 
@@ -39,7 +40,7 @@ def from_dict_deep(cls, d: dict):
                 and issubclass(fields[k].type, SerializableDataClass)
             ):
                 filtered[k] = fields[k].type.from_dict_deep(v)
-            elif get_origin(fields[k].type) == Union:
+            elif get_origin(fields[k].type) is Union or isinstance(fields[k].type, types.UnionType):
                 for t in fields[k].type.__args__:
                     if t == type(None) and v is None:
                         filtered[k] = None