From c96dbd6722c9b1904957de27162afcf6c2136a4c Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:39:35 +0000 Subject: [PATCH 01/10] chore: remove dead Python 3.9 skipif guard in test_context The SDK requires Python >=3.10, so the `sys.version_info < (3, 9)` guard on `test_to_thread_preserves_context` is always False and can be removed. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/test_context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py index 17b3c6d1..cb3b78a9 100644 --- a/py/src/braintrust/test_context.py +++ b/py/src/braintrust/test_context.py @@ -277,7 +277,6 @@ async def async_worker(): assert parent_log["span_id"] in worker_log.get("span_parents", []), "Worker should have parent as parent" -@pytest.mark.skipif(sys.version_info < (3, 9), reason="to_thread requires Python 3.9+") @pytest.mark.asyncio async def test_to_thread_preserves_context(test_logger, with_memory_logger): """ From f0a5a5baf4a365c4c2bae14d8fbaa5e37a26e77d Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:40:32 +0000 Subject: [PATCH 02/10] chore: add proper type annotation to prettify_xact Now that min Python is 3.10, use `int | str` union syntax directly in the type annotation instead of leaving it untyped with a comment. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/xact_ids.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/py/src/braintrust/xact_ids.py b/py/src/braintrust/xact_ids.py index 0327066e..1231e3bd 100644 --- a/py/src/braintrust/xact_ids.py +++ b/py/src/braintrust/xact_ids.py @@ -9,9 +9,7 @@ def modular_multiply(value: int, prime: int): return (value * prime) % MOD -# value : int | str -# Cannot use a | because of python 3.8 -def prettify_xact(value) -> str: +def prettify_xact(value: int | str) -> str: encoded = modular_multiply(int(value), COPRIME) return hex(encoded)[2:].rjust(16, "0") From 9ddc09ccc6f0aa48203e7ecb282da68bad36dfc6 Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:41:30 +0000 Subject: [PATCH 03/10] chore: use proper Future[Any] return type for TracedThreadPoolExecutor.submit Future[T] generic typing has been stable since Python 3.9; with min Python 3.10 we no longer need to return Any as a workaround. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/logger.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 32cd554b..9ef35c50 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -5618,9 +5618,7 @@ def __str__(self): class TracedThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor): - # Returns Any because Future[T] generic typing was stabilized in Python 3.9, - # but we maintain compatibility with older type checkers. - def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: + def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> concurrent.futures.Future[Any]: # Capture all current context variables context = contextvars.copy_context() From f6268820b35689a99a8ceeaa8808819985797ae8 Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:43:11 +0000 Subject: [PATCH 04/10] chore: move Protocol and TypedDict from typing_extensions to typing Both have been in stdlib typing since Python 3.8. With min Python 3.10, no need for the typing_extensions backport. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/devserver/schemas.py | 4 +--- py/src/braintrust/framework.py | 3 ++- py/src/braintrust/types/_eval.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/py/src/braintrust/devserver/schemas.py b/py/src/braintrust/devserver/schemas.py index cd8f49da..a359a93d 100644 --- a/py/src/braintrust/devserver/schemas.py +++ b/py/src/braintrust/devserver/schemas.py @@ -1,8 +1,6 @@ import json from collections.abc import Sequence -from typing import Any, Union, get_args, get_origin, get_type_hints - -from typing_extensions import TypedDict +from typing import Any, TypedDict, Union, get_args, get_origin, get_type_hints # This is not beautiful code, but it saves us from introducing Pydantic as a dependency, and it is fairly diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 5ad20a25..4dd58a33 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -17,13 +17,14 @@ Any, Generic, Literal, + Protocol, + TypedDict, TypeVar, Union, ) from tqdm.asyncio import tqdm as async_tqdm from tqdm.auto import tqdm as std_tqdm -from typing_extensions import Protocol, TypedDict from .generated_types import FunctionFormat, FunctionOutputType, ObjectReference from .git_fields import GitMetadataSettings, RepoInfo diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py index 0f5be193..a199d8a2 100644 --- a/py/src/braintrust/types/_eval.py +++ b/py/src/braintrust/types/_eval.py @@ -5,9 +5,9 @@ underscore-prefixed so pyright strict mode doesn't flag them as private. """ -from typing import Any, Generic, Sequence, TypeVar +from typing import Any, Generic, Sequence, TypedDict, TypeVar -from typing_extensions import NotRequired, TypedDict +from typing_extensions import NotRequired Input = TypeVar("Input") From 3eb39056faf05e7711fa575dbaf9a0e0c64232d3 Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:46:56 +0000 Subject: [PATCH 05/10] chore: replace Union[X, Y] with X | Y syntax (PEP 604) With min Python 3.10, the union pipe syntax is available at runtime. Migrates type aliases and annotations in framework.py, logger.py, util.py, prompt.py, functions/stream.py, and langchain/callbacks.py. Keeps Union imports where used as a runtime sentinel for get_origin() checks (serializable_data_class.py, devserver/schemas.py). Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/framework.py | 74 +++++++++---------- py/src/braintrust/functions/stream.py | 16 ++-- .../integrations/langchain/callbacks.py | 3 +- py/src/braintrust/logger.py | 9 +-- py/src/braintrust/prompt.py | 4 +- py/src/braintrust/util.py | 4 +- 6 files changed, 53 insertions(+), 57 deletions(-) diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 4dd58a33..a40c5471 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -20,7 +20,6 @@ Protocol, TypedDict, TypeVar, - Union, ) from tqdm.asyncio import tqdm as async_tqdm @@ -217,13 +216,13 @@ class EvalScorerArgs(SerializableDataClass, Generic[Input, Output, Expected]): metadata: Metadata | None = None -OneOrMoreScores = Union[float, int, bool, None, Score, list[Score]] -OneOrMoreClassifications = Union[ - None, - Classification, - Mapping[str, Any], - list[Classification | Mapping[str, Any]], -] +OneOrMoreScores = float | int | bool | None | Score | list[Score] +OneOrMoreClassifications = ( + None + | Classification + | Mapping[str, Any] + | list[Classification | Mapping[str, Any]] +) # Synchronous scorer interface - implements callable @@ -248,20 +247,19 @@ class AsyncScorerLike(Protocol, Generic[Input, Output, Expected]): async def eval_async(self, output: Output, expected: Expected | None = None, **kwargs: Any) -> OneOrMoreScores: ... -# Union type for any kind of scorer (for typing) -ScorerLike = Union[SyncScorerLike[Input, Output, Expected], AsyncScorerLike[Input, Output, Expected]] +ScorerLike = SyncScorerLike[Input, Output, Expected] | AsyncScorerLike[Input, Output, Expected] -EvalScorer = Union[ - ScorerLike[Input, Output, Expected], - type[ScorerLike[Input, Output, Expected]], - Callable[[Input, Output, Expected], OneOrMoreScores], - Callable[[Input, Output, Expected], Awaitable[OneOrMoreScores]], -] +EvalScorer = ( + ScorerLike[Input, Output, Expected] + | type[ScorerLike[Input, Output, Expected]] + | Callable[[Input, Output, Expected], OneOrMoreScores] + | Callable[[Input, Output, Expected], Awaitable[OneOrMoreScores]] +) -EvalClassifier = Union[ - Callable[[Input, Output, Expected], OneOrMoreClassifications], - Callable[[Input, Output, Expected], Awaitable[OneOrMoreClassifications]], -] +EvalClassifier = ( + Callable[[Input, Output, Expected], OneOrMoreClassifications] + | Callable[[Input, Output, Expected], Awaitable[OneOrMoreClassifications]] +) @dataclasses.dataclass @@ -279,27 +277,27 @@ class BaseExperiment: """ -_AnyEvalCase = Union[ - EvalCase[Input, Expected], - EvalCaseDict[Input, Expected], - EvalCaseDictNoOutput[Input], - ExperimentDatasetEvent, -] +_AnyEvalCase = ( + EvalCase[Input, Expected] + | EvalCaseDict[Input, Expected] + | EvalCaseDictNoOutput[Input] + | ExperimentDatasetEvent +) -_EvalDataObject = Union[ - Iterable[_AnyEvalCase[Input, Expected]], - Iterator[_AnyEvalCase[Input, Expected]], - Awaitable[Iterator[_AnyEvalCase[Input, Expected]]], - Callable[[], Union[Iterator[_AnyEvalCase[Input, Expected]], Awaitable[Iterator[_AnyEvalCase[Input, Expected]]]]], - BaseExperiment, -] +_EvalDataObject = ( + Iterable[_AnyEvalCase[Input, Expected]] + | Iterator[_AnyEvalCase[Input, Expected]] + | Awaitable[Iterator[_AnyEvalCase[Input, Expected]]] + | Callable[[], Iterator[_AnyEvalCase[Input, Expected]] | Awaitable[Iterator[_AnyEvalCase[Input, Expected]]]] + | BaseExperiment +) -EvalData = Union[_EvalDataObject[Input, Expected], type[_EvalDataObject[Input, Expected]], Dataset] +EvalData = _EvalDataObject[Input, Expected] | type[_EvalDataObject[Input, Expected]] | Dataset -EvalTask = Union[ - Callable[[Input], Union[Output, Awaitable[Output]]], - Callable[[Input, EvalHooks[Expected]], Union[Output, Awaitable[Output]]], -] +EvalTask = ( + Callable[[Input], Output | Awaitable[Output]] + | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]] +) ErrorScoreHandler = Callable[[Span, EvalCase[Input, Expected], Sequence[str]], dict[str, float] | None] diff --git a/py/src/braintrust/functions/stream.py b/py/src/braintrust/functions/stream.py index ba651315..18ebfb6e 100644 --- a/py/src/braintrust/functions/stream.py +++ b/py/src/braintrust/functions/stream.py @@ -9,7 +9,7 @@ import json from collections.abc import Generator, Iterable from itertools import tee -from typing import Literal, Union +from typing import Literal from sseclient import SSEClient @@ -79,13 +79,13 @@ class BraintrustInvokeError(ValueError): pass -BraintrustStreamChunk = Union[ - BraintrustTextChunk, - BraintrustJsonChunk, - BraintrustErrorChunk, - BraintrustConsoleChunk, - BraintrustProgressChunk, -] +BraintrustStreamChunk = ( + BraintrustTextChunk + | BraintrustJsonChunk + | BraintrustErrorChunk + | BraintrustConsoleChunk + | BraintrustProgressChunk +) class BraintrustStream: diff --git a/py/src/braintrust/integrations/langchain/callbacks.py b/py/src/braintrust/integrations/langchain/callbacks.py index 50650da0..a80a5625 100644 --- a/py/src/braintrust/integrations/langchain/callbacks.py +++ b/py/src/braintrust/integrations/langchain/callbacks.py @@ -7,7 +7,6 @@ from typing import ( Any, TypedDict, - Union, ) from uuid import UUID @@ -528,7 +527,7 @@ def on_llm_new_token( self, token: str, *, - chunk: Union["GenerationChunk", "ChatGenerationChunk"] | None = None, # type: ignore + chunk: "GenerationChunk | ChatGenerationChunk | None" = None, # type: ignore run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any, diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 9ef35c50..6ed497a1 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -28,7 +28,6 @@ Literal, TypedDict, TypeVar, - Union, cast, overload, ) @@ -1600,7 +1599,7 @@ def init( base_experiment_id: str | None = None, repo_info: RepoInfo | None = None, state: BraintrustState | None = None, -) -> Union["Experiment", "ReadonlyExperiment"]: +) -> "Experiment | ReadonlyExperiment": """ Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created. @@ -1767,7 +1766,7 @@ def compute_metadata(): return ret -def init_experiment(*args, **kwargs) -> Union["Experiment", "ReadonlyExperiment"]: +def init_experiment(*args, **kwargs) -> "Experiment | ReadonlyExperiment": """Alias for `init`""" return init(*args, **kwargs) @@ -2392,7 +2391,7 @@ def parent_context(parent: str | None, state: BraintrustState | None = None): def get_span_parent_object( parent: str | None = None, state: BraintrustState | None = None -) -> Union[SpanComponentsV4, "Logger", "Experiment", Span]: +) -> "SpanComponentsV4 | Logger | Experiment | Span": """Mainly for internal use. Return the parent object for starting a span in a global context. Applies precedence: current span > propagated parent string > experiment > logger.""" @@ -4857,7 +4856,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: def render_message(render: Callable[[str], str], message: PromptMessage): base = {k: v for (k, v) in message.as_dict().items() if v is not None} # TODO: shouldn't load_prompt guarantee content is a PromptMessage? - content = cast(Union[str, list[Union[TextPart, ImagePart]], dict[str, Any]], message.content) + content = cast(str | list[TextPart | ImagePart] | dict[str, Any], message.content) if content is not None: if isinstance(content, str): base["content"] = render(content) diff --git a/py/src/braintrust/prompt.py b/py/src/braintrust/prompt.py index d4b7fa19..e9f413b7 100644 --- a/py/src/braintrust/prompt.py +++ b/py/src/braintrust/prompt.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Literal, Union +from typing import Literal from .generated_types import PromptOptions from .serializable_data_class import SerializableDataClass @@ -61,7 +61,7 @@ class PromptChatBlock(SerializableDataClass): type: Literal["chat"] = "chat" -PromptBlockData = Union[PromptCompletionBlock, PromptChatBlock] +PromptBlockData = PromptCompletionBlock | PromptChatBlock @dataclass diff --git a/py/src/braintrust/util.py b/py/src/braintrust/util.py index 3541cb5f..7fdb8abb 100644 --- a/py/src/braintrust/util.py +++ b/py/src/braintrust/util.py @@ -7,7 +7,7 @@ import urllib.parse from collections.abc import Callable, Mapping from dataclasses import dataclass -from typing import Any, Generic, Literal, TypedDict, TypeVar, Union +from typing import Any, Generic, Literal, TypedDict, TypeVar from requests import HTTPError, Response @@ -179,7 +179,7 @@ class _LazyValuePendingState: has_succeeded: Literal[False] = False -_LazyValueState = Union[_LazyValueResolvedState[T], _LazyValuePendingState] +_LazyValueState = _LazyValueResolvedState[T] | _LazyValuePendingState class LazyValue(Generic[T]): From f9ebd13f40795bfda65e3653efd1bb045111aa7c Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:50:07 +0000 Subject: [PATCH 06/10] chore: replace Optional[X] with X | None syntax Modernizes type annotations across merge_row_batch.py, trace.py, span_cache.py, otel/context.py, langsmith_wrapper.py, and test_serializable_data_class.py. Removes unused Optional imports. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/merge_row_batch.py | 4 +- py/src/braintrust/otel/context.py | 4 +- py/src/braintrust/span_cache.py | 14 +++--- .../test_serializable_data_class.py | 9 ++-- py/src/braintrust/trace.py | 44 +++++++++---------- .../braintrust/wrappers/langsmith_wrapper.py | 37 ++++++++-------- 6 files changed, 56 insertions(+), 56 deletions(-) diff --git a/py/src/braintrust/merge_row_batch.py b/py/src/braintrust/merge_row_batch.py index c9047775..066744ce 100644 --- a/py/src/braintrust/merge_row_batch.py +++ b/py/src/braintrust/merge_row_batch.py @@ -1,5 +1,5 @@ from collections.abc import Callable, Sequence -from typing import Any, Optional, TypeVar +from typing import Any, TypeVar from .db_fields import IS_MERGE_FIELD @@ -8,7 +8,7 @@ from .util import merge_dicts -_MergedRowKey = tuple[Optional[Any], ...] +_MergedRowKey = tuple[Any | None, ...] def _generate_merged_row_key(row: dict[str, Any]) -> _MergedRowKey: diff --git a/py/src/braintrust/otel/context.py b/py/src/braintrust/otel/context.py index bb65be77..ea9703bc 100644 --- a/py/src/braintrust/otel/context.py +++ b/py/src/braintrust/otel/context.py @@ -1,7 +1,7 @@ """Unified context management using OTEL's built-in context.""" import logging -from typing import Any, Optional +from typing import Any from braintrust.context import ParentSpanIds, SpanInfo from braintrust.logger import Span @@ -18,7 +18,7 @@ class ContextManager: def __init__(self): pass - def get_current_span_info(self) -> Optional["SpanInfo"]: + def get_current_span_info(self) -> "SpanInfo | None": """Get information about the currently active span from OTEL context.""" # Get the current span from OTEL context diff --git a/py/src/braintrust/span_cache.py b/py/src/braintrust/span_cache.py index f3248d0c..ee926614 100644 --- a/py/src/braintrust/span_cache.py +++ b/py/src/braintrust/span_cache.py @@ -11,7 +11,7 @@ import os import tempfile import uuid -from typing import Any, Optional +from typing import Any from braintrust.types import Metadata from braintrust.util import merge_dicts @@ -28,11 +28,11 @@ class CachedSpan: def __init__( self, span_id: str, - input: Optional[Any] = None, - output: Optional[Any] = None, + input: Any | None = None, + output: Any | None = None, metadata: Metadata | None = None, - span_parents: Optional[list[str]] = None, - span_attributes: Optional[dict[str, Any]] = None, + span_parents: list[str] | None = None, + span_attributes: dict[str, Any] | None = None, ): self.span_id = span_id self.input = input @@ -104,7 +104,7 @@ class SpanCache: """ def __init__(self, disabled: bool = False): - self._cache_file_path: Optional[str] = None + self._cache_file_path: str | None = None self._initialized = False # Tracks whether the cache was explicitly disabled (via constructor or disable()) self._explicitly_disabled = disabled @@ -226,7 +226,7 @@ def _flush_write_buffer(self) -> None: # This can happen if disk is full or file permissions changed pass - def get_by_root_span_id(self, root_span_id: str) -> Optional[list[CachedSpan]]: + def get_by_root_span_id(self, root_span_id: str) -> list[CachedSpan] | None: """ Get all cached spans for a given rootSpanId. diff --git a/py/src/braintrust/test_serializable_data_class.py b/py/src/braintrust/test_serializable_data_class.py index 0cade6ae..e31e2078 100644 --- a/py/src/braintrust/test_serializable_data_class.py +++ b/py/src/braintrust/test_serializable_data_class.py @@ -1,14 +1,13 @@ import unittest from dataclasses import dataclass -from typing import List, Optional from .serializable_data_class import SerializableDataClass @dataclass class PromptData(SerializableDataClass): - prompt: Optional[str] = None - options: Optional[dict] = None + prompt: str | None = None + options: dict | None = None @dataclass @@ -18,9 +17,9 @@ class PromptSchema(SerializableDataClass): _xact_id: str name: str slug: str - description: Optional[str] + description: str | None prompt_data: PromptData - tags: Optional[List[str]] + tags: list[str] | None class TestSerializableDataClass(unittest.TestCase): diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py index d3426ac4..00084495 100644 --- a/py/src/braintrust/trace.py +++ b/py/src/braintrust/trace.py @@ -6,7 +6,7 @@ """ import asyncio -from typing import Any, Awaitable, Callable, Optional, Protocol, TypedDict +from typing import Any, Awaitable, Callable, Protocol, TypedDict from braintrust.functions.invoke import invoke from braintrust.logger import BraintrustState, ObjectFetcher @@ -18,12 +18,12 @@ class SpanData: def __init__( self, - input: Optional[Any] = None, - output: Optional[Any] = None, + input: Any | None = None, + output: Any | None = None, metadata: Metadata | None = None, - span_id: Optional[str] = None, - span_parents: Optional[list[str]] = None, - span_attributes: Optional[dict[str, Any]] = None, + span_id: str | None = None, + span_parents: list[str] | None = None, + span_attributes: dict[str, Any] | None = None, **kwargs: Any, ): self.input = input @@ -62,7 +62,7 @@ def __init__( object_id: str, root_span_id: str, state: BraintrustState, - span_type_filter: Optional[list[str]] = None, + span_type_filter: list[str] | None = None, ): # Build the filter expression for root_span_id and optionally span_attributes.type filter_expr = self._build_filter(root_span_id, span_type_filter) @@ -75,7 +75,7 @@ def __init__( self._state = state @staticmethod - def _build_filter(root_span_id: str, span_type_filter: Optional[list[str]] = None) -> dict[str, Any]: + def _build_filter(root_span_id: str, span_type_filter: list[str] | None = None) -> dict[str, Any]: """Build BTQL filter expression.""" children = [ # Base filter: root_span_id = 'value' @@ -121,7 +121,7 @@ def _get_state(self) -> BraintrustState: return self._state -SpanFetchFn = Callable[[Optional[list[str]]], Awaitable[list[SpanData]]] +SpanFetchFn = Callable[[list[str] | None], Awaitable[list[SpanData]]] class GetThreadOptions(TypedDict, total=False): @@ -140,11 +140,11 @@ class CachedSpanFetcher: def __init__( self, - object_type: Optional[str] = None, # Literal["experiment", "project_logs", "playground_logs"] - object_id: Optional[str] = None, - root_span_id: Optional[str] = None, - get_state: Optional[Callable[[], Awaitable[BraintrustState]]] = None, - fetch_fn: Optional[SpanFetchFn] = None, + object_type: str | None = None, # Literal["experiment", "project_logs", "playground_logs"] + object_id: str | None = None, + root_span_id: str | None = None, + get_state: Callable[[], Awaitable[BraintrustState] | None] = None, + fetch_fn: SpanFetchFn | None = None, ): self._span_cache: dict[str, list[SpanData]] = {} self._all_fetched = False @@ -159,7 +159,7 @@ def __init__( "Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state" ) - async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]: + async def _fetch_fn(span_type: list[str] | None) -> list[SpanData]: state = await get_state() fetcher = SpanFetcher( object_type=object_type, @@ -196,7 +196,7 @@ async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]: self._fetch_fn = _fetch_fn - async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]: + async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]: """ Get spans, using cache when possible. @@ -228,7 +228,7 @@ async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanDat await self._fetch_spans(missing_types) return self._get_from_cache(span_type) - async def _fetch_spans(self, span_type: Optional[list[str]]) -> None: + async def _fetch_spans(self, span_type: list[str] | None) -> None: """Fetch spans from the server.""" spans = await self._fetch_fn(span_type) @@ -239,7 +239,7 @@ async def _fetch_spans(self, span_type: Optional[list[str]]) -> None: self._span_cache[span_type_str] = [] self._span_cache[span_type_str].append(span) - def _get_from_cache(self, span_type: Optional[list[str]]) -> list[SpanData]: + def _get_from_cache(self, span_type: list[str] | None) -> list[SpanData]: """Get spans from cache, optionally filtering by type.""" if not span_type or len(span_type) == 0: # Return all spans @@ -266,7 +266,7 @@ def get_configuration(self) -> dict[str, str]: """Get the trace configuration (object_type, object_id, root_span_id).""" ... - async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]: + async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]: """ Fetch all spans for this root span. @@ -307,7 +307,7 @@ def __init__( object_type: str, # Literal["experiment", "project_logs", "playground_logs"] object_id: str, root_span_id: str, - ensure_spans_flushed: Optional[Callable[[], Awaitable[None]]], + ensure_spans_flushed: Callable[[], Awaitable[None] | None], state: BraintrustState, ): # Initialize dict with trace_ref for JSON serialization @@ -327,7 +327,7 @@ def __init__( self._ensure_spans_flushed = ensure_spans_flushed self._state = state self._spans_flushed = False - self._spans_flush_promise: Optional[asyncio.Task[None]] = None + self._spans_flush_promise: asyncio.Task[None] | None = None self._thread_cache: dict[str, asyncio.Task[list[Any]]] = {} async def get_state() -> BraintrustState: @@ -351,7 +351,7 @@ def get_configuration(self) -> dict[str, str]: "root_span_id": self._root_span_id, } - async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]: + async def get_spans(self, span_type: list[str] | None = None) -> list[SpanData]: """ Fetch all rows for this root span from its parent object (experiment or project logs). First checks the local span cache for recently logged spans, then falls diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py index b22117df..f6c48e78 100644 --- a/py/src/braintrust/wrappers/langsmith_wrapper.py +++ b/py/src/braintrust/wrappers/langsmith_wrapper.py @@ -40,7 +40,8 @@ def my_function(inputs: dict) -> dict: import inspect import logging import os -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, ParamSpec, TypeVar +from collections.abc import Iterable, Iterator +from typing import Any, Callable, ParamSpec, TypeVar from braintrust.framework import EvalCase from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced @@ -50,7 +51,7 @@ def my_function(inputs: dict) -> dict: logger = logging.getLogger(__name__) # Global list to store Braintrust eval results when running in tandem mode -_braintrust_eval_results: List[Any] = [] +_braintrust_eval_results: list[Any] = [] # TODO: langsmith.test/unit/expect, langsmith.AsyncClient, trace __all__ = [ @@ -68,7 +69,7 @@ def my_function(inputs: dict) -> dict: R = TypeVar("R") -def get_braintrust_results() -> List[Any]: +def get_braintrust_results() -> list[Any]: """Get all Braintrust eval results collected during tandem mode.""" return _braintrust_eval_results.copy() @@ -79,9 +80,9 @@ def clear_braintrust_results() -> None: def setup_langsmith( - api_key: Optional[str] = None, - project_id: Optional[str] = None, - project_name: Optional[str] = None, + api_key: str | None = None, + project_id: str | None = None, + project_name: str | None = None, standalone: bool = False, ) -> bool: """ @@ -169,7 +170,7 @@ def decorator(fn: Callable[P, R]) -> Callable[P, R]: def wrap_client( - Client: Any, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False + Client: Any, project_name: str | None = None, project_id: str | None = None, standalone: bool = False ) -> Any: """ Wrap langsmith.Client to redirect evaluate() and aevaluate() to Braintrust's Eval. @@ -204,7 +205,7 @@ def wrap_client( def make_evaluate_wrapper( - *, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False + *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False ): def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: result = None @@ -231,7 +232,7 @@ def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any def make_aevaluate_wrapper( - *, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False + *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False ): async def aevaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: result = None @@ -258,7 +259,7 @@ async def aevaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) def wrap_evaluate( - evaluate: F, project_name: Optional[str] = None, project_id: Optional[str] = None, standalone: bool = False + evaluate: F, project_name: str | None = None, project_id: str | None = None, standalone: bool = False ) -> F: """ Wrap module-level langsmith.evaluate to redirect to Braintrust's Eval. @@ -282,8 +283,8 @@ def wrap_evaluate( def wrap_aevaluate( aevaluate: F, - project_name: Optional[str] = None, - project_id: Optional[str] = None, + project_name: str | None = None, + project_id: str | None = None, standalone: bool = False, ) -> F: """ @@ -318,8 +319,8 @@ def _is_patched(obj: Any) -> bool: def _run_braintrust_eval( args: Any, kwargs: Any, - project_name: Optional[str] = None, - project_id: Optional[str] = None, + project_name: str | None = None, + project_id: str | None = None, ) -> Any: """Run Braintrust Eval with LangSmith-style arguments.""" from braintrust.framework import Eval @@ -356,8 +357,8 @@ def _run_braintrust_eval( async def _run_braintrust_eval_async( args: Any, kwargs: Any, - project_name: Optional[str] = None, - project_id: Optional[str] = None, + project_name: str | None = None, + project_id: str | None = None, ) -> Any: """Run Braintrust EvalAsync with LangSmith-style arguments.""" from braintrust.framework import EvalAsync @@ -396,7 +397,7 @@ async def _run_braintrust_eval_async( # ============================================================================= -def _wrap_output(output: Any) -> Dict[str, Any]: +def _wrap_output(output: Any) -> dict[str, Any]: """Wrap non-dict outputs the same way LangSmith does.""" if not isinstance(output, dict): return {"output": output} @@ -413,7 +414,7 @@ def _make_braintrust_scorer( """ evaluator_name = getattr(evaluator, "__name__", "score") - def braintrust_scorer(input: Any, output: Any, expected: Optional[Any] = None, **kwargs: Any) -> Any: + def braintrust_scorer(input: Any, output: Any, expected: Any | None = None, **kwargs: Any) -> Any: from braintrust.score import Score # Run the evaluator with LangSmith's signature From 53cfefc4eacf172a6876e6d9d806873802a918da Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 19:54:14 +0000 Subject: [PATCH 07/10] chore: replace typing.List/Dict/etc with builtins and collections.abc Migrates legacy typing container imports (List, Dict, AsyncGenerator, Generator, Callable, Mapping, Sequence, Iterable, Iterator, Awaitable) to their modern equivalents: lowercase builtins (list, dict) and collections.abc for abstract types. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/bt_json.py | 3 ++- .../contrib/temporal/test_temporal.py | 10 ++++----- .../claude_agent_sdk/_constants.py | 3 ++- .../integrations/langchain/helpers.py | 3 ++- .../integrations/langchain/test_callbacks.py | 22 +++++++++---------- .../integrations/langchain/test_context.py | 3 +-- .../test_pydantic_ai_wrap_openai.py | 4 ++-- py/src/braintrust/test_context.py | 3 ++- py/src/braintrust/test_framework.py | 11 +++++----- py/src/braintrust/test_logger.py | 8 +++---- py/src/braintrust/test_util.py | 7 +++--- py/src/braintrust/trace.py | 3 ++- py/src/braintrust/types/_eval.py | 3 ++- 13 files changed, 43 insertions(+), 40 deletions(-) diff --git a/py/src/braintrust/bt_json.py b/py/src/braintrust/bt_json.py index 76dadb01..e6035d6b 100644 --- a/py/src/braintrust/bt_json.py +++ b/py/src/braintrust/bt_json.py @@ -2,7 +2,8 @@ import json import math import warnings -from typing import Any, Callable, Mapping, NamedTuple, cast, overload +from collections.abc import Callable, Mapping +from typing import Any, NamedTuple, cast, overload # Try to import orjson for better performance diff --git a/py/src/braintrust/contrib/temporal/test_temporal.py b/py/src/braintrust/contrib/temporal/test_temporal.py index 8ed87264..03999310 100644 --- a/py/src/braintrust/contrib/temporal/test_temporal.py +++ b/py/src/braintrust/contrib/temporal/test_temporal.py @@ -4,7 +4,7 @@ import uuid from dataclasses import dataclass from datetime import timedelta -from typing import Any, Dict +from typing import Any import pytest import pytest_asyncio @@ -31,7 +31,7 @@ class TestHeaderSerialization: def test_span_context_to_headers_with_valid_context(self): interceptor = BraintrustInterceptor() span_context = {"trace_id": "test-trace-id", "span_id": "test-span-id"} - headers: Dict[str, temporalio.api.common.v1.Payload] = {} + headers: dict[str, temporalio.api.common.v1.Payload] = {} result_headers = interceptor._span_context_to_headers(span_context, headers) @@ -40,8 +40,8 @@ def test_span_context_to_headers_with_valid_context(self): def test_span_context_to_headers_with_empty_context(self): interceptor = BraintrustInterceptor() - span_context: Dict[str, Any] = {} - headers: Dict[str, temporalio.api.common.v1.Payload] = {} + span_context: dict[str, Any] = {} + headers: dict[str, temporalio.api.common.v1.Payload] = {} result_headers = interceptor._span_context_to_headers(span_context, headers) @@ -78,7 +78,7 @@ def test_span_context_from_headers_with_valid_header(self): def test_span_context_from_headers_with_missing_header(self): interceptor = BraintrustInterceptor() - headers: Dict[str, temporalio.api.common.v1.Payload] = {} + headers: dict[str, temporalio.api.common.v1.Payload] = {} result = interceptor._span_context_from_headers(headers) diff --git a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py index fa228a34..8be12600 100644 --- a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py @@ -1,7 +1,8 @@ from dataclasses import dataclass from enum import Enum from types import MappingProxyType -from typing import Final, Mapping +from collections.abc import Mapping +from typing import Final class MessageClassName(str, Enum): diff --git a/py/src/braintrust/integrations/langchain/helpers.py b/py/src/braintrust/integrations/langchain/helpers.py index f75b96db..7271ef37 100644 --- a/py/src/braintrust/integrations/langchain/helpers.py +++ b/py/src/braintrust/integrations/langchain/helpers.py @@ -1,4 +1,5 @@ -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any from unittest.mock import ANY diff --git a/py/src/braintrust/integrations/langchain/test_callbacks.py b/py/src/braintrust/integrations/langchain/test_callbacks.py index da7181d3..375332c9 100644 --- a/py/src/braintrust/integrations/langchain/test_callbacks.py +++ b/py/src/braintrust/integrations/langchain/test_callbacks.py @@ -3,7 +3,7 @@ import time import uuid from pathlib import Path -from typing import Dict, List, Union, cast +from typing import cast import pytest from braintrust import logger @@ -56,7 +56,7 @@ def test_llm_calls(logger_memory_logger): presence_penalty=0, n=1, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) spans = memory_logger.pop() @@ -159,7 +159,7 @@ def test_chain_with_memory(logger_memory_logger): handler = BraintrustCallbackHandler(logger=test_logger) prompt = ChatPromptTemplate.from_template("{history} User: {input}") model = ChatOpenAI(model="gpt-4o-mini") - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) memory = {"history": "Assistant: Hello! How can I assist you today?"} chain.invoke( @@ -480,16 +480,16 @@ def test_langgraph_state_management(logger_memory_logger): n=1, ) - def say_hello(state: Dict[str, str]): + def say_hello(state: dict[str, str]): response = model.invoke("Say hello") - return cast(Union[str, List[str], Dict[str, str]], response.content) + return cast(str | list[str] | dict[str, str], response.content) - def say_bye(state: Dict[str, str]): + def say_bye(state: dict[str, str]): print("From the 'sayBye' node: Bye world!") return "Bye" workflow = ( - StateGraph(state_schema=Dict[str, str]) + StateGraph(state_schema=dict[str, str]) .add_node("sayHello", say_hello) .add_node("sayBye", say_bye) .add_edge(START, "sayHello") @@ -837,10 +837,10 @@ def test_streaming_ttft(logger_memory_logger): max_completion_tokens=50, streaming=True, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) # Collect chunks to verify streaming works - chunks: List[str] = [] + chunks: list[str] = [] for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}): if chunk.content: chunks.append(str(chunk.content)) @@ -1272,9 +1272,9 @@ async def test_async_streaming(logger_memory_logger): handler = BraintrustCallbackHandler(logger=test_logger) prompt = ChatPromptTemplate.from_template("Count from 1 to 3.") model = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20, streaming=True) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) - chunks: List[str] = [] + chunks: list[str] = [] async for chunk in chain.astream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}): if chunk.content: chunks.append(str(chunk.content)) diff --git a/py/src/braintrust/integrations/langchain/test_context.py b/py/src/braintrust/integrations/langchain/test_context.py index 2076c4b3..37cf552a 100644 --- a/py/src/braintrust/integrations/langchain/test_context.py +++ b/py/src/braintrust/integrations/langchain/test_context.py @@ -1,5 +1,4 @@ # pyright: reportTypedDictNotRequiredAccess=none -from typing import Dict from unittest.mock import ANY import pytest @@ -57,7 +56,7 @@ def test_global_handler(logger_memory_logger): presence_penalty=0, n=1, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) message = chain.invoke({"number": "2"}) diff --git a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py index c1dfceb3..dc112b6c 100644 --- a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py +++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py @@ -1,5 +1,5 @@ import time -from typing import Any, Dict +from typing import Any import pytest from openai import AsyncOpenAI @@ -61,7 +61,7 @@ def memory_logger(): yield bgl -def _assert_metrics_are_valid(metrics: Dict[str, Any]): +def _assert_metrics_are_valid(metrics: dict[str, Any]): assert metrics["tokens"] > 0 assert metrics["prompt_tokens"] > 0 assert metrics["completion_tokens"] > 0 diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py index cb3b78a9..80e47048 100644 --- a/py/src/braintrust/test_context.py +++ b/py/src/braintrust/test_context.py @@ -24,7 +24,8 @@ def _threadpool_scenario(test_logger, with_memory_logger): import subprocess import sys import threading -from typing import AsyncGenerator, Callable, Generator, TypeVar +from collections.abc import AsyncGenerator, Callable, Generator +from typing import TypeVar import braintrust import pytest diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py index 6368cee3..608b7585 100644 --- a/py/src/braintrust/test_framework.py +++ b/py/src/braintrust/test_framework.py @@ -1,6 +1,5 @@ import importlib.util import re -from typing import List from unittest.mock import MagicMock import pytest @@ -213,7 +212,7 @@ def _run_eval_sync(self, *args, **kwargs): @pytest.mark.asyncio async def test_hooks_trial_index(): """Test that trial_index is correctly passed to task via hooks.""" - trial_indices: List[int] = [] + trial_indices: list[int] = [] # Task that captures trial indices def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: @@ -253,7 +252,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: @pytest.mark.asyncio async def test_hooks_trial_index_multiple_inputs(): """Test trial_index with multiple inputs to ensure proper indexing.""" - trial_data: List[tuple] = [] # (input, trial_index) + trial_data: list[tuple] = [] # (input, trial_index) def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: trial_data.append((input_value, hooks.trial_index)) @@ -293,7 +292,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: @pytest.mark.asyncio async def test_per_input_trial_count_overrides_global(): """Test that per-input trial_count overrides the global trial_count.""" - trial_data: List[tuple] = [] # (input, trial_index) + trial_data: list[tuple] = [] # (input, trial_index) def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: trial_data.append((input_value, hooks.trial_index)) @@ -332,7 +331,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: @pytest.mark.asyncio async def test_per_input_trial_count_without_global(): """Test that per-input trial_count works when no global trial_count is set.""" - trial_data: List[tuple] = [] # (input, trial_index) + trial_data: list[tuple] = [] # (input, trial_index) def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: trial_data.append((input_value, hooks.trial_index)) @@ -367,7 +366,7 @@ def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: @pytest.mark.asyncio async def test_per_input_trial_count_with_dict_data(): """Test that per-input trial_count works when data items are plain dicts.""" - trial_data: List[tuple] = [] # (input, trial_index) + trial_data: list[tuple] = [] # (input, trial_index) def task_with_hooks(input_value: int, hooks: EvalHooks) -> int: trial_data.append((input_value, hooks.trial_index)) diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 7662ad77..e8c22bdc 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -5,7 +5,7 @@ import logging import os import time -from typing import AsyncGenerator, List +from collections.abc import AsyncGenerator from unittest import TestCase from unittest.mock import MagicMock, patch @@ -384,7 +384,7 @@ def test_load_parameters_prefers_version_over_environment_for_id(self): assert "environment" not in mock_api_conn.get_json.call_args.args[1] def test_extract_attachments_no_op(self): - attachments: List[BaseAttachment] = [] + attachments: list[BaseAttachment] = [] _extract_attachments({}, attachments) self.assertEqual(len(attachments), 0) @@ -441,7 +441,7 @@ def test_extract_attachments_with_attachments(self): } saved_nested = event["nested"] - attachments: List[BaseAttachment] = [] + attachments: list[BaseAttachment] = [] _extract_attachments(event, attachments) self.assertEqual( @@ -3135,7 +3135,7 @@ def test_extract_attachments_with_json_attachment(self): }, } - attachments: List[BaseAttachment] = [] + attachments: list[BaseAttachment] = [] _extract_attachments(event, attachments) self.assertEqual(len(attachments), 1) diff --git a/py/src/braintrust/test_util.py b/py/src/braintrust/test_util.py index 90f18602..0dd27568 100644 --- a/py/src/braintrust/test_util.py +++ b/py/src/braintrust/test_util.py @@ -1,6 +1,5 @@ import os import unittest -from typing import List import pytest @@ -129,9 +128,9 @@ def compute_value(): lazy = LazyValue(compute_value, use_mutex=True) # Launch multiple threads that all try to get() simultaneously - threads: List[threading.Thread] = [] - results: List[str] = [] - errors: List[Exception] = [] + threads: list[threading.Thread] = [] + results: list[str] = [] + errors: list[Exception] = [] def worker(): try: diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py index 00084495..118fe5eb 100644 --- a/py/src/braintrust/trace.py +++ b/py/src/braintrust/trace.py @@ -6,7 +6,8 @@ """ import asyncio -from typing import Any, Awaitable, Callable, Protocol, TypedDict +from collections.abc import Awaitable, Callable +from typing import Any, Protocol, TypedDict from braintrust.functions.invoke import invoke from braintrust.logger import BraintrustState, ObjectFetcher diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py index a199d8a2..94be4cae 100644 --- a/py/src/braintrust/types/_eval.py +++ b/py/src/braintrust/types/_eval.py @@ -5,7 +5,8 @@ underscore-prefixed so pyright strict mode doesn't flag them as private. """ -from typing import Any, Generic, Sequence, TypedDict, TypeVar +from collections.abc import Sequence +from typing import Any, Generic, TypedDict, TypeVar from typing_extensions import NotRequired From 8b95b16b1e2321fa6d0fc1d8d88480e828d97510 Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 20:03:39 +0000 Subject: [PATCH 08/10] fix: correct Optional[Callable[...]] conversion in trace.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regex-based Optional→|None replacement incorrectly placed | None inside Callable's return type instead of outside the whole Callable, changing the type semantics: - `Optional[Callable[[], Awaitable[X]]]` (callable is nullable) was wrongly converted to `Callable[[], Awaitable[X] | None]` (return type is nullable) Fixed to: `Callable[[], Awaitable[X]] | None` Also moves Callable from typing to collections.abc in langsmith_wrapper.py for consistency with other files. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/trace.py | 4 ++-- py/src/braintrust/wrappers/langsmith_wrapper.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py index 118fe5eb..24bcefa2 100644 --- a/py/src/braintrust/trace.py +++ b/py/src/braintrust/trace.py @@ -144,7 +144,7 @@ def __init__( object_type: str | None = None, # Literal["experiment", "project_logs", "playground_logs"] object_id: str | None = None, root_span_id: str | None = None, - get_state: Callable[[], Awaitable[BraintrustState] | None] = None, + get_state: Callable[[], Awaitable[BraintrustState]] | None = None, fetch_fn: SpanFetchFn | None = None, ): self._span_cache: dict[str, list[SpanData]] = {} @@ -308,7 +308,7 @@ def __init__( object_type: str, # Literal["experiment", "project_logs", "playground_logs"] object_id: str, root_span_id: str, - ensure_spans_flushed: Callable[[], Awaitable[None] | None], + ensure_spans_flushed: Callable[[], Awaitable[None]] | None, state: BraintrustState, ): # Initialize dict with trace_ref for JSON serialization diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py index f6c48e78..6c69dddb 100644 --- a/py/src/braintrust/wrappers/langsmith_wrapper.py +++ b/py/src/braintrust/wrappers/langsmith_wrapper.py @@ -40,8 +40,8 @@ def my_function(inputs: dict) -> dict: import inspect import logging import os -from collections.abc import Iterable, Iterator -from typing import Any, Callable, ParamSpec, TypeVar +from collections.abc import Callable, Iterable, Iterator +from typing import Any, ParamSpec, TypeVar from braintrust.framework import EvalCase from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced From 010af87923c8467a3712aefc1b9dd7d46e742b0a Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 20:06:23 +0000 Subject: [PATCH 09/10] fix: revert TypedDict to typing_extensions in _eval.py, run ruff On Python 3.10, stdlib typing.TypedDict does not support inheriting from both Generic and TypedDict simultaneously. The typing_extensions backport handles this correctly, so TypedDict must stay in typing_extensions for types/_eval.py. Also applies ruff format/check fixes (import sorting, line collapsing). Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/framework.py | 15 +++------------ py/src/braintrust/functions/stream.py | 6 +----- .../integrations/claude_agent_sdk/_constants.py | 2 +- py/src/braintrust/types/_eval.py | 4 ++-- py/src/braintrust/wrappers/langsmith_wrapper.py | 4 +--- 5 files changed, 8 insertions(+), 23 deletions(-) diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index a40c5471..554e4007 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -217,12 +217,7 @@ class EvalScorerArgs(SerializableDataClass, Generic[Input, Output, Expected]): OneOrMoreScores = float | int | bool | None | Score | list[Score] -OneOrMoreClassifications = ( - None - | Classification - | Mapping[str, Any] - | list[Classification | Mapping[str, Any]] -) +OneOrMoreClassifications = None | Classification | Mapping[str, Any] | list[Classification | Mapping[str, Any]] # Synchronous scorer interface - implements callable @@ -278,10 +273,7 @@ class BaseExperiment: _AnyEvalCase = ( - EvalCase[Input, Expected] - | EvalCaseDict[Input, Expected] - | EvalCaseDictNoOutput[Input] - | ExperimentDatasetEvent + EvalCase[Input, Expected] | EvalCaseDict[Input, Expected] | EvalCaseDictNoOutput[Input] | ExperimentDatasetEvent ) _EvalDataObject = ( @@ -295,8 +287,7 @@ class BaseExperiment: EvalData = _EvalDataObject[Input, Expected] | type[_EvalDataObject[Input, Expected]] | Dataset EvalTask = ( - Callable[[Input], Output | Awaitable[Output]] - | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]] + Callable[[Input], Output | Awaitable[Output]] | Callable[[Input, EvalHooks[Expected]], Output | Awaitable[Output]] ) ErrorScoreHandler = Callable[[Span, EvalCase[Input, Expected], Sequence[str]], dict[str, float] | None] diff --git a/py/src/braintrust/functions/stream.py b/py/src/braintrust/functions/stream.py index 18ebfb6e..d3a6b84d 100644 --- a/py/src/braintrust/functions/stream.py +++ b/py/src/braintrust/functions/stream.py @@ -80,11 +80,7 @@ class BraintrustInvokeError(ValueError): BraintrustStreamChunk = ( - BraintrustTextChunk - | BraintrustJsonChunk - | BraintrustErrorChunk - | BraintrustConsoleChunk - | BraintrustProgressChunk + BraintrustTextChunk | BraintrustJsonChunk | BraintrustErrorChunk | BraintrustConsoleChunk | BraintrustProgressChunk ) diff --git a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py index 8be12600..f8757deb 100644 --- a/py/src/braintrust/integrations/claude_agent_sdk/_constants.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py @@ -1,7 +1,7 @@ +from collections.abc import Mapping from dataclasses import dataclass from enum import Enum from types import MappingProxyType -from collections.abc import Mapping from typing import Final diff --git a/py/src/braintrust/types/_eval.py b/py/src/braintrust/types/_eval.py index 94be4cae..528df11e 100644 --- a/py/src/braintrust/types/_eval.py +++ b/py/src/braintrust/types/_eval.py @@ -6,9 +6,9 @@ """ from collections.abc import Sequence -from typing import Any, Generic, TypedDict, TypeVar +from typing import Any, Generic, TypeVar -from typing_extensions import NotRequired +from typing_extensions import NotRequired, TypedDict Input = TypeVar("Input") diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py index 6c69dddb..83e1fe01 100644 --- a/py/src/braintrust/wrappers/langsmith_wrapper.py +++ b/py/src/braintrust/wrappers/langsmith_wrapper.py @@ -204,9 +204,7 @@ def wrap_client( return Client -def make_evaluate_wrapper( - *, project_name: str | None = None, project_id: str | None = None, standalone: bool = False -): +def make_evaluate_wrapper(*, project_name: str | None = None, project_id: str | None = None, standalone: bool = False): def evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: result = None if not standalone: From ac790785edd29bb0b3532cee50d9ae64106e08b3 Mon Sep 17 00:00:00 2001 From: "Nova (SFK)" Date: Mon, 27 Apr 2026 20:13:59 +0000 Subject: [PATCH 10/10] fix: handle types.UnionType in from_dict_deep, fix missed List reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Python 3.10-3.12, the `X | Y` syntax creates a `types.UnionType` which is distinct from `typing.Union`. The `from_dict_deep` method in `SerializableDataClass` only checked for `typing.Union` via `get_origin`, so it failed to deserialize union-typed fields when the annotation used pipe syntax (e.g. `PromptBlockData`). Also fixes a missed `cast(List, ...)` → `cast(list, ...)` in langchain test_callbacks.py that caused a pylint E0602. Co-Authored-By: Claude Opus 4.6 --- py/src/braintrust/integrations/langchain/test_callbacks.py | 2 +- py/src/braintrust/serializable_data_class.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/py/src/braintrust/integrations/langchain/test_callbacks.py b/py/src/braintrust/integrations/langchain/test_callbacks.py index 375332c9..adeaa37d 100644 --- a/py/src/braintrust/integrations/langchain/test_callbacks.py +++ b/py/src/braintrust/integrations/langchain/test_callbacks.py @@ -399,7 +399,7 @@ def test_parallel_execution(logger_memory_logger): map_chain.invoke({"topic": "bear"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) - spans = cast(List, memory_logger.pop()) + spans = cast(list, memory_logger.pop()) # Find the LLM spans llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI") diff --git a/py/src/braintrust/serializable_data_class.py b/py/src/braintrust/serializable_data_class.py index 8f9eeefc..ba32ecca 100644 --- a/py/src/braintrust/serializable_data_class.py +++ b/py/src/braintrust/serializable_data_class.py @@ -1,5 +1,6 @@ import dataclasses import json +import types from typing import Union, get_origin @@ -39,7 +40,7 @@ def from_dict_deep(cls, d: dict): and issubclass(fields[k].type, SerializableDataClass) ): filtered[k] = fields[k].type.from_dict_deep(v) - elif get_origin(fields[k].type) == Union: + elif get_origin(fields[k].type) is Union or isinstance(fields[k].type, types.UnionType): for t in fields[k].type.__args__: if t == type(None) and v is None: filtered[k] = None