diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 71ccaac7..00000000
--- a/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 119
-ignore = E402, E203, E501, W503
diff --git a/.isort.cfg b/.isort.cfg
deleted file mode 100644
index 2a4365a4..00000000
--- a/.isort.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-[settings]
-line_length=119
-multi_line_output=3
-use_parentheses=true
-lines_after_imports=2
-include_trailing_comma=True
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7a19c313..31549ae0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,7 +10,8 @@ repos:
     # Ruff version.
     rev: v0.12.7
     hooks:
-      - id: ruff
+      - id: ruff-format
+      - id: ruff-check
         args: [--fix, --exit-non-zero-on-fix]
   - repo: https://github.com/codespell-project/codespell
     rev: v2.2.5
diff --git a/.prettierignore b/.prettierignore
deleted file mode 100644
index 66a1eb7c..00000000
--- a/.prettierignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# Patch files use specific whitespace formatting required by unified diff format
-patches/**/*.patch
diff --git a/integrations/adk-py/examples/mcp_tracing/agent.py b/integrations/adk-py/examples/mcp_tracing/agent.py
index 40ca8869..eb687f7f 100644
--- a/integrations/adk-py/examples/mcp_tracing/agent.py
+++ b/integrations/adk-py/examples/mcp_tracing/agent.py
@@ -61,9 +61,7 @@ async def main():
     SESSION_ID = "demo-session"
 
     session_service = InMemorySessionService()
-    await session_service.create_session(
-        app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID
-    )
+    await session_service.create_session(app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID)
 
     runner = Runner(agent=agent, app_name=APP_NAME, session_service=session_service)
 
@@ -74,15 +72,9 @@ async def main():
         parts=[types.Part(text="What files are in /tmp? Just list a few.")],
     )
 
-    async for event in runner.run_async(
-        user_id=USER_ID, session_id=SESSION_ID, new_message=user_msg
-    ):
+    async for event in runner.run_async(user_id=USER_ID, session_id=SESSION_ID, new_message=user_msg):
         if event.is_final_response():
-            text = (
-                event.content.parts[0].text
-                if event.content and event.content.parts
-                else "No response"
-            )
+            text = event.content.parts[0].text if event.content and event.content.parts else "No response"
             print(f"Agent response: {text}\n")
 
     print("=== Trace complete ===")
diff --git a/integrations/adk-py/pyproject.toml b/integrations/adk-py/pyproject.toml
index 7e93963e..c988e970 100644
--- a/integrations/adk-py/pyproject.toml
+++ b/integrations/adk-py/pyproject.toml
@@ -49,10 +49,6 @@ dev = [
     "ruff>=0.12.9",
 ]
 
-[tool.isort]
-profile = "black"
-line_length = 120
-
 [tool.ruff]
 line-length = 120
 
diff --git a/integrations/langchain-py/pyproject.toml b/integrations/langchain-py/pyproject.toml
index 6aa1e850..9bbf9d7a 100644
--- a/integrations/langchain-py/pyproject.toml
+++ b/integrations/langchain-py/pyproject.toml
@@ -44,12 +44,8 @@ members = [
 
 [dependency-groups]
 dev = [
-    "black",
     "build",
-    "flake8",
-    "flake8-isort",
     "httpx",
-    "isort==5.12.0",
     "langchain-anthropic>=0.3.20",
     "langchain-openai",
     "langgraph>=0.2.1,<0.4.0",
@@ -62,16 +58,6 @@ dev = [
     "twine",
 ]
 
-[tool.black]
-line-length = 120
-target-version = ['py310']
-
-[tool.isort]
-profile = "black"
-line_length = 120
-known_first_party = ["braintrust_langchain"]
-known_third_party = ["braintrust", "langchain"]
-
 [tool.ruff]
 line-length = 120
 
diff --git a/integrations/langchain-py/src/braintrust_langchain/callbacks.py b/integrations/langchain-py/src/braintrust_langchain/callbacks.py
index 871253a1..016a1268 100644
--- a/integrations/langchain-py/src/braintrust_langchain/callbacks.py
+++ b/integrations/langchain-py/src/braintrust_langchain/callbacks.py
@@ -6,10 +6,6 @@
 from re import Pattern
 from typing import (
     Any,
-    Dict,
-    List,
-    Optional,
-    Set,
     TypedDict,
     Union,
 )
diff --git a/integrations/langchain-py/src/braintrust_langchain/context.py b/integrations/langchain-py/src/braintrust_langchain/context.py
index 0c997de0..5c6bb4e8 100644
--- a/integrations/langchain-py/src/braintrust_langchain/context.py
+++ b/integrations/langchain-py/src/braintrust_langchain/context.py
@@ -1,5 +1,4 @@
 from contextvars import ContextVar
-from typing import Optional
 
 from langchain_core.tracers.context import register_configure_hook
 
diff --git a/integrations/langchain-py/src/tests/test_callbacks.py b/integrations/langchain-py/src/tests/test_callbacks.py
index ab17182a..8cc9f926 100644
--- a/integrations/langchain-py/src/tests/test_callbacks.py
+++ b/integrations/langchain-py/src/tests/test_callbacks.py
@@ -149,6 +149,7 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
         ],
     )
 
+
 @pytest.mark.vcr
 def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
     logger, memory_logger = logger_memory_logger
diff --git a/internal/golden/adk-py-v1/google_adk.py b/internal/golden/adk-py-v1/google_adk.py
index 5fcff824..a0b82811 100644
--- a/internal/golden/adk-py-v1/google_adk.py
+++ b/internal/golden/adk-py-v1/google_adk.py
@@ -13,6 +13,7 @@
 from google.adk.sessions import InMemorySessionService
 from google.genai import types
 
+
 setup_adk(project_name="golden-py-adk")
 
 FIXTURES_DIR = Path(__file__).parent.parent / "fixtures"
diff --git a/internal/golden/genai-py-v1/google_genai.py b/internal/golden/genai-py-v1/google_genai.py
index 79284096..46ad47e8 100644
--- a/internal/golden/genai-py-v1/google_genai.py
+++ b/internal/golden/genai-py-v1/google_genai.py
@@ -11,6 +11,7 @@
 from google.genai import types
 from google.genai.client import Client
 
+
 setup_genai(project_name="golden-py-genai")
 
 FIXTURES_DIR = Path(__file__).parent.parent / "fixtures"
diff --git a/internal/golden/langchain-py-v0/langchain.py b/internal/golden/langchain-py-v0/langchain.py
index 93eac31e..c8070089 100644
--- a/internal/golden/langchain-py-v0/langchain.py
+++ b/internal/golden/langchain-py-v0/langchain.py
@@ -11,6 +11,7 @@
 from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 
+
 init_logger(project="golden-py-langchain-v0")
 
 handler = BraintrustCallbackHandler()
diff --git a/internal/golden/langchain-py-v1/langchain.py b/internal/golden/langchain-py-v1/langchain.py
index 37e220dd..bf2c3dd0 100644
--- a/internal/golden/langchain-py-v1/langchain.py
+++ b/internal/golden/langchain-py-v1/langchain.py
@@ -11,6 +11,7 @@
 from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 
+
 init_logger(project="golden-py-langchain-v1")
 
 handler = BraintrustCallbackHandler()
diff --git a/internal/golden/pydantic-ai-v1/pydantic_ai_test.py b/internal/golden/pydantic-ai-v1/pydantic_ai_test.py
index c6d80a75..8a6fde51 100644
--- a/internal/golden/pydantic-ai-v1/pydantic_ai_test.py
+++ b/internal/golden/pydantic-ai-v1/pydantic_ai_test.py
@@ -21,6 +21,7 @@
 )
 from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel, OpenAIResponsesModelSettings
 
+
 setup_pydantic_ai(project_name="golden-py-pydantic_ai")
 
 FIXTURES_DIR = Path(__file__).parent.parent / "fixtures"
diff --git a/py/benchmarks/perf.py b/py/benchmarks/perf.py
index a29d7c24..128ae2f2 100644
--- a/py/benchmarks/perf.py
+++ b/py/benchmarks/perf.py
@@ -3,6 +3,7 @@
 import braintrust
 from braintrust import traced
 
+
 LOOPS = 2000
 
 braintrust.init_logger(project="perf_test")
diff --git a/py/examples/adk/auto.py b/py/examples/adk/auto.py
index 60ef737f..fe21bbfb 100644
--- a/py/examples/adk/auto.py
+++ b/py/examples/adk/auto.py
@@ -10,6 +10,7 @@
 
 import braintrust
 
+
 # Auto-instrument all supported libraries including Google ADK
 braintrust.auto_instrument()
 
diff --git a/py/examples/adk/manual_patching.py b/py/examples/adk/manual_patching.py
index b35e1131..332eb8a8 100644
--- a/py/examples/adk/manual_patching.py
+++ b/py/examples/adk/manual_patching.py
@@ -10,6 +10,7 @@
 
 from braintrust.wrappers.adk import setup_adk
 
+
 # Setup ADK tracing with a specific project
 setup_adk(project_name="my-adk-project")
 
diff --git a/py/examples/agno/async_simple_agent_stream.py b/py/examples/agno/async_simple_agent_stream.py
index e3531609..b6c3717a 100644
--- a/py/examples/agno/async_simple_agent_stream.py
+++ b/py/examples/agno/async_simple_agent_stream.py
@@ -2,6 +2,7 @@
 
 from braintrust.wrappers.agno import setup_agno
 
+
 setup_agno(project_name="simple-agent-project")
 
 from agno.agent import Agent
diff --git a/py/examples/agno/async_team_agent.py b/py/examples/agno/async_team_agent.py
index 47a9b83d..723e7be7 100644
--- a/py/examples/agno/async_team_agent.py
+++ b/py/examples/agno/async_team_agent.py
@@ -2,6 +2,7 @@
 
 from braintrust.wrappers.agno import setup_agno
 
+
 # Set up Braintrust observability
 setup_agno(project_name="async-team-agent-project")
 
diff --git a/py/examples/agno/simple_agent.py b/py/examples/agno/simple_agent.py
index ce8822a2..a69ed279 100644
--- a/py/examples/agno/simple_agent.py
+++ b/py/examples/agno/simple_agent.py
@@ -1,11 +1,13 @@
 from braintrust.wrappers.agno import setup_agno
 
+
 setup_agno(project_name="simple-agent-project")
 
 from agno.agent import Agent
 from agno.models.openai import OpenAIChat
 from agno.tools.yfinance import YFinanceTools
 
+
 # Create and configure the agent
 agent = Agent(
     name="Stock Price Agent",
diff --git a/py/examples/agno/simple_agent_stream.py b/py/examples/agno/simple_agent_stream.py
index d842649c..7e6961b1 100644
--- a/py/examples/agno/simple_agent_stream.py
+++ b/py/examples/agno/simple_agent_stream.py
@@ -1,11 +1,13 @@
 from braintrust.wrappers.agno import setup_agno
 
+
 setup_agno(project_name="simple-agent-project")
 
 from agno.agent import Agent
 from agno.models.openai import OpenAIChat
 from agno.tools.yfinance import YFinanceTools
 
+
 # Create and configure the agent
 agent = Agent(
     name="Stock Price Agent",
diff --git a/py/examples/agno/team_agent.py b/py/examples/agno/team_agent.py
index c9bc86f6..db154e06 100644
--- a/py/examples/agno/team_agent.py
+++ b/py/examples/agno/team_agent.py
@@ -1,5 +1,6 @@
 from braintrust.wrappers.agno import setup_agno
 
+
 # Set up Braintrust observability
 setup_agno(project_name="team-agent-project")
 
@@ -8,6 +9,7 @@
 from agno.team import Team
 from agno.tools.yfinance import YFinanceTools
 
+
 # Create specialized agents for the team
 research_agent = Agent(
     name="Research Analyst",
diff --git a/py/examples/anthropic_async.py b/py/examples/anthropic_async.py
index 772084bb..c9cacdef 100755
--- a/py/examples/anthropic_async.py
+++ b/py/examples/anthropic_async.py
@@ -5,6 +5,7 @@
 import braintrust
 from anthropic import AsyncAnthropic
 
+
 # Initialize Anthropic client (needs ANTHROPIC_API_KEY)
 client = braintrust.wrap_anthropic(AsyncAnthropic())
 
diff --git a/py/examples/anthropic_sync.py b/py/examples/anthropic_sync.py
index 23e70fdb..16bb34f5 100755
--- a/py/examples/anthropic_sync.py
+++ b/py/examples/anthropic_sync.py
@@ -6,6 +6,7 @@
 import anthropic
 import braintrust
 
+
 # Initialize Anthropic client (needs ANTHROPIC_API_KEY)
 client = braintrust.wrap_anthropic(anthropic.Anthropic())
 braintrust.init_logger(project="example-anthropic-app")
diff --git a/py/examples/auto_instrument.py b/py/examples/auto_instrument.py
index 33fa278a..7215826f 100644
--- a/py/examples/auto_instrument.py
+++ b/py/examples/auto_instrument.py
@@ -17,6 +17,7 @@
 
 import braintrust
 
+
 # One-line instrumentation - call this BEFORE importing AI libraries
 # This patches all supported libraries automatically
 results = braintrust.auto_instrument()
@@ -36,6 +37,7 @@
 import anthropic
 import openai
 
+
 # Create clients - they're automatically wrapped
 openai_client = openai.OpenAI()
 anthropic_client = anthropic.Anthropic()
diff --git a/py/examples/dspy/example.py b/py/examples/dspy/example.py
index 8a53b9ae..257c4a20 100644
--- a/py/examples/dspy/example.py
+++ b/py/examples/dspy/example.py
@@ -11,6 +11,7 @@
 # IMPORTANT: Patch LiteLLM BEFORE importing DSPy to get detailed token metrics
 from braintrust.wrappers.litellm import patch_litellm
 
+
 patch_litellm()
 
 # Now import DSPy
diff --git a/py/examples/evals/eval_example.py b/py/examples/evals/eval_example.py
index 1d605a08..ced8a09c 100644
--- a/py/examples/evals/eval_example.py
+++ b/py/examples/evals/eval_example.py
@@ -2,6 +2,7 @@
 
 from braintrust import Eval
 
+
 NUM_EXAMPLES = 10
 
 
@@ -12,9 +13,9 @@ async def exact_match_scorer(input, output, expected, trace=None):
         score = 1.0 if output == expected else 0.0
 
     if trace:
-        print("\n" + "="*80)
+        print("\n" + "=" * 80)
         print(f"🔍 TRACE INFO for input: {input}")
-        print("="*80)
+        print("=" * 80)
 
         # Print trace configuration
         config = trace.get_configuration()
@@ -27,13 +28,13 @@ async def exact_match_scorer(input, output, expected, trace=None):
         try:
             spans = await trace.get_spans()
             print(f"\n✨ Found {len(spans)} spans:")
-            print("-"*80)
+            print("-" * 80)
 
             for i, span in enumerate(spans, 1):
                 print(f"\n  Span {i}:")
                 print(f"    ID:         {span.span_id}")
-                span_type = span.span_attributes.get('type', 'N/A') if span.span_attributes else 'N/A'
-                span_name = span.span_attributes.get('name', 'N/A') if span.span_attributes else 'N/A'
+                span_type = span.span_attributes.get("type", "N/A") if span.span_attributes else "N/A"
+                span_name = span.span_attributes.get("name", "N/A") if span.span_attributes else "N/A"
                 print(f"    Type:       {span_type}")
                 print(f"    Name:       {span_name}")
 
@@ -50,10 +51,11 @@ async def exact_match_scorer(input, output, expected, trace=None):
                 if span.metadata:
                     print(f"    Metadata:   {list(span.metadata.keys())}")
 
-            print("\n" + "="*80 + "\n")
+            print("\n" + "=" * 80 + "\n")
         except Exception as e:
             print(f"\n⚠️  Error fetching spans: {e}")
             import traceback
+
             traceback.print_exc()
     else:
         print(f"⚠️  No trace available for input: {input}")
diff --git a/py/examples/langsmith/eval_example.py b/py/examples/langsmith/eval_example.py
index 68c55152..944358ff 100644
--- a/py/examples/langsmith/eval_example.py
+++ b/py/examples/langsmith/eval_example.py
@@ -11,6 +11,7 @@
 
 import os
 
+
 # Enable LangSmith tracing (required for traces to be sent to LangSmith)
 os.environ.setdefault("LANGCHAIN_TRACING_V2", "true")
 os.environ.setdefault("LANGCHAIN_PROJECT", "examples-wrappers-langsmith-eval")
@@ -18,6 +19,7 @@
 # IMPORTANT: Call setup_langsmith BEFORE importing from langsmith
 from braintrust.wrappers.langsmith_wrapper import setup_langsmith
 
+
 # Set BRAINTRUST_STANDALONE=1 to completely replace LangSmith with Braintrust
 standalone = os.environ.get("BRAINTRUST_STANDALONE", "").lower() in ("1", "true", "yes")
 
diff --git a/py/examples/langsmith/tracing_example.py b/py/examples/langsmith/tracing_example.py
index 242609f1..b8bfbd87 100644
--- a/py/examples/langsmith/tracing_example.py
+++ b/py/examples/langsmith/tracing_example.py
@@ -10,6 +10,7 @@
 
 import os
 
+
 # Enable LangSmith tracing (required for traces to be sent to LangSmith)
 os.environ.setdefault("LANGCHAIN_TRACING_V2", "true")
 os.environ.setdefault("LANGCHAIN_PROJECT", "examples-wrappers-langsmith-tracing")
@@ -17,6 +18,7 @@
 # IMPORTANT: Call setup_langsmith BEFORE importing from langsmith
 from braintrust.wrappers.langsmith_wrapper import setup_langsmith
 
+
 # Set BRAINTRUST_STANDALONE=1 to completely replace LangSmith with Braintrust
 standalone = os.environ.get("BRAINTRUST_STANDALONE", "").lower() in ("1", "true", "yes")
 
diff --git a/py/examples/openai_example.py b/py/examples/openai_example.py
index a9c731eb..a0ead8d4 100755
--- a/py/examples/openai_example.py
+++ b/py/examples/openai_example.py
@@ -3,6 +3,7 @@
 from braintrust import init_logger, traced, wrap_openai
 from openai import OpenAI
 
+
 logger = init_logger(project="example-openai-project")
 client = wrap_openai(OpenAI())
 
diff --git a/py/examples/otel/basic_otel_example.py b/py/examples/otel/basic_otel_example.py
index cdec195c..1cdd56b0 100755
--- a/py/examples/otel/basic_otel_example.py
+++ b/py/examples/otel/basic_otel_example.py
@@ -9,6 +9,7 @@
 import os
 import time
 
+
 # Set environment variables
 os.environ.setdefault("BRAINTRUST_PARENT", "project_name:otel-examples")
 
@@ -18,6 +19,7 @@
 from opentelemetry.instrumentation.openai import OpenAIInstrumentor
 from opentelemetry.sdk.trace import TracerProvider
 
+
 # Set up the tracer provider
 provider = TracerProvider()
 trace.set_tracer_provider(provider)
diff --git a/py/examples/otel/bt-otel-context.py b/py/examples/otel/bt-otel-context.py
index cd3084e4..bf1c9bec 100644
--- a/py/examples/otel/bt-otel-context.py
+++ b/py/examples/otel/bt-otel-context.py
@@ -11,11 +11,13 @@
 
 import os
 
-os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true'
+
+os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
 
 import braintrust
 from braintrust.otel import add_braintrust_span_processor
 
+
 PROJECT_NAME = "mixed-otel-braintrust-python-2"
 
 from opentelemetry import trace
@@ -30,14 +32,13 @@ def setup_otel():
 
     return trace.get_tracer(__name__, "1.0.0")
 
+
 def main():
     # Setup
     braintrust.login()
 
     tracer = setup_otel()
-    project = braintrust.init_logger(
-        project=PROJECT_NAME
-    )
+    project = braintrust.init_logger(project=PROJECT_NAME)
 
     # Demo 1: BT project as root span with OTEL instrumentation inside
     with project.start_span("trace1_root_bt") as session_span:
@@ -67,10 +68,9 @@ def trace1_child_bt_traced():
 
         trace1_child_bt_traced()
 
-
     # Demo 2: OTEL as root span with BT spans inside
     with tracer.start_as_current_span("trace2_root_otel") as otel_root:
-        otel_trace_id = format(otel_root.get_span_context().trace_id, '032x')
+        otel_trace_id = format(otel_root.get_span_context().trace_id, "032x")
         otel_root.set_attribute("type", "otel_root")
         otel_root.add_event("otel_root_start")
 
@@ -86,6 +86,7 @@ def trace1_child_bt_traced():
             @braintrust.traced
             def trace2_grandchild_bt1():
                 pass
+
             trace2_grandchild_bt1()
 
             # Nested BT span should also inherit same trace ID
@@ -95,6 +96,7 @@ def trace2_grandchild_bt1():
         @braintrust.traced
         def trace2_child_bt_traced():
             pass
+
         trace2_child_bt_traced()
 
         otel_root.add_event("otel_root_end")
@@ -103,7 +105,7 @@ def trace2_child_bt_traced():
     project.flush()
 
     # Then flush OTEL spans so they can attach to existing parents
-    if hasattr(trace.get_tracer_provider(), 'force_flush'):
+    if hasattr(trace.get_tracer_provider(), "force_flush"):
         trace.get_tracer_provider().force_flush(timeout_millis=5000)
 
 
diff --git a/py/examples/otel/distributed-tracing.py b/py/examples/otel/distributed-tracing.py
index f1c0bfeb..1499db29 100644
--- a/py/examples/otel/distributed-tracing.py
+++ b/py/examples/otel/distributed-tracing.py
@@ -16,8 +16,9 @@
 
 import os
 
+
 # Enable OTEL compatibility mode
-os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true'
+os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
 
 import braintrust
 from braintrust.otel import (
@@ -31,14 +32,14 @@
 from opentelemetry.propagate import inject
 from opentelemetry.sdk.trace import TracerProvider
 
+
 PROJECT_NAME = "distributed-tracing-demo"
 
 
 def setup_otel():
     """Setup OTEL instrumentation with Braintrust processor."""
     provider = TracerProvider()
-    add_braintrust_span_processor(provider,
-                                  parent=f"project_name:different-project")
+    add_braintrust_span_processor(provider, parent=f"project_name:different-project")
     trace.set_tracer_provider(provider)
     return trace.get_tracer(__name__, "1.0.0")
 
@@ -61,10 +62,9 @@ def service_b_process_request(exported_context: str, tracer, project):
         with tracer.start_as_current_span("service_b.root") as fetch_span:
             # Nested operation in Service B
             with tracer.start_as_current_span("service_b.child"):
-                trace_id = format(fetch_span.get_span_context().trace_id, '032x')
+                trace_id = format(fetch_span.get_span_context().trace_id, "032x")
                 print(f"  Created OTEL child spans (trace_id: {trace_id})")
 
-
             # Ensure 'braintrust.parent' is set on the baggage.
             add_span_parent_to_baggage(fetch_span)
 
@@ -94,9 +94,7 @@ def service_c_process_request(headers: dict, project):
         span_id = analytics_span.span_id
         print(f"  Created BT span as child of OTEL parent (span_id: {span_id[:16]}...)")
         analytics_span.log(
-            input="Analytics data from Service B",
-            output="Processed analytics",
-            metadata={"service": "analytics"}
+            input="Analytics data from Service B", output="Processed analytics", metadata={"service": "analytics"}
         )
 
 
@@ -130,7 +128,7 @@ def main():
 
     # Flush all data
     project.flush()
-    if hasattr(trace.get_tracer_provider(), 'force_flush'):
+    if hasattr(trace.get_tracer_provider(), "force_flush"):
         trace.get_tracer_provider().force_flush(timeout_millis=5000)
 
     print(f"\n✓ Trace complete! All 3 services share trace_id: {trace_id[:16]}...")
diff --git a/py/examples/otel/filtered_otel_example.py b/py/examples/otel/filtered_otel_example.py
index 25910d22..e7518889 100755
--- a/py/examples/otel/filtered_otel_example.py
+++ b/py/examples/otel/filtered_otel_example.py
@@ -9,6 +9,7 @@
 import os
 import time
 
+
 # Set environment variables
 os.environ.setdefault("BRAINTRUST_PARENT", "project_name:otel-examples")
 os.environ.setdefault("BRAINTRUST_OTEL_FILTER_AI_SPANS", "false")
@@ -19,6 +20,7 @@
 from opentelemetry.instrumentation.openai import OpenAIInstrumentor
 from opentelemetry.sdk.trace import TracerProvider
 
+
 # Set up the tracer provider
 provider = TracerProvider()
 trace.set_tracer_provider(provider)
diff --git a/py/examples/otel/otel_eval.py b/py/examples/otel/otel_eval.py
index 8fb72bfe..db201432 100644
--- a/py/examples/otel/otel_eval.py
+++ b/py/examples/otel/otel_eval.py
@@ -7,8 +7,9 @@
 
 import os
 
+
 # Enable OTEL compatibility
-os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true'
+os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
 
 from autoevals import Levenshtein
 from braintrust import Eval
@@ -16,12 +17,14 @@
 from opentelemetry import trace
 from opentelemetry.sdk.trace import TracerProvider
 
+
 # Setup OTEL tracing
 provider = TracerProvider()
 processor = BraintrustSpanProcessor(parent="project_name:otel-eval-example")
 provider.add_span_processor(processor)
 trace.set_tracer_provider(provider)
 
+
 def task_with_otel_tracing(input):
     tracer = trace.get_tracer(__name__)
 
@@ -34,6 +37,7 @@ def task_with_otel_tracing(input):
         span.set_attribute("output", result)
         return result
 
+
 # Run evaluation with OTEL tracing
 Eval(
     "Say Hi Bot",
diff --git a/py/examples/pydantic_ai_example.py b/py/examples/pydantic_ai_example.py
index 0092278d..da72a41b 100644
--- a/py/examples/pydantic_ai_example.py
+++ b/py/examples/pydantic_ai_example.py
@@ -4,11 +4,13 @@
 
 import braintrust
 
+
 braintrust.auto_instrument()
 logger = braintrust.init_logger(project="example-pydantic-ai-project")
 
 from pydantic_ai import Agent
 
+
 agent = Agent("openai:gpt-4o", system_prompt="You are a helpful assistant.")
 
 
diff --git a/py/examples/temporal/worker.py b/py/examples/temporal/worker.py
index 631d6c21..847a2786 100644
--- a/py/examples/temporal/worker.py
+++ b/py/examples/temporal/worker.py
@@ -6,6 +6,7 @@
 # Import only what we need to avoid loading optional dependencies
 from braintrust.logger import init_logger
 
+
 # Initialize logger at module level before importing plugin
 init_logger(project="temporal-example")
 
diff --git a/py/examples/temporal/workflow.py b/py/examples/temporal/workflow.py
index db64a679..0c0fe6d0 100644
--- a/py/examples/temporal/workflow.py
+++ b/py/examples/temporal/workflow.py
@@ -7,6 +7,7 @@
 from temporalio import activity, workflow
 from temporalio.common import RetryPolicy
 
+
 TASK_QUEUE_NAME = "braintrust-example-task-queue"
 
 
diff --git a/py/noxfile.py b/py/noxfile.py
index d5dd376c..be0e7798 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -17,6 +17,7 @@
 
 import nox
 
+
 # much faster than pip
 nox.options.default_venv_backend = "uv"
 
diff --git a/py/requirements-dev.txt b/py/requirements-dev.txt
index 48020b85..f8bc52e0 100644
--- a/py/requirements-dev.txt
+++ b/py/requirements-dev.txt
@@ -1,9 +1,5 @@
 # Also include build dependencies
-black
 datamodel-code-generator>=0.53.0
-flake8
-flake8-isort
-isort==5.12.0
 nox
 pre-commit
 pydoc-markdown
diff --git a/py/scripts/generate_types.py b/py/scripts/generate_types.py
index dcaa9e40..45670af6 100755
--- a/py/scripts/generate_types.py
+++ b/py/scripts/generate_types.py
@@ -6,6 +6,7 @@
 import subprocess
 import sys
 
+
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 OPENAPI_SPEC_PATH = os.path.join(SCRIPT_DIR, "../../generated_types.json")
 INTERNAL_TYPES_OUTPUT_PATH = os.path.join(SCRIPT_DIR, "../src/braintrust/_generated_types.py")
@@ -61,7 +62,9 @@ def cleanup_internal_types():
     # optional-but-not-nullable TypedDicts.
     contents = re.sub(
         r"(\s[A-Za-z0-9_]+: NotRequired\[)(.+?)(\])\n",
-        lambda m: m.group(0) if m.group(2).rstrip().endswith("None") else f"{m.group(1)}{m.group(2)} | None{m.group(3)}\n",
+        lambda m: m.group(0)
+        if m.group(2).rstrip().endswith("None")
+        else f"{m.group(1)}{m.group(2)} | None{m.group(3)}\n",
         contents,
     )
 
diff --git a/py/scripts/validate-release.py b/py/scripts/validate-release.py
index cb60ed7b..7a22a9e8 100644
--- a/py/scripts/validate-release.py
+++ b/py/scripts/validate-release.py
@@ -11,6 +11,7 @@
 import urllib.error
 import urllib.request
 
+
 STABLE_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$")
 PRERELEASE_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(a|b|rc)[0-9]+$")
 
@@ -39,9 +40,7 @@ def validate_release_type(release_type: str, version: str) -> None:
     if release_type == "stable" and not STABLE_VERSION_RE.fullmatch(version):
         raise ValueError(f"Stable releases require a version like X.Y.Z; found '{version}'")
     if release_type == "prerelease" and not PRERELEASE_VERSION_RE.fullmatch(version):
-        raise ValueError(
-            f"Prereleases require a version like X.Y.Zrc1, X.Y.Za1, or X.Y.Zb1; found '{version}'"
-        )
+        raise ValueError(f"Prereleases require a version like X.Y.Zrc1, X.Y.Za1, or X.Y.Zb1; found '{version}'")
 
 
 def check_tag_does_not_exist(tag: str) -> None:
diff --git a/py/setup.py b/py/setup.py
index 2b7826a3..8730a45e 100644
--- a/py/setup.py
+++ b/py/setup.py
@@ -2,6 +2,7 @@
 
 import setuptools
 
+
 dir_name = os.path.abspath(os.path.dirname(__file__))
 
 version_contents = {}
diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py
index 26dd1e2e..32ef4999 100644
--- a/py/src/braintrust/__init__.py
+++ b/py/src/braintrust/__init__.py
@@ -52,6 +52,7 @@ def is_equal(expected, output):
 # Check env var at import time for auto-instrumentation
 import os
 
+
 if os.getenv("BRAINTRUST_INSTRUMENT_THREADS", "").lower() in ("true", "1", "yes"):
     try:
         from .wrappers.threads import setup_threads
diff --git a/py/src/braintrust/_generated_types.py b/py/src/braintrust/_generated_types.py
index bf621221..e3b19c24 100644
--- a/py/src/braintrust/_generated_types.py
+++ b/py/src/braintrust/_generated_types.py
@@ -11,18 +11,19 @@
 
 from typing_extensions import NotRequired
 
+
 AclObjectType: TypeAlias = Literal[
-    'organization',
-    'project',
-    'experiment',
-    'dataset',
-    'prompt',
-    'prompt_session',
-    'group',
-    'role',
-    'org_member',
-    'project_log',
-    'org_project',
+    "organization",
+    "project",
+    "experiment",
+    "dataset",
+    "prompt",
+    "prompt_session",
+    "group",
+    "role",
+    "org_member",
+    "project_log",
+    "org_project",
 ]
 """
 The object type that the ACL applies to
@@ -60,7 +61,7 @@ class AnyModelParamsToolChoiceFunction(TypedDict):
 
 
 class AnyModelParamsToolChoice(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     function: AnyModelParamsToolChoiceFunction
 
 
@@ -105,24 +106,24 @@ class ApiKey(TypedDict):
 
 
 class AsyncScoringControlAsyncScoringControl(TypedDict):
-    kind: Literal['score_update']
+    kind: Literal["score_update"]
     token: NotRequired[str | None]
 
 
 class AsyncScoringControlAsyncScoringControl2(TypedDict):
-    kind: Literal['state_force_reselect']
+    kind: Literal["state_force_reselect"]
 
 
 class AsyncScoringControlAsyncScoringControl3(TypedDict):
-    kind: Literal['state_enabled_force_rescore']
+    kind: Literal["state_enabled_force_rescore"]
 
 
 class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope(TypedDict):
-    type: Literal['span']
+    type: Literal["span"]
 
 
 class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1(TypedDict):
-    type: Literal['trace']
+    type: Literal["trace"]
 
 
 class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict):
@@ -135,23 +136,23 @@ class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict):
 
 
 class AsyncScoringControlAsyncScoringControl4(TypedDict):
-    kind: Literal['trigger_functions']
+    kind: Literal["trigger_functions"]
     triggered_functions: Sequence[AsyncScoringControlAsyncScoringControl4TriggeredFunction]
 
 
 class AsyncScoringControlAsyncScoringControl5(TypedDict):
-    kind: Literal['complete_triggered_functions']
+    kind: Literal["complete_triggered_functions"]
     function_ids: Sequence[Any]
     triggered_xact_id: str
 
 
 class AsyncScoringControlAsyncScoringControl6(TypedDict):
-    kind: Literal['mark_attempt_failed']
+    kind: Literal["mark_attempt_failed"]
     function_ids: Sequence[Any]
 
 
 class AsyncScoringStateAsyncScoringState(TypedDict):
-    status: Literal['enabled']
+    status: Literal["enabled"]
     token: str
     function_ids: Sequence[Any]
     skip_logging: NotRequired[bool | None]
@@ -159,14 +160,14 @@ class AsyncScoringStateAsyncScoringState(TypedDict):
 
 
 class AsyncScoringStateAsyncScoringState1(TypedDict):
-    status: Literal['disabled']
+    status: Literal["disabled"]
 
 
 AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoringStateAsyncScoringState1 | None
 
 
 class PreprocessorPreprocessor(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -206,7 +207,7 @@ class BatchedFacetDataFacet(TypedDict):
 
 
 class BraintrustAttachmentReference(TypedDict):
-    type: Literal['braintrust_attachment']
+    type: Literal["braintrust_attachment"]
     """
     An identifier to help disambiguate parsing.
     """
@@ -233,49 +234,49 @@ class BraintrustModelParams(TypedDict):
 class CallEventCallEvent(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['text_delta']
+    event: Literal["text_delta"]
 
 
 class CallEventCallEvent1(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['reasoning_delta']
+    event: Literal["reasoning_delta"]
 
 
 class CallEventCallEvent2(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['json_delta']
+    event: Literal["json_delta"]
 
 
 class CallEventCallEvent3(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['progress']
+    event: Literal["progress"]
 
 
 class CallEventCallEvent4(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['error']
+    event: Literal["error"]
 
 
 class CallEventCallEvent5(TypedDict):
     id: NotRequired[str | None]
     data: str
-    event: Literal['console']
+    event: Literal["console"]
 
 
 class CallEventCallEvent6(TypedDict):
     id: NotRequired[str | None]
-    event: Literal['start']
-    data: Literal['']
+    event: Literal["start"]
+    data: Literal[""]
 
 
 class CallEventCallEvent7(TypedDict):
     id: NotRequired[str | None]
-    event: Literal['done']
-    data: Literal['']
+    event: Literal["done"]
+    data: Literal[""]
 
 
 CallEvent: TypeAlias = (
@@ -298,42 +299,42 @@ class ChatCompletionContentPartFileFile(TypedDict):
 
 class ChatCompletionContentPartFileWithTitle(TypedDict):
     file: ChatCompletionContentPartFileFile
-    type: Literal['file']
+    type: Literal["file"]
 
 
 class ChatCompletionContentPartImageWithTitleImageUrl(TypedDict):
     url: str
-    detail: NotRequired[Literal['auto'] | Literal['low'] | Literal['high'] | None]
+    detail: NotRequired[Literal["auto"] | Literal["low"] | Literal["high"] | None]
 
 
 class ChatCompletionContentPartImageWithTitle(TypedDict):
     image_url: ChatCompletionContentPartImageWithTitleImageUrl
-    type: Literal['image_url']
+    type: Literal["image_url"]
 
 
 class ChatCompletionContentPartTextCacheControl(TypedDict):
-    type: Literal['ephemeral']
+    type: Literal["ephemeral"]
 
 
 class ChatCompletionContentPartText(TypedDict):
     text: str
-    type: Literal['text']
+    type: Literal["text"]
     cache_control: NotRequired[ChatCompletionContentPartTextCacheControl | None]
 
 
 class ChatCompletionContentPartTextWithTitleCacheControl(TypedDict):
-    type: Literal['ephemeral']
+    type: Literal["ephemeral"]
 
 
 class ChatCompletionContentPartTextWithTitle(TypedDict):
     text: str
-    type: Literal['text']
+    type: Literal["text"]
     cache_control: NotRequired[ChatCompletionContentPartTextWithTitleCacheControl | None]
 
 
 class ChatCompletionMessageParamChatCompletionMessageParam(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['system']
+    role: Literal["system"]
     name: NotRequired[str | None]
 
 
@@ -344,24 +345,24 @@ class ChatCompletionMessageParamChatCompletionMessageParam2FunctionCall(TypedDic
 
 class ChatCompletionMessageParamChatCompletionMessageParam3(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['tool']
+    role: Literal["tool"]
     tool_call_id: str
 
 
 class ChatCompletionMessageParamChatCompletionMessageParam4(TypedDict):
     content: str | None
     name: str
-    role: Literal['function']
+    role: Literal["function"]
 
 
 class ChatCompletionMessageParamChatCompletionMessageParam5(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['developer']
+    role: Literal["developer"]
     name: NotRequired[str | None]
 
 
 class ChatCompletionMessageParamChatCompletionMessageParam6(TypedDict):
-    role: Literal['model']
+    role: Literal["model"]
     content: NotRequired[str | None]
 
 
@@ -378,12 +379,12 @@ class ChatCompletionMessageToolCallFunction(TypedDict):
 class ChatCompletionMessageToolCall(TypedDict):
     id: str
     function: ChatCompletionMessageToolCallFunction
-    type: Literal['function']
+    type: Literal["function"]
 
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['system']
+    role: Literal["system"]
     name: NotRequired[str | None]
 
 
@@ -393,7 +394,7 @@ class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2FunctionC
 
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2(TypedDict):
-    role: Literal['assistant']
+    role: Literal["assistant"]
     content: NotRequired[str | Sequence[ChatCompletionContentPartText] | None]
     function_call: NotRequired[ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2FunctionCall | None]
     name: NotRequired[str | None]
@@ -403,19 +404,19 @@ class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2(TypedDic
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam3(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['tool']
+    role: Literal["tool"]
     tool_call_id: str
 
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam4(TypedDict):
     content: str | None
     name: str
-    role: Literal['function']
+    role: Literal["function"]
 
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam5(TypedDict):
     content: str | Sequence[ChatCompletionContentPartText]
-    role: Literal['developer']
+    role: Literal["developer"]
     name: NotRequired[str | None]
 
 
@@ -427,36 +428,36 @@ class ChatCompletionToolFunction(TypedDict):
 
 class ChatCompletionTool(TypedDict):
     function: ChatCompletionToolFunction
-    type: Literal['function']
+    type: Literal["function"]
 
 
 class CodeBundleRuntimeContext(TypedDict):
-    runtime: Literal['node', 'python', 'browser', 'quickjs']
+    runtime: Literal["node", "python", "browser", "quickjs"]
     version: str
 
 
 class CodeBundleLocationPosition(TypedDict):
-    type: Literal['task']
+    type: Literal["task"]
 
 
 class CodeBundleLocationPosition1(TypedDict):
-    type: Literal['scorer']
+    type: Literal["scorer"]
     index: int
 
 
 class CodeBundleLocation(TypedDict):
-    type: Literal['experiment']
+    type: Literal["experiment"]
     eval_name: str
     position: CodeBundleLocationPosition | CodeBundleLocationPosition1
 
 
 class CodeBundleLocation1(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     index: int
 
 
 class CodeBundleLocation2SandboxSpec(TypedDict):
-    provider: Literal['modal']
+    provider: Literal["modal"]
     snapshot_ref: str
     """
     sandbox snapshot ref
@@ -464,11 +465,11 @@ class CodeBundleLocation2SandboxSpec(TypedDict):
 
 
 class CodeBundleLocation2SandboxSpec1(TypedDict):
-    provider: Literal['lambda']
+    provider: Literal["lambda"]
 
 
 class CodeBundleLocation2(TypedDict):
-    type: Literal['sandbox']
+    type: Literal["sandbox"]
     sandbox_spec: CodeBundleLocation2SandboxSpec | CodeBundleLocation2SandboxSpec1
     entrypoints: NotRequired[Sequence[str] | None]
     """
@@ -546,7 +547,7 @@ class EnvVar(TypedDict):
     """
     Unique identifier for the environment variable
     """
-    object_type: Literal['organization', 'project', 'function']
+    object_type: Literal["organization", "project", "function"]
     """
     The type of the object the environment variable is scoped for
     """
@@ -574,7 +575,7 @@ class EnvVar(TypedDict):
     """
     Optional classification for the secret (for example, the AI provider name)
     """
-    secret_category: NotRequired[Literal['env_var', 'ai_provider', 'sandbox_provider'] | None]
+    secret_category: NotRequired[Literal["env_var", "ai_provider", "sandbox_provider"] | None]
     """
     The category of the secret: env_var for regular environment variables, ai_provider for AI provider API keys
     """
@@ -601,7 +602,7 @@ class EvalStatusPageConfig(TypedDict):
     """
     Field to sort results by (format: 'score:<name>' or 'metric:<name>')
     """
-    sort_order: NotRequired[Literal['asc', 'desc'] | None]
+    sort_order: NotRequired[Literal["asc", "desc"] | None]
     """
     Sort order (ascending or descending)
     """
@@ -611,7 +612,7 @@ class EvalStatusPageConfig(TypedDict):
     """
 
 
-EvalStatusPageTheme: TypeAlias = Literal['light', 'dark']
+EvalStatusPageTheme: TypeAlias = Literal["light", "dark"]
 """
 The theme for the page
 """
@@ -675,7 +676,7 @@ class ExperimentEventContext(TypedDict):
 
 
 class ExtendedSavedFunctionIdExtendedSavedFunctionId(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -684,13 +685,13 @@ class ExtendedSavedFunctionIdExtendedSavedFunctionId(TypedDict):
 
 
 class ExtendedSavedFunctionIdExtendedSavedFunctionId2(TypedDict):
-    type: Literal['slug']
+    type: Literal["slug"]
     project_id: str
     slug: str
 
 
 class ExternalAttachmentReference(TypedDict):
-    type: Literal['external_attachment']
+    type: Literal["external_attachment"]
     """
     An identifier to help disambiguate parsing.
     """
@@ -709,7 +710,7 @@ class ExternalAttachmentReference(TypedDict):
 
 
 class Preprocessor1Preprocessor1(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -743,20 +744,20 @@ class FunctionFunctionSchema(TypedDict):
 
 
 class FunctionDataFunctionData(TypedDict):
-    type: Literal['prompt']
+    type: Literal["prompt"]
 
 
 class Data(CodeBundle):
-    type: Literal['bundle']
+    type: Literal["bundle"]
 
 
 class FunctionDataFunctionData1DataRuntimeContext(TypedDict):
-    runtime: Literal['node', 'python', 'browser', 'quickjs']
+    runtime: Literal["node", "python", "browser", "quickjs"]
     version: str
 
 
 class FunctionDataFunctionData1Data(TypedDict):
-    type: Literal['inline']
+    type: Literal["inline"]
     runtime_context: FunctionDataFunctionData1DataRuntimeContext
     code: str
     code_hash: NotRequired[str | None]
@@ -766,12 +767,12 @@ class FunctionDataFunctionData1Data(TypedDict):
 
 
 class FunctionDataFunctionData1(TypedDict):
-    type: Literal['code']
+    type: Literal["code"]
     data: Data | FunctionDataFunctionData1Data
 
 
 class FunctionDataFunctionData2(TypedDict):
-    type: Literal['remote_eval']
+    type: Literal["remote_eval"]
     endpoint: str
     eval_name: str
     parameters: Mapping[str, Any]
@@ -782,14 +783,14 @@ class FunctionDataFunctionData2(TypedDict):
 
 
 class FunctionDataFunctionData4Schema(TypedDict):
-    type: Literal['object']
+    type: Literal["object"]
     properties: Mapping[str, Mapping[str, Any]]
     required: NotRequired[Sequence[str] | None]
     additionalProperties: NotRequired[bool | None]
 
 
 class FunctionDataFunctionData4(TypedDict):
-    type: Literal['parameters']
+    type: Literal["parameters"]
     data: Mapping[str, Any]
     """
     The parameters data
@@ -800,7 +801,7 @@ class FunctionDataFunctionData4(TypedDict):
     """
 
 
-FunctionFormat: TypeAlias = Literal['llm', 'code', 'global', 'graph', 'topic_map']
+FunctionFormat: TypeAlias = Literal["llm", "code", "global", "graph", "topic_map"]
 
 
 class FunctionIdFunctionId(TypedDict):
@@ -845,7 +846,7 @@ class FunctionIdFunctionId3(TypedDict):
 
 
 class FunctionIdFunctionId4InlineContext(TypedDict):
-    runtime: Literal['node', 'python', 'browser', 'quickjs']
+    runtime: Literal["node", "python", "browser", "quickjs"]
     version: str
 
 
@@ -853,35 +854,35 @@ class FunctionIdFunctionId4InlineContext(TypedDict):
 
 
 FunctionObjectType: TypeAlias = Literal[
-    'prompt',
-    'tool',
-    'scorer',
-    'task',
-    'workflow',
-    'custom_view',
-    'preprocessor',
-    'facet',
-    'classifier',
-    'parameters',
-    'sandbox',
+    "prompt",
+    "tool",
+    "scorer",
+    "task",
+    "workflow",
+    "custom_view",
+    "preprocessor",
+    "facet",
+    "classifier",
+    "parameters",
+    "sandbox",
 ]
 
 
-FunctionOutputType: TypeAlias = Literal['completion', 'score', 'facet', 'classification', 'any']
+FunctionOutputType: TypeAlias = Literal["completion", "score", "facet", "classification", "any"]
 
 
 FunctionTypeEnum: TypeAlias = Literal[
-    'llm',
-    'scorer',
-    'task',
-    'tool',
-    'custom_view',
-    'preprocessor',
-    'facet',
-    'classifier',
-    'tag',
-    'parameters',
-    'sandbox',
+    "llm",
+    "scorer",
+    "task",
+    "tool",
+    "custom_view",
+    "preprocessor",
+    "facet",
+    "classifier",
+    "tag",
+    "parameters",
+    "sandbox",
 ]
 """
 The type of global function. Defaults to 'scorer'.
@@ -889,34 +890,34 @@ class FunctionIdFunctionId4InlineContext(TypedDict):
 
 
 FunctionTypeEnumNullish: TypeAlias = Literal[
-    'llm',
-    'scorer',
-    'task',
-    'tool',
-    'custom_view',
-    'preprocessor',
-    'facet',
-    'classifier',
-    'tag',
-    'parameters',
-    'sandbox',
+    "llm",
+    "scorer",
+    "task",
+    "tool",
+    "custom_view",
+    "preprocessor",
+    "facet",
+    "classifier",
+    "tag",
+    "parameters",
+    "sandbox",
 ]
 
 
 class GitMetadataSettings(TypedDict):
-    collect: Literal['all', 'none', 'some']
+    collect: Literal["all", "none", "some"]
     fields: NotRequired[
         Sequence[
             Literal[
-                'commit',
-                'branch',
-                'tag',
-                'dirty',
-                'author_name',
-                'author_email',
-                'commit_message',
-                'commit_time',
-                'git_diff',
+                "commit",
+                "branch",
+                "tag",
+                "dirty",
+                "author_name",
+                "author_email",
+                "commit_message",
+                "commit_time",
+                "git_diff",
             ]
         ]
     ]
@@ -941,7 +942,7 @@ class GraphEdgeTarget(TypedDict):
 class GraphEdge(TypedDict):
     source: GraphEdgeSource
     target: GraphEdgeTarget
-    purpose: Literal['control', 'data', 'messages']
+    purpose: Literal["control", "data", "messages"]
     """
     The purpose of the edge
     """
@@ -967,7 +968,7 @@ class GraphNodeGraphNode(TypedDict):
     """
     The position of the node
     """
-    type: Literal['function']
+    type: Literal["function"]
     function: FunctionIdRef
 
 
@@ -991,7 +992,7 @@ class GraphNodeGraphNode1(TypedDict):
     """
     The position of the node
     """
-    type: Literal['input']
+    type: Literal["input"]
     """
     The input to the graph
     """
@@ -1017,7 +1018,7 @@ class GraphNodeGraphNode2(TypedDict):
     """
     The position of the node
     """
-    type: Literal['output']
+    type: Literal["output"]
     """
     The output of the graph
     """
@@ -1043,7 +1044,7 @@ class GraphNodeGraphNode3(TypedDict):
     """
     The position of the node
     """
-    type: Literal['literal']
+    type: Literal["literal"]
     value: NotRequired[Any | None]
     """
     A literal value to be returned
@@ -1070,7 +1071,7 @@ class GraphNodeGraphNode4(TypedDict):
     """
     The position of the node
     """
-    type: Literal['btql']
+    type: Literal["btql"]
     expr: str
     """
     A BTQL expression to be evaluated
@@ -1097,7 +1098,7 @@ class GraphNodeGraphNode5(TypedDict):
     """
     The position of the node
     """
-    type: Literal['gate']
+    type: Literal["gate"]
     condition: NotRequired[str | None]
     """
     A BTQL expression to be evaluated
@@ -1124,7 +1125,7 @@ class GraphNodeGraphNode6(TypedDict):
     """
     The position of the node
     """
-    type: Literal['aggregator']
+    type: Literal["aggregator"]
 
 
 class GraphNodeGraphNode7Position(TypedDict):
@@ -1182,7 +1183,7 @@ class Group(TypedDict):
 
 
 class GroupScope(TypedDict):
-    type: Literal['group']
+    type: Literal["group"]
     group_by: str
     """
     Field path to group by, e.g. metadata.session_id
@@ -1193,10 +1194,10 @@ class GroupScope(TypedDict):
     """
 
 
-IfExists: TypeAlias = Literal['error', 'ignore', 'replace']
+IfExists: TypeAlias = Literal["error", "ignore", "replace"]
 
 
-ImageRenderingMode: TypeAlias = Literal['auto', 'click_to_load', 'blocked']
+ImageRenderingMode: TypeAlias = Literal["auto", "click_to_load", "blocked"]
 """
 Controls how images are rendered in the UI: 'auto' loads images automatically, 'click_to_load' shows a placeholder until clicked, 'blocked' prevents image loading entirely
 """
@@ -1252,7 +1253,7 @@ class InvokeFunctionInvokeFunction3(TypedDict):
 
 
 class InvokeFunctionInvokeFunction4InlineContext(TypedDict):
-    runtime: Literal['node', 'python', 'browser', 'quickjs']
+    runtime: Literal["node", "python", "browser", "quickjs"]
     version: str
 
 
@@ -1292,7 +1293,7 @@ class InvokeParentInvokeParentRowIds(TypedDict):
 
 
 class InvokeParentInvokeParent(TypedDict):
-    object_type: Literal['project_logs', 'experiment', 'playground_logs']
+    object_type: Literal["project_logs", "experiment", "playground_logs"]
     object_id: str
     """
     The id of the container object you are logging to
@@ -1348,7 +1349,7 @@ class MCPServer(TypedDict):
     """
 
 
-MessageRole: TypeAlias = Literal['system', 'user', 'assistant', 'function', 'tool', 'model', 'developer']
+MessageRole: TypeAlias = Literal["system", "user", "assistant", "function", "tool", "model", "developer"]
 
 
 class ModelParamsModelParamsToolChoiceFunction(TypedDict):
@@ -1356,7 +1357,7 @@ class ModelParamsModelParamsToolChoiceFunction(TypedDict):
 
 
 class ModelParamsModelParamsToolChoice(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     function: ModelParamsModelParamsToolChoiceFunction
 
 
@@ -1404,7 +1405,7 @@ class ModelParamsModelParams4(TypedDict):
 
 
 class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -1413,7 +1414,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
 
 
 class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -1427,7 +1428,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
 
 
 class ObjectReference(TypedDict):
-    object_type: Literal['project_logs', 'experiment', 'dataset', 'prompt', 'function', 'prompt_session']
+    object_type: Literal["project_logs", "experiment", "dataset", "prompt", "function", "prompt_session"]
     """
     Type of the object the event is originating from.
     """
@@ -1450,7 +1451,7 @@ class ObjectReference(TypedDict):
 
 
 class ObjectReferenceNullish(TypedDict):
-    object_type: Literal['project_logs', 'experiment', 'dataset', 'prompt', 'function', 'prompt_session']
+    object_type: Literal["project_logs", "experiment", "dataset", "prompt", "function", "prompt_session"]
     """
     Type of the object the event is originating from.
     """
@@ -1494,7 +1495,7 @@ class Organization(TypedDict):
 
 
 Permission: TypeAlias = Literal[
-    'create', 'read', 'update', 'delete', 'create_acls', 'read_acls', 'update_acls', 'delete_acls'
+    "create", "read", "update", "delete", "create_acls", "read_acls", "update_acls", "delete_acls"
 ]
 """
 Each permission permits a certain type of operation on an object in the system
@@ -1504,7 +1505,7 @@ class Organization(TypedDict):
 
 
 class ProjectAutomationConfigAction(TypedDict):
-    type: Literal['webhook']
+    type: Literal["webhook"]
     """
     The type of action to take
     """
@@ -1515,7 +1516,7 @@ class ProjectAutomationConfigAction(TypedDict):
 
 
 class ProjectAutomationConfigAction1(TypedDict):
-    type: Literal['slack']
+    type: Literal["slack"]
     """
     The type of action to take
     """
@@ -1534,7 +1535,7 @@ class ProjectAutomationConfigAction1(TypedDict):
 
 
 class ProjectAutomationConfig(TypedDict):
-    event_type: Literal['logs']
+    event_type: Literal["logs"]
     """
     The type of automation.
     """
@@ -1553,15 +1554,15 @@ class ProjectAutomationConfig(TypedDict):
 
 
 class ProjectAutomationConfig1ExportDefinition(TypedDict):
-    type: Literal['log_traces']
+    type: Literal["log_traces"]
 
 
 class ProjectAutomationConfig1ExportDefinition1(TypedDict):
-    type: Literal['log_spans']
+    type: Literal["log_spans"]
 
 
 class ProjectAutomationConfig1ExportDefinition2(TypedDict):
-    type: Literal['btql_query']
+    type: Literal["btql_query"]
     btql_query: str
     """
     The BTQL query to export
@@ -1569,7 +1570,7 @@ class ProjectAutomationConfig1ExportDefinition2(TypedDict):
 
 
 class ProjectAutomationConfig1Credentials(TypedDict):
-    type: Literal['aws_iam']
+    type: Literal["aws_iam"]
     role_arn: str
     """
     The ARN of the IAM role to use
@@ -1581,7 +1582,7 @@ class ProjectAutomationConfig1Credentials(TypedDict):
 
 
 class ProjectAutomationConfig1(TypedDict):
-    event_type: Literal['btql_export']
+    event_type: Literal["btql_export"]
     """
     The type of automation.
     """
@@ -1597,7 +1598,7 @@ class ProjectAutomationConfig1(TypedDict):
     """
     The path to export the results to. It should include the storage protocol and prefix, e.g. s3://bucket-name/path/to/export
     """
-    format: Literal['jsonl', 'parquet']
+    format: Literal["jsonl", "parquet"]
     """
     The format to export the results in
     """
@@ -1613,7 +1614,7 @@ class ProjectAutomationConfig1(TypedDict):
 
 
 class ProjectAutomationConfig3Action(TypedDict):
-    type: Literal['webhook']
+    type: Literal["webhook"]
     """
     The type of action to take
     """
@@ -1624,7 +1625,7 @@ class ProjectAutomationConfig3Action(TypedDict):
 
 
 class ProjectAutomationConfig3Action1(TypedDict):
-    type: Literal['slack']
+    type: Literal["slack"]
     """
     The type of action to take
     """
@@ -1643,7 +1644,7 @@ class ProjectAutomationConfig3Action1(TypedDict):
 
 
 class ProjectAutomationConfig3(TypedDict):
-    event_type: Literal['environment_update']
+    event_type: Literal["environment_update"]
     """
     The type of automation.
     """
@@ -1725,7 +1726,7 @@ class ProjectScoreCategory(TypedDict):
     """
 
 
-ProjectScoreType: TypeAlias = Literal['slider', 'categorical', 'weighted', 'minimum', 'maximum', 'online', 'free-form']
+ProjectScoreType: TypeAlias = Literal["slider", "categorical", "weighted", "minimum", "maximum", "online", "free-form"]
 """
 The type of the configured score
 """
@@ -1735,7 +1736,7 @@ class ProjectSettingsSpanFieldOrderItem(TypedDict):
     object_type: str
     column_id: str
     position: str
-    layout: NotRequired[Literal['full'] | Literal['two_column'] | None]
+    layout: NotRequired[Literal["full"] | Literal["two_column"] | None]
 
 
 class ProjectSettingsRemoteEvalSource(TypedDict):
@@ -1801,12 +1802,12 @@ class ProjectTag(TypedDict):
 
 
 class PromptBlockDataPromptBlockData1(TypedDict):
-    type: Literal['completion']
+    type: Literal["completion"]
     content: str
 
 
 class PromptBlockDataNullishPromptBlockDataNullish1(TypedDict):
-    type: Literal['completion']
+    type: Literal["completion"]
     content: str
 
 
@@ -1823,7 +1824,7 @@ class PromptDataNullishOrigin(TypedDict):
 
 
 class PromptParserNullish(TypedDict):
-    type: Literal['llm_classifier']
+    type: Literal["llm_classifier"]
     use_cot: bool
     choice_scores: NotRequired[Mapping[str, float] | None]
     """
@@ -1931,11 +1932,11 @@ class RepoInfo(TypedDict):
 
 
 class ResponseFormatResponseFormat(TypedDict):
-    type: Literal['json_object']
+    type: Literal["json_object"]
 
 
 class ResponseFormatResponseFormat2(TypedDict):
-    type: Literal['text']
+    type: Literal["text"]
 
 
 class ResponseFormatJsonSchema(TypedDict):
@@ -1946,16 +1947,16 @@ class ResponseFormatJsonSchema(TypedDict):
 
 
 class ResponseFormatNullishResponseFormatNullish(TypedDict):
-    type: Literal['json_object']
+    type: Literal["json_object"]
 
 
 class ResponseFormatNullishResponseFormatNullish1(TypedDict):
-    type: Literal['json_schema']
+    type: Literal["json_schema"]
     json_schema: ResponseFormatJsonSchema
 
 
 class ResponseFormatNullishResponseFormatNullish2(TypedDict):
-    type: Literal['text']
+    type: Literal["text"]
 
 
 ResponseFormatNullish: TypeAlias = (
@@ -1966,7 +1967,7 @@ class ResponseFormatNullishResponseFormatNullish2(TypedDict):
 )
 
 
-RetentionObjectType: TypeAlias = Literal['project_logs', 'experiment', 'dataset']
+RetentionObjectType: TypeAlias = Literal["project_logs", "experiment", "dataset"]
 """
 The object type that the retention policy applies to
 """
@@ -2087,7 +2088,7 @@ class TaskTask3(TypedDict):
 
 
 class TaskTask4InlineContext(TypedDict):
-    runtime: Literal['node', 'python', 'browser', 'quickjs']
+    runtime: Literal["node", "python", "browser", "quickjs"]
     version: str
 
 
@@ -2144,7 +2145,7 @@ class ParentParentRowIds(TypedDict):
 
 
 class ParentParent(TypedDict):
-    object_type: Literal['project_logs', 'experiment', 'playground_logs']
+    object_type: Literal["project_logs", "experiment", "playground_logs"]
     object_id: str
     """
     The id of the container object you are logging to
@@ -2178,7 +2179,7 @@ class RunEvalMcpAuth(TypedDict):
 
 
 class SavedFunctionIdSavedFunctionId(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -2187,7 +2188,7 @@ class SavedFunctionIdSavedFunctionId(TypedDict):
 
 
 class SavedFunctionIdSavedFunctionId1(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -2267,11 +2268,11 @@ class SpanIFrame(TypedDict):
 
 
 class SpanScope(TypedDict):
-    type: Literal['span']
+    type: Literal["span"]
 
 
 SpanType: TypeAlias = Literal[
-    'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor', 'classifier', 'review'
+    "llm", "score", "function", "eval", "task", "tool", "automation", "facet", "preprocessor", "classifier", "review"
 ]
 """
 Type of the span, for display purposes only
@@ -2279,7 +2280,7 @@ class SpanScope(TypedDict):
 
 
 class SSEConsoleEventData(TypedDict):
-    stream: Literal['stderr', 'stdout']
+    stream: Literal["stderr", "stdout"]
     message: str
 
 
@@ -2293,11 +2294,11 @@ class SSEProgressEventData(TypedDict):
     format: FunctionFormat
     output_type: FunctionOutputType
     name: str
-    event: Literal['reasoning_delta', 'text_delta', 'json_delta', 'error', 'console', 'start', 'done', 'progress']
+    event: Literal["reasoning_delta", "text_delta", "json_delta", "error", "console", "start", "done", "progress"]
     data: str
 
 
-StreamingMode: TypeAlias = Literal['auto', 'parallel', 'json', 'text']
+StreamingMode: TypeAlias = Literal["auto", "parallel", "json", "text"]
 """
 The mode format of the returned value (defaults to 'auto')
 """
@@ -2311,29 +2312,29 @@ class ToolFunctionDefinitionFunction(TypedDict):
 
 
 class ToolFunctionDefinition(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     function: ToolFunctionDefinitionFunction
 
 
 TopicAutomationConfigBackfillTimeRange = TypedDict(
-    'TopicAutomationConfigBackfillTimeRange',
+    "TopicAutomationConfigBackfillTimeRange",
     {
-        'from': str,
-        'to': str,
+        "from": str,
+        "to": str,
     },
 )
 
 
 class TopicAutomationDataScopeTopicAutomationDataScope(TypedDict):
-    type: Literal['project_logs']
+    type: Literal["project_logs"]
 
 
 class TopicAutomationDataScopeTopicAutomationDataScope1(TypedDict):
-    type: Literal['project_experiments']
+    type: Literal["project_experiments"]
 
 
 class TopicAutomationDataScopeTopicAutomationDataScope2(TypedDict):
-    type: Literal['experiment']
+    type: Literal["experiment"]
     experiment_id: str
 
 
@@ -2349,7 +2350,7 @@ class TopicAutomationDataScopeTopicAutomationDataScope2(TypedDict):
 
 
 class TopicMapData(TypedDict):
-    type: Literal['topic_map']
+    type: Literal["topic_map"]
     source_facet: str
     """
     The facet field name to use as input for classification
@@ -2377,7 +2378,7 @@ class TopicMapData(TypedDict):
 
 
 class Function1Function1(TypedDict):
-    type: Literal['function']
+    type: Literal["function"]
     id: str
     version: NotRequired[str | None]
     """
@@ -2386,7 +2387,7 @@ class Function1Function1(TypedDict):
 
 
 class Function1Function11(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -2415,7 +2416,7 @@ class TopicMapFunctionAutomation(TypedDict):
 
 
 class TraceScope(TypedDict):
-    type: Literal['trace']
+    type: Literal["trace"]
     idle_seconds: NotRequired[float | None]
     """
     Consider trace complete after this many seconds of inactivity (default: 30)
@@ -2423,15 +2424,15 @@ class TraceScope(TypedDict):
 
 
 class TriggeredFunctionStateScope(TypedDict):
-    type: Literal['span']
+    type: Literal["span"]
 
 
 class TriggeredFunctionStateScope1(TypedDict):
-    type: Literal['trace']
+    type: Literal["trace"]
 
 
 class TriggeredFunctionStateScope2(TypedDict):
-    type: Literal['group']
+    type: Literal["group"]
     key: str
     value: str
 
@@ -2459,7 +2460,7 @@ class TriggeredFunctionState(TypedDict):
     """
 
 
-UploadStatus: TypeAlias = Literal['uploading', 'done', 'error']
+UploadStatus: TypeAlias = Literal["uploading", "done", "error"]
 
 
 class User(TypedDict):
@@ -2497,40 +2498,40 @@ class ViewDataSearch(TypedDict):
 
 
 class ViewOptionsViewOptionsOptions(TypedDict):
-    spanType: NotRequired[Literal['range', 'frame'] | None]
+    spanType: NotRequired[Literal["range", "frame"] | None]
     rangeValue: NotRequired[str | None]
     frameStart: NotRequired[str | None]
     frameEnd: NotRequired[str | None]
     tzUTC: NotRequired[bool | None]
     chartVisibility: NotRequired[Mapping[str, Any] | None]
     projectId: NotRequired[str | None]
-    type: NotRequired[Literal['project', 'experiment'] | None]
+    type: NotRequired[Literal["project", "experiment"] | None]
     groupBy: NotRequired[str | None]
 
 
 class ViewOptionsViewOptions(TypedDict):
-    viewType: Literal['monitor']
+    viewType: Literal["monitor"]
     options: ViewOptionsViewOptionsOptions
     freezeColumns: NotRequired[bool | None]
 
 
 class ViewOptionsViewOptions1ExcludedMeasure(TypedDict):
-    type: Literal['none', 'score', 'metric', 'metadata']
+    type: Literal["none", "score", "metric", "metadata"]
     value: str
 
 
 class ViewOptionsViewOptions1YMetric(TypedDict):
-    type: Literal['none', 'score', 'metric', 'metadata']
+    type: Literal["none", "score", "metric", "metadata"]
     value: str
 
 
 class ViewOptionsViewOptions1XAxis(TypedDict):
-    type: Literal['none', 'score', 'metric', 'metadata']
+    type: Literal["none", "score", "metric", "metadata"]
     value: str
 
 
 class ViewOptionsViewOptions1SymbolGrouping(TypedDict):
-    type: Literal['none', 'score', 'metric', 'metadata']
+    type: Literal["none", "score", "metric", "metadata"]
     value: str
 
 
@@ -2540,10 +2541,10 @@ class ViewOptionsViewOptions1ChartAnnotation(TypedDict):
 
 
 ViewOptionsViewOptions1TimeRangeFilter = TypedDict(
-    'ViewOptionsViewOptions1TimeRangeFilter',
+    "ViewOptionsViewOptions1TimeRangeFilter",
     {
-        'from': str,
-        'to': str,
+        "from": str,
+        "to": str,
     },
 )
 
@@ -2567,7 +2568,7 @@ class ViewOptionsViewOptions1(TypedDict):
     """
     chartAnnotations: NotRequired[Sequence[ViewOptionsViewOptions1ChartAnnotation] | None]
     timeRangeFilter: NotRequired[str | ViewOptionsViewOptions1TimeRangeFilter | None]
-    queryShape: NotRequired[Literal['traces', 'spans'] | None]
+    queryShape: NotRequired[Literal["traces", "spans"] | None]
     freezeColumns: NotRequired[bool | None]
 
 
@@ -2622,12 +2623,12 @@ class AnyModelParams(TypedDict):
     frequency_penalty: NotRequired[float | None]
     presence_penalty: NotRequired[float | None]
     response_format: NotRequired[ResponseFormatNullish | None]
-    tool_choice: NotRequired[Literal['auto'] | Literal['none'] | Literal['required'] | AnyModelParamsToolChoice | None]
-    function_call: NotRequired[Literal['auto'] | Literal['none'] | AnyModelParamsFunctionCall | None]
+    tool_choice: NotRequired[Literal["auto"] | Literal["none"] | Literal["required"] | AnyModelParamsToolChoice | None]
+    function_call: NotRequired[Literal["auto"] | Literal["none"] | AnyModelParamsFunctionCall | None]
     n: NotRequired[float | None]
     stop: NotRequired[Sequence[str] | None]
-    reasoning_effort: NotRequired[Literal['none', 'minimal', 'low', 'medium', 'high'] | None]
-    verbosity: NotRequired[Literal['low', 'medium', 'high'] | None]
+    reasoning_effort: NotRequired[Literal["none", "minimal", "low", "medium", "high"] | None]
+    verbosity: NotRequired[Literal["low", "medium", "high"] | None]
     top_k: NotRequired[float | None]
     stop_sequences: NotRequired[Sequence[str] | None]
     reasoning_enabled: NotRequired[bool | None]
@@ -2643,7 +2644,7 @@ class AnyModelParams(TypedDict):
 
 
 class AsyncScoringControlAsyncScoringControl1(TypedDict):
-    kind: Literal['state_override']
+    kind: Literal["state_override"]
     state: AsyncScoringState
 
 
@@ -2670,7 +2671,7 @@ class AttachmentStatus(TypedDict):
 
 
 class PreprocessorPreprocessor1(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -2695,7 +2696,7 @@ class BatchedFacetDataTopicMap(TypedDict):
 
 
 class BatchedFacetData(TypedDict):
-    type: Literal['batched_facet']
+    type: Literal["batched_facet"]
     preprocessor: NotRequired[Preprocessor | None]
     facets: Sequence[BatchedFacetDataFacet]
     topic_maps: NotRequired[Mapping[str, Sequence[BatchedFacetDataTopicMap]] | None]
@@ -2713,12 +2714,12 @@ class BatchedFacetData(TypedDict):
 
 class ChatCompletionMessageParamChatCompletionMessageParam1(TypedDict):
     content: str | Sequence[ChatCompletionContentPart]
-    role: Literal['user']
+    role: Literal["user"]
     name: NotRequired[str | None]
 
 
 class ChatCompletionMessageParamChatCompletionMessageParam2(TypedDict):
-    role: Literal['assistant']
+    role: Literal["assistant"]
     content: NotRequired[str | Sequence[ChatCompletionContentPartText] | None]
     function_call: NotRequired[ChatCompletionMessageParamChatCompletionMessageParam2FunctionCall | None]
     name: NotRequired[str | None]
@@ -2739,7 +2740,7 @@ class ChatCompletionMessageParamChatCompletionMessageParam2(TypedDict):
 
 class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam1(TypedDict):
     content: str | Sequence[ChatCompletionContentPart]
-    role: Literal['user']
+    role: Literal["user"]
     name: NotRequired[str | None]
 
 
@@ -2931,7 +2932,7 @@ class Experiment(TypedDict):
 
 
 class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -2944,7 +2945,7 @@ class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict):
 
 
 class Preprocessor1Preprocessor11(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
 
@@ -2957,7 +2958,7 @@ class Preprocessor1Preprocessor14(Preprocessor1Preprocessor11, Preprocessor1Prep
 
 
 class FacetData(TypedDict):
-    type: Literal['facet']
+    type: Literal["facet"]
     preprocessor: NotRequired[Preprocessor1 | None]
     prompt: str
     """
@@ -2978,7 +2979,7 @@ class FacetData(TypedDict):
 
 
 class FunctionDataFunctionData3(TypedDict):
-    type: Literal['global']
+    type: Literal["global"]
     name: str
     function_type: NotRequired[FunctionTypeEnum | None]
     config: NotRequired[Mapping[str, Any] | None]
@@ -3084,13 +3085,13 @@ class ModelParamsModelParams(TypedDict):
     presence_penalty: NotRequired[float | None]
     response_format: NotRequired[ResponseFormatNullish | None]
     tool_choice: NotRequired[
-        Literal['auto'] | Literal['none'] | Literal['required'] | ModelParamsModelParamsToolChoice
+        Literal["auto"] | Literal["none"] | Literal["required"] | ModelParamsModelParamsToolChoice
     ]
-    function_call: NotRequired[Literal['auto'] | Literal['none'] | ModelParamsModelParamsFunctionCall | None]
+    function_call: NotRequired[Literal["auto"] | Literal["none"] | ModelParamsModelParamsFunctionCall | None]
     n: NotRequired[float | None]
     stop: NotRequired[Sequence[str] | None]
-    reasoning_effort: NotRequired[Literal['none', 'minimal', 'low', 'medium', 'high'] | None]
-    verbosity: NotRequired[Literal['low', 'medium', 'high'] | None]
+    reasoning_effort: NotRequired[Literal["none", "minimal", "low", "medium", "high"] | None]
+    verbosity: NotRequired[Literal["low", "medium", "high"] | None]
 
 
 ModelParams: TypeAlias = (
@@ -3166,7 +3167,7 @@ class Project(TypedDict):
 
 
 class ProjectAutomationConfig2(TypedDict):
-    event_type: Literal['retention']
+    event_type: Literal["retention"]
     """
     The type of automation.
     """
@@ -3187,7 +3188,7 @@ class ProjectScoreConfig(TypedDict):
 
 
 class PromptBlockDataPromptBlockData(TypedDict):
-    type: Literal['chat']
+    type: Literal["chat"]
     messages: Sequence[ChatCompletionMessageParam]
     tools: NotRequired[str | None]
 
@@ -3196,7 +3197,7 @@ class PromptBlockDataPromptBlockData(TypedDict):
 
 
 class PromptBlockDataNullishPromptBlockDataNullish(TypedDict):
-    type: Literal['chat']
+    type: Literal["chat"]
     messages: Sequence[ChatCompletionMessageParam]
     tools: NotRequired[str | None]
 
@@ -3219,7 +3220,7 @@ class PromptOptionsNullish(TypedDict):
 
 
 class ResponseFormatResponseFormat1(TypedDict):
-    type: Literal['json_schema']
+    type: Literal["json_schema"]
     json_schema: ResponseFormatJsonSchema
 
 
@@ -3237,7 +3238,7 @@ class SpanAttributes(TypedDict):
 
 
 class TopicAutomationConfig(TypedDict):
-    event_type: Literal['topic']
+    event_type: Literal["topic"]
     """
     The type of automation.
     """
@@ -3379,7 +3380,7 @@ class GraphNodeGraphNode7(TypedDict):
     """
     The position of the node
     """
-    type: Literal['prompt_template']
+    type: Literal["prompt_template"]
     prompt: PromptBlockData
 
 
@@ -3457,7 +3458,7 @@ class ProjectLogsEvent(TypedDict):
     """
     Unique identifier for the project
     """
-    log_id: Literal['g']
+    log_id: Literal["g"]
     """
     A literal 'g' which identifies the log as a project log
     """
@@ -3573,7 +3574,7 @@ class PromptData(TypedDict):
     options: NotRequired[PromptOptionsNullish | None]
     parser: NotRequired[PromptParserNullish | None]
     tool_functions: NotRequired[Sequence[SavedFunctionId] | None]
-    template_format: NotRequired[Literal['mustache', 'nunjucks', 'none'] | None]
+    template_format: NotRequired[Literal["mustache", "nunjucks", "none"] | None]
     mcp: NotRequired[Mapping[str, Any] | None]
     origin: NotRequired[PromptDataOrigin | None]
 
@@ -3583,7 +3584,7 @@ class PromptDataNullish(TypedDict):
     options: NotRequired[PromptOptionsNullish | None]
     parser: NotRequired[PromptParserNullish | None]
     tool_functions: NotRequired[Sequence[SavedFunctionId] | None]
-    template_format: NotRequired[Literal['mustache', 'nunjucks', 'none'] | None]
+    template_format: NotRequired[Literal["mustache", "nunjucks", "none"] | None]
     mcp: NotRequired[Mapping[str, Any] | None]
     origin: NotRequired[PromptDataNullishOrigin | None]
 
@@ -3629,23 +3630,23 @@ class View(TypedDict):
     The id of the object the view applies to
     """
     view_type: Literal[
-        'projects',
-        'experiments',
-        'experiment',
-        'playgrounds',
-        'playground',
-        'datasets',
-        'dataset',
-        'prompts',
-        'parameters',
-        'tools',
-        'scorers',
-        'classifiers',
-        'logs',
-        'monitor',
-        'for_review_project_log',
-        'for_review_experiments',
-        'for_review_datasets',
+        "projects",
+        "experiments",
+        "experiment",
+        "playgrounds",
+        "playground",
+        "datasets",
+        "dataset",
+        "prompts",
+        "parameters",
+        "tools",
+        "scorers",
+        "classifiers",
+        "logs",
+        "monitor",
+        "for_review_project_log",
+        "for_review_experiments",
+        "for_review_datasets",
     ]
     """
     Type of object that the view corresponds to.
@@ -3704,7 +3705,7 @@ class FunctionIdFunctionId6(TypedDict):
 
 
 class GraphData(TypedDict):
-    type: Literal['graph']
+    type: Literal["graph"]
     nodes: Mapping[str, GraphNode]
     edges: Mapping[str, GraphEdge]
 
@@ -3763,7 +3764,7 @@ class Prompt(TypedDict):
     """
     Unique identifier for the project that the prompt belongs under
     """
-    log_id: Literal['p']
+    log_id: Literal["p"]
     """
     A literal 'p' which identifies the object as a project prompt
     """
@@ -3905,7 +3906,7 @@ class Function(TypedDict):
     """
     Unique identifier for the project that the prompt belongs under
     """
-    log_id: Literal['p']
+    log_id: Literal["p"]
     """
     A literal 'p' which identifies the object as a project prompt
     """
@@ -3946,4 +3947,5 @@ class Function(TypedDict):
     JSON schema for the function's parameters and return type
     """
 
+
 __all__ = []
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index fe336267..30dcc2b2 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -9,6 +9,7 @@
 import logging
 from contextlib import contextmanager
 
+
 __all__ = ["auto_instrument"]
 
 logger = logging.getLogger(__name__)
diff --git a/py/src/braintrust/bt_json.py b/py/src/braintrust/bt_json.py
index 4fc36f8c..00e8bfde 100644
--- a/py/src/braintrust/bt_json.py
+++ b/py/src/braintrust/bt_json.py
@@ -3,6 +3,7 @@
 import math
 from typing import Any, Callable, Mapping, NamedTuple, cast, overload
 
+
 # Try to import orjson for better performance
 # If not available, we'll use standard json
 try:
@@ -13,7 +14,6 @@
     _HAS_ORJSON = False
 
 
-
 def _to_bt_safe(v: Any) -> Any:
     """
     Converts the object to a Braintrust-safe representation (i.e. Attachment objects are safe (specially handled by background logger)).
@@ -87,24 +87,27 @@ def _to_bt_safe(v: Any) -> Any:
     # We pass `encoder=_str_encoder` since we've already tried converting rich objects to json safe objects.
     return bt_loads(bt_dumps(v, encoder=_str_encoder))
 
+
 @overload
 def bt_safe_deep_copy(
     obj: Mapping[str, Any],
     max_depth: int = ...,
 ) -> dict[str, Any]: ...
 
+
 @overload
 def bt_safe_deep_copy(
     obj: list[Any],
     max_depth: int = ...,
 ) -> list[Any]: ...
 
+
 @overload
 def bt_safe_deep_copy(
     obj: Any,
     max_depth: int = ...,
 ) -> Any: ...
-def bt_safe_deep_copy(obj: Any, max_depth: int=200):
+def bt_safe_deep_copy(obj: Any, max_depth: int = 200):
     """
     Creates a deep copy of the given object and converts rich objects to Braintrust-safe representations. See `_to_bt_safe` for more details.
 
@@ -161,6 +164,7 @@ def _deep_copy_object(v: Any, depth: int = 0) -> Any:
 
     return _deep_copy_object(obj)
 
+
 def _safe_str(obj: Any) -> str:
     try:
         return str(obj)
@@ -211,10 +215,12 @@ class Encoder(NamedTuple):
     native: type[json.JSONEncoder]
     orjson: Callable[[Any], Any]
 
+
 _json_encoder = Encoder(native=BraintrustJSONEncoder, orjson=_to_json_safe)
 _str_encoder = Encoder(native=BraintrustStrEncoder, orjson=_safe_str)
 
-def bt_dumps(obj: Any, encoder: Encoder | None=_json_encoder, **kwargs: Any) -> str:
+
+def bt_dumps(obj: Any, encoder: Encoder | None = _json_encoder, **kwargs: Any) -> str:
     """
     Serialize obj to a JSON-formatted string.
 
diff --git a/py/src/braintrust/cli/eval.py b/py/src/braintrust/cli/eval.py
index e5877856..1a3b09fd 100644
--- a/py/src/braintrust/cli/eval.py
+++ b/py/src/braintrust/cli/eval.py
@@ -24,6 +24,7 @@
 from ..logger import Dataset
 from ..util import eprint
 
+
 INCLUDE = [
     "**/eval_*.py",
 ]
diff --git a/py/src/braintrust/cli/install/__init__.py b/py/src/braintrust/cli/install/__init__.py
index d01ec808..849eda60 100644
--- a/py/src/braintrust/cli/install/__init__.py
+++ b/py/src/braintrust/cli/install/__init__.py
@@ -1,6 +1,7 @@
 import argparse
 import textwrap
 
+
 _module_not_found_error = None
 try:
     from . import api, bump_versions, logs, run_migrations
diff --git a/py/src/braintrust/cli/install/api.py b/py/src/braintrust/cli/install/api.py
index 2538c605..6c95774a 100644
--- a/py/src/braintrust/cli/install/api.py
+++ b/py/src/braintrust/cli/install/api.py
@@ -10,6 +10,7 @@
 from ...aws import cloudformation
 from ...util import response_raise_for_status
 
+
 _logger = logging.getLogger("braintrust.install.api")
 
 PARAMS = {
diff --git a/py/src/braintrust/cli/install/bump_versions.py b/py/src/braintrust/cli/install/bump_versions.py
index e0954aa5..04a2fc71 100644
--- a/py/src/braintrust/cli/install/bump_versions.py
+++ b/py/src/braintrust/cli/install/bump_versions.py
@@ -3,6 +3,7 @@
 # pylint: disable=no-name-in-module
 from ...aws import LazyClient, cloudformation
 
+
 _logger = logging.getLogger("braintrust.install.logs")
 
 
diff --git a/py/src/braintrust/cli/install/logs.py b/py/src/braintrust/cli/install/logs.py
index 3423c4c0..2b840aec 100644
--- a/py/src/braintrust/cli/install/logs.py
+++ b/py/src/braintrust/cli/install/logs.py
@@ -5,6 +5,7 @@
 # pylint: disable=no-name-in-module
 from ...aws import cloudformation, logs
 
+
 _logger = logging.getLogger("braintrust.install.logs")
 
 
diff --git a/py/src/braintrust/cli/install/redshift.py b/py/src/braintrust/cli/install/redshift.py
index c3011591..6571acbb 100644
--- a/py/src/braintrust/cli/install/redshift.py
+++ b/py/src/braintrust/cli/install/redshift.py
@@ -9,6 +9,7 @@
 # pylint: disable=no-name-in-module
 from ...aws import iam, redshift_serverless
 
+
 _logger = logging.getLogger("braintrust.install.redshift")
 
 
diff --git a/py/src/braintrust/cli/install/run_migrations.py b/py/src/braintrust/cli/install/run_migrations.py
index 5235f7e9..68fb29a7 100644
--- a/py/src/braintrust/cli/install/run_migrations.py
+++ b/py/src/braintrust/cli/install/run_migrations.py
@@ -3,6 +3,7 @@
 # pylint: disable=no-name-in-module
 from ...aws import LazyClient, cloudformation
 
+
 _logger = logging.getLogger("braintrust.install.logs")
 
 
diff --git a/py/src/braintrust/cli/test_push.py b/py/src/braintrust/cli/test_push.py
index 24207ca7..970883c1 100644
--- a/py/src/braintrust/cli/test_push.py
+++ b/py/src/braintrust/cli/test_push.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+
 pydantic = pytest.importorskip("pydantic")
 
 from ..framework2 import (
diff --git a/py/src/braintrust/contrib/temporal/__init__.py b/py/src/braintrust/contrib/temporal/__init__.py
index 19cfc4ee..61e4397e 100644
--- a/py/src/braintrust/contrib/temporal/__init__.py
+++ b/py/src/braintrust/contrib/temporal/__init__.py
@@ -94,6 +94,7 @@
 from temporalio.worker import WorkflowRunner
 from temporalio.worker.workflow_sandbox import SandboxedWorkflowRunner
 
+
 # Braintrust dynamically chooses its context implementation at runtime based on
 # BRAINTRUST_OTEL_COMPAT environment variable. When first accessed, it reads
 # os.environ which is restricted in the sandbox. Therefore if the first use
diff --git a/py/src/braintrust/contrib/temporal/test_temporal.py b/py/src/braintrust/contrib/temporal/test_temporal.py
index 5034612e..8ed87264 100644
--- a/py/src/braintrust/contrib/temporal/test_temporal.py
+++ b/py/src/braintrust/contrib/temporal/test_temporal.py
@@ -9,6 +9,7 @@
 import pytest
 import pytest_asyncio
 
+
 pytest.importorskip("temporalio")
 
 import braintrust
diff --git a/py/src/braintrust/devserver/auth.py b/py/src/braintrust/devserver/auth.py
index d1af0338..70523720 100644
--- a/py/src/braintrust/devserver/auth.py
+++ b/py/src/braintrust/devserver/auth.py
@@ -7,6 +7,7 @@
 
 from ..logger import BraintrustState
 
+
 ORIGIN_HEADER = "origin"
 BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token"
 BRAINTRUST_ORG_NAME_HEADER = "x-bt-org-name"
diff --git a/py/src/braintrust/devserver/cors.py b/py/src/braintrust/devserver/cors.py
index e014d4a2..0b60f4e1 100644
--- a/py/src/braintrust/devserver/cors.py
+++ b/py/src/braintrust/devserver/cors.py
@@ -3,6 +3,7 @@
 from collections.abc import Awaitable, Callable
 from typing import Any
 
+
 # CORS configuration
 ALLOWED_ORIGINS: list[str | re.Pattern] = [
     "https://www.braintrust.dev",
diff --git a/py/src/braintrust/devserver/schemas.py b/py/src/braintrust/devserver/schemas.py
index 841daffd..cd8f49da 100644
--- a/py/src/braintrust/devserver/schemas.py
+++ b/py/src/braintrust/devserver/schemas.py
@@ -4,6 +4,7 @@
 
 from typing_extensions import TypedDict
 
+
 # This is not beautiful code, but it saves us from introducing Pydantic as a dependency, and it is fairly
 # straightforward for an LLM to keep it up to date with runEvalBodySchema in JS.
 
diff --git a/py/src/braintrust/devserver/server.py b/py/src/braintrust/devserver/server.py
index 96f981c4..46e81b5c 100644
--- a/py/src/braintrust/devserver/server.py
+++ b/py/src/braintrust/devserver/server.py
@@ -4,6 +4,7 @@
 import textwrap
 from typing import Any
 
+
 try:
     import uvicorn
     from starlette.applications import Starlette
@@ -36,6 +37,7 @@
 from .eval_hooks import SSEQueue
 from .schemas import ValidationError, parse_eval_body
 
+
 _all_evaluators: dict[str, Evaluator[Any, Any]] = {}
 
 
diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py
index 4794785a..1dc38f90 100644
--- a/py/src/braintrust/framework.py
+++ b/py/src/braintrust/framework.py
@@ -49,6 +49,7 @@
 from .span_types import SpanTypeAttribute
 from .util import bt_iscoroutinefunction, eprint, merge_dicts
 
+
 Input = TypeVar("Input")
 Output = TypeVar("Output")
 
@@ -1276,7 +1277,8 @@ async def run_evaluator(
 ) -> EvalResultWithSummary[Input, Output]:
     """Wrapper on _run_evaluator_internal that times out execution after evaluator.timeout."""
     results = await asyncio.wait_for(
-        _run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache), evaluator.timeout
+        _run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache),
+        evaluator.timeout,
     )
 
     if experiment:
@@ -1473,9 +1475,7 @@ def report_progress(event: TaskProgressEvent):
                 async def ensure_spans_flushed():
                     # Flush native Braintrust spans
                     if experiment:
-                        await asyncio.get_event_loop().run_in_executor(
-                            None, lambda: experiment.state.flush()
-                        )
+                        await asyncio.get_event_loop().run_in_executor(None, lambda: experiment.state.flush())
                     elif state:
                         await asyncio.get_event_loop().run_in_executor(None, lambda: state.flush())
                     else:
diff --git a/py/src/braintrust/functions/invoke.py b/py/src/braintrust/functions/invoke.py
index f0b1c3c0..b9597954 100644
--- a/py/src/braintrust/functions/invoke.py
+++ b/py/src/braintrust/functions/invoke.py
@@ -9,6 +9,7 @@
 from .constants import INVOKE_API_VERSION
 from .stream import BraintrustInvokeError, BraintrustStream
 
+
 T = TypeVar("T")
 ModeType = Literal["auto", "parallel", "json", "text"]
 ObjectType = Literal["project_logs", "experiment", "dataset", "playground_logs"]
diff --git a/py/src/braintrust/generated_types.py b/py/src/braintrust/generated_types.py
index 6f49a6ed..5528fe91 100644
--- a/py/src/braintrust/generated_types.py
+++ b/py/src/braintrust/generated_types.py
@@ -115,6 +115,7 @@
     ViewOptions,
 )
 
+
 __all__ = [
     "AISecret",
     "Acl",
diff --git a/py/src/braintrust/gitutil.py b/py/src/braintrust/gitutil.py
index 4ab875e5..416dc59b 100644
--- a/py/src/braintrust/gitutil.py
+++ b/py/src/braintrust/gitutil.py
@@ -7,6 +7,7 @@
 
 from .git_fields import GitMetadataSettings, RepoInfo
 
+
 # https://stackoverflow.com/questions/48399498/git-executable-not-found-in-python
 os.environ["GIT_PYTHON_REFRESH"] = "quiet"
 try:
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index f2d20863..ff08052d 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -93,11 +93,13 @@
     response_raise_for_status,
 )
 
+
 # Fields that should be passed to the masking function
 # Note: "tags" field is intentionally excluded, but can be added if needed
 REDACTION_FIELDS = ["input", "output", "expected", "metadata", "context", "scores", "metrics"]
 from .xact_ids import prettify_xact
 
+
 Metadata = dict[str, Any]
 DATA_API_VERSION = 2
 LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow"
@@ -3510,17 +3512,17 @@ def _start_span_parent_args(
     if parent:
         assert parent_span_ids is None, "Cannot specify both parent and parent_span_ids"
         parent_components = SpanComponentsV4.from_str(parent)
-        assert (
-            parent_object_type == parent_components.object_type
-        ), f"Mismatch between expected span parent object type {parent_object_type} and provided type {parent_components.object_type}"
+        assert parent_object_type == parent_components.object_type, (
+            f"Mismatch between expected span parent object type {parent_object_type} and provided type {parent_components.object_type}"
+        )
 
         parent_components_object_id_lambda = _span_components_to_object_id_lambda(parent_components)
 
         def compute_parent_object_id():
             parent_components_object_id = parent_components_object_id_lambda()
-            assert (
-                parent_object_id.get() == parent_components_object_id
-            ), f"Mismatch between expected span parent object id {parent_object_id.get()} and provided id {parent_components_object_id}"
+            assert parent_object_id.get() == parent_components_object_id, (
+                f"Mismatch between expected span parent object id {parent_object_id.get()} and provided id {parent_components_object_id}"
+            )
             return parent_object_id.get()
 
         arg_parent_object_id = LazyValue(compute_parent_object_id, use_mutex=False)
diff --git a/py/src/braintrust/merge_row_batch.py b/py/src/braintrust/merge_row_batch.py
index 07d40578..c9047775 100644
--- a/py/src/braintrust/merge_row_batch.py
+++ b/py/src/braintrust/merge_row_batch.py
@@ -3,9 +3,11 @@
 
 from .db_fields import IS_MERGE_FIELD
 
+
 T = TypeVar("T")
 from .util import merge_dicts
 
+
 _MergedRowKey = tuple[Optional[Any], ...]
 
 
diff --git a/py/src/braintrust/oai.py b/py/src/braintrust/oai.py
index 52e07a78..80bb3e7a 100644
--- a/py/src/braintrust/oai.py
+++ b/py/src/braintrust/oai.py
@@ -11,6 +11,7 @@
 from .span_types import SpanTypeAttribute
 from .util import is_numeric, merge_dicts
 
+
 X_LEGACY_CACHED_HEADER = "x-cached"
 X_CACHED_HEADER = "x-bt-cached"
 
diff --git a/py/src/braintrust/object.py b/py/src/braintrust/object.py
index f241a589..e596c761 100644
--- a/py/src/braintrust/object.py
+++ b/py/src/braintrust/object.py
@@ -1,5 +1,6 @@
 from .generated_types import DatasetEvent
 
+
 DEFAULT_IS_LEGACY_DATASET = False
 
 
diff --git a/py/src/braintrust/otel/__init__.py b/py/src/braintrust/otel/__init__.py
index fec3a4cd..e6fe7f3e 100644
--- a/py/src/braintrust/otel/__init__.py
+++ b/py/src/braintrust/otel/__init__.py
@@ -3,6 +3,7 @@
 import warnings
 from urllib.parse import urljoin
 
+
 INSTALL_ERR_MSG = (
     "OpenTelemetry packages are not installed. "
     "Install optional OpenTelemetry dependencies with: pip install braintrust[otel]"
@@ -402,10 +403,12 @@ def _get_braintrust_parent(object_type, object_id: str | None = None, compute_ar
 
     return None
 
+
 def is_root_span(span) -> bool:
     """Returns True if the span is a root span (no parent span)."""
     return getattr(span, "parent", None) is None
 
+
 def context_from_span_export(export_str: str):
     """
     Create an OTEL context from a Braintrust span export string.
diff --git a/py/src/braintrust/otel/context.py b/py/src/braintrust/otel/context.py
index b0148384..bb65be77 100644
--- a/py/src/braintrust/otel/context.py
+++ b/py/src/braintrust/otel/context.py
@@ -8,6 +8,7 @@
 from opentelemetry import context, trace
 from opentelemetry.trace import SpanContext, TraceFlags
 
+
 log = logging.getLogger(__name__)
 
 
diff --git a/py/src/braintrust/otel/test_distributed_tracing.py b/py/src/braintrust/otel/test_distributed_tracing.py
index 2610f81b..a2fab2a2 100644
--- a/py/src/braintrust/otel/test_distributed_tracing.py
+++ b/py/src/braintrust/otel/test_distributed_tracing.py
@@ -12,6 +12,7 @@
 from braintrust.otel import BraintrustSpanProcessor, context_from_span_export
 from braintrust.test_helpers import init_test_logger, preserve_env_vars
 
+
 OTEL_AVAILABLE = True
 try:
     from opentelemetry.sdk.trace import TracerProvider
diff --git a/py/src/braintrust/otel/test_otel_bt_integration.py b/py/src/braintrust/otel/test_otel_bt_integration.py
index 9ca1acc9..579082d9 100644
--- a/py/src/braintrust/otel/test_otel_bt_integration.py
+++ b/py/src/braintrust/otel/test_otel_bt_integration.py
@@ -13,6 +13,7 @@
 from braintrust.otel import BraintrustSpanProcessor
 from braintrust.test_helpers import init_test_exp, init_test_logger, preserve_env_vars
 
+
 OTEL_AVAILABLE = True
 try:
     from opentelemetry.sdk.trace import TracerProvider
diff --git a/py/src/braintrust/prompt.py b/py/src/braintrust/prompt.py
index 242cee43..d4b7fa19 100644
--- a/py/src/braintrust/prompt.py
+++ b/py/src/braintrust/prompt.py
@@ -4,6 +4,7 @@
 from .generated_types import PromptOptions
 from .serializable_data_class import SerializableDataClass
 
+
 # Keep these definitions in sync with sdk/core/js/typespecs/prompt.ts.
 
 
diff --git a/py/src/braintrust/prompt_cache/disk_cache.py b/py/src/braintrust/prompt_cache/disk_cache.py
index 3bf400ef..b7408024 100644
--- a/py/src/braintrust/prompt_cache/disk_cache.py
+++ b/py/src/braintrust/prompt_cache/disk_cache.py
@@ -15,6 +15,7 @@
 from collections.abc import Callable
 from typing import Any, Generic, TypeVar
 
+
 T = TypeVar("T")
 
 
diff --git a/py/src/braintrust/prompt_cache/lru_cache.py b/py/src/braintrust/prompt_cache/lru_cache.py
index e6023f0b..126fbd27 100644
--- a/py/src/braintrust/prompt_cache/lru_cache.py
+++ b/py/src/braintrust/prompt_cache/lru_cache.py
@@ -10,6 +10,7 @@
 from collections import OrderedDict
 from typing import Generic, TypeVar
 
+
 K = TypeVar("K")
 V = TypeVar("V")
 
diff --git a/py/src/braintrust/prompt_cache/prompt_cache.py b/py/src/braintrust/prompt_cache/prompt_cache.py
index 27313849..ac6d8a33 100644
--- a/py/src/braintrust/prompt_cache/prompt_cache.py
+++ b/py/src/braintrust/prompt_cache/prompt_cache.py
@@ -9,7 +9,6 @@
 The cache is keyed by project identifier (ID or name), prompt slug, and version.
 """
 
-
 from braintrust import prompt
 from braintrust.prompt_cache import disk_cache, lru_cache
 
diff --git a/py/src/braintrust/queue.py b/py/src/braintrust/queue.py
index 4629c3a3..ff6fc6cf 100644
--- a/py/src/braintrust/queue.py
+++ b/py/src/braintrust/queue.py
@@ -4,6 +4,7 @@
 
 from .util import eprint
 
+
 T = TypeVar("T")
 
 DEFAULT_QUEUE_SIZE = 25000
diff --git a/py/src/braintrust/score.py b/py/src/braintrust/score.py
index cd5fe720..62f9ee7e 100644
--- a/py/src/braintrust/score.py
+++ b/py/src/braintrust/score.py
@@ -6,6 +6,7 @@
 
 from .serializable_data_class import SerializableDataClass
 
+
 # =========================================================================
 #  !!!!!!!!!!!!!!!! READ THIS BEFORE CHANGING THIS FILE !!!!!!!!!!!!!!!!
 #
diff --git a/py/src/braintrust/span_cache.py b/py/src/braintrust/span_cache.py
index 17148cde..1f6bde9c 100644
--- a/py/src/braintrust/span_cache.py
+++ b/py/src/braintrust/span_cache.py
@@ -15,6 +15,7 @@
 
 from braintrust.util import merge_dicts
 
+
 # Global registry of active span caches for process exit cleanup
 _active_caches: set["SpanCache"] = set()
 _exit_handlers_registered = False
diff --git a/py/src/braintrust/span_identifier_v4.py b/py/src/braintrust/span_identifier_v4.py
index a3db4c80..c881ef49 100644
--- a/py/src/braintrust/span_identifier_v4.py
+++ b/py/src/braintrust/span_identifier_v4.py
@@ -11,6 +11,7 @@
     SpanObjectTypeV3,
 )
 
+
 ENCODING_VERSION_NUMBER_V4 = 4
 
 
diff --git a/py/src/braintrust/test_bt_json.py b/py/src/braintrust/test_bt_json.py
index f67f7c69..f1d4e368 100644
--- a/py/src/braintrust/test_bt_json.py
+++ b/py/src/braintrust/test_bt_json.py
@@ -282,6 +282,7 @@ def test_deep_copy_numeric_and_special_keys(self):
         self.assertTrue("(1, 2)" in result or "1, 2" in result)
         self.assertIn("None", result)
 
+
 @pytest.mark.vcr
 def test_to_bt_safe_special_objects():
     """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
@@ -314,9 +315,7 @@ def test_to_bt_safe_attachments(self):
         self.assertIs(result, attachment)
 
         # Test ExternalAttachment preservation
-        ext_attachment = ExternalAttachment(
-            url="s3://bucket/key", filename="ext.pdf", content_type="application/pdf"
-        )
+        ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="ext.pdf", content_type="application/pdf")
         result_ext = _to_bt_safe(ext_attachment)
         self.assertIs(result_ext, ext_attachment)
 
@@ -418,9 +417,7 @@ def __init__(self):
     def test_bt_safe_deep_copy_attachment_identity(self):
         """Test bt_safe_deep_copy preserves attachment object identity."""
         attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
-        attachment2 = ExternalAttachment(
-            url="s3://bucket/key", filename="file2.pdf", content_type="application/pdf"
-        )
+        attachment2 = ExternalAttachment(url="s3://bucket/key", filename="file2.pdf", content_type="application/pdf")
 
         original = {
             "field1": attachment1,
diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
index 6c3c1fbd..313756cf 100644
--- a/py/src/braintrust/test_context.py
+++ b/py/src/braintrust/test_context.py
@@ -32,6 +32,7 @@ def _threadpool_scenario(test_logger, with_memory_logger):
 from braintrust.test_helpers import init_test_logger, with_memory_logger  # noqa: F401
 from braintrust.wrappers.threads import setup_threads
 
+
 F = TypeVar("F", bound=Callable)
 
 
diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py
index 9acf284b..87247b44 100644
--- a/py/src/braintrust/test_framework.py
+++ b/py/src/braintrust/test_framework.py
@@ -531,6 +531,7 @@ def task_with_hooks(input, hooks):
     assert len(root_span) == 1
     assert root_span[0].get("tags") == None
 
+
 @pytest.mark.asyncio
 async def test_eval_enable_cache():
     state = BraintrustState()
diff --git a/py/src/braintrust/test_framework2.py b/py/src/braintrust/test_framework2.py
index 9b06c5b5..d2329850 100644
--- a/py/src/braintrust/test_framework2.py
+++ b/py/src/braintrust/test_framework2.py
@@ -6,6 +6,7 @@
 
 from .framework2 import projects
 
+
 # Check if pydantic is available
 HAS_PYDANTIC = importlib.util.find_spec("pydantic") is not None
 
diff --git a/py/src/braintrust/test_helpers.py b/py/src/braintrust/test_helpers.py
index 7e24bb23..98a8e7e5 100644
--- a/py/src/braintrust/test_helpers.py
+++ b/py/src/braintrust/test_helpers.py
@@ -6,6 +6,7 @@
 from braintrust.logger import ObjectMetadata, OrgProjectMetadata, ProjectExperimentMetadata
 from braintrust.util import LazyValue
 
+
 # Fake API key for testing only - this will not work with actual API calls
 TEST_ORG_ID = "test-org-id"
 TEST_ORG_NAME = "test-org-name"
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 9792ed2a..9e8829c7 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -68,7 +68,6 @@ def test_init_validation(self):
 
         assert str(cm.exception) == f"duplicate tag: {tag}"
 
-
     def test_init_with_dataset_id_only(self):
         """Test that init accepts dataset={'id': '...'} parameter"""
         # Test the logic that extracts dataset_id from the dict
@@ -125,6 +124,7 @@ def test_init_with_repo_info_does_not_raise(self):
         assert metadata.project.id == "test-project-id"
         assert metadata.experiment.name == "test-exp"
 
+
 class TestLogger(TestCase):
     def test_extract_attachments_no_op(self):
         attachments: List[BaseAttachment] = []
@@ -242,8 +242,6 @@ def test_extract_attachments_with_attachments(self):
             },
         )
 
-
-
     def test_prompt_build_with_structured_output_templating(self):
         self.maxDiff = None
         prompt = Prompt(
@@ -3076,7 +3074,7 @@ def test_extract_attachments_collects_and_replaces():
     event = {
         "input": {"file": attachment1},
         "output": {"file": attachment2},
-        "metadata": {"files": [attachment1, ext_attachment]}
+        "metadata": {"files": [attachment1, ext_attachment]},
     }
 
     attachments = []
@@ -3106,7 +3104,7 @@ def test_extract_attachments_preserves_identity():
     event = {
         "input": attachment,
         "output": attachment,  # Same instance
-        "metadata": {"file": attachment}  # Same instance again
+        "metadata": {"file": attachment},  # Same instance again
     }
 
     attachments = []
@@ -3145,10 +3143,7 @@ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_l
 
     logger = init_test_logger(__name__)
     span = logger.start_span(name="test_span")
-    span.log(
-        input={"file1": attachment1},
-        output={"file2": attachment2}
-    )
+    span.log(input={"file1": attachment1}, output={"file2": attachment2})
     span.end()
     logger.flush()
 
@@ -3178,9 +3173,7 @@ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_sim
 def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
     """Test that ExternalAttachment upload is also tracked."""
     ext_attachment = ExternalAttachment(
-        url="s3://bucket/key.pdf",
-        filename="external.pdf",
-        content_type="application/pdf"
+        url="s3://bucket/key.pdf", filename="external.pdf", content_type="application/pdf"
     )
 
     logger = init_test_logger(__name__)
@@ -3218,11 +3211,7 @@ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_log
 
     logger = init_test_logger(__name__)
     span = logger.start_span(name="test_span")
-    span.log(
-        input=attachment,
-        output=json_attachment,
-        metadata={"file": ext_attachment}
-    )
+    span.log(input=attachment, output=json_attachment, metadata={"file": ext_attachment})
     span.end()
     logger.flush()
 
diff --git a/py/src/braintrust/test_otel.py b/py/src/braintrust/test_otel.py
index 2706cf62..68da6a72 100644
--- a/py/src/braintrust/test_otel.py
+++ b/py/src/braintrust/test_otel.py
@@ -576,6 +576,7 @@ def test_custom_filter_is_root_span(self):
         assert "root_span" in names
         assert "child_span" not in names
 
+
 def test_parent_from_headers_invalid_inputs():
     """Test parent_from_headers with various invalid inputs."""
     if not _check_otel_installed():
diff --git a/py/src/braintrust/test_sandbox.py b/py/src/braintrust/test_sandbox.py
index c170eead..3e5694b3 100644
--- a/py/src/braintrust/test_sandbox.py
+++ b/py/src/braintrust/test_sandbox.py
@@ -7,6 +7,7 @@
 from .logger import BraintrustState
 from .sandbox import RegisterSandboxResult, SandboxConfig, register_sandbox
 
+
 SNAPSHOT_REF = "im-icRxmsk1Sz9XPP2f8OblVU"
 PROJECT = "My Project"
 ENTRYPOINTS = ["./local/js/optimization/evals/btql-generation/btql-queries.eval.ts"]
diff --git a/py/src/braintrust/test_span_cache.py b/py/src/braintrust/test_span_cache.py
index fc0b6c7e..9b250d44 100644
--- a/py/src/braintrust/test_span_cache.py
+++ b/py/src/braintrust/test_span_cache.py
@@ -1,6 +1,5 @@
 """Tests for SpanCache (disk-based cache)."""
 
-
 from braintrust.span_cache import CachedSpan, SpanCache
 
 
diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py
index ef7044e8..f07b9ef0 100644
--- a/py/src/braintrust/trace.py
+++ b/py/src/braintrust/trace.py
@@ -154,7 +154,9 @@ def __init__(
         else:
             # Standard constructor with SpanFetcher
             if object_type is None or object_id is None or root_span_id is None or get_state is None:
-                raise ValueError("Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state")
+                raise ValueError(
+                    "Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state"
+                )
 
             async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]:
                 state = await get_state()
@@ -307,13 +309,15 @@ def __init__(
         state: BraintrustState,
     ):
         # Initialize dict with trace_ref for JSON serialization
-        super().__init__({
-            "trace_ref": {
-                "object_type": object_type,
-                "object_id": object_id,
-                "root_span_id": root_span_id,
+        super().__init__(
+            {
+                "trace_ref": {
+                    "object_type": object_type,
+                    "object_id": object_id,
+                    "root_span_id": root_span_id,
+                }
             }
-        })
+        )
 
         self._object_type = object_type
         self._object_id = object_id
diff --git a/py/src/braintrust/util.py b/py/src/braintrust/util.py
index 5ed1ccd2..516cb9b6 100644
--- a/py/src/braintrust/util.py
+++ b/py/src/braintrust/util.py
@@ -29,6 +29,7 @@ def parse_env_var_float(name: str, default: float) -> float:
     except (ValueError, TypeError):
         return default
 
+
 GLOBAL_PROJECT = "Global"
 BT_IS_ASYNC_ATTRIBUTE = "_BT_IS_ASYNC"
 
diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk/__init__.py
index 18edc6a8..6c6b8a14 100644
--- a/py/src/braintrust/wrappers/adk/__init__.py
+++ b/py/src/braintrust/wrappers/adk/__init__.py
@@ -11,6 +11,7 @@
 from braintrust.span_types import SpanTypeAttribute
 from wrapt import wrap_function_wrapper
 
+
 logger = logging.getLogger(__name__)
 
 __all__ = ["setup_braintrust", "setup_adk", "wrap_agent", "wrap_runner", "wrap_flow", "wrap_mcp_tool"]
diff --git a/py/src/braintrust/wrappers/adk/test_adk.py b/py/src/braintrust/wrappers/adk/test_adk.py
index f1c1700a..4462d89d 100644
--- a/py/src/braintrust/wrappers/adk/test_adk.py
+++ b/py/src/braintrust/wrappers/adk/test_adk.py
@@ -10,6 +10,7 @@
 from braintrust.wrappers.adk import _wrap_create_thread, setup_adk
 from google.adk import Agent
 
+
 ADK_VERSION = tuple(int(x) for x in pkg_version("google-adk").split(".")[:3])
 from google.adk.agents import LlmAgent
 from google.adk.runners import Runner
@@ -17,6 +18,7 @@
 from google.genai import types
 from pydantic import BaseModel, Field
 
+
 PROJECT_NAME = "test_adk"
 
 setup_adk(project_name=PROJECT_NAME)
diff --git a/py/src/braintrust/wrappers/adk/test_auto_adk.py b/py/src/braintrust/wrappers/adk/test_auto_adk.py
index ead2ba4a..493bd1fb 100644
--- a/py/src/braintrust/wrappers/adk/test_auto_adk.py
+++ b/py/src/braintrust/wrappers/adk/test_auto_adk.py
@@ -2,6 +2,7 @@
 
 from braintrust.auto import auto_instrument
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("adk") == True, "auto_instrument should return True for adk"
@@ -14,6 +15,7 @@
 from google.adk import agents, runners
 from google.adk.flows.llm_flows import base_llm_flow
 
+
 assert getattr(agents.BaseAgent, "_braintrust_patched", False), "BaseAgent should be patched"
 assert getattr(runners.Runner, "_braintrust_patched", False), "Runner should be patched"
 assert getattr(base_llm_flow.BaseLlmFlow, "_braintrust_patched", False), "BaseLlmFlow should be patched"
diff --git a/py/src/braintrust/wrappers/agno/__init__.py b/py/src/braintrust/wrappers/agno/__init__.py
index 527c01ef..3345f28f 100644
--- a/py/src/braintrust/wrappers/agno/__init__.py
+++ b/py/src/braintrust/wrappers/agno/__init__.py
@@ -30,6 +30,7 @@
 from .team import wrap_team
 from .workflow import wrap_workflow
 
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
index 1f67ee05..fcb926e1 100644
--- a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
+++ b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py
@@ -7,6 +7,7 @@
 
 from braintrust.wrappers.agno.agent import wrap_agent
 
+
 PROJECT_NAME = "test-agno-app"
 
 
@@ -129,6 +130,7 @@ def __init__(self):
 
         def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs):
             if stream:
+
                 def _stream():
                     yield FakeEvent("WorkflowStarted")
                     yield FakeEvent(
diff --git a/py/src/braintrust/wrappers/agno/test_workflow.py b/py/src/braintrust/wrappers/agno/test_workflow.py
index 0a9e88ac..199a52f9 100644
--- a/py/src/braintrust/wrappers/agno/test_workflow.py
+++ b/py/src/braintrust/wrappers/agno/test_workflow.py
@@ -186,7 +186,9 @@ def test_agno_workflow_stream_prefers_final_workflow_output(memory_logger):
 
 
 def test_agno_workflow_stream_preserves_final_run_response_fields(memory_logger):
-    Workflow = wrap_workflow(make_fake_streaming_workflow_with_mutated_run_response("CompatWorkflowMutatedRunResponse"))
+    Workflow = wrap_workflow(
+        make_fake_streaming_workflow_with_mutated_run_response("CompatWorkflowMutatedRunResponse")
+    )
     workflow = Workflow()
 
     execution_input = FakeExecutionInput("hello world")
diff --git a/py/src/braintrust/wrappers/agno/utils.py b/py/src/braintrust/wrappers/agno/utils.py
index ecdf0646..7951ac7c 100644
--- a/py/src/braintrust/wrappers/agno/utils.py
+++ b/py/src/braintrust/wrappers/agno/utils.py
@@ -24,7 +24,6 @@ def get_args_kwargs(args: list[str], kwargs: dict[str, Any], keys: list[str]):
     return {k: args[i] if args else kwargs.get(k) for i, k in enumerate(keys)}, omit(kwargs, keys)
 
 
-
 def _try_to_dict(obj: Any) -> Any:
     """Convert object to dict, handling different object types like OpenAI wrapper."""
     if isinstance(obj, dict):
diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py
index 26033536..8357fc1e 100644
--- a/py/src/braintrust/wrappers/anthropic.py
+++ b/py/src/braintrust/wrappers/anthropic.py
@@ -7,6 +7,7 @@
 from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens
 from wrapt import wrap_function_wrapper
 
+
 log = logging.getLogger(__name__)
 
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py
index ecfca75e..d4129a2f 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py
@@ -3,6 +3,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("agno") == True, "auto_instrument should return True for agno"
@@ -48,28 +49,28 @@ def check_wrapped(klass, private_method, public_method, required=True):
 # Model methods (all public, all required)
 assert hasattr(Model, "invoke") and hasattr(Model.invoke, "__wrapped__"), "Model.invoke should be wrapped"
 assert hasattr(Model, "ainvoke") and hasattr(Model.ainvoke, "__wrapped__"), "Model.ainvoke should be wrapped"
-assert hasattr(Model, "invoke_stream") and hasattr(
-    Model.invoke_stream, "__wrapped__"
-), "Model.invoke_stream should be wrapped"
-assert hasattr(Model, "ainvoke_stream") and hasattr(
-    Model.ainvoke_stream, "__wrapped__"
-), "Model.ainvoke_stream should be wrapped"
+assert hasattr(Model, "invoke_stream") and hasattr(Model.invoke_stream, "__wrapped__"), (
+    "Model.invoke_stream should be wrapped"
+)
+assert hasattr(Model, "ainvoke_stream") and hasattr(Model.ainvoke_stream, "__wrapped__"), (
+    "Model.ainvoke_stream should be wrapped"
+)
 assert hasattr(Model, "response") and hasattr(Model.response, "__wrapped__"), "Model.response should be wrapped"
 assert hasattr(Model, "aresponse") and hasattr(Model.aresponse, "__wrapped__"), "Model.aresponse should be wrapped"
-assert hasattr(Model, "response_stream") and hasattr(
-    Model.response_stream, "__wrapped__"
-), "Model.response_stream should be wrapped"
-assert hasattr(Model, "aresponse_stream") and hasattr(
-    Model.aresponse_stream, "__wrapped__"
-), "Model.aresponse_stream should be wrapped"
+assert hasattr(Model, "response_stream") and hasattr(Model.response_stream, "__wrapped__"), (
+    "Model.response_stream should be wrapped"
+)
+assert hasattr(Model, "aresponse_stream") and hasattr(Model.aresponse_stream, "__wrapped__"), (
+    "Model.aresponse_stream should be wrapped"
+)
 
 # FunctionCall methods (all public, all required)
-assert hasattr(FunctionCall, "execute") and hasattr(
-    FunctionCall.execute, "__wrapped__"
-), "FunctionCall.execute should be wrapped"
-assert hasattr(FunctionCall, "aexecute") and hasattr(
-    FunctionCall.aexecute, "__wrapped__"
-), "FunctionCall.aexecute should be wrapped"
+assert hasattr(FunctionCall, "execute") and hasattr(FunctionCall.execute, "__wrapped__"), (
+    "FunctionCall.execute should be wrapped"
+)
+assert hasattr(FunctionCall, "aexecute") and hasattr(FunctionCall.aexecute, "__wrapped__"), (
+    "FunctionCall.aexecute should be wrapped"
+)
 
 # 4. Make API call and verify spans
 with autoinstrument_test_context("test_auto_agno") as memory_logger:
@@ -108,7 +109,9 @@ def check_wrapped(klass, private_method, public_method, required=True):
     # Verify span hierarchy - LLM span should be child of agent span
     llm_parents = llm_span.get("span_parents", [])
     agent_span_id = agent_span.get("span_id")
-    assert agent_span_id in llm_parents, f"LLM span should be child of agent span. Agent ID: {agent_span_id}, LLM parents: {llm_parents}"
+    assert agent_span_id in llm_parents, (
+        f"LLM span should be child of agent span. Agent ID: {agent_span_id}, LLM parents: {llm_parents}"
+    )
 
     print("Agent span created (type: task)")
     print("Model span created (type: llm)")
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py
index 6a6b32f8..2c51d911 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py
@@ -4,6 +4,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Verify not patched initially
 assert not getattr(anthropic, "__braintrust_wrapped__", False)
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py
index b4d69586..d9213cdb 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py
@@ -4,6 +4,7 @@
 from braintrust.wrappers.claude_agent_sdk._test_transport import make_cassette_transport
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("claude_agent_sdk") == True
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py
index 4a2fccdf..924ceb46 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py
@@ -9,6 +9,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.dspy import BraintrustDSpyCallback
 
+
 # 1. Verify not patched initially
 assert not getattr(dspy, "__braintrust_wrapped__", False)
 
@@ -25,6 +26,7 @@
 dspy.configure(lm=None)
 from dspy.dsp.utils.settings import settings
 
+
 has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in settings.callbacks)
 assert has_bt_callback, f"Expected BraintrustDSpyCallback in callbacks after configure()"
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py
index 4645ae0d..b9a5b72b 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py
@@ -3,6 +3,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("google_genai") == True
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py
index 2aeeb921..0d8db254 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py
@@ -4,6 +4,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Verify not patched initially
 assert not hasattr(litellm, "_braintrust_wrapped")
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py
index 4fb5f4c8..ef5eaf8f 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py
@@ -4,6 +4,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Verify not patched initially
 assert not getattr(openai, "__braintrust_wrapped__", False)
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py
index c6b87484..c45fd13b 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py
@@ -3,6 +3,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("pydantic_ai") == True
@@ -23,6 +24,7 @@
     )
 
     import asyncio
+
     result = asyncio.run(agent.run("Say hi"))
     assert result.output
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py
index 49867b36..42191de0 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py
@@ -6,6 +6,7 @@
 from braintrust.wrappers.litellm import patch_litellm
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 patch_litellm()
 
 
diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py
index e25b2f86..2b2eac38 100644
--- a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py
+++ b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py
@@ -4,6 +4,7 @@
 from braintrust.wrappers.litellm import patch_litellm
 from braintrust.wrappers.test_utils import autoinstrument_test_context
 
+
 patch_litellm()
 
 with autoinstrument_test_context("test_patch_litellm_responses") as memory_logger:
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py
index 9c45bf7d..8b596860 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py
@@ -21,6 +21,7 @@
 
 from ._wrapper import _create_client_wrapper_class, _create_tool_wrapper_class, _wrap_tool_factory
 
+
 logger = logging.getLogger(__name__)
 
 __all__ = ["setup_claude_agent_sdk"]
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
index f8786ead..3a516568 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py
@@ -11,6 +11,7 @@
 
 import anyio
 
+
 try:
     import claude_agent_sdk
     from claude_agent_sdk._internal.transport import Transport
@@ -230,9 +231,7 @@ async def connect(self) -> None:
             return
 
         if self._record_mode not in {"once", "all"}:
-            raise FileNotFoundError(
-                f"Cassette missing for {self._cassette_name}: {self._cassette_path}"
-            )
+            raise FileNotFoundError(f"Cassette missing for {self._cassette_name}: {self._cassette_path}")
 
         self._cassette_path.parent.mkdir(parents=True, exist_ok=True)
         prompt = _empty_stream() if self._prompt == "" else self._prompt
@@ -255,9 +254,7 @@ async def write(self, data: str) -> None:
         expected = _normalize_for_match(recorded["payload"])
         self._maybe_remap_control_request_id(recorded["payload"], actual_raw)
         if expected != actual:
-            raise AssertionError(
-                f"Write mismatch for {self._cassette_name}\nexpected: {expected}\nactual: {actual}"
-            )
+            raise AssertionError(f"Write mismatch for {self._cassette_name}\nexpected: {expected}\nactual: {actual}")
 
     def read_messages(self):
         return self._read_messages_impl()
@@ -306,17 +303,13 @@ def _should_replay(self) -> bool:
             return True
         return False
 
-    async def _wait_for_event(
-        self, op: str, *, allow_eof: bool = False
-    ) -> dict[str, Any] | None:
+    async def _wait_for_event(self, op: str, *, allow_eof: bool = False) -> dict[str, Any] | None:
         while True:
             async with self._cursor_lock:
                 if self._cursor >= len(self._events):
                     if allow_eof:
                         return None
-                    raise AssertionError(
-                        f"Replay for {self._cassette_name} exhausted before expected {op}"
-                    )
+                    raise AssertionError(f"Replay for {self._cassette_name} exhausted before expected {op}")
 
                 event = self._events[self._cursor]
                 if event["op"] == op:
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
index 43e5d8f5..e019241d 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py
@@ -24,6 +24,7 @@
     SerializedContentType,
 )
 
+
 log = logging.getLogger(__name__)
 _thread_local = threading.local()
 
@@ -81,6 +82,7 @@ def release(self) -> None:
 def _log_tracing_warning(exc: Exception) -> None:
     log.warning("Error in tracing code", exc_info=exc)
 
+
 def _parse_tool_name(tool_name: Any) -> ParsedToolName:
     raw_name = str(tool_name) if tool_name is not None else DEFAULT_TOOL_NAME
 
@@ -136,6 +138,7 @@ def _serialize_tool_result_output(tool_result_block: Any) -> dict[str, Any]:
 
     return output
 
+
 def _serialize_system_message(message: Any) -> dict[str, Any]:
     serialized = {"subtype": getattr(message, "subtype", None)}
 
@@ -319,7 +322,9 @@ def mark_task_started(self, tool_use_id: Any) -> None:
 
     def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan | None:
         parsed_tool_name = _parse_tool_name(tool_name)
-        candidate_names = list(dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name))))
+        candidate_names = list(
+            dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name)))
+        )
 
         candidates = [
             active_tool_span
@@ -335,7 +340,9 @@ def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan
         matched_span.activate()
         return matched_span
 
-    def _end_tool_span(self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None) -> None:
+    def _end_tool_span(
+        self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None
+    ) -> None:
         active_tool_span = self._active_spans.pop(tool_use_id, None)
         self._pending_task_link_tool_use_ids.discard(tool_use_id)
         if active_tool_span is None:
@@ -528,7 +535,9 @@ def process(self, message: Any) -> None:
                 self._task_span_by_tool_use_id.pop(str(tool_use_id), None)
             task_span.end()
             del self._active_spans[task_id]
-            self._active_task_order = [active_task_id for active_task_id in self._active_task_order if active_task_id != task_id]
+            self._active_task_order = [
+                active_task_id for active_task_id in self._active_task_order if active_task_id != task_id
+            ]
 
     @property
     def active_tool_use_ids(self) -> frozenset[str]:
@@ -568,11 +577,7 @@ def _parent_export(self, message: Any) -> str:
         return self._tool_tracker.get_span_export(getattr(message, "tool_use_id", None)) or self._root_span_export
 
     def _span_name(self, message: Any, task_id: str) -> str:
-        return (
-            getattr(message, "description", None)
-            or getattr(message, "task_type", None)
-            or f"Task {task_id}"
-        )
+        return getattr(message, "description", None) or getattr(message, "task_type", None) or f"Task {task_id}"
 
     def _metadata(self, message: Any) -> dict[str, Any]:
         metadata = {
@@ -711,7 +716,8 @@ async def receive_response(self) -> AsyncGenerator[Any, None]:
                         if message_type == MessageClassName.ASSISTANT:
                             if llm_tracker.current_span and tool_tracker.has_active_spans:
                                 active_subagent_tool_use_ids = (
-                                    task_event_span_tracker.active_tool_use_ids | tool_tracker.pending_task_link_tool_use_ids
+                                    task_event_span_tracker.active_tool_use_ids
+                                    | tool_tracker.pending_task_link_tool_use_ids
                                 )
                                 tool_tracker.cleanup(
                                     end_time=llm_tracker.get_next_start_time(),
@@ -729,7 +735,11 @@ async def receive_response(self) -> AsyncGenerator[Any, None]:
                             )
                             tool_tracker.start_tool_spans(message, llm_tracker.current_span_export)
                             if final_content:
-                                if extended_existing_span and final_results and final_results[-1].get("role") == "assistant":
+                                if (
+                                    extended_existing_span
+                                    and final_results
+                                    and final_results[-1].get("role") == "assistant"
+                                ):
                                     final_results[-1] = final_content
                                 else:
                                     final_results.append(final_content)
@@ -738,8 +748,7 @@ async def receive_response(self) -> AsyncGenerator[Any, None]:
                             has_tool_results = False
                             if hasattr(message, "content"):
                                 has_tool_results = any(
-                                    type(block).__name__ == BlockClassName.TOOL_RESULT
-                                    for block in message.content
+                                    type(block).__name__ == BlockClassName.TOOL_RESULT for block in message.content
                                 )
                                 content = _serialize_content_blocks(message.content)
                                 final_results.append({"content": content, "role": "user"})
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py
index 9204968d..eb12fa3d 100644
--- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py
+++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py
@@ -11,6 +11,7 @@
 
 import pytest
 
+
 # Try to import the Claude Agent SDK - skip tests if not available
 try:
     import claude_agent_sdk as _claude_agent_sdk
@@ -42,6 +43,7 @@
 )
 from braintrust.wrappers.test_utils import verify_autoinstrument_script
 
+
 PROJECT_NAME = "test-claude-agent-sdk"
 TEST_MODEL = "claude-haiku-4-5-20251001"
 REPO_ROOT = Path(__file__).resolve().parents[5]
@@ -369,10 +371,16 @@ async def test_bundled_subagent_creates_task_span(memory_logger):
 
     llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM]
     _assert_llm_spans_have_time_to_first_token(llm_spans)
-    assert any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans for llm_span in llm_spans)
+    assert any(
+        subagent_span["span_id"] in llm_span["span_parents"]
+        for subagent_span in subagent_spans
+        for llm_span in llm_spans
+    )
 
     delegated_llm_spans = [
-        llm_span for llm_span in llm_spans if any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans)
+        llm_span
+        for llm_span in llm_spans
+        if any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans)
     ]
     assert delegated_llm_spans, "Expected at least one delegated LLM span nested under a subagent task span"
 
@@ -573,7 +581,9 @@ async def test_delegated_subagent_llm_and_tool_spans_nest_under_task_span(memory
             tool_use_id="call-agent",
             usage={"total_tokens": 42, "tool_uses": 1, "duration_ms": 250},
         ),
-        UserMessage(content=[ToolResultBlock(tool_use_id="call-agent", content=[TextBlock("2026.03.11 | sdk-platform")])]),
+        UserMessage(
+            content=[ToolResultBlock(tool_use_id="call-agent", content=[TextBlock("2026.03.11 | sdk-platform")])]
+        ),
         ResultMessage(),
     ]
 
@@ -684,8 +694,12 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs
         ),
         UserMessage(
             content=[
-                ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]),
-                ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]),
+                ToolResultBlock(
+                    tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]
+                ),
+                ToolResultBlock(
+                    tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]
+                ),
             ]
         ),
         ResultMessage(),
@@ -708,9 +722,7 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs
     agent_tool_spans = [span for span in tool_spans if span["span_attributes"]["name"] == "Agent"]
     assert len(agent_tool_spans) == 2
 
-    outer_llm_spans = [
-        llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"]
-    ]
+    outer_llm_spans = [llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"]]
     assert len(outer_llm_spans) == 1, f"Expected a single outer orchestration LLM span, got {len(outer_llm_spans)}"
     outer_llm_span = outer_llm_spans[0]
 
@@ -722,10 +734,12 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs
     delegated_llm_spans = [
         llm_span
         for llm_span in llm_spans
-        if alpha_task_span["span_id"] in llm_span["span_parents"] or beta_task_span["span_id"] in llm_span["span_parents"]
+        if alpha_task_span["span_id"] in llm_span["span_parents"]
+        or beta_task_span["span_id"] in llm_span["span_parents"]
     ]
     assert delegated_llm_spans, "Expected delegated LLM spans nested under delegated task spans"
 
+
 @pytest.mark.asyncio
 async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm_span(memory_logger):
     """Relay UserMessages (subagent prompt echoes without ToolResultBlocks) between
@@ -815,8 +829,12 @@ async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm
         # Final tool results (real turn boundary — has ToolResultBlocks)
         UserMessage(
             content=[
-                ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]),
-                ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]),
+                ToolResultBlock(
+                    tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]
+                ),
+                ToolResultBlock(
+                    tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]
+                ),
             ]
         ),
         # Final answer
@@ -852,7 +870,8 @@ async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm
     # Exactly one outer LLM span should parent both Agent tool calls
     # (the final-answer LLM span is a separate, expected outer span)
     orchestration_llm_spans = [
-        llm_span for llm_span in llm_spans
+        llm_span
+        for llm_span in llm_spans
         if any(llm_span["span_id"] in agent_tool_span["span_parents"] for agent_tool_span in agent_tool_spans)
     ]
     assert len(orchestration_llm_spans) == 1, (
@@ -978,18 +997,13 @@ async def test_agent_tool_spans_encapsulate_child_task_spans(memory_logger):
     for agent_span in agent_tool_spans:
         agent_end = agent_span["metrics"]["end"]
         # Find child TASK span (parented under this Agent TOOL span)
-        children = [
-            ts for ts in child_task_spans
-            if agent_span["span_id"] in ts.get("span_parents", [])
-        ]
+        children = [ts for ts in child_task_spans if agent_span["span_id"] in ts.get("span_parents", [])]
         assert len(children) == 1, (
-            f"Agent span {agent_span['span_id']} should have exactly 1 child TASK span, "
-            f"got {len(children)}"
+            f"Agent span {agent_span['span_id']} should have exactly 1 child TASK span, got {len(children)}"
         )
         child_end = children[0]["metrics"]["end"]
         assert agent_end >= child_end, (
-            f"Agent TOOL span must encapsulate its child TASK span. "
-            f"Agent end={agent_end}, child TASK end={child_end}"
+            f"Agent TOOL span must encapsulate its child TASK span. Agent end={agent_end}, child TASK end={child_end}"
         )
 
 
@@ -1386,7 +1400,9 @@ def test_tool_span_tracker_logs_errors(memory_logger):
         )
         tracker.finish_tool_spans(
             UserMessage(
-                content=[ToolResultBlock(tool_use_id="call-err", content=[TextBlock("Division by zero")], is_error=True)]
+                content=[
+                    ToolResultBlock(tool_use_id="call-err", content=[TextBlock("Division by zero")], is_error=True)
+                ]
             )
         )
         llm_span.end()
@@ -1560,7 +1576,9 @@ def test_serialize_system_message_extracts_known_fields(message, expected):
 
 
 def test_extract_usage_from_result_message_normalizes_anthropic_tokens():
-    metrics = _extract_usage_from_result_message(ResultMessage(input_tokens=5, output_tokens=3, cache_creation_input_tokens=2))
+    metrics = _extract_usage_from_result_message(
+        ResultMessage(input_tokens=5, output_tokens=3, cache_creation_input_tokens=2)
+    )
 
     assert metrics == {
         "prompt_tokens": 7.0,
@@ -1715,7 +1733,7 @@ async def calculator_handler(args):
         nested_span = start_span(name=f"nested_tool_work_{args['a']}")
         nested_span.log(input=args)
         nested_span.end()
-        return {"content": [{"type": "text", "text": str(args['a'] + args['b'])}]}
+        return {"content": [{"type": "text", "text": str(args["a"] + args["b"])}]}
 
     calculator_tool = wrapped_tool_class(
         name="calculator",
@@ -1767,8 +1785,13 @@ async def calculator_handler(args):
     nested_span_first = _find_span_by_name(spans, "nested_tool_work_2")
     nested_span_second = _find_span_by_name(spans, "nested_tool_work_10")
 
-    assert tool_span_by_input[(("a", 2), ("b", 3), ("operation", "add"))]["span_id"] in nested_span_first["span_parents"]
-    assert tool_span_by_input[(("a", 10), ("b", 5), ("operation", "add"))]["span_id"] in nested_span_second["span_parents"]
+    assert (
+        tool_span_by_input[(("a", 2), ("b", 3), ("operation", "add"))]["span_id"] in nested_span_first["span_parents"]
+    )
+    assert (
+        tool_span_by_input[(("a", 10), ("b", 5), ("operation", "add"))]["span_id"]
+        in nested_span_second["span_parents"]
+    )
 
 
 class TestAutoInstrumentClaudeAgentSDK:
@@ -1779,6 +1802,7 @@ def test_auto_instrument_claude_agent_sdk(self):
         """Test auto_instrument patches Claude Agent SDK and creates spans."""
         verify_autoinstrument_script("test_auto_claude_agent_sdk.py")
 
+
 @pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed")
 @pytest.mark.asyncio
 async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, monkeypatch):
@@ -1809,7 +1833,9 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m
 
         async def main() -> None:
             loop = asyncio.get_running_loop()
-            loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message")))
+            loop.set_exception_handler(
+                lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message"))
+            )
 
             options = getattr(consumer_module, "ClaudeAgentOptions")(
                 model="claude-3-5-haiku-20241022",
diff --git a/py/src/braintrust/wrappers/dspy.py b/py/src/braintrust/wrappers/dspy.py
index 8fad6691..713b3cfe 100644
--- a/py/src/braintrust/wrappers/dspy.py
+++ b/py/src/braintrust/wrappers/dspy.py
@@ -53,6 +53,7 @@
 from braintrust.span_types import SpanTypeAttribute
 from wrapt import wrap_function_wrapper
 
+
 # Note: For detailed token and cost metrics, use patch_litellm() before importing DSPy.
 # The DSPy callback focuses on execution flow and span hierarchy.
 
diff --git a/py/src/braintrust/wrappers/google_genai/__init__.py b/py/src/braintrust/wrappers/google_genai/__init__.py
index f80db150..61df30ab 100644
--- a/py/src/braintrust/wrappers/google_genai/__init__.py
+++ b/py/src/braintrust/wrappers/google_genai/__init__.py
@@ -8,6 +8,7 @@
 from braintrust.span_types import SpanTypeAttribute
 from wrapt import wrap_function_wrapper
 
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py
index c723d062..6beeb578 100644
--- a/py/src/braintrust/wrappers/langchain.py
+++ b/py/src/braintrust/wrappers/langchain.py
@@ -5,6 +5,7 @@
 
 import braintrust
 
+
 _logger = logging.getLogger("braintrust.wrappers.langchain")
 
 try:
diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py
index a00a9b40..b22117df 100644
--- a/py/src/braintrust/wrappers/langsmith_wrapper.py
+++ b/py/src/braintrust/wrappers/langsmith_wrapper.py
@@ -46,6 +46,7 @@ def my_function(inputs: dict) -> dict:
 from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced
 from wrapt import wrap_function_wrapper
 
+
 logger = logging.getLogger(__name__)
 
 # Global list to store Braintrust eval results when running in tandem mode
diff --git a/py/src/braintrust/wrappers/litellm.py b/py/src/braintrust/wrappers/litellm.py
index 526b222f..236df998 100644
--- a/py/src/braintrust/wrappers/litellm.py
+++ b/py/src/braintrust/wrappers/litellm.py
@@ -9,6 +9,7 @@
 from braintrust.span_types import SpanTypeAttribute
 from braintrust.util import is_numeric, merge_dicts
 
+
 X_LEGACY_CACHED_HEADER = "x-cached"
 X_CACHED_HEADER = "x-bt-cached"
 
@@ -572,7 +573,6 @@ def _parse_metrics_from_usage(usage: Any) -> dict[str, Any]:
     return metrics
 
 
-
 def prettify_params(params: dict[str, Any]) -> dict[str, Any]:
     """Clean up parameters by filtering out NOT_GIVEN values and serializing response_format."""
     # Filter out NOT_GIVEN parameters
diff --git a/py/src/braintrust/wrappers/openai.py b/py/src/braintrust/wrappers/openai.py
index 82bb3903..484a769d 100644
--- a/py/src/braintrust/wrappers/openai.py
+++ b/py/src/braintrust/wrappers/openai.py
@@ -217,7 +217,7 @@ def _mcp_list_tools_log_data(self, span: tracing.Span[tracing.MCPListToolsSpanDa
             "output": span.span_data.result,
             "metadata": {
                 "server": span.span_data.server,
-            }
+            },
         }
 
     def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanData]) -> dict[str, Any]:
@@ -227,7 +227,7 @@ def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanDa
             "metadata": {
                 "model": span.span_data.model,
                 "model_config": span.span_data.model_config,
-            }
+            },
         }
 
     def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[str, Any]:
@@ -237,7 +237,7 @@ def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[s
             "metadata": {
                 "model": span.span_data.model,
                 "model_config": span.span_data.model_config,
-            }
+            },
         }
 
     def _speech_group_log_data(self, span: tracing.Span[tracing.SpeechGroupSpanData]) -> dict[str, Any]:
diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py
index 9ed61462..e3442b85 100644
--- a/py/src/braintrust/wrappers/pydantic_ai.py
+++ b/py/src/braintrust/wrappers/pydantic_ai.py
@@ -11,6 +11,7 @@
 from braintrust.span_types import SpanTypeAttribute
 from wrapt import wrap_function_wrapper
 
+
 logger = logging.getLogger(__name__)
 
 __all__ = ["setup_pydantic_ai"]
diff --git a/py/src/braintrust/wrappers/pytest_plugin/plugin.py b/py/src/braintrust/wrappers/pytest_plugin/plugin.py
index 12eb3113..3ad84a7e 100644
--- a/py/src/braintrust/wrappers/pytest_plugin/plugin.py
+++ b/py/src/braintrust/wrappers/pytest_plugin/plugin.py
@@ -14,6 +14,7 @@
 
 import pytest
 
+
 if TYPE_CHECKING:
     from braintrust.logger import Span
 
diff --git a/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py b/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py
index 4baaac49..4f5bc4ce 100644
--- a/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py
+++ b/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py
@@ -13,13 +13,13 @@
 
 import pytest
 
+
 # ---------------------------------------------------------------------------
 # Tell pytest we need the pytester plugin.
 # ---------------------------------------------------------------------------
 pytest_plugins = ["pytester"]
 
 
-
 # ---------------------------------------------------------------------------
 # Helper: inline conftest that mocks braintrust.init for child tests.
 #
diff --git a/py/src/braintrust/wrappers/test_anthropic.py b/py/src/braintrust/wrappers/test_anthropic.py
index 5d8da9f3..54182597 100644
--- a/py/src/braintrust/wrappers/test_anthropic.py
+++ b/py/src/braintrust/wrappers/test_anthropic.py
@@ -11,6 +11,7 @@
 from braintrust.wrappers.anthropic import wrap_anthropic
 from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
 
+
 TEST_ORG_ID = "test-org-123"
 PROJECT_NAME = "test-anthropic-app"
 MODEL = "claude-3-haiku-20240307"  # use the cheapest model since answers dont matter
@@ -448,7 +449,9 @@ async def test_anthropic_beta_messages_create_async(memory_logger):
     assert "10" in span["output"]["content"][0]["text"]
 
 
-@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path", "body"])  # exclude query - varies by SDK version
+@pytest.mark.vcr(
+    match_on=["method", "scheme", "host", "port", "path", "body"]
+)  # exclude query - varies by SDK version
 @pytest.mark.asyncio
 async def test_anthropic_beta_messages_streaming_async(memory_logger):
     assert not memory_logger.pop()
diff --git a/py/src/braintrust/wrappers/test_dspy.py b/py/src/braintrust/wrappers/test_dspy.py
index a9faa6af..edbc6334 100644
--- a/py/src/braintrust/wrappers/test_dspy.py
+++ b/py/src/braintrust/wrappers/test_dspy.py
@@ -9,6 +9,7 @@
 from braintrust.wrappers.dspy import BraintrustDSpyCallback
 from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
 
+
 PROJECT_NAME = "test-dspy-app"
 MODEL = "openai/gpt-4o-mini"
 
diff --git a/py/src/braintrust/wrappers/test_google_genai.py b/py/src/braintrust/wrappers/test_google_genai.py
index 02fc21f5..51b3b090 100644
--- a/py/src/braintrust/wrappers/test_google_genai.py
+++ b/py/src/braintrust/wrappers/test_google_genai.py
@@ -10,6 +10,7 @@
 from google.genai import types
 from google.genai.client import Client
 
+
 PROJECT_NAME = "test-genai-app"
 MODEL = "gemini-2.0-flash-001"
 FIXTURES_DIR = Path(__file__).parent.parent.parent.parent.parent / "internal/golden/fixtures"
diff --git a/py/src/braintrust/wrappers/test_langsmith_wrapper.py b/py/src/braintrust/wrappers/test_langsmith_wrapper.py
index c009a410..c25256c2 100644
--- a/py/src/braintrust/wrappers/test_langsmith_wrapper.py
+++ b/py/src/braintrust/wrappers/test_langsmith_wrapper.py
@@ -9,7 +9,6 @@
 Tests for the LangSmith wrapper to ensure compatibility with LangSmith's API.
 """
 
-
 from braintrust.wrappers.langsmith_wrapper import (
     _convert_langsmith_data,
     _is_patched,
@@ -292,7 +291,11 @@ def test_make_braintrust_scorer_handles_wrapped_outputs(self):
         def langsmith_evaluator(inputs, outputs, reference_outputs):
             # outputs will be wrapped as {"output": value} for non-dict results
             actual = outputs.get("output", outputs)
-            expected = reference_outputs.get("output", reference_outputs) if isinstance(reference_outputs, dict) else reference_outputs
+            expected = (
+                reference_outputs.get("output", reference_outputs)
+                if isinstance(reference_outputs, dict)
+                else reference_outputs
+            )
             return {"key": "match", "score": 1.0 if actual == expected else 0.0}
 
         converted = _make_braintrust_scorer(langsmith_evaluator)
diff --git a/py/src/braintrust/wrappers/test_litellm.py b/py/src/braintrust/wrappers/test_litellm.py
index 4639d809..6020634c 100644
--- a/py/src/braintrust/wrappers/test_litellm.py
+++ b/py/src/braintrust/wrappers/test_litellm.py
@@ -8,6 +8,7 @@
 from braintrust.wrappers.litellm import wrap_litellm
 from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script
 
+
 TEST_ORG_ID = "test-org-litellm-py-tracing"
 PROJECT_NAME = "test-project-litellm-py-tracing"
 TEST_MODEL = "gpt-4o-mini"  # cheapest model for tests
diff --git a/py/src/braintrust/wrappers/test_oai_attachments.py b/py/src/braintrust/wrappers/test_oai_attachments.py
index 737b20da..3e065f3c 100644
--- a/py/src/braintrust/wrappers/test_oai_attachments.py
+++ b/py/src/braintrust/wrappers/test_oai_attachments.py
@@ -1,4 +1,5 @@
 """Tests for OpenAI wrapper attachment processing."""
+
 import time
 
 import openai
@@ -7,6 +8,7 @@
 from braintrust.test_helpers import init_test_logger
 from braintrust.wrappers.test_utils import assert_metrics_are_valid
 
+
 PROJECT_NAME = "test-project-openai-attachment-processing"
 TEST_MODEL = "gpt-4o-mini"
 
diff --git a/py/src/braintrust/wrappers/test_openai.py b/py/src/braintrust/wrappers/test_openai.py
index e832993d..6ab9b343 100644
--- a/py/src/braintrust/wrappers/test_openai.py
+++ b/py/src/braintrust/wrappers/test_openai.py
@@ -12,6 +12,7 @@
 from openai._types import NOT_GIVEN
 from pydantic import BaseModel
 
+
 TEST_ORG_ID = "test-org-openai-py-tracing"
 PROJECT_NAME = "test-project-openai-py-tracing"
 TEST_MODEL = "gpt-4o-mini"  # cheapest model for tests
diff --git a/py/src/braintrust/wrappers/test_openrouter.py b/py/src/braintrust/wrappers/test_openrouter.py
index 1d750659..39365e1e 100644
--- a/py/src/braintrust/wrappers/test_openrouter.py
+++ b/py/src/braintrust/wrappers/test_openrouter.py
@@ -15,6 +15,7 @@
 from braintrust.wrappers.test_utils import assert_metrics_are_valid
 from openai import AsyncOpenAI, OpenAI
 
+
 PROJECT_NAME = "test-openrouter"
 TEST_MODEL = "openai/gpt-4o-mini"
 
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
index 14088f74..b794b18b 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
+++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
@@ -16,6 +16,7 @@
 from pydantic_ai.messages import ModelRequest, UserPromptPart
 from pydantic_ai.usage import UsageLimits
 
+
 PROJECT_NAME = "test-pydantic-ai-integration"
 MODEL = "openai:gpt-4o-mini"  # Use cheaper model for tests
 TEST_PROMPT = "What is 2+2? Answer with just the number."
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py b/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py
index 5bbe252b..661b7bf7 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py
+++ b/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py
@@ -13,6 +13,7 @@
 from braintrust.test_helpers import init_test_logger
 from pydantic_ai import Agent, ModelSettings
 
+
 PROJECT_NAME = "test-pydantic-ai-logfire"
 MODEL = "openai:gpt-4o-mini"
 TEST_PROMPT = "What is 2+2? Answer with just the number."
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py
index b7e2bd9c..c1dfceb3 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py
+++ b/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py
@@ -5,6 +5,7 @@
 from openai import AsyncOpenAI
 from pydantic_ai import Agent  # pylint: disable=import-error
 
+
 try:
     # Try new API first (pydantic_ai >= 1.0)
     from pydantic_ai.models.openai import OpenAIChatModel  # pylint: disable=import-error
@@ -20,6 +21,7 @@
 from braintrust.test_helpers import init_test_logger
 from pydantic_ai.providers.openai import OpenAIProvider  # pylint: disable=import-error
 
+
 PROJECT_NAME = "test-pydantic-ai"
 MODEL = "gpt-3.5-turbo"  # Use a cheaper model for testing
 TEST_PROMPT = "What is the capital of Italy?"
diff --git a/py/src/braintrust/wrappers/test_utils.py b/py/src/braintrust/wrappers/test_utils.py
index be3b92f9..80d9d661 100644
--- a/py/src/braintrust/wrappers/test_utils.py
+++ b/py/src/braintrust/wrappers/test_utils.py
@@ -10,6 +10,7 @@
 from braintrust.conftest import get_vcr_config
 from braintrust.test_helpers import init_test_logger
 
+
 # Source directory paths (resolved to handle installed vs source locations)
 _SOURCE_DIR = Path(__file__).resolve().parent
 AUTO_TEST_SCRIPTS_DIR = _SOURCE_DIR / "auto_test_scripts"
@@ -18,9 +19,7 @@
 CASSETTES_DIR = Path(os.environ.get("BRAINTRUST_CASSETTES_DIR", _SOURCE_DIR / "cassettes"))
 
 
-def run_in_subprocess(
-    code: str, timeout: int = 30, env: dict[str, str] | None = None
-) -> subprocess.CompletedProcess:
+def run_in_subprocess(code: str, timeout: int = 30, env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
     """Run Python code in a fresh subprocess."""
     run_env = os.environ.copy()
     if env:
@@ -43,9 +42,7 @@ def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subproc
     # Pass cassettes dir to subprocess since it may use installed package
     env = os.environ.copy()
     env["BRAINTRUST_CASSETTES_DIR"] = str(_SOURCE_DIR / "cassettes")
-    env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str(
-        _SOURCE_DIR / "claude_agent_sdk" / "cassettes"
-    )
+    env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str(_SOURCE_DIR / "claude_agent_sdk" / "cassettes")
     result = subprocess.run(
         [sys.executable, str(script_path)],
         capture_output=True,
diff --git a/py/src/braintrust/wrappers/threads.py b/py/src/braintrust/wrappers/threads.py
index 4572e638..71c58f25 100644
--- a/py/src/braintrust/wrappers/threads.py
+++ b/py/src/braintrust/wrappers/threads.py
@@ -7,6 +7,7 @@
 
 from wrapt import wrap_function_wrapper  # pyright: ignore[reportUnknownVariableType, reportMissingTypeStubs]
 
+
 logger = logging.getLogger(__name__)
 
 __all__ = ["setup_threads", "patch_thread", "patch_thread_pool_executor"]
diff --git a/pyproject.toml b/pyproject.toml
index f0618c8f..ec2cdf1f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,3 @@
-[tool.black]
-line-length = 119
-
 [tool.ruff]
 line-length = 119
 
@@ -11,6 +8,8 @@ select = [
 ]
 [tool.ruff.lint.isort]
 known-third-party = ["braintrust", "braintrust_local", "autoevals"]
+lines-after-imports = 2
+split-on-trailing-comma = true
 
 [tool.pytest.ini_options]
 asyncio_mode = "strict"