diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 71ccaac7..00000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 119 -ignore = E402, E203, E501, W503 diff --git a/.isort.cfg b/.isort.cfg deleted file mode 100644 index 2a4365a4..00000000 --- a/.isort.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[settings] -line_length=119 -multi_line_output=3 -use_parentheses=true -lines_after_imports=2 -include_trailing_comma=True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a19c313..31549ae0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,8 @@ repos: # Ruff version. rev: v0.12.7 hooks: - - id: ruff + - id: ruff-format + - id: ruff-check args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/codespell-project/codespell rev: v2.2.5 diff --git a/.prettierignore b/.prettierignore deleted file mode 100644 index 66a1eb7c..00000000 --- a/.prettierignore +++ /dev/null @@ -1,2 +0,0 @@ -# Patch files use specific whitespace formatting required by unified diff format -patches/**/*.patch diff --git a/integrations/adk-py/examples/mcp_tracing/agent.py b/integrations/adk-py/examples/mcp_tracing/agent.py index 40ca8869..eb687f7f 100644 --- a/integrations/adk-py/examples/mcp_tracing/agent.py +++ b/integrations/adk-py/examples/mcp_tracing/agent.py @@ -61,9 +61,7 @@ async def main(): SESSION_ID = "demo-session" session_service = InMemorySessionService() - await session_service.create_session( - app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID - ) + await session_service.create_session(app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID) runner = Runner(agent=agent, app_name=APP_NAME, session_service=session_service) @@ -74,15 +72,9 @@ async def main(): parts=[types.Part(text="What files are in /tmp? Just list a few.")], ) - async for event in runner.run_async( - user_id=USER_ID, session_id=SESSION_ID, new_message=user_msg - ): + async for event in runner.run_async(user_id=USER_ID, session_id=SESSION_ID, new_message=user_msg): if event.is_final_response(): - text = ( - event.content.parts[0].text - if event.content and event.content.parts - else "No response" - ) + text = event.content.parts[0].text if event.content and event.content.parts else "No response" print(f"Agent response: {text}\n") print("=== Trace complete ===") diff --git a/integrations/adk-py/pyproject.toml b/integrations/adk-py/pyproject.toml index 7e93963e..c988e970 100644 --- a/integrations/adk-py/pyproject.toml +++ b/integrations/adk-py/pyproject.toml @@ -49,10 +49,6 @@ dev = [ "ruff>=0.12.9", ] -[tool.isort] -profile = "black" -line_length = 120 - [tool.ruff] line-length = 120 diff --git a/integrations/langchain-py/pyproject.toml b/integrations/langchain-py/pyproject.toml index 6aa1e850..9bbf9d7a 100644 --- a/integrations/langchain-py/pyproject.toml +++ b/integrations/langchain-py/pyproject.toml @@ -44,12 +44,8 @@ members = [ [dependency-groups] dev = [ - "black", "build", - "flake8", - "flake8-isort", "httpx", - "isort==5.12.0", "langchain-anthropic>=0.3.20", "langchain-openai", "langgraph>=0.2.1,<0.4.0", @@ -62,16 +58,6 @@ dev = [ "twine", ] -[tool.black] -line-length = 120 -target-version = ['py310'] - -[tool.isort] -profile = "black" -line_length = 120 -known_first_party = ["braintrust_langchain"] -known_third_party = ["braintrust", "langchain"] - [tool.ruff] line-length = 120 diff --git a/integrations/langchain-py/src/braintrust_langchain/callbacks.py b/integrations/langchain-py/src/braintrust_langchain/callbacks.py index 871253a1..016a1268 100644 --- a/integrations/langchain-py/src/braintrust_langchain/callbacks.py +++ b/integrations/langchain-py/src/braintrust_langchain/callbacks.py @@ -6,10 +6,6 @@ from re import Pattern from typing import ( Any, - Dict, - List, - Optional, - Set, TypedDict, Union, ) diff --git a/integrations/langchain-py/src/braintrust_langchain/context.py b/integrations/langchain-py/src/braintrust_langchain/context.py index 0c997de0..5c6bb4e8 100644 --- a/integrations/langchain-py/src/braintrust_langchain/context.py +++ b/integrations/langchain-py/src/braintrust_langchain/context.py @@ -1,5 +1,4 @@ from contextvars import ContextVar -from typing import Optional from langchain_core.tracers.context import register_configure_hook diff --git a/integrations/langchain-py/src/tests/test_callbacks.py b/integrations/langchain-py/src/tests/test_callbacks.py index ab17182a..8cc9f926 100644 --- a/integrations/langchain-py/src/tests/test_callbacks.py +++ b/integrations/langchain-py/src/tests/test_callbacks.py @@ -149,6 +149,7 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): ], ) + @pytest.mark.vcr def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): logger, memory_logger = logger_memory_logger diff --git a/internal/golden/adk-py-v1/google_adk.py b/internal/golden/adk-py-v1/google_adk.py index 5fcff824..a0b82811 100644 --- a/internal/golden/adk-py-v1/google_adk.py +++ b/internal/golden/adk-py-v1/google_adk.py @@ -13,6 +13,7 @@ from google.adk.sessions import InMemorySessionService from google.genai import types + setup_adk(project_name="golden-py-adk") FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" diff --git a/internal/golden/genai-py-v1/google_genai.py b/internal/golden/genai-py-v1/google_genai.py index 79284096..46ad47e8 100644 --- a/internal/golden/genai-py-v1/google_genai.py +++ b/internal/golden/genai-py-v1/google_genai.py @@ -11,6 +11,7 @@ from google.genai import types from google.genai.client import Client + setup_genai(project_name="golden-py-genai") FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" diff --git a/internal/golden/langchain-py-v0/langchain.py b/internal/golden/langchain-py-v0/langchain.py index 93eac31e..c8070089 100644 --- a/internal/golden/langchain-py-v0/langchain.py +++ b/internal/golden/langchain-py-v0/langchain.py @@ -11,6 +11,7 @@ from langchain_core.tools import tool from langchain_openai import ChatOpenAI + init_logger(project="golden-py-langchain-v0") handler = BraintrustCallbackHandler() diff --git a/internal/golden/langchain-py-v1/langchain.py b/internal/golden/langchain-py-v1/langchain.py index 37e220dd..bf2c3dd0 100644 --- a/internal/golden/langchain-py-v1/langchain.py +++ b/internal/golden/langchain-py-v1/langchain.py @@ -11,6 +11,7 @@ from langchain_core.tools import tool from langchain_openai import ChatOpenAI + init_logger(project="golden-py-langchain-v1") handler = BraintrustCallbackHandler() diff --git a/internal/golden/pydantic-ai-v1/pydantic_ai_test.py b/internal/golden/pydantic-ai-v1/pydantic_ai_test.py index c6d80a75..8a6fde51 100644 --- a/internal/golden/pydantic-ai-v1/pydantic_ai_test.py +++ b/internal/golden/pydantic-ai-v1/pydantic_ai_test.py @@ -21,6 +21,7 @@ ) from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel, OpenAIResponsesModelSettings + setup_pydantic_ai(project_name="golden-py-pydantic_ai") FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" diff --git a/py/benchmarks/perf.py b/py/benchmarks/perf.py index a29d7c24..128ae2f2 100644 --- a/py/benchmarks/perf.py +++ b/py/benchmarks/perf.py @@ -3,6 +3,7 @@ import braintrust from braintrust import traced + LOOPS = 2000 braintrust.init_logger(project="perf_test") diff --git a/py/examples/adk/auto.py b/py/examples/adk/auto.py index 60ef737f..fe21bbfb 100644 --- a/py/examples/adk/auto.py +++ b/py/examples/adk/auto.py @@ -10,6 +10,7 @@ import braintrust + # Auto-instrument all supported libraries including Google ADK braintrust.auto_instrument() diff --git a/py/examples/adk/manual_patching.py b/py/examples/adk/manual_patching.py index b35e1131..332eb8a8 100644 --- a/py/examples/adk/manual_patching.py +++ b/py/examples/adk/manual_patching.py @@ -10,6 +10,7 @@ from braintrust.wrappers.adk import setup_adk + # Setup ADK tracing with a specific project setup_adk(project_name="my-adk-project") diff --git a/py/examples/agno/async_simple_agent_stream.py b/py/examples/agno/async_simple_agent_stream.py index e3531609..b6c3717a 100644 --- a/py/examples/agno/async_simple_agent_stream.py +++ b/py/examples/agno/async_simple_agent_stream.py @@ -2,6 +2,7 @@ from braintrust.wrappers.agno import setup_agno + setup_agno(project_name="simple-agent-project") from agno.agent import Agent diff --git a/py/examples/agno/async_team_agent.py b/py/examples/agno/async_team_agent.py index 47a9b83d..723e7be7 100644 --- a/py/examples/agno/async_team_agent.py +++ b/py/examples/agno/async_team_agent.py @@ -2,6 +2,7 @@ from braintrust.wrappers.agno import setup_agno + # Set up Braintrust observability setup_agno(project_name="async-team-agent-project") diff --git a/py/examples/agno/simple_agent.py b/py/examples/agno/simple_agent.py index ce8822a2..a69ed279 100644 --- a/py/examples/agno/simple_agent.py +++ b/py/examples/agno/simple_agent.py @@ -1,11 +1,13 @@ from braintrust.wrappers.agno import setup_agno + setup_agno(project_name="simple-agent-project") from agno.agent import Agent from agno.models.openai import OpenAIChat from agno.tools.yfinance import YFinanceTools + # Create and configure the agent agent = Agent( name="Stock Price Agent", diff --git a/py/examples/agno/simple_agent_stream.py b/py/examples/agno/simple_agent_stream.py index d842649c..7e6961b1 100644 --- a/py/examples/agno/simple_agent_stream.py +++ b/py/examples/agno/simple_agent_stream.py @@ -1,11 +1,13 @@ from braintrust.wrappers.agno import setup_agno + setup_agno(project_name="simple-agent-project") from agno.agent import Agent from agno.models.openai import OpenAIChat from agno.tools.yfinance import YFinanceTools + # Create and configure the agent agent = Agent( name="Stock Price Agent", diff --git a/py/examples/agno/team_agent.py b/py/examples/agno/team_agent.py index c9bc86f6..db154e06 100644 --- a/py/examples/agno/team_agent.py +++ b/py/examples/agno/team_agent.py @@ -1,5 +1,6 @@ from braintrust.wrappers.agno import setup_agno + # Set up Braintrust observability setup_agno(project_name="team-agent-project") @@ -8,6 +9,7 @@ from agno.team import Team from agno.tools.yfinance import YFinanceTools + # Create specialized agents for the team research_agent = Agent( name="Research Analyst", diff --git a/py/examples/anthropic_async.py b/py/examples/anthropic_async.py index 772084bb..c9cacdef 100755 --- a/py/examples/anthropic_async.py +++ b/py/examples/anthropic_async.py @@ -5,6 +5,7 @@ import braintrust from anthropic import AsyncAnthropic + # Initialize Anthropic client (needs ANTHROPIC_API_KEY) client = braintrust.wrap_anthropic(AsyncAnthropic()) diff --git a/py/examples/anthropic_sync.py b/py/examples/anthropic_sync.py index 23e70fdb..16bb34f5 100755 --- a/py/examples/anthropic_sync.py +++ b/py/examples/anthropic_sync.py @@ -6,6 +6,7 @@ import anthropic import braintrust + # Initialize Anthropic client (needs ANTHROPIC_API_KEY) client = braintrust.wrap_anthropic(anthropic.Anthropic()) braintrust.init_logger(project="example-anthropic-app") diff --git a/py/examples/auto_instrument.py b/py/examples/auto_instrument.py index 33fa278a..7215826f 100644 --- a/py/examples/auto_instrument.py +++ b/py/examples/auto_instrument.py @@ -17,6 +17,7 @@ import braintrust + # One-line instrumentation - call this BEFORE importing AI libraries # This patches all supported libraries automatically results = braintrust.auto_instrument() @@ -36,6 +37,7 @@ import anthropic import openai + # Create clients - they're automatically wrapped openai_client = openai.OpenAI() anthropic_client = anthropic.Anthropic() diff --git a/py/examples/dspy/example.py b/py/examples/dspy/example.py index 8a53b9ae..257c4a20 100644 --- a/py/examples/dspy/example.py +++ b/py/examples/dspy/example.py @@ -11,6 +11,7 @@ # IMPORTANT: Patch LiteLLM BEFORE importing DSPy to get detailed token metrics from braintrust.wrappers.litellm import patch_litellm + patch_litellm() # Now import DSPy diff --git a/py/examples/evals/eval_example.py b/py/examples/evals/eval_example.py index 1d605a08..ced8a09c 100644 --- a/py/examples/evals/eval_example.py +++ b/py/examples/evals/eval_example.py @@ -2,6 +2,7 @@ from braintrust import Eval + NUM_EXAMPLES = 10 @@ -12,9 +13,9 @@ async def exact_match_scorer(input, output, expected, trace=None): score = 1.0 if output == expected else 0.0 if trace: - print("\n" + "="*80) + print("\n" + "=" * 80) print(f"🔍 TRACE INFO for input: {input}") - print("="*80) + print("=" * 80) # Print trace configuration config = trace.get_configuration() @@ -27,13 +28,13 @@ async def exact_match_scorer(input, output, expected, trace=None): try: spans = await trace.get_spans() print(f"\n✨ Found {len(spans)} spans:") - print("-"*80) + print("-" * 80) for i, span in enumerate(spans, 1): print(f"\n Span {i}:") print(f" ID: {span.span_id}") - span_type = span.span_attributes.get('type', 'N/A') if span.span_attributes else 'N/A' - span_name = span.span_attributes.get('name', 'N/A') if span.span_attributes else 'N/A' + span_type = span.span_attributes.get("type", "N/A") if span.span_attributes else "N/A" + span_name = span.span_attributes.get("name", "N/A") if span.span_attributes else "N/A" print(f" Type: {span_type}") print(f" Name: {span_name}") @@ -50,10 +51,11 @@ async def exact_match_scorer(input, output, expected, trace=None): if span.metadata: print(f" Metadata: {list(span.metadata.keys())}") - print("\n" + "="*80 + "\n") + print("\n" + "=" * 80 + "\n") except Exception as e: print(f"\n⚠️ Error fetching spans: {e}") import traceback + traceback.print_exc() else: print(f"⚠️ No trace available for input: {input}") diff --git a/py/examples/langsmith/eval_example.py b/py/examples/langsmith/eval_example.py index 68c55152..944358ff 100644 --- a/py/examples/langsmith/eval_example.py +++ b/py/examples/langsmith/eval_example.py @@ -11,6 +11,7 @@ import os + # Enable LangSmith tracing (required for traces to be sent to LangSmith) os.environ.setdefault("LANGCHAIN_TRACING_V2", "true") os.environ.setdefault("LANGCHAIN_PROJECT", "examples-wrappers-langsmith-eval") @@ -18,6 +19,7 @@ # IMPORTANT: Call setup_langsmith BEFORE importing from langsmith from braintrust.wrappers.langsmith_wrapper import setup_langsmith + # Set BRAINTRUST_STANDALONE=1 to completely replace LangSmith with Braintrust standalone = os.environ.get("BRAINTRUST_STANDALONE", "").lower() in ("1", "true", "yes") diff --git a/py/examples/langsmith/tracing_example.py b/py/examples/langsmith/tracing_example.py index 242609f1..b8bfbd87 100644 --- a/py/examples/langsmith/tracing_example.py +++ b/py/examples/langsmith/tracing_example.py @@ -10,6 +10,7 @@ import os + # Enable LangSmith tracing (required for traces to be sent to LangSmith) os.environ.setdefault("LANGCHAIN_TRACING_V2", "true") os.environ.setdefault("LANGCHAIN_PROJECT", "examples-wrappers-langsmith-tracing") @@ -17,6 +18,7 @@ # IMPORTANT: Call setup_langsmith BEFORE importing from langsmith from braintrust.wrappers.langsmith_wrapper import setup_langsmith + # Set BRAINTRUST_STANDALONE=1 to completely replace LangSmith with Braintrust standalone = os.environ.get("BRAINTRUST_STANDALONE", "").lower() in ("1", "true", "yes") diff --git a/py/examples/openai_example.py b/py/examples/openai_example.py index a9c731eb..a0ead8d4 100755 --- a/py/examples/openai_example.py +++ b/py/examples/openai_example.py @@ -3,6 +3,7 @@ from braintrust import init_logger, traced, wrap_openai from openai import OpenAI + logger = init_logger(project="example-openai-project") client = wrap_openai(OpenAI()) diff --git a/py/examples/otel/basic_otel_example.py b/py/examples/otel/basic_otel_example.py index cdec195c..1cdd56b0 100755 --- a/py/examples/otel/basic_otel_example.py +++ b/py/examples/otel/basic_otel_example.py @@ -9,6 +9,7 @@ import os import time + # Set environment variables os.environ.setdefault("BRAINTRUST_PARENT", "project_name:otel-examples") @@ -18,6 +19,7 @@ from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.sdk.trace import TracerProvider + # Set up the tracer provider provider = TracerProvider() trace.set_tracer_provider(provider) diff --git a/py/examples/otel/bt-otel-context.py b/py/examples/otel/bt-otel-context.py index cd3084e4..bf1c9bec 100644 --- a/py/examples/otel/bt-otel-context.py +++ b/py/examples/otel/bt-otel-context.py @@ -11,11 +11,13 @@ import os -os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true' + +os.environ["BRAINTRUST_OTEL_COMPAT"] = "true" import braintrust from braintrust.otel import add_braintrust_span_processor + PROJECT_NAME = "mixed-otel-braintrust-python-2" from opentelemetry import trace @@ -30,14 +32,13 @@ def setup_otel(): return trace.get_tracer(__name__, "1.0.0") + def main(): # Setup braintrust.login() tracer = setup_otel() - project = braintrust.init_logger( - project=PROJECT_NAME - ) + project = braintrust.init_logger(project=PROJECT_NAME) # Demo 1: BT project as root span with OTEL instrumentation inside with project.start_span("trace1_root_bt") as session_span: @@ -67,10 +68,9 @@ def trace1_child_bt_traced(): trace1_child_bt_traced() - # Demo 2: OTEL as root span with BT spans inside with tracer.start_as_current_span("trace2_root_otel") as otel_root: - otel_trace_id = format(otel_root.get_span_context().trace_id, '032x') + otel_trace_id = format(otel_root.get_span_context().trace_id, "032x") otel_root.set_attribute("type", "otel_root") otel_root.add_event("otel_root_start") @@ -86,6 +86,7 @@ def trace1_child_bt_traced(): @braintrust.traced def trace2_grandchild_bt1(): pass + trace2_grandchild_bt1() # Nested BT span should also inherit same trace ID @@ -95,6 +96,7 @@ def trace2_grandchild_bt1(): @braintrust.traced def trace2_child_bt_traced(): pass + trace2_child_bt_traced() otel_root.add_event("otel_root_end") @@ -103,7 +105,7 @@ def trace2_child_bt_traced(): project.flush() # Then flush OTEL spans so they can attach to existing parents - if hasattr(trace.get_tracer_provider(), 'force_flush'): + if hasattr(trace.get_tracer_provider(), "force_flush"): trace.get_tracer_provider().force_flush(timeout_millis=5000) diff --git a/py/examples/otel/distributed-tracing.py b/py/examples/otel/distributed-tracing.py index f1c0bfeb..1499db29 100644 --- a/py/examples/otel/distributed-tracing.py +++ b/py/examples/otel/distributed-tracing.py @@ -16,8 +16,9 @@ import os + # Enable OTEL compatibility mode -os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true' +os.environ["BRAINTRUST_OTEL_COMPAT"] = "true" import braintrust from braintrust.otel import ( @@ -31,14 +32,14 @@ from opentelemetry.propagate import inject from opentelemetry.sdk.trace import TracerProvider + PROJECT_NAME = "distributed-tracing-demo" def setup_otel(): """Setup OTEL instrumentation with Braintrust processor.""" provider = TracerProvider() - add_braintrust_span_processor(provider, - parent=f"project_name:different-project") + add_braintrust_span_processor(provider, parent=f"project_name:different-project") trace.set_tracer_provider(provider) return trace.get_tracer(__name__, "1.0.0") @@ -61,10 +62,9 @@ def service_b_process_request(exported_context: str, tracer, project): with tracer.start_as_current_span("service_b.root") as fetch_span: # Nested operation in Service B with tracer.start_as_current_span("service_b.child"): - trace_id = format(fetch_span.get_span_context().trace_id, '032x') + trace_id = format(fetch_span.get_span_context().trace_id, "032x") print(f" Created OTEL child spans (trace_id: {trace_id})") - # Ensure 'braintrust.parent' is set on the baggage. add_span_parent_to_baggage(fetch_span) @@ -94,9 +94,7 @@ def service_c_process_request(headers: dict, project): span_id = analytics_span.span_id print(f" Created BT span as child of OTEL parent (span_id: {span_id[:16]}...)") analytics_span.log( - input="Analytics data from Service B", - output="Processed analytics", - metadata={"service": "analytics"} + input="Analytics data from Service B", output="Processed analytics", metadata={"service": "analytics"} ) @@ -130,7 +128,7 @@ def main(): # Flush all data project.flush() - if hasattr(trace.get_tracer_provider(), 'force_flush'): + if hasattr(trace.get_tracer_provider(), "force_flush"): trace.get_tracer_provider().force_flush(timeout_millis=5000) print(f"\n✓ Trace complete! All 3 services share trace_id: {trace_id[:16]}...") diff --git a/py/examples/otel/filtered_otel_example.py b/py/examples/otel/filtered_otel_example.py index 25910d22..e7518889 100755 --- a/py/examples/otel/filtered_otel_example.py +++ b/py/examples/otel/filtered_otel_example.py @@ -9,6 +9,7 @@ import os import time + # Set environment variables os.environ.setdefault("BRAINTRUST_PARENT", "project_name:otel-examples") os.environ.setdefault("BRAINTRUST_OTEL_FILTER_AI_SPANS", "false") @@ -19,6 +20,7 @@ from opentelemetry.instrumentation.openai import OpenAIInstrumentor from opentelemetry.sdk.trace import TracerProvider + # Set up the tracer provider provider = TracerProvider() trace.set_tracer_provider(provider) diff --git a/py/examples/otel/otel_eval.py b/py/examples/otel/otel_eval.py index 8fb72bfe..db201432 100644 --- a/py/examples/otel/otel_eval.py +++ b/py/examples/otel/otel_eval.py @@ -7,8 +7,9 @@ import os + # Enable OTEL compatibility -os.environ['BRAINTRUST_OTEL_COMPAT'] = 'true' +os.environ["BRAINTRUST_OTEL_COMPAT"] = "true" from autoevals import Levenshtein from braintrust import Eval @@ -16,12 +17,14 @@ from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider + # Setup OTEL tracing provider = TracerProvider() processor = BraintrustSpanProcessor(parent="project_name:otel-eval-example") provider.add_span_processor(processor) trace.set_tracer_provider(provider) + def task_with_otel_tracing(input): tracer = trace.get_tracer(__name__) @@ -34,6 +37,7 @@ def task_with_otel_tracing(input): span.set_attribute("output", result) return result + # Run evaluation with OTEL tracing Eval( "Say Hi Bot", diff --git a/py/examples/pydantic_ai_example.py b/py/examples/pydantic_ai_example.py index 0092278d..da72a41b 100644 --- a/py/examples/pydantic_ai_example.py +++ b/py/examples/pydantic_ai_example.py @@ -4,11 +4,13 @@ import braintrust + braintrust.auto_instrument() logger = braintrust.init_logger(project="example-pydantic-ai-project") from pydantic_ai import Agent + agent = Agent("openai:gpt-4o", system_prompt="You are a helpful assistant.") diff --git a/py/examples/temporal/worker.py b/py/examples/temporal/worker.py index 631d6c21..847a2786 100644 --- a/py/examples/temporal/worker.py +++ b/py/examples/temporal/worker.py @@ -6,6 +6,7 @@ # Import only what we need to avoid loading optional dependencies from braintrust.logger import init_logger + # Initialize logger at module level before importing plugin init_logger(project="temporal-example") diff --git a/py/examples/temporal/workflow.py b/py/examples/temporal/workflow.py index db64a679..0c0fe6d0 100644 --- a/py/examples/temporal/workflow.py +++ b/py/examples/temporal/workflow.py @@ -7,6 +7,7 @@ from temporalio import activity, workflow from temporalio.common import RetryPolicy + TASK_QUEUE_NAME = "braintrust-example-task-queue" diff --git a/py/noxfile.py b/py/noxfile.py index d5dd376c..be0e7798 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -17,6 +17,7 @@ import nox + # much faster than pip nox.options.default_venv_backend = "uv" diff --git a/py/requirements-dev.txt b/py/requirements-dev.txt index 48020b85..f8bc52e0 100644 --- a/py/requirements-dev.txt +++ b/py/requirements-dev.txt @@ -1,9 +1,5 @@ # Also include build dependencies -black datamodel-code-generator>=0.53.0 -flake8 -flake8-isort -isort==5.12.0 nox pre-commit pydoc-markdown diff --git a/py/scripts/generate_types.py b/py/scripts/generate_types.py index dcaa9e40..45670af6 100755 --- a/py/scripts/generate_types.py +++ b/py/scripts/generate_types.py @@ -6,6 +6,7 @@ import subprocess import sys + SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) OPENAPI_SPEC_PATH = os.path.join(SCRIPT_DIR, "../../generated_types.json") INTERNAL_TYPES_OUTPUT_PATH = os.path.join(SCRIPT_DIR, "../src/braintrust/_generated_types.py") @@ -61,7 +62,9 @@ def cleanup_internal_types(): # optional-but-not-nullable TypedDicts. contents = re.sub( r"(\s[A-Za-z0-9_]+: NotRequired\[)(.+?)(\])\n", - lambda m: m.group(0) if m.group(2).rstrip().endswith("None") else f"{m.group(1)}{m.group(2)} | None{m.group(3)}\n", + lambda m: m.group(0) + if m.group(2).rstrip().endswith("None") + else f"{m.group(1)}{m.group(2)} | None{m.group(3)}\n", contents, ) diff --git a/py/scripts/validate-release.py b/py/scripts/validate-release.py index cb60ed7b..7a22a9e8 100644 --- a/py/scripts/validate-release.py +++ b/py/scripts/validate-release.py @@ -11,6 +11,7 @@ import urllib.error import urllib.request + STABLE_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") PRERELEASE_VERSION_RE = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(a|b|rc)[0-9]+$") @@ -39,9 +40,7 @@ def validate_release_type(release_type: str, version: str) -> None: if release_type == "stable" and not STABLE_VERSION_RE.fullmatch(version): raise ValueError(f"Stable releases require a version like X.Y.Z; found '{version}'") if release_type == "prerelease" and not PRERELEASE_VERSION_RE.fullmatch(version): - raise ValueError( - f"Prereleases require a version like X.Y.Zrc1, X.Y.Za1, or X.Y.Zb1; found '{version}'" - ) + raise ValueError(f"Prereleases require a version like X.Y.Zrc1, X.Y.Za1, or X.Y.Zb1; found '{version}'") def check_tag_does_not_exist(tag: str) -> None: diff --git a/py/setup.py b/py/setup.py index 2b7826a3..8730a45e 100644 --- a/py/setup.py +++ b/py/setup.py @@ -2,6 +2,7 @@ import setuptools + dir_name = os.path.abspath(os.path.dirname(__file__)) version_contents = {} diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py index 26dd1e2e..32ef4999 100644 --- a/py/src/braintrust/__init__.py +++ b/py/src/braintrust/__init__.py @@ -52,6 +52,7 @@ def is_equal(expected, output): # Check env var at import time for auto-instrumentation import os + if os.getenv("BRAINTRUST_INSTRUMENT_THREADS", "").lower() in ("true", "1", "yes"): try: from .wrappers.threads import setup_threads diff --git a/py/src/braintrust/_generated_types.py b/py/src/braintrust/_generated_types.py index bf621221..e3b19c24 100644 --- a/py/src/braintrust/_generated_types.py +++ b/py/src/braintrust/_generated_types.py @@ -11,18 +11,19 @@ from typing_extensions import NotRequired + AclObjectType: TypeAlias = Literal[ - 'organization', - 'project', - 'experiment', - 'dataset', - 'prompt', - 'prompt_session', - 'group', - 'role', - 'org_member', - 'project_log', - 'org_project', + "organization", + "project", + "experiment", + "dataset", + "prompt", + "prompt_session", + "group", + "role", + "org_member", + "project_log", + "org_project", ] """ The object type that the ACL applies to @@ -60,7 +61,7 @@ class AnyModelParamsToolChoiceFunction(TypedDict): class AnyModelParamsToolChoice(TypedDict): - type: Literal['function'] + type: Literal["function"] function: AnyModelParamsToolChoiceFunction @@ -105,24 +106,24 @@ class ApiKey(TypedDict): class AsyncScoringControlAsyncScoringControl(TypedDict): - kind: Literal['score_update'] + kind: Literal["score_update"] token: NotRequired[str | None] class AsyncScoringControlAsyncScoringControl2(TypedDict): - kind: Literal['state_force_reselect'] + kind: Literal["state_force_reselect"] class AsyncScoringControlAsyncScoringControl3(TypedDict): - kind: Literal['state_enabled_force_rescore'] + kind: Literal["state_enabled_force_rescore"] class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope(TypedDict): - type: Literal['span'] + type: Literal["span"] class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1(TypedDict): - type: Literal['trace'] + type: Literal["trace"] class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict): @@ -135,23 +136,23 @@ class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict): class AsyncScoringControlAsyncScoringControl4(TypedDict): - kind: Literal['trigger_functions'] + kind: Literal["trigger_functions"] triggered_functions: Sequence[AsyncScoringControlAsyncScoringControl4TriggeredFunction] class AsyncScoringControlAsyncScoringControl5(TypedDict): - kind: Literal['complete_triggered_functions'] + kind: Literal["complete_triggered_functions"] function_ids: Sequence[Any] triggered_xact_id: str class AsyncScoringControlAsyncScoringControl6(TypedDict): - kind: Literal['mark_attempt_failed'] + kind: Literal["mark_attempt_failed"] function_ids: Sequence[Any] class AsyncScoringStateAsyncScoringState(TypedDict): - status: Literal['enabled'] + status: Literal["enabled"] token: str function_ids: Sequence[Any] skip_logging: NotRequired[bool | None] @@ -159,14 +160,14 @@ class AsyncScoringStateAsyncScoringState(TypedDict): class AsyncScoringStateAsyncScoringState1(TypedDict): - status: Literal['disabled'] + status: Literal["disabled"] AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoringStateAsyncScoringState1 | None class PreprocessorPreprocessor(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -206,7 +207,7 @@ class BatchedFacetDataFacet(TypedDict): class BraintrustAttachmentReference(TypedDict): - type: Literal['braintrust_attachment'] + type: Literal["braintrust_attachment"] """ An identifier to help disambiguate parsing. """ @@ -233,49 +234,49 @@ class BraintrustModelParams(TypedDict): class CallEventCallEvent(TypedDict): id: NotRequired[str | None] data: str - event: Literal['text_delta'] + event: Literal["text_delta"] class CallEventCallEvent1(TypedDict): id: NotRequired[str | None] data: str - event: Literal['reasoning_delta'] + event: Literal["reasoning_delta"] class CallEventCallEvent2(TypedDict): id: NotRequired[str | None] data: str - event: Literal['json_delta'] + event: Literal["json_delta"] class CallEventCallEvent3(TypedDict): id: NotRequired[str | None] data: str - event: Literal['progress'] + event: Literal["progress"] class CallEventCallEvent4(TypedDict): id: NotRequired[str | None] data: str - event: Literal['error'] + event: Literal["error"] class CallEventCallEvent5(TypedDict): id: NotRequired[str | None] data: str - event: Literal['console'] + event: Literal["console"] class CallEventCallEvent6(TypedDict): id: NotRequired[str | None] - event: Literal['start'] - data: Literal[''] + event: Literal["start"] + data: Literal[""] class CallEventCallEvent7(TypedDict): id: NotRequired[str | None] - event: Literal['done'] - data: Literal[''] + event: Literal["done"] + data: Literal[""] CallEvent: TypeAlias = ( @@ -298,42 +299,42 @@ class ChatCompletionContentPartFileFile(TypedDict): class ChatCompletionContentPartFileWithTitle(TypedDict): file: ChatCompletionContentPartFileFile - type: Literal['file'] + type: Literal["file"] class ChatCompletionContentPartImageWithTitleImageUrl(TypedDict): url: str - detail: NotRequired[Literal['auto'] | Literal['low'] | Literal['high'] | None] + detail: NotRequired[Literal["auto"] | Literal["low"] | Literal["high"] | None] class ChatCompletionContentPartImageWithTitle(TypedDict): image_url: ChatCompletionContentPartImageWithTitleImageUrl - type: Literal['image_url'] + type: Literal["image_url"] class ChatCompletionContentPartTextCacheControl(TypedDict): - type: Literal['ephemeral'] + type: Literal["ephemeral"] class ChatCompletionContentPartText(TypedDict): text: str - type: Literal['text'] + type: Literal["text"] cache_control: NotRequired[ChatCompletionContentPartTextCacheControl | None] class ChatCompletionContentPartTextWithTitleCacheControl(TypedDict): - type: Literal['ephemeral'] + type: Literal["ephemeral"] class ChatCompletionContentPartTextWithTitle(TypedDict): text: str - type: Literal['text'] + type: Literal["text"] cache_control: NotRequired[ChatCompletionContentPartTextWithTitleCacheControl | None] class ChatCompletionMessageParamChatCompletionMessageParam(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['system'] + role: Literal["system"] name: NotRequired[str | None] @@ -344,24 +345,24 @@ class ChatCompletionMessageParamChatCompletionMessageParam2FunctionCall(TypedDic class ChatCompletionMessageParamChatCompletionMessageParam3(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['tool'] + role: Literal["tool"] tool_call_id: str class ChatCompletionMessageParamChatCompletionMessageParam4(TypedDict): content: str | None name: str - role: Literal['function'] + role: Literal["function"] class ChatCompletionMessageParamChatCompletionMessageParam5(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['developer'] + role: Literal["developer"] name: NotRequired[str | None] class ChatCompletionMessageParamChatCompletionMessageParam6(TypedDict): - role: Literal['model'] + role: Literal["model"] content: NotRequired[str | None] @@ -378,12 +379,12 @@ class ChatCompletionMessageToolCallFunction(TypedDict): class ChatCompletionMessageToolCall(TypedDict): id: str function: ChatCompletionMessageToolCallFunction - type: Literal['function'] + type: Literal["function"] class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['system'] + role: Literal["system"] name: NotRequired[str | None] @@ -393,7 +394,7 @@ class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2FunctionC class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2(TypedDict): - role: Literal['assistant'] + role: Literal["assistant"] content: NotRequired[str | Sequence[ChatCompletionContentPartText] | None] function_call: NotRequired[ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2FunctionCall | None] name: NotRequired[str | None] @@ -403,19 +404,19 @@ class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam2(TypedDic class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam3(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['tool'] + role: Literal["tool"] tool_call_id: str class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam4(TypedDict): content: str | None name: str - role: Literal['function'] + role: Literal["function"] class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam5(TypedDict): content: str | Sequence[ChatCompletionContentPartText] - role: Literal['developer'] + role: Literal["developer"] name: NotRequired[str | None] @@ -427,36 +428,36 @@ class ChatCompletionToolFunction(TypedDict): class ChatCompletionTool(TypedDict): function: ChatCompletionToolFunction - type: Literal['function'] + type: Literal["function"] class CodeBundleRuntimeContext(TypedDict): - runtime: Literal['node', 'python', 'browser', 'quickjs'] + runtime: Literal["node", "python", "browser", "quickjs"] version: str class CodeBundleLocationPosition(TypedDict): - type: Literal['task'] + type: Literal["task"] class CodeBundleLocationPosition1(TypedDict): - type: Literal['scorer'] + type: Literal["scorer"] index: int class CodeBundleLocation(TypedDict): - type: Literal['experiment'] + type: Literal["experiment"] eval_name: str position: CodeBundleLocationPosition | CodeBundleLocationPosition1 class CodeBundleLocation1(TypedDict): - type: Literal['function'] + type: Literal["function"] index: int class CodeBundleLocation2SandboxSpec(TypedDict): - provider: Literal['modal'] + provider: Literal["modal"] snapshot_ref: str """ sandbox snapshot ref @@ -464,11 +465,11 @@ class CodeBundleLocation2SandboxSpec(TypedDict): class CodeBundleLocation2SandboxSpec1(TypedDict): - provider: Literal['lambda'] + provider: Literal["lambda"] class CodeBundleLocation2(TypedDict): - type: Literal['sandbox'] + type: Literal["sandbox"] sandbox_spec: CodeBundleLocation2SandboxSpec | CodeBundleLocation2SandboxSpec1 entrypoints: NotRequired[Sequence[str] | None] """ @@ -546,7 +547,7 @@ class EnvVar(TypedDict): """ Unique identifier for the environment variable """ - object_type: Literal['organization', 'project', 'function'] + object_type: Literal["organization", "project", "function"] """ The type of the object the environment variable is scoped for """ @@ -574,7 +575,7 @@ class EnvVar(TypedDict): """ Optional classification for the secret (for example, the AI provider name) """ - secret_category: NotRequired[Literal['env_var', 'ai_provider', 'sandbox_provider'] | None] + secret_category: NotRequired[Literal["env_var", "ai_provider", "sandbox_provider"] | None] """ The category of the secret: env_var for regular environment variables, ai_provider for AI provider API keys """ @@ -601,7 +602,7 @@ class EvalStatusPageConfig(TypedDict): """ Field to sort results by (format: 'score:' or 'metric:') """ - sort_order: NotRequired[Literal['asc', 'desc'] | None] + sort_order: NotRequired[Literal["asc", "desc"] | None] """ Sort order (ascending or descending) """ @@ -611,7 +612,7 @@ class EvalStatusPageConfig(TypedDict): """ -EvalStatusPageTheme: TypeAlias = Literal['light', 'dark'] +EvalStatusPageTheme: TypeAlias = Literal["light", "dark"] """ The theme for the page """ @@ -675,7 +676,7 @@ class ExperimentEventContext(TypedDict): class ExtendedSavedFunctionIdExtendedSavedFunctionId(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -684,13 +685,13 @@ class ExtendedSavedFunctionIdExtendedSavedFunctionId(TypedDict): class ExtendedSavedFunctionIdExtendedSavedFunctionId2(TypedDict): - type: Literal['slug'] + type: Literal["slug"] project_id: str slug: str class ExternalAttachmentReference(TypedDict): - type: Literal['external_attachment'] + type: Literal["external_attachment"] """ An identifier to help disambiguate parsing. """ @@ -709,7 +710,7 @@ class ExternalAttachmentReference(TypedDict): class Preprocessor1Preprocessor1(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -743,20 +744,20 @@ class FunctionFunctionSchema(TypedDict): class FunctionDataFunctionData(TypedDict): - type: Literal['prompt'] + type: Literal["prompt"] class Data(CodeBundle): - type: Literal['bundle'] + type: Literal["bundle"] class FunctionDataFunctionData1DataRuntimeContext(TypedDict): - runtime: Literal['node', 'python', 'browser', 'quickjs'] + runtime: Literal["node", "python", "browser", "quickjs"] version: str class FunctionDataFunctionData1Data(TypedDict): - type: Literal['inline'] + type: Literal["inline"] runtime_context: FunctionDataFunctionData1DataRuntimeContext code: str code_hash: NotRequired[str | None] @@ -766,12 +767,12 @@ class FunctionDataFunctionData1Data(TypedDict): class FunctionDataFunctionData1(TypedDict): - type: Literal['code'] + type: Literal["code"] data: Data | FunctionDataFunctionData1Data class FunctionDataFunctionData2(TypedDict): - type: Literal['remote_eval'] + type: Literal["remote_eval"] endpoint: str eval_name: str parameters: Mapping[str, Any] @@ -782,14 +783,14 @@ class FunctionDataFunctionData2(TypedDict): class FunctionDataFunctionData4Schema(TypedDict): - type: Literal['object'] + type: Literal["object"] properties: Mapping[str, Mapping[str, Any]] required: NotRequired[Sequence[str] | None] additionalProperties: NotRequired[bool | None] class FunctionDataFunctionData4(TypedDict): - type: Literal['parameters'] + type: Literal["parameters"] data: Mapping[str, Any] """ The parameters data @@ -800,7 +801,7 @@ class FunctionDataFunctionData4(TypedDict): """ -FunctionFormat: TypeAlias = Literal['llm', 'code', 'global', 'graph', 'topic_map'] +FunctionFormat: TypeAlias = Literal["llm", "code", "global", "graph", "topic_map"] class FunctionIdFunctionId(TypedDict): @@ -845,7 +846,7 @@ class FunctionIdFunctionId3(TypedDict): class FunctionIdFunctionId4InlineContext(TypedDict): - runtime: Literal['node', 'python', 'browser', 'quickjs'] + runtime: Literal["node", "python", "browser", "quickjs"] version: str @@ -853,35 +854,35 @@ class FunctionIdFunctionId4InlineContext(TypedDict): FunctionObjectType: TypeAlias = Literal[ - 'prompt', - 'tool', - 'scorer', - 'task', - 'workflow', - 'custom_view', - 'preprocessor', - 'facet', - 'classifier', - 'parameters', - 'sandbox', + "prompt", + "tool", + "scorer", + "task", + "workflow", + "custom_view", + "preprocessor", + "facet", + "classifier", + "parameters", + "sandbox", ] -FunctionOutputType: TypeAlias = Literal['completion', 'score', 'facet', 'classification', 'any'] +FunctionOutputType: TypeAlias = Literal["completion", "score", "facet", "classification", "any"] FunctionTypeEnum: TypeAlias = Literal[ - 'llm', - 'scorer', - 'task', - 'tool', - 'custom_view', - 'preprocessor', - 'facet', - 'classifier', - 'tag', - 'parameters', - 'sandbox', + "llm", + "scorer", + "task", + "tool", + "custom_view", + "preprocessor", + "facet", + "classifier", + "tag", + "parameters", + "sandbox", ] """ The type of global function. Defaults to 'scorer'. @@ -889,34 +890,34 @@ class FunctionIdFunctionId4InlineContext(TypedDict): FunctionTypeEnumNullish: TypeAlias = Literal[ - 'llm', - 'scorer', - 'task', - 'tool', - 'custom_view', - 'preprocessor', - 'facet', - 'classifier', - 'tag', - 'parameters', - 'sandbox', + "llm", + "scorer", + "task", + "tool", + "custom_view", + "preprocessor", + "facet", + "classifier", + "tag", + "parameters", + "sandbox", ] class GitMetadataSettings(TypedDict): - collect: Literal['all', 'none', 'some'] + collect: Literal["all", "none", "some"] fields: NotRequired[ Sequence[ Literal[ - 'commit', - 'branch', - 'tag', - 'dirty', - 'author_name', - 'author_email', - 'commit_message', - 'commit_time', - 'git_diff', + "commit", + "branch", + "tag", + "dirty", + "author_name", + "author_email", + "commit_message", + "commit_time", + "git_diff", ] ] ] @@ -941,7 +942,7 @@ class GraphEdgeTarget(TypedDict): class GraphEdge(TypedDict): source: GraphEdgeSource target: GraphEdgeTarget - purpose: Literal['control', 'data', 'messages'] + purpose: Literal["control", "data", "messages"] """ The purpose of the edge """ @@ -967,7 +968,7 @@ class GraphNodeGraphNode(TypedDict): """ The position of the node """ - type: Literal['function'] + type: Literal["function"] function: FunctionIdRef @@ -991,7 +992,7 @@ class GraphNodeGraphNode1(TypedDict): """ The position of the node """ - type: Literal['input'] + type: Literal["input"] """ The input to the graph """ @@ -1017,7 +1018,7 @@ class GraphNodeGraphNode2(TypedDict): """ The position of the node """ - type: Literal['output'] + type: Literal["output"] """ The output of the graph """ @@ -1043,7 +1044,7 @@ class GraphNodeGraphNode3(TypedDict): """ The position of the node """ - type: Literal['literal'] + type: Literal["literal"] value: NotRequired[Any | None] """ A literal value to be returned @@ -1070,7 +1071,7 @@ class GraphNodeGraphNode4(TypedDict): """ The position of the node """ - type: Literal['btql'] + type: Literal["btql"] expr: str """ A BTQL expression to be evaluated @@ -1097,7 +1098,7 @@ class GraphNodeGraphNode5(TypedDict): """ The position of the node """ - type: Literal['gate'] + type: Literal["gate"] condition: NotRequired[str | None] """ A BTQL expression to be evaluated @@ -1124,7 +1125,7 @@ class GraphNodeGraphNode6(TypedDict): """ The position of the node """ - type: Literal['aggregator'] + type: Literal["aggregator"] class GraphNodeGraphNode7Position(TypedDict): @@ -1182,7 +1183,7 @@ class Group(TypedDict): class GroupScope(TypedDict): - type: Literal['group'] + type: Literal["group"] group_by: str """ Field path to group by, e.g. metadata.session_id @@ -1193,10 +1194,10 @@ class GroupScope(TypedDict): """ -IfExists: TypeAlias = Literal['error', 'ignore', 'replace'] +IfExists: TypeAlias = Literal["error", "ignore", "replace"] -ImageRenderingMode: TypeAlias = Literal['auto', 'click_to_load', 'blocked'] +ImageRenderingMode: TypeAlias = Literal["auto", "click_to_load", "blocked"] """ Controls how images are rendered in the UI: 'auto' loads images automatically, 'click_to_load' shows a placeholder until clicked, 'blocked' prevents image loading entirely """ @@ -1252,7 +1253,7 @@ class InvokeFunctionInvokeFunction3(TypedDict): class InvokeFunctionInvokeFunction4InlineContext(TypedDict): - runtime: Literal['node', 'python', 'browser', 'quickjs'] + runtime: Literal["node", "python", "browser", "quickjs"] version: str @@ -1292,7 +1293,7 @@ class InvokeParentInvokeParentRowIds(TypedDict): class InvokeParentInvokeParent(TypedDict): - object_type: Literal['project_logs', 'experiment', 'playground_logs'] + object_type: Literal["project_logs", "experiment", "playground_logs"] object_id: str """ The id of the container object you are logging to @@ -1348,7 +1349,7 @@ class MCPServer(TypedDict): """ -MessageRole: TypeAlias = Literal['system', 'user', 'assistant', 'function', 'tool', 'model', 'developer'] +MessageRole: TypeAlias = Literal["system", "user", "assistant", "function", "tool", "model", "developer"] class ModelParamsModelParamsToolChoiceFunction(TypedDict): @@ -1356,7 +1357,7 @@ class ModelParamsModelParamsToolChoiceFunction(TypedDict): class ModelParamsModelParamsToolChoice(TypedDict): - type: Literal['function'] + type: Literal["function"] function: ModelParamsModelParamsToolChoiceFunction @@ -1404,7 +1405,7 @@ class ModelParamsModelParams4(TypedDict): class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -1413,7 +1414,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict): class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -1427,7 +1428,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict): class ObjectReference(TypedDict): - object_type: Literal['project_logs', 'experiment', 'dataset', 'prompt', 'function', 'prompt_session'] + object_type: Literal["project_logs", "experiment", "dataset", "prompt", "function", "prompt_session"] """ Type of the object the event is originating from. """ @@ -1450,7 +1451,7 @@ class ObjectReference(TypedDict): class ObjectReferenceNullish(TypedDict): - object_type: Literal['project_logs', 'experiment', 'dataset', 'prompt', 'function', 'prompt_session'] + object_type: Literal["project_logs", "experiment", "dataset", "prompt", "function", "prompt_session"] """ Type of the object the event is originating from. """ @@ -1494,7 +1495,7 @@ class Organization(TypedDict): Permission: TypeAlias = Literal[ - 'create', 'read', 'update', 'delete', 'create_acls', 'read_acls', 'update_acls', 'delete_acls' + "create", "read", "update", "delete", "create_acls", "read_acls", "update_acls", "delete_acls" ] """ Each permission permits a certain type of operation on an object in the system @@ -1504,7 +1505,7 @@ class Organization(TypedDict): class ProjectAutomationConfigAction(TypedDict): - type: Literal['webhook'] + type: Literal["webhook"] """ The type of action to take """ @@ -1515,7 +1516,7 @@ class ProjectAutomationConfigAction(TypedDict): class ProjectAutomationConfigAction1(TypedDict): - type: Literal['slack'] + type: Literal["slack"] """ The type of action to take """ @@ -1534,7 +1535,7 @@ class ProjectAutomationConfigAction1(TypedDict): class ProjectAutomationConfig(TypedDict): - event_type: Literal['logs'] + event_type: Literal["logs"] """ The type of automation. """ @@ -1553,15 +1554,15 @@ class ProjectAutomationConfig(TypedDict): class ProjectAutomationConfig1ExportDefinition(TypedDict): - type: Literal['log_traces'] + type: Literal["log_traces"] class ProjectAutomationConfig1ExportDefinition1(TypedDict): - type: Literal['log_spans'] + type: Literal["log_spans"] class ProjectAutomationConfig1ExportDefinition2(TypedDict): - type: Literal['btql_query'] + type: Literal["btql_query"] btql_query: str """ The BTQL query to export @@ -1569,7 +1570,7 @@ class ProjectAutomationConfig1ExportDefinition2(TypedDict): class ProjectAutomationConfig1Credentials(TypedDict): - type: Literal['aws_iam'] + type: Literal["aws_iam"] role_arn: str """ The ARN of the IAM role to use @@ -1581,7 +1582,7 @@ class ProjectAutomationConfig1Credentials(TypedDict): class ProjectAutomationConfig1(TypedDict): - event_type: Literal['btql_export'] + event_type: Literal["btql_export"] """ The type of automation. """ @@ -1597,7 +1598,7 @@ class ProjectAutomationConfig1(TypedDict): """ The path to export the results to. It should include the storage protocol and prefix, e.g. s3://bucket-name/path/to/export """ - format: Literal['jsonl', 'parquet'] + format: Literal["jsonl", "parquet"] """ The format to export the results in """ @@ -1613,7 +1614,7 @@ class ProjectAutomationConfig1(TypedDict): class ProjectAutomationConfig3Action(TypedDict): - type: Literal['webhook'] + type: Literal["webhook"] """ The type of action to take """ @@ -1624,7 +1625,7 @@ class ProjectAutomationConfig3Action(TypedDict): class ProjectAutomationConfig3Action1(TypedDict): - type: Literal['slack'] + type: Literal["slack"] """ The type of action to take """ @@ -1643,7 +1644,7 @@ class ProjectAutomationConfig3Action1(TypedDict): class ProjectAutomationConfig3(TypedDict): - event_type: Literal['environment_update'] + event_type: Literal["environment_update"] """ The type of automation. """ @@ -1725,7 +1726,7 @@ class ProjectScoreCategory(TypedDict): """ -ProjectScoreType: TypeAlias = Literal['slider', 'categorical', 'weighted', 'minimum', 'maximum', 'online', 'free-form'] +ProjectScoreType: TypeAlias = Literal["slider", "categorical", "weighted", "minimum", "maximum", "online", "free-form"] """ The type of the configured score """ @@ -1735,7 +1736,7 @@ class ProjectSettingsSpanFieldOrderItem(TypedDict): object_type: str column_id: str position: str - layout: NotRequired[Literal['full'] | Literal['two_column'] | None] + layout: NotRequired[Literal["full"] | Literal["two_column"] | None] class ProjectSettingsRemoteEvalSource(TypedDict): @@ -1801,12 +1802,12 @@ class ProjectTag(TypedDict): class PromptBlockDataPromptBlockData1(TypedDict): - type: Literal['completion'] + type: Literal["completion"] content: str class PromptBlockDataNullishPromptBlockDataNullish1(TypedDict): - type: Literal['completion'] + type: Literal["completion"] content: str @@ -1823,7 +1824,7 @@ class PromptDataNullishOrigin(TypedDict): class PromptParserNullish(TypedDict): - type: Literal['llm_classifier'] + type: Literal["llm_classifier"] use_cot: bool choice_scores: NotRequired[Mapping[str, float] | None] """ @@ -1931,11 +1932,11 @@ class RepoInfo(TypedDict): class ResponseFormatResponseFormat(TypedDict): - type: Literal['json_object'] + type: Literal["json_object"] class ResponseFormatResponseFormat2(TypedDict): - type: Literal['text'] + type: Literal["text"] class ResponseFormatJsonSchema(TypedDict): @@ -1946,16 +1947,16 @@ class ResponseFormatJsonSchema(TypedDict): class ResponseFormatNullishResponseFormatNullish(TypedDict): - type: Literal['json_object'] + type: Literal["json_object"] class ResponseFormatNullishResponseFormatNullish1(TypedDict): - type: Literal['json_schema'] + type: Literal["json_schema"] json_schema: ResponseFormatJsonSchema class ResponseFormatNullishResponseFormatNullish2(TypedDict): - type: Literal['text'] + type: Literal["text"] ResponseFormatNullish: TypeAlias = ( @@ -1966,7 +1967,7 @@ class ResponseFormatNullishResponseFormatNullish2(TypedDict): ) -RetentionObjectType: TypeAlias = Literal['project_logs', 'experiment', 'dataset'] +RetentionObjectType: TypeAlias = Literal["project_logs", "experiment", "dataset"] """ The object type that the retention policy applies to """ @@ -2087,7 +2088,7 @@ class TaskTask3(TypedDict): class TaskTask4InlineContext(TypedDict): - runtime: Literal['node', 'python', 'browser', 'quickjs'] + runtime: Literal["node", "python", "browser", "quickjs"] version: str @@ -2144,7 +2145,7 @@ class ParentParentRowIds(TypedDict): class ParentParent(TypedDict): - object_type: Literal['project_logs', 'experiment', 'playground_logs'] + object_type: Literal["project_logs", "experiment", "playground_logs"] object_id: str """ The id of the container object you are logging to @@ -2178,7 +2179,7 @@ class RunEvalMcpAuth(TypedDict): class SavedFunctionIdSavedFunctionId(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -2187,7 +2188,7 @@ class SavedFunctionIdSavedFunctionId(TypedDict): class SavedFunctionIdSavedFunctionId1(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -2267,11 +2268,11 @@ class SpanIFrame(TypedDict): class SpanScope(TypedDict): - type: Literal['span'] + type: Literal["span"] SpanType: TypeAlias = Literal[ - 'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor', 'classifier', 'review' + "llm", "score", "function", "eval", "task", "tool", "automation", "facet", "preprocessor", "classifier", "review" ] """ Type of the span, for display purposes only @@ -2279,7 +2280,7 @@ class SpanScope(TypedDict): class SSEConsoleEventData(TypedDict): - stream: Literal['stderr', 'stdout'] + stream: Literal["stderr", "stdout"] message: str @@ -2293,11 +2294,11 @@ class SSEProgressEventData(TypedDict): format: FunctionFormat output_type: FunctionOutputType name: str - event: Literal['reasoning_delta', 'text_delta', 'json_delta', 'error', 'console', 'start', 'done', 'progress'] + event: Literal["reasoning_delta", "text_delta", "json_delta", "error", "console", "start", "done", "progress"] data: str -StreamingMode: TypeAlias = Literal['auto', 'parallel', 'json', 'text'] +StreamingMode: TypeAlias = Literal["auto", "parallel", "json", "text"] """ The mode format of the returned value (defaults to 'auto') """ @@ -2311,29 +2312,29 @@ class ToolFunctionDefinitionFunction(TypedDict): class ToolFunctionDefinition(TypedDict): - type: Literal['function'] + type: Literal["function"] function: ToolFunctionDefinitionFunction TopicAutomationConfigBackfillTimeRange = TypedDict( - 'TopicAutomationConfigBackfillTimeRange', + "TopicAutomationConfigBackfillTimeRange", { - 'from': str, - 'to': str, + "from": str, + "to": str, }, ) class TopicAutomationDataScopeTopicAutomationDataScope(TypedDict): - type: Literal['project_logs'] + type: Literal["project_logs"] class TopicAutomationDataScopeTopicAutomationDataScope1(TypedDict): - type: Literal['project_experiments'] + type: Literal["project_experiments"] class TopicAutomationDataScopeTopicAutomationDataScope2(TypedDict): - type: Literal['experiment'] + type: Literal["experiment"] experiment_id: str @@ -2349,7 +2350,7 @@ class TopicAutomationDataScopeTopicAutomationDataScope2(TypedDict): class TopicMapData(TypedDict): - type: Literal['topic_map'] + type: Literal["topic_map"] source_facet: str """ The facet field name to use as input for classification @@ -2377,7 +2378,7 @@ class TopicMapData(TypedDict): class Function1Function1(TypedDict): - type: Literal['function'] + type: Literal["function"] id: str version: NotRequired[str | None] """ @@ -2386,7 +2387,7 @@ class Function1Function1(TypedDict): class Function1Function11(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -2415,7 +2416,7 @@ class TopicMapFunctionAutomation(TypedDict): class TraceScope(TypedDict): - type: Literal['trace'] + type: Literal["trace"] idle_seconds: NotRequired[float | None] """ Consider trace complete after this many seconds of inactivity (default: 30) @@ -2423,15 +2424,15 @@ class TraceScope(TypedDict): class TriggeredFunctionStateScope(TypedDict): - type: Literal['span'] + type: Literal["span"] class TriggeredFunctionStateScope1(TypedDict): - type: Literal['trace'] + type: Literal["trace"] class TriggeredFunctionStateScope2(TypedDict): - type: Literal['group'] + type: Literal["group"] key: str value: str @@ -2459,7 +2460,7 @@ class TriggeredFunctionState(TypedDict): """ -UploadStatus: TypeAlias = Literal['uploading', 'done', 'error'] +UploadStatus: TypeAlias = Literal["uploading", "done", "error"] class User(TypedDict): @@ -2497,40 +2498,40 @@ class ViewDataSearch(TypedDict): class ViewOptionsViewOptionsOptions(TypedDict): - spanType: NotRequired[Literal['range', 'frame'] | None] + spanType: NotRequired[Literal["range", "frame"] | None] rangeValue: NotRequired[str | None] frameStart: NotRequired[str | None] frameEnd: NotRequired[str | None] tzUTC: NotRequired[bool | None] chartVisibility: NotRequired[Mapping[str, Any] | None] projectId: NotRequired[str | None] - type: NotRequired[Literal['project', 'experiment'] | None] + type: NotRequired[Literal["project", "experiment"] | None] groupBy: NotRequired[str | None] class ViewOptionsViewOptions(TypedDict): - viewType: Literal['monitor'] + viewType: Literal["monitor"] options: ViewOptionsViewOptionsOptions freezeColumns: NotRequired[bool | None] class ViewOptionsViewOptions1ExcludedMeasure(TypedDict): - type: Literal['none', 'score', 'metric', 'metadata'] + type: Literal["none", "score", "metric", "metadata"] value: str class ViewOptionsViewOptions1YMetric(TypedDict): - type: Literal['none', 'score', 'metric', 'metadata'] + type: Literal["none", "score", "metric", "metadata"] value: str class ViewOptionsViewOptions1XAxis(TypedDict): - type: Literal['none', 'score', 'metric', 'metadata'] + type: Literal["none", "score", "metric", "metadata"] value: str class ViewOptionsViewOptions1SymbolGrouping(TypedDict): - type: Literal['none', 'score', 'metric', 'metadata'] + type: Literal["none", "score", "metric", "metadata"] value: str @@ -2540,10 +2541,10 @@ class ViewOptionsViewOptions1ChartAnnotation(TypedDict): ViewOptionsViewOptions1TimeRangeFilter = TypedDict( - 'ViewOptionsViewOptions1TimeRangeFilter', + "ViewOptionsViewOptions1TimeRangeFilter", { - 'from': str, - 'to': str, + "from": str, + "to": str, }, ) @@ -2567,7 +2568,7 @@ class ViewOptionsViewOptions1(TypedDict): """ chartAnnotations: NotRequired[Sequence[ViewOptionsViewOptions1ChartAnnotation] | None] timeRangeFilter: NotRequired[str | ViewOptionsViewOptions1TimeRangeFilter | None] - queryShape: NotRequired[Literal['traces', 'spans'] | None] + queryShape: NotRequired[Literal["traces", "spans"] | None] freezeColumns: NotRequired[bool | None] @@ -2622,12 +2623,12 @@ class AnyModelParams(TypedDict): frequency_penalty: NotRequired[float | None] presence_penalty: NotRequired[float | None] response_format: NotRequired[ResponseFormatNullish | None] - tool_choice: NotRequired[Literal['auto'] | Literal['none'] | Literal['required'] | AnyModelParamsToolChoice | None] - function_call: NotRequired[Literal['auto'] | Literal['none'] | AnyModelParamsFunctionCall | None] + tool_choice: NotRequired[Literal["auto"] | Literal["none"] | Literal["required"] | AnyModelParamsToolChoice | None] + function_call: NotRequired[Literal["auto"] | Literal["none"] | AnyModelParamsFunctionCall | None] n: NotRequired[float | None] stop: NotRequired[Sequence[str] | None] - reasoning_effort: NotRequired[Literal['none', 'minimal', 'low', 'medium', 'high'] | None] - verbosity: NotRequired[Literal['low', 'medium', 'high'] | None] + reasoning_effort: NotRequired[Literal["none", "minimal", "low", "medium", "high"] | None] + verbosity: NotRequired[Literal["low", "medium", "high"] | None] top_k: NotRequired[float | None] stop_sequences: NotRequired[Sequence[str] | None] reasoning_enabled: NotRequired[bool | None] @@ -2643,7 +2644,7 @@ class AnyModelParams(TypedDict): class AsyncScoringControlAsyncScoringControl1(TypedDict): - kind: Literal['state_override'] + kind: Literal["state_override"] state: AsyncScoringState @@ -2670,7 +2671,7 @@ class AttachmentStatus(TypedDict): class PreprocessorPreprocessor1(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -2695,7 +2696,7 @@ class BatchedFacetDataTopicMap(TypedDict): class BatchedFacetData(TypedDict): - type: Literal['batched_facet'] + type: Literal["batched_facet"] preprocessor: NotRequired[Preprocessor | None] facets: Sequence[BatchedFacetDataFacet] topic_maps: NotRequired[Mapping[str, Sequence[BatchedFacetDataTopicMap]] | None] @@ -2713,12 +2714,12 @@ class BatchedFacetData(TypedDict): class ChatCompletionMessageParamChatCompletionMessageParam1(TypedDict): content: str | Sequence[ChatCompletionContentPart] - role: Literal['user'] + role: Literal["user"] name: NotRequired[str | None] class ChatCompletionMessageParamChatCompletionMessageParam2(TypedDict): - role: Literal['assistant'] + role: Literal["assistant"] content: NotRequired[str | Sequence[ChatCompletionContentPartText] | None] function_call: NotRequired[ChatCompletionMessageParamChatCompletionMessageParam2FunctionCall | None] name: NotRequired[str | None] @@ -2739,7 +2740,7 @@ class ChatCompletionMessageParamChatCompletionMessageParam2(TypedDict): class ChatCompletionOpenAIMessageParamChatCompletionOpenAIMessageParam1(TypedDict): content: str | Sequence[ChatCompletionContentPart] - role: Literal['user'] + role: Literal["user"] name: NotRequired[str | None] @@ -2931,7 +2932,7 @@ class Experiment(TypedDict): class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -2944,7 +2945,7 @@ class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict): class Preprocessor1Preprocessor11(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] @@ -2957,7 +2958,7 @@ class Preprocessor1Preprocessor14(Preprocessor1Preprocessor11, Preprocessor1Prep class FacetData(TypedDict): - type: Literal['facet'] + type: Literal["facet"] preprocessor: NotRequired[Preprocessor1 | None] prompt: str """ @@ -2978,7 +2979,7 @@ class FacetData(TypedDict): class FunctionDataFunctionData3(TypedDict): - type: Literal['global'] + type: Literal["global"] name: str function_type: NotRequired[FunctionTypeEnum | None] config: NotRequired[Mapping[str, Any] | None] @@ -3084,13 +3085,13 @@ class ModelParamsModelParams(TypedDict): presence_penalty: NotRequired[float | None] response_format: NotRequired[ResponseFormatNullish | None] tool_choice: NotRequired[ - Literal['auto'] | Literal['none'] | Literal['required'] | ModelParamsModelParamsToolChoice + Literal["auto"] | Literal["none"] | Literal["required"] | ModelParamsModelParamsToolChoice ] - function_call: NotRequired[Literal['auto'] | Literal['none'] | ModelParamsModelParamsFunctionCall | None] + function_call: NotRequired[Literal["auto"] | Literal["none"] | ModelParamsModelParamsFunctionCall | None] n: NotRequired[float | None] stop: NotRequired[Sequence[str] | None] - reasoning_effort: NotRequired[Literal['none', 'minimal', 'low', 'medium', 'high'] | None] - verbosity: NotRequired[Literal['low', 'medium', 'high'] | None] + reasoning_effort: NotRequired[Literal["none", "minimal", "low", "medium", "high"] | None] + verbosity: NotRequired[Literal["low", "medium", "high"] | None] ModelParams: TypeAlias = ( @@ -3166,7 +3167,7 @@ class Project(TypedDict): class ProjectAutomationConfig2(TypedDict): - event_type: Literal['retention'] + event_type: Literal["retention"] """ The type of automation. """ @@ -3187,7 +3188,7 @@ class ProjectScoreConfig(TypedDict): class PromptBlockDataPromptBlockData(TypedDict): - type: Literal['chat'] + type: Literal["chat"] messages: Sequence[ChatCompletionMessageParam] tools: NotRequired[str | None] @@ -3196,7 +3197,7 @@ class PromptBlockDataPromptBlockData(TypedDict): class PromptBlockDataNullishPromptBlockDataNullish(TypedDict): - type: Literal['chat'] + type: Literal["chat"] messages: Sequence[ChatCompletionMessageParam] tools: NotRequired[str | None] @@ -3219,7 +3220,7 @@ class PromptOptionsNullish(TypedDict): class ResponseFormatResponseFormat1(TypedDict): - type: Literal['json_schema'] + type: Literal["json_schema"] json_schema: ResponseFormatJsonSchema @@ -3237,7 +3238,7 @@ class SpanAttributes(TypedDict): class TopicAutomationConfig(TypedDict): - event_type: Literal['topic'] + event_type: Literal["topic"] """ The type of automation. """ @@ -3379,7 +3380,7 @@ class GraphNodeGraphNode7(TypedDict): """ The position of the node """ - type: Literal['prompt_template'] + type: Literal["prompt_template"] prompt: PromptBlockData @@ -3457,7 +3458,7 @@ class ProjectLogsEvent(TypedDict): """ Unique identifier for the project """ - log_id: Literal['g'] + log_id: Literal["g"] """ A literal 'g' which identifies the log as a project log """ @@ -3573,7 +3574,7 @@ class PromptData(TypedDict): options: NotRequired[PromptOptionsNullish | None] parser: NotRequired[PromptParserNullish | None] tool_functions: NotRequired[Sequence[SavedFunctionId] | None] - template_format: NotRequired[Literal['mustache', 'nunjucks', 'none'] | None] + template_format: NotRequired[Literal["mustache", "nunjucks", "none"] | None] mcp: NotRequired[Mapping[str, Any] | None] origin: NotRequired[PromptDataOrigin | None] @@ -3583,7 +3584,7 @@ class PromptDataNullish(TypedDict): options: NotRequired[PromptOptionsNullish | None] parser: NotRequired[PromptParserNullish | None] tool_functions: NotRequired[Sequence[SavedFunctionId] | None] - template_format: NotRequired[Literal['mustache', 'nunjucks', 'none'] | None] + template_format: NotRequired[Literal["mustache", "nunjucks", "none"] | None] mcp: NotRequired[Mapping[str, Any] | None] origin: NotRequired[PromptDataNullishOrigin | None] @@ -3629,23 +3630,23 @@ class View(TypedDict): The id of the object the view applies to """ view_type: Literal[ - 'projects', - 'experiments', - 'experiment', - 'playgrounds', - 'playground', - 'datasets', - 'dataset', - 'prompts', - 'parameters', - 'tools', - 'scorers', - 'classifiers', - 'logs', - 'monitor', - 'for_review_project_log', - 'for_review_experiments', - 'for_review_datasets', + "projects", + "experiments", + "experiment", + "playgrounds", + "playground", + "datasets", + "dataset", + "prompts", + "parameters", + "tools", + "scorers", + "classifiers", + "logs", + "monitor", + "for_review_project_log", + "for_review_experiments", + "for_review_datasets", ] """ Type of object that the view corresponds to. @@ -3704,7 +3705,7 @@ class FunctionIdFunctionId6(TypedDict): class GraphData(TypedDict): - type: Literal['graph'] + type: Literal["graph"] nodes: Mapping[str, GraphNode] edges: Mapping[str, GraphEdge] @@ -3763,7 +3764,7 @@ class Prompt(TypedDict): """ Unique identifier for the project that the prompt belongs under """ - log_id: Literal['p'] + log_id: Literal["p"] """ A literal 'p' which identifies the object as a project prompt """ @@ -3905,7 +3906,7 @@ class Function(TypedDict): """ Unique identifier for the project that the prompt belongs under """ - log_id: Literal['p'] + log_id: Literal["p"] """ A literal 'p' which identifies the object as a project prompt """ @@ -3946,4 +3947,5 @@ class Function(TypedDict): JSON schema for the function's parameters and return type """ + __all__ = [] diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index fe336267..30dcc2b2 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -9,6 +9,7 @@ import logging from contextlib import contextmanager + __all__ = ["auto_instrument"] logger = logging.getLogger(__name__) diff --git a/py/src/braintrust/bt_json.py b/py/src/braintrust/bt_json.py index 4fc36f8c..00e8bfde 100644 --- a/py/src/braintrust/bt_json.py +++ b/py/src/braintrust/bt_json.py @@ -3,6 +3,7 @@ import math from typing import Any, Callable, Mapping, NamedTuple, cast, overload + # Try to import orjson for better performance # If not available, we'll use standard json try: @@ -13,7 +14,6 @@ _HAS_ORJSON = False - def _to_bt_safe(v: Any) -> Any: """ Converts the object to a Braintrust-safe representation (i.e. Attachment objects are safe (specially handled by background logger)). @@ -87,24 +87,27 @@ def _to_bt_safe(v: Any) -> Any: # We pass `encoder=_str_encoder` since we've already tried converting rich objects to json safe objects. return bt_loads(bt_dumps(v, encoder=_str_encoder)) + @overload def bt_safe_deep_copy( obj: Mapping[str, Any], max_depth: int = ..., ) -> dict[str, Any]: ... + @overload def bt_safe_deep_copy( obj: list[Any], max_depth: int = ..., ) -> list[Any]: ... + @overload def bt_safe_deep_copy( obj: Any, max_depth: int = ..., ) -> Any: ... -def bt_safe_deep_copy(obj: Any, max_depth: int=200): +def bt_safe_deep_copy(obj: Any, max_depth: int = 200): """ Creates a deep copy of the given object and converts rich objects to Braintrust-safe representations. See `_to_bt_safe` for more details. @@ -161,6 +164,7 @@ def _deep_copy_object(v: Any, depth: int = 0) -> Any: return _deep_copy_object(obj) + def _safe_str(obj: Any) -> str: try: return str(obj) @@ -211,10 +215,12 @@ class Encoder(NamedTuple): native: type[json.JSONEncoder] orjson: Callable[[Any], Any] + _json_encoder = Encoder(native=BraintrustJSONEncoder, orjson=_to_json_safe) _str_encoder = Encoder(native=BraintrustStrEncoder, orjson=_safe_str) -def bt_dumps(obj: Any, encoder: Encoder | None=_json_encoder, **kwargs: Any) -> str: + +def bt_dumps(obj: Any, encoder: Encoder | None = _json_encoder, **kwargs: Any) -> str: """ Serialize obj to a JSON-formatted string. diff --git a/py/src/braintrust/cli/eval.py b/py/src/braintrust/cli/eval.py index e5877856..1a3b09fd 100644 --- a/py/src/braintrust/cli/eval.py +++ b/py/src/braintrust/cli/eval.py @@ -24,6 +24,7 @@ from ..logger import Dataset from ..util import eprint + INCLUDE = [ "**/eval_*.py", ] diff --git a/py/src/braintrust/cli/install/__init__.py b/py/src/braintrust/cli/install/__init__.py index d01ec808..849eda60 100644 --- a/py/src/braintrust/cli/install/__init__.py +++ b/py/src/braintrust/cli/install/__init__.py @@ -1,6 +1,7 @@ import argparse import textwrap + _module_not_found_error = None try: from . import api, bump_versions, logs, run_migrations diff --git a/py/src/braintrust/cli/install/api.py b/py/src/braintrust/cli/install/api.py index 2538c605..6c95774a 100644 --- a/py/src/braintrust/cli/install/api.py +++ b/py/src/braintrust/cli/install/api.py @@ -10,6 +10,7 @@ from ...aws import cloudformation from ...util import response_raise_for_status + _logger = logging.getLogger("braintrust.install.api") PARAMS = { diff --git a/py/src/braintrust/cli/install/bump_versions.py b/py/src/braintrust/cli/install/bump_versions.py index e0954aa5..04a2fc71 100644 --- a/py/src/braintrust/cli/install/bump_versions.py +++ b/py/src/braintrust/cli/install/bump_versions.py @@ -3,6 +3,7 @@ # pylint: disable=no-name-in-module from ...aws import LazyClient, cloudformation + _logger = logging.getLogger("braintrust.install.logs") diff --git a/py/src/braintrust/cli/install/logs.py b/py/src/braintrust/cli/install/logs.py index 3423c4c0..2b840aec 100644 --- a/py/src/braintrust/cli/install/logs.py +++ b/py/src/braintrust/cli/install/logs.py @@ -5,6 +5,7 @@ # pylint: disable=no-name-in-module from ...aws import cloudformation, logs + _logger = logging.getLogger("braintrust.install.logs") diff --git a/py/src/braintrust/cli/install/redshift.py b/py/src/braintrust/cli/install/redshift.py index c3011591..6571acbb 100644 --- a/py/src/braintrust/cli/install/redshift.py +++ b/py/src/braintrust/cli/install/redshift.py @@ -9,6 +9,7 @@ # pylint: disable=no-name-in-module from ...aws import iam, redshift_serverless + _logger = logging.getLogger("braintrust.install.redshift") diff --git a/py/src/braintrust/cli/install/run_migrations.py b/py/src/braintrust/cli/install/run_migrations.py index 5235f7e9..68fb29a7 100644 --- a/py/src/braintrust/cli/install/run_migrations.py +++ b/py/src/braintrust/cli/install/run_migrations.py @@ -3,6 +3,7 @@ # pylint: disable=no-name-in-module from ...aws import LazyClient, cloudformation + _logger = logging.getLogger("braintrust.install.logs") diff --git a/py/src/braintrust/cli/test_push.py b/py/src/braintrust/cli/test_push.py index 24207ca7..970883c1 100644 --- a/py/src/braintrust/cli/test_push.py +++ b/py/src/braintrust/cli/test_push.py @@ -2,6 +2,7 @@ import pytest + pydantic = pytest.importorskip("pydantic") from ..framework2 import ( diff --git a/py/src/braintrust/contrib/temporal/__init__.py b/py/src/braintrust/contrib/temporal/__init__.py index 19cfc4ee..61e4397e 100644 --- a/py/src/braintrust/contrib/temporal/__init__.py +++ b/py/src/braintrust/contrib/temporal/__init__.py @@ -94,6 +94,7 @@ from temporalio.worker import WorkflowRunner from temporalio.worker.workflow_sandbox import SandboxedWorkflowRunner + # Braintrust dynamically chooses its context implementation at runtime based on # BRAINTRUST_OTEL_COMPAT environment variable. When first accessed, it reads # os.environ which is restricted in the sandbox. Therefore if the first use diff --git a/py/src/braintrust/contrib/temporal/test_temporal.py b/py/src/braintrust/contrib/temporal/test_temporal.py index 5034612e..8ed87264 100644 --- a/py/src/braintrust/contrib/temporal/test_temporal.py +++ b/py/src/braintrust/contrib/temporal/test_temporal.py @@ -9,6 +9,7 @@ import pytest import pytest_asyncio + pytest.importorskip("temporalio") import braintrust diff --git a/py/src/braintrust/devserver/auth.py b/py/src/braintrust/devserver/auth.py index d1af0338..70523720 100644 --- a/py/src/braintrust/devserver/auth.py +++ b/py/src/braintrust/devserver/auth.py @@ -7,6 +7,7 @@ from ..logger import BraintrustState + ORIGIN_HEADER = "origin" BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token" BRAINTRUST_ORG_NAME_HEADER = "x-bt-org-name" diff --git a/py/src/braintrust/devserver/cors.py b/py/src/braintrust/devserver/cors.py index e014d4a2..0b60f4e1 100644 --- a/py/src/braintrust/devserver/cors.py +++ b/py/src/braintrust/devserver/cors.py @@ -3,6 +3,7 @@ from collections.abc import Awaitable, Callable from typing import Any + # CORS configuration ALLOWED_ORIGINS: list[str | re.Pattern] = [ "https://www.braintrust.dev", diff --git a/py/src/braintrust/devserver/schemas.py b/py/src/braintrust/devserver/schemas.py index 841daffd..cd8f49da 100644 --- a/py/src/braintrust/devserver/schemas.py +++ b/py/src/braintrust/devserver/schemas.py @@ -4,6 +4,7 @@ from typing_extensions import TypedDict + # This is not beautiful code, but it saves us from introducing Pydantic as a dependency, and it is fairly # straightforward for an LLM to keep it up to date with runEvalBodySchema in JS. diff --git a/py/src/braintrust/devserver/server.py b/py/src/braintrust/devserver/server.py index 96f981c4..46e81b5c 100644 --- a/py/src/braintrust/devserver/server.py +++ b/py/src/braintrust/devserver/server.py @@ -4,6 +4,7 @@ import textwrap from typing import Any + try: import uvicorn from starlette.applications import Starlette @@ -36,6 +37,7 @@ from .eval_hooks import SSEQueue from .schemas import ValidationError, parse_eval_body + _all_evaluators: dict[str, Evaluator[Any, Any]] = {} diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 4794785a..1dc38f90 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -49,6 +49,7 @@ from .span_types import SpanTypeAttribute from .util import bt_iscoroutinefunction, eprint, merge_dicts + Input = TypeVar("Input") Output = TypeVar("Output") @@ -1276,7 +1277,8 @@ async def run_evaluator( ) -> EvalResultWithSummary[Input, Output]: """Wrapper on _run_evaluator_internal that times out execution after evaluator.timeout.""" results = await asyncio.wait_for( - _run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache), evaluator.timeout + _run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache), + evaluator.timeout, ) if experiment: @@ -1473,9 +1475,7 @@ def report_progress(event: TaskProgressEvent): async def ensure_spans_flushed(): # Flush native Braintrust spans if experiment: - await asyncio.get_event_loop().run_in_executor( - None, lambda: experiment.state.flush() - ) + await asyncio.get_event_loop().run_in_executor(None, lambda: experiment.state.flush()) elif state: await asyncio.get_event_loop().run_in_executor(None, lambda: state.flush()) else: diff --git a/py/src/braintrust/functions/invoke.py b/py/src/braintrust/functions/invoke.py index f0b1c3c0..b9597954 100644 --- a/py/src/braintrust/functions/invoke.py +++ b/py/src/braintrust/functions/invoke.py @@ -9,6 +9,7 @@ from .constants import INVOKE_API_VERSION from .stream import BraintrustInvokeError, BraintrustStream + T = TypeVar("T") ModeType = Literal["auto", "parallel", "json", "text"] ObjectType = Literal["project_logs", "experiment", "dataset", "playground_logs"] diff --git a/py/src/braintrust/generated_types.py b/py/src/braintrust/generated_types.py index 6f49a6ed..5528fe91 100644 --- a/py/src/braintrust/generated_types.py +++ b/py/src/braintrust/generated_types.py @@ -115,6 +115,7 @@ ViewOptions, ) + __all__ = [ "AISecret", "Acl", diff --git a/py/src/braintrust/gitutil.py b/py/src/braintrust/gitutil.py index 4ab875e5..416dc59b 100644 --- a/py/src/braintrust/gitutil.py +++ b/py/src/braintrust/gitutil.py @@ -7,6 +7,7 @@ from .git_fields import GitMetadataSettings, RepoInfo + # https://stackoverflow.com/questions/48399498/git-executable-not-found-in-python os.environ["GIT_PYTHON_REFRESH"] = "quiet" try: diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index f2d20863..ff08052d 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -93,11 +93,13 @@ response_raise_for_status, ) + # Fields that should be passed to the masking function # Note: "tags" field is intentionally excluded, but can be added if needed REDACTION_FIELDS = ["input", "output", "expected", "metadata", "context", "scores", "metrics"] from .xact_ids import prettify_xact + Metadata = dict[str, Any] DATA_API_VERSION = 2 LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow" @@ -3510,17 +3512,17 @@ def _start_span_parent_args( if parent: assert parent_span_ids is None, "Cannot specify both parent and parent_span_ids" parent_components = SpanComponentsV4.from_str(parent) - assert ( - parent_object_type == parent_components.object_type - ), f"Mismatch between expected span parent object type {parent_object_type} and provided type {parent_components.object_type}" + assert parent_object_type == parent_components.object_type, ( + f"Mismatch between expected span parent object type {parent_object_type} and provided type {parent_components.object_type}" + ) parent_components_object_id_lambda = _span_components_to_object_id_lambda(parent_components) def compute_parent_object_id(): parent_components_object_id = parent_components_object_id_lambda() - assert ( - parent_object_id.get() == parent_components_object_id - ), f"Mismatch between expected span parent object id {parent_object_id.get()} and provided id {parent_components_object_id}" + assert parent_object_id.get() == parent_components_object_id, ( + f"Mismatch between expected span parent object id {parent_object_id.get()} and provided id {parent_components_object_id}" + ) return parent_object_id.get() arg_parent_object_id = LazyValue(compute_parent_object_id, use_mutex=False) diff --git a/py/src/braintrust/merge_row_batch.py b/py/src/braintrust/merge_row_batch.py index 07d40578..c9047775 100644 --- a/py/src/braintrust/merge_row_batch.py +++ b/py/src/braintrust/merge_row_batch.py @@ -3,9 +3,11 @@ from .db_fields import IS_MERGE_FIELD + T = TypeVar("T") from .util import merge_dicts + _MergedRowKey = tuple[Optional[Any], ...] diff --git a/py/src/braintrust/oai.py b/py/src/braintrust/oai.py index 52e07a78..80bb3e7a 100644 --- a/py/src/braintrust/oai.py +++ b/py/src/braintrust/oai.py @@ -11,6 +11,7 @@ from .span_types import SpanTypeAttribute from .util import is_numeric, merge_dicts + X_LEGACY_CACHED_HEADER = "x-cached" X_CACHED_HEADER = "x-bt-cached" diff --git a/py/src/braintrust/object.py b/py/src/braintrust/object.py index f241a589..e596c761 100644 --- a/py/src/braintrust/object.py +++ b/py/src/braintrust/object.py @@ -1,5 +1,6 @@ from .generated_types import DatasetEvent + DEFAULT_IS_LEGACY_DATASET = False diff --git a/py/src/braintrust/otel/__init__.py b/py/src/braintrust/otel/__init__.py index fec3a4cd..e6fe7f3e 100644 --- a/py/src/braintrust/otel/__init__.py +++ b/py/src/braintrust/otel/__init__.py @@ -3,6 +3,7 @@ import warnings from urllib.parse import urljoin + INSTALL_ERR_MSG = ( "OpenTelemetry packages are not installed. " "Install optional OpenTelemetry dependencies with: pip install braintrust[otel]" @@ -402,10 +403,12 @@ def _get_braintrust_parent(object_type, object_id: str | None = None, compute_ar return None + def is_root_span(span) -> bool: """Returns True if the span is a root span (no parent span).""" return getattr(span, "parent", None) is None + def context_from_span_export(export_str: str): """ Create an OTEL context from a Braintrust span export string. diff --git a/py/src/braintrust/otel/context.py b/py/src/braintrust/otel/context.py index b0148384..bb65be77 100644 --- a/py/src/braintrust/otel/context.py +++ b/py/src/braintrust/otel/context.py @@ -8,6 +8,7 @@ from opentelemetry import context, trace from opentelemetry.trace import SpanContext, TraceFlags + log = logging.getLogger(__name__) diff --git a/py/src/braintrust/otel/test_distributed_tracing.py b/py/src/braintrust/otel/test_distributed_tracing.py index 2610f81b..a2fab2a2 100644 --- a/py/src/braintrust/otel/test_distributed_tracing.py +++ b/py/src/braintrust/otel/test_distributed_tracing.py @@ -12,6 +12,7 @@ from braintrust.otel import BraintrustSpanProcessor, context_from_span_export from braintrust.test_helpers import init_test_logger, preserve_env_vars + OTEL_AVAILABLE = True try: from opentelemetry.sdk.trace import TracerProvider diff --git a/py/src/braintrust/otel/test_otel_bt_integration.py b/py/src/braintrust/otel/test_otel_bt_integration.py index 9ca1acc9..579082d9 100644 --- a/py/src/braintrust/otel/test_otel_bt_integration.py +++ b/py/src/braintrust/otel/test_otel_bt_integration.py @@ -13,6 +13,7 @@ from braintrust.otel import BraintrustSpanProcessor from braintrust.test_helpers import init_test_exp, init_test_logger, preserve_env_vars + OTEL_AVAILABLE = True try: from opentelemetry.sdk.trace import TracerProvider diff --git a/py/src/braintrust/prompt.py b/py/src/braintrust/prompt.py index 242cee43..d4b7fa19 100644 --- a/py/src/braintrust/prompt.py +++ b/py/src/braintrust/prompt.py @@ -4,6 +4,7 @@ from .generated_types import PromptOptions from .serializable_data_class import SerializableDataClass + # Keep these definitions in sync with sdk/core/js/typespecs/prompt.ts. diff --git a/py/src/braintrust/prompt_cache/disk_cache.py b/py/src/braintrust/prompt_cache/disk_cache.py index 3bf400ef..b7408024 100644 --- a/py/src/braintrust/prompt_cache/disk_cache.py +++ b/py/src/braintrust/prompt_cache/disk_cache.py @@ -15,6 +15,7 @@ from collections.abc import Callable from typing import Any, Generic, TypeVar + T = TypeVar("T") diff --git a/py/src/braintrust/prompt_cache/lru_cache.py b/py/src/braintrust/prompt_cache/lru_cache.py index e6023f0b..126fbd27 100644 --- a/py/src/braintrust/prompt_cache/lru_cache.py +++ b/py/src/braintrust/prompt_cache/lru_cache.py @@ -10,6 +10,7 @@ from collections import OrderedDict from typing import Generic, TypeVar + K = TypeVar("K") V = TypeVar("V") diff --git a/py/src/braintrust/prompt_cache/prompt_cache.py b/py/src/braintrust/prompt_cache/prompt_cache.py index 27313849..ac6d8a33 100644 --- a/py/src/braintrust/prompt_cache/prompt_cache.py +++ b/py/src/braintrust/prompt_cache/prompt_cache.py @@ -9,7 +9,6 @@ The cache is keyed by project identifier (ID or name), prompt slug, and version. """ - from braintrust import prompt from braintrust.prompt_cache import disk_cache, lru_cache diff --git a/py/src/braintrust/queue.py b/py/src/braintrust/queue.py index 4629c3a3..ff6fc6cf 100644 --- a/py/src/braintrust/queue.py +++ b/py/src/braintrust/queue.py @@ -4,6 +4,7 @@ from .util import eprint + T = TypeVar("T") DEFAULT_QUEUE_SIZE = 25000 diff --git a/py/src/braintrust/score.py b/py/src/braintrust/score.py index cd5fe720..62f9ee7e 100644 --- a/py/src/braintrust/score.py +++ b/py/src/braintrust/score.py @@ -6,6 +6,7 @@ from .serializable_data_class import SerializableDataClass + # ========================================================================= # !!!!!!!!!!!!!!!! READ THIS BEFORE CHANGING THIS FILE !!!!!!!!!!!!!!!! # diff --git a/py/src/braintrust/span_cache.py b/py/src/braintrust/span_cache.py index 17148cde..1f6bde9c 100644 --- a/py/src/braintrust/span_cache.py +++ b/py/src/braintrust/span_cache.py @@ -15,6 +15,7 @@ from braintrust.util import merge_dicts + # Global registry of active span caches for process exit cleanup _active_caches: set["SpanCache"] = set() _exit_handlers_registered = False diff --git a/py/src/braintrust/span_identifier_v4.py b/py/src/braintrust/span_identifier_v4.py index a3db4c80..c881ef49 100644 --- a/py/src/braintrust/span_identifier_v4.py +++ b/py/src/braintrust/span_identifier_v4.py @@ -11,6 +11,7 @@ SpanObjectTypeV3, ) + ENCODING_VERSION_NUMBER_V4 = 4 diff --git a/py/src/braintrust/test_bt_json.py b/py/src/braintrust/test_bt_json.py index f67f7c69..f1d4e368 100644 --- a/py/src/braintrust/test_bt_json.py +++ b/py/src/braintrust/test_bt_json.py @@ -282,6 +282,7 @@ def test_deep_copy_numeric_and_special_keys(self): self.assertTrue("(1, 2)" in result or "1, 2" in result) self.assertIn("None", result) + @pytest.mark.vcr def test_to_bt_safe_special_objects(): """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects.""" @@ -314,9 +315,7 @@ def test_to_bt_safe_attachments(self): self.assertIs(result, attachment) # Test ExternalAttachment preservation - ext_attachment = ExternalAttachment( - url="s3://bucket/key", filename="ext.pdf", content_type="application/pdf" - ) + ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="ext.pdf", content_type="application/pdf") result_ext = _to_bt_safe(ext_attachment) self.assertIs(result_ext, ext_attachment) @@ -418,9 +417,7 @@ def __init__(self): def test_bt_safe_deep_copy_attachment_identity(self): """Test bt_safe_deep_copy preserves attachment object identity.""" attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain") - attachment2 = ExternalAttachment( - url="s3://bucket/key", filename="file2.pdf", content_type="application/pdf" - ) + attachment2 = ExternalAttachment(url="s3://bucket/key", filename="file2.pdf", content_type="application/pdf") original = { "field1": attachment1, diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py index 6c3c1fbd..313756cf 100644 --- a/py/src/braintrust/test_context.py +++ b/py/src/braintrust/test_context.py @@ -32,6 +32,7 @@ def _threadpool_scenario(test_logger, with_memory_logger): from braintrust.test_helpers import init_test_logger, with_memory_logger # noqa: F401 from braintrust.wrappers.threads import setup_threads + F = TypeVar("F", bound=Callable) diff --git a/py/src/braintrust/test_framework.py b/py/src/braintrust/test_framework.py index 9acf284b..87247b44 100644 --- a/py/src/braintrust/test_framework.py +++ b/py/src/braintrust/test_framework.py @@ -531,6 +531,7 @@ def task_with_hooks(input, hooks): assert len(root_span) == 1 assert root_span[0].get("tags") == None + @pytest.mark.asyncio async def test_eval_enable_cache(): state = BraintrustState() diff --git a/py/src/braintrust/test_framework2.py b/py/src/braintrust/test_framework2.py index 9b06c5b5..d2329850 100644 --- a/py/src/braintrust/test_framework2.py +++ b/py/src/braintrust/test_framework2.py @@ -6,6 +6,7 @@ from .framework2 import projects + # Check if pydantic is available HAS_PYDANTIC = importlib.util.find_spec("pydantic") is not None diff --git a/py/src/braintrust/test_helpers.py b/py/src/braintrust/test_helpers.py index 7e24bb23..98a8e7e5 100644 --- a/py/src/braintrust/test_helpers.py +++ b/py/src/braintrust/test_helpers.py @@ -6,6 +6,7 @@ from braintrust.logger import ObjectMetadata, OrgProjectMetadata, ProjectExperimentMetadata from braintrust.util import LazyValue + # Fake API key for testing only - this will not work with actual API calls TEST_ORG_ID = "test-org-id" TEST_ORG_NAME = "test-org-name" diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 9792ed2a..9e8829c7 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -68,7 +68,6 @@ def test_init_validation(self): assert str(cm.exception) == f"duplicate tag: {tag}" - def test_init_with_dataset_id_only(self): """Test that init accepts dataset={'id': '...'} parameter""" # Test the logic that extracts dataset_id from the dict @@ -125,6 +124,7 @@ def test_init_with_repo_info_does_not_raise(self): assert metadata.project.id == "test-project-id" assert metadata.experiment.name == "test-exp" + class TestLogger(TestCase): def test_extract_attachments_no_op(self): attachments: List[BaseAttachment] = [] @@ -242,8 +242,6 @@ def test_extract_attachments_with_attachments(self): }, ) - - def test_prompt_build_with_structured_output_templating(self): self.maxDiff = None prompt = Prompt( @@ -3076,7 +3074,7 @@ def test_extract_attachments_collects_and_replaces(): event = { "input": {"file": attachment1}, "output": {"file": attachment2}, - "metadata": {"files": [attachment1, ext_attachment]} + "metadata": {"files": [attachment1, ext_attachment]}, } attachments = [] @@ -3106,7 +3104,7 @@ def test_extract_attachments_preserves_identity(): event = { "input": attachment, "output": attachment, # Same instance - "metadata": {"file": attachment} # Same instance again + "metadata": {"file": attachment}, # Same instance again } attachments = [] @@ -3145,10 +3143,7 @@ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_l logger = init_test_logger(__name__) span = logger.start_span(name="test_span") - span.log( - input={"file1": attachment1}, - output={"file2": attachment2} - ) + span.log(input={"file1": attachment1}, output={"file2": attachment2}) span.end() logger.flush() @@ -3178,9 +3173,7 @@ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_sim def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login): """Test that ExternalAttachment upload is also tracked.""" ext_attachment = ExternalAttachment( - url="s3://bucket/key.pdf", - filename="external.pdf", - content_type="application/pdf" + url="s3://bucket/key.pdf", filename="external.pdf", content_type="application/pdf" ) logger = init_test_logger(__name__) @@ -3218,11 +3211,7 @@ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_log logger = init_test_logger(__name__) span = logger.start_span(name="test_span") - span.log( - input=attachment, - output=json_attachment, - metadata={"file": ext_attachment} - ) + span.log(input=attachment, output=json_attachment, metadata={"file": ext_attachment}) span.end() logger.flush() diff --git a/py/src/braintrust/test_otel.py b/py/src/braintrust/test_otel.py index 2706cf62..68da6a72 100644 --- a/py/src/braintrust/test_otel.py +++ b/py/src/braintrust/test_otel.py @@ -576,6 +576,7 @@ def test_custom_filter_is_root_span(self): assert "root_span" in names assert "child_span" not in names + def test_parent_from_headers_invalid_inputs(): """Test parent_from_headers with various invalid inputs.""" if not _check_otel_installed(): diff --git a/py/src/braintrust/test_sandbox.py b/py/src/braintrust/test_sandbox.py index c170eead..3e5694b3 100644 --- a/py/src/braintrust/test_sandbox.py +++ b/py/src/braintrust/test_sandbox.py @@ -7,6 +7,7 @@ from .logger import BraintrustState from .sandbox import RegisterSandboxResult, SandboxConfig, register_sandbox + SNAPSHOT_REF = "im-icRxmsk1Sz9XPP2f8OblVU" PROJECT = "My Project" ENTRYPOINTS = ["./local/js/optimization/evals/btql-generation/btql-queries.eval.ts"] diff --git a/py/src/braintrust/test_span_cache.py b/py/src/braintrust/test_span_cache.py index fc0b6c7e..9b250d44 100644 --- a/py/src/braintrust/test_span_cache.py +++ b/py/src/braintrust/test_span_cache.py @@ -1,6 +1,5 @@ """Tests for SpanCache (disk-based cache).""" - from braintrust.span_cache import CachedSpan, SpanCache diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py index ef7044e8..f07b9ef0 100644 --- a/py/src/braintrust/trace.py +++ b/py/src/braintrust/trace.py @@ -154,7 +154,9 @@ def __init__( else: # Standard constructor with SpanFetcher if object_type is None or object_id is None or root_span_id is None or get_state is None: - raise ValueError("Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state") + raise ValueError( + "Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state" + ) async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]: state = await get_state() @@ -307,13 +309,15 @@ def __init__( state: BraintrustState, ): # Initialize dict with trace_ref for JSON serialization - super().__init__({ - "trace_ref": { - "object_type": object_type, - "object_id": object_id, - "root_span_id": root_span_id, + super().__init__( + { + "trace_ref": { + "object_type": object_type, + "object_id": object_id, + "root_span_id": root_span_id, + } } - }) + ) self._object_type = object_type self._object_id = object_id diff --git a/py/src/braintrust/util.py b/py/src/braintrust/util.py index 5ed1ccd2..516cb9b6 100644 --- a/py/src/braintrust/util.py +++ b/py/src/braintrust/util.py @@ -29,6 +29,7 @@ def parse_env_var_float(name: str, default: float) -> float: except (ValueError, TypeError): return default + GLOBAL_PROJECT = "Global" BT_IS_ASYNC_ATTRIBUTE = "_BT_IS_ASYNC" diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk/__init__.py index 18edc6a8..6c6b8a14 100644 --- a/py/src/braintrust/wrappers/adk/__init__.py +++ b/py/src/braintrust/wrappers/adk/__init__.py @@ -11,6 +11,7 @@ from braintrust.span_types import SpanTypeAttribute from wrapt import wrap_function_wrapper + logger = logging.getLogger(__name__) __all__ = ["setup_braintrust", "setup_adk", "wrap_agent", "wrap_runner", "wrap_flow", "wrap_mcp_tool"] diff --git a/py/src/braintrust/wrappers/adk/test_adk.py b/py/src/braintrust/wrappers/adk/test_adk.py index f1c1700a..4462d89d 100644 --- a/py/src/braintrust/wrappers/adk/test_adk.py +++ b/py/src/braintrust/wrappers/adk/test_adk.py @@ -10,6 +10,7 @@ from braintrust.wrappers.adk import _wrap_create_thread, setup_adk from google.adk import Agent + ADK_VERSION = tuple(int(x) for x in pkg_version("google-adk").split(".")[:3]) from google.adk.agents import LlmAgent from google.adk.runners import Runner @@ -17,6 +18,7 @@ from google.genai import types from pydantic import BaseModel, Field + PROJECT_NAME = "test_adk" setup_adk(project_name=PROJECT_NAME) diff --git a/py/src/braintrust/wrappers/adk/test_auto_adk.py b/py/src/braintrust/wrappers/adk/test_auto_adk.py index ead2ba4a..493bd1fb 100644 --- a/py/src/braintrust/wrappers/adk/test_auto_adk.py +++ b/py/src/braintrust/wrappers/adk/test_auto_adk.py @@ -2,6 +2,7 @@ from braintrust.auto import auto_instrument + # 1. Instrument results = auto_instrument() assert results.get("adk") == True, "auto_instrument should return True for adk" @@ -14,6 +15,7 @@ from google.adk import agents, runners from google.adk.flows.llm_flows import base_llm_flow + assert getattr(agents.BaseAgent, "_braintrust_patched", False), "BaseAgent should be patched" assert getattr(runners.Runner, "_braintrust_patched", False), "Runner should be patched" assert getattr(base_llm_flow.BaseLlmFlow, "_braintrust_patched", False), "BaseLlmFlow should be patched" diff --git a/py/src/braintrust/wrappers/agno/__init__.py b/py/src/braintrust/wrappers/agno/__init__.py index 527c01ef..3345f28f 100644 --- a/py/src/braintrust/wrappers/agno/__init__.py +++ b/py/src/braintrust/wrappers/agno/__init__.py @@ -30,6 +30,7 @@ from .team import wrap_team from .workflow import wrap_workflow + logger = logging.getLogger(__name__) diff --git a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py index 1f67ee05..fcb926e1 100644 --- a/py/src/braintrust/wrappers/agno/_test_agno_helpers.py +++ b/py/src/braintrust/wrappers/agno/_test_agno_helpers.py @@ -7,6 +7,7 @@ from braintrust.wrappers.agno.agent import wrap_agent + PROJECT_NAME = "test-agno-app" @@ -129,6 +130,7 @@ def __init__(self): def _execute_workflow_agent(self, user_input, session, execution_input, run_context, stream=False, **kwargs): if stream: + def _stream(): yield FakeEvent("WorkflowStarted") yield FakeEvent( diff --git a/py/src/braintrust/wrappers/agno/test_workflow.py b/py/src/braintrust/wrappers/agno/test_workflow.py index 0a9e88ac..199a52f9 100644 --- a/py/src/braintrust/wrappers/agno/test_workflow.py +++ b/py/src/braintrust/wrappers/agno/test_workflow.py @@ -186,7 +186,9 @@ def test_agno_workflow_stream_prefers_final_workflow_output(memory_logger): def test_agno_workflow_stream_preserves_final_run_response_fields(memory_logger): - Workflow = wrap_workflow(make_fake_streaming_workflow_with_mutated_run_response("CompatWorkflowMutatedRunResponse")) + Workflow = wrap_workflow( + make_fake_streaming_workflow_with_mutated_run_response("CompatWorkflowMutatedRunResponse") + ) workflow = Workflow() execution_input = FakeExecutionInput("hello world") diff --git a/py/src/braintrust/wrappers/agno/utils.py b/py/src/braintrust/wrappers/agno/utils.py index ecdf0646..7951ac7c 100644 --- a/py/src/braintrust/wrappers/agno/utils.py +++ b/py/src/braintrust/wrappers/agno/utils.py @@ -24,7 +24,6 @@ def get_args_kwargs(args: list[str], kwargs: dict[str, Any], keys: list[str]): return {k: args[i] if args else kwargs.get(k) for i, k in enumerate(keys)}, omit(kwargs, keys) - def _try_to_dict(obj: Any) -> Any: """Convert object to dict, handling different object types like OpenAI wrapper.""" if isinstance(obj, dict): diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py index 26033536..8357fc1e 100644 --- a/py/src/braintrust/wrappers/anthropic.py +++ b/py/src/braintrust/wrappers/anthropic.py @@ -7,6 +7,7 @@ from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens from wrapt import wrap_function_wrapper + log = logging.getLogger(__name__) diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py index ecfca75e..d4129a2f 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py @@ -3,6 +3,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Instrument results = auto_instrument() assert results.get("agno") == True, "auto_instrument should return True for agno" @@ -48,28 +49,28 @@ def check_wrapped(klass, private_method, public_method, required=True): # Model methods (all public, all required) assert hasattr(Model, "invoke") and hasattr(Model.invoke, "__wrapped__"), "Model.invoke should be wrapped" assert hasattr(Model, "ainvoke") and hasattr(Model.ainvoke, "__wrapped__"), "Model.ainvoke should be wrapped" -assert hasattr(Model, "invoke_stream") and hasattr( - Model.invoke_stream, "__wrapped__" -), "Model.invoke_stream should be wrapped" -assert hasattr(Model, "ainvoke_stream") and hasattr( - Model.ainvoke_stream, "__wrapped__" -), "Model.ainvoke_stream should be wrapped" +assert hasattr(Model, "invoke_stream") and hasattr(Model.invoke_stream, "__wrapped__"), ( + "Model.invoke_stream should be wrapped" +) +assert hasattr(Model, "ainvoke_stream") and hasattr(Model.ainvoke_stream, "__wrapped__"), ( + "Model.ainvoke_stream should be wrapped" +) assert hasattr(Model, "response") and hasattr(Model.response, "__wrapped__"), "Model.response should be wrapped" assert hasattr(Model, "aresponse") and hasattr(Model.aresponse, "__wrapped__"), "Model.aresponse should be wrapped" -assert hasattr(Model, "response_stream") and hasattr( - Model.response_stream, "__wrapped__" -), "Model.response_stream should be wrapped" -assert hasattr(Model, "aresponse_stream") and hasattr( - Model.aresponse_stream, "__wrapped__" -), "Model.aresponse_stream should be wrapped" +assert hasattr(Model, "response_stream") and hasattr(Model.response_stream, "__wrapped__"), ( + "Model.response_stream should be wrapped" +) +assert hasattr(Model, "aresponse_stream") and hasattr(Model.aresponse_stream, "__wrapped__"), ( + "Model.aresponse_stream should be wrapped" +) # FunctionCall methods (all public, all required) -assert hasattr(FunctionCall, "execute") and hasattr( - FunctionCall.execute, "__wrapped__" -), "FunctionCall.execute should be wrapped" -assert hasattr(FunctionCall, "aexecute") and hasattr( - FunctionCall.aexecute, "__wrapped__" -), "FunctionCall.aexecute should be wrapped" +assert hasattr(FunctionCall, "execute") and hasattr(FunctionCall.execute, "__wrapped__"), ( + "FunctionCall.execute should be wrapped" +) +assert hasattr(FunctionCall, "aexecute") and hasattr(FunctionCall.aexecute, "__wrapped__"), ( + "FunctionCall.aexecute should be wrapped" +) # 4. Make API call and verify spans with autoinstrument_test_context("test_auto_agno") as memory_logger: @@ -108,7 +109,9 @@ def check_wrapped(klass, private_method, public_method, required=True): # Verify span hierarchy - LLM span should be child of agent span llm_parents = llm_span.get("span_parents", []) agent_span_id = agent_span.get("span_id") - assert agent_span_id in llm_parents, f"LLM span should be child of agent span. Agent ID: {agent_span_id}, LLM parents: {llm_parents}" + assert agent_span_id in llm_parents, ( + f"LLM span should be child of agent span. Agent ID: {agent_span_id}, LLM parents: {llm_parents}" + ) print("Agent span created (type: task)") print("Model span created (type: llm)") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py index 6a6b32f8..2c51d911 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py @@ -4,6 +4,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Verify not patched initially assert not getattr(anthropic, "__braintrust_wrapped__", False) diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py index b4d69586..d9213cdb 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py @@ -4,6 +4,7 @@ from braintrust.wrappers.claude_agent_sdk._test_transport import make_cassette_transport from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Instrument results = auto_instrument() assert results.get("claude_agent_sdk") == True diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py index 4a2fccdf..924ceb46 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py @@ -9,6 +9,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.dspy import BraintrustDSpyCallback + # 1. Verify not patched initially assert not getattr(dspy, "__braintrust_wrapped__", False) @@ -25,6 +26,7 @@ dspy.configure(lm=None) from dspy.dsp.utils.settings import settings + has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in settings.callbacks) assert has_bt_callback, f"Expected BraintrustDSpyCallback in callbacks after configure()" diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py index 4645ae0d..b9a5b72b 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py @@ -3,6 +3,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Instrument results = auto_instrument() assert results.get("google_genai") == True diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py index 2aeeb921..0d8db254 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py @@ -4,6 +4,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Verify not patched initially assert not hasattr(litellm, "_braintrust_wrapped") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py index 4fb5f4c8..ef5eaf8f 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py @@ -4,6 +4,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Verify not patched initially assert not getattr(openai, "__braintrust_wrapped__", False) diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py index c6b87484..c45fd13b 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py @@ -3,6 +3,7 @@ from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context + # 1. Instrument results = auto_instrument() assert results.get("pydantic_ai") == True @@ -23,6 +24,7 @@ ) import asyncio + result = asyncio.run(agent.run("Say hi")) assert result.output diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py index 49867b36..42191de0 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_aresponses.py @@ -6,6 +6,7 @@ from braintrust.wrappers.litellm import patch_litellm from braintrust.wrappers.test_utils import autoinstrument_test_context + patch_litellm() diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py index e25b2f86..2b2eac38 100644 --- a/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_patch_litellm_responses.py @@ -4,6 +4,7 @@ from braintrust.wrappers.litellm import patch_litellm from braintrust.wrappers.test_utils import autoinstrument_test_context + patch_litellm() with autoinstrument_test_context("test_patch_litellm_responses") as memory_logger: diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py index 9c45bf7d..8b596860 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py @@ -21,6 +21,7 @@ from ._wrapper import _create_client_wrapper_class, _create_tool_wrapper_class, _wrap_tool_factory + logger = logging.getLogger(__name__) __all__ = ["setup_claude_agent_sdk"] diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py index f8786ead..3a516568 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py @@ -11,6 +11,7 @@ import anyio + try: import claude_agent_sdk from claude_agent_sdk._internal.transport import Transport @@ -230,9 +231,7 @@ async def connect(self) -> None: return if self._record_mode not in {"once", "all"}: - raise FileNotFoundError( - f"Cassette missing for {self._cassette_name}: {self._cassette_path}" - ) + raise FileNotFoundError(f"Cassette missing for {self._cassette_name}: {self._cassette_path}") self._cassette_path.parent.mkdir(parents=True, exist_ok=True) prompt = _empty_stream() if self._prompt == "" else self._prompt @@ -255,9 +254,7 @@ async def write(self, data: str) -> None: expected = _normalize_for_match(recorded["payload"]) self._maybe_remap_control_request_id(recorded["payload"], actual_raw) if expected != actual: - raise AssertionError( - f"Write mismatch for {self._cassette_name}\nexpected: {expected}\nactual: {actual}" - ) + raise AssertionError(f"Write mismatch for {self._cassette_name}\nexpected: {expected}\nactual: {actual}") def read_messages(self): return self._read_messages_impl() @@ -306,17 +303,13 @@ def _should_replay(self) -> bool: return True return False - async def _wait_for_event( - self, op: str, *, allow_eof: bool = False - ) -> dict[str, Any] | None: + async def _wait_for_event(self, op: str, *, allow_eof: bool = False) -> dict[str, Any] | None: while True: async with self._cursor_lock: if self._cursor >= len(self._events): if allow_eof: return None - raise AssertionError( - f"Replay for {self._cassette_name} exhausted before expected {op}" - ) + raise AssertionError(f"Replay for {self._cassette_name} exhausted before expected {op}") event = self._events[self._cursor] if event["op"] == op: diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index 43e5d8f5..e019241d 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -24,6 +24,7 @@ SerializedContentType, ) + log = logging.getLogger(__name__) _thread_local = threading.local() @@ -81,6 +82,7 @@ def release(self) -> None: def _log_tracing_warning(exc: Exception) -> None: log.warning("Error in tracing code", exc_info=exc) + def _parse_tool_name(tool_name: Any) -> ParsedToolName: raw_name = str(tool_name) if tool_name is not None else DEFAULT_TOOL_NAME @@ -136,6 +138,7 @@ def _serialize_tool_result_output(tool_result_block: Any) -> dict[str, Any]: return output + def _serialize_system_message(message: Any) -> dict[str, Any]: serialized = {"subtype": getattr(message, "subtype", None)} @@ -319,7 +322,9 @@ def mark_task_started(self, tool_use_id: Any) -> None: def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan | None: parsed_tool_name = _parse_tool_name(tool_name) - candidate_names = list(dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name)))) + candidate_names = list( + dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name))) + ) candidates = [ active_tool_span @@ -335,7 +340,9 @@ def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan matched_span.activate() return matched_span - def _end_tool_span(self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None) -> None: + def _end_tool_span( + self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None + ) -> None: active_tool_span = self._active_spans.pop(tool_use_id, None) self._pending_task_link_tool_use_ids.discard(tool_use_id) if active_tool_span is None: @@ -528,7 +535,9 @@ def process(self, message: Any) -> None: self._task_span_by_tool_use_id.pop(str(tool_use_id), None) task_span.end() del self._active_spans[task_id] - self._active_task_order = [active_task_id for active_task_id in self._active_task_order if active_task_id != task_id] + self._active_task_order = [ + active_task_id for active_task_id in self._active_task_order if active_task_id != task_id + ] @property def active_tool_use_ids(self) -> frozenset[str]: @@ -568,11 +577,7 @@ def _parent_export(self, message: Any) -> str: return self._tool_tracker.get_span_export(getattr(message, "tool_use_id", None)) or self._root_span_export def _span_name(self, message: Any, task_id: str) -> str: - return ( - getattr(message, "description", None) - or getattr(message, "task_type", None) - or f"Task {task_id}" - ) + return getattr(message, "description", None) or getattr(message, "task_type", None) or f"Task {task_id}" def _metadata(self, message: Any) -> dict[str, Any]: metadata = { @@ -711,7 +716,8 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: if message_type == MessageClassName.ASSISTANT: if llm_tracker.current_span and tool_tracker.has_active_spans: active_subagent_tool_use_ids = ( - task_event_span_tracker.active_tool_use_ids | tool_tracker.pending_task_link_tool_use_ids + task_event_span_tracker.active_tool_use_ids + | tool_tracker.pending_task_link_tool_use_ids ) tool_tracker.cleanup( end_time=llm_tracker.get_next_start_time(), @@ -729,7 +735,11 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: ) tool_tracker.start_tool_spans(message, llm_tracker.current_span_export) if final_content: - if extended_existing_span and final_results and final_results[-1].get("role") == "assistant": + if ( + extended_existing_span + and final_results + and final_results[-1].get("role") == "assistant" + ): final_results[-1] = final_content else: final_results.append(final_content) @@ -738,8 +748,7 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: has_tool_results = False if hasattr(message, "content"): has_tool_results = any( - type(block).__name__ == BlockClassName.TOOL_RESULT - for block in message.content + type(block).__name__ == BlockClassName.TOOL_RESULT for block in message.content ) content = _serialize_content_blocks(message.content) final_results.append({"content": content, "role": "user"}) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 9204968d..eb12fa3d 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -11,6 +11,7 @@ import pytest + # Try to import the Claude Agent SDK - skip tests if not available try: import claude_agent_sdk as _claude_agent_sdk @@ -42,6 +43,7 @@ ) from braintrust.wrappers.test_utils import verify_autoinstrument_script + PROJECT_NAME = "test-claude-agent-sdk" TEST_MODEL = "claude-haiku-4-5-20251001" REPO_ROOT = Path(__file__).resolve().parents[5] @@ -369,10 +371,16 @@ async def test_bundled_subagent_creates_task_span(memory_logger): llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] _assert_llm_spans_have_time_to_first_token(llm_spans) - assert any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans for llm_span in llm_spans) + assert any( + subagent_span["span_id"] in llm_span["span_parents"] + for subagent_span in subagent_spans + for llm_span in llm_spans + ) delegated_llm_spans = [ - llm_span for llm_span in llm_spans if any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans) + llm_span + for llm_span in llm_spans + if any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans) ] assert delegated_llm_spans, "Expected at least one delegated LLM span nested under a subagent task span" @@ -573,7 +581,9 @@ async def test_delegated_subagent_llm_and_tool_spans_nest_under_task_span(memory tool_use_id="call-agent", usage={"total_tokens": 42, "tool_uses": 1, "duration_ms": 250}, ), - UserMessage(content=[ToolResultBlock(tool_use_id="call-agent", content=[TextBlock("2026.03.11 | sdk-platform")])]), + UserMessage( + content=[ToolResultBlock(tool_use_id="call-agent", content=[TextBlock("2026.03.11 | sdk-platform")])] + ), ResultMessage(), ] @@ -684,8 +694,12 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs ), UserMessage( content=[ - ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]), - ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]), + ToolResultBlock( + tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")] + ), + ToolResultBlock( + tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")] + ), ] ), ResultMessage(), @@ -708,9 +722,7 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs agent_tool_spans = [span for span in tool_spans if span["span_attributes"]["name"] == "Agent"] assert len(agent_tool_spans) == 2 - outer_llm_spans = [ - llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"] - ] + outer_llm_spans = [llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"]] assert len(outer_llm_spans) == 1, f"Expected a single outer orchestration LLM span, got {len(outer_llm_spans)}" outer_llm_span = outer_llm_spans[0] @@ -722,10 +734,12 @@ async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outs delegated_llm_spans = [ llm_span for llm_span in llm_spans - if alpha_task_span["span_id"] in llm_span["span_parents"] or beta_task_span["span_id"] in llm_span["span_parents"] + if alpha_task_span["span_id"] in llm_span["span_parents"] + or beta_task_span["span_id"] in llm_span["span_parents"] ] assert delegated_llm_spans, "Expected delegated LLM spans nested under delegated task spans" + @pytest.mark.asyncio async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm_span(memory_logger): """Relay UserMessages (subagent prompt echoes without ToolResultBlocks) between @@ -815,8 +829,12 @@ async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm # Final tool results (real turn boundary — has ToolResultBlocks) UserMessage( content=[ - ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]), - ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]), + ToolResultBlock( + tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")] + ), + ToolResultBlock( + tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")] + ), ] ), # Final answer @@ -852,7 +870,8 @@ async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm # Exactly one outer LLM span should parent both Agent tool calls # (the final-answer LLM span is a separate, expected outer span) orchestration_llm_spans = [ - llm_span for llm_span in llm_spans + llm_span + for llm_span in llm_spans if any(llm_span["span_id"] in agent_tool_span["span_parents"] for agent_tool_span in agent_tool_spans) ] assert len(orchestration_llm_spans) == 1, ( @@ -978,18 +997,13 @@ async def test_agent_tool_spans_encapsulate_child_task_spans(memory_logger): for agent_span in agent_tool_spans: agent_end = agent_span["metrics"]["end"] # Find child TASK span (parented under this Agent TOOL span) - children = [ - ts for ts in child_task_spans - if agent_span["span_id"] in ts.get("span_parents", []) - ] + children = [ts for ts in child_task_spans if agent_span["span_id"] in ts.get("span_parents", [])] assert len(children) == 1, ( - f"Agent span {agent_span['span_id']} should have exactly 1 child TASK span, " - f"got {len(children)}" + f"Agent span {agent_span['span_id']} should have exactly 1 child TASK span, got {len(children)}" ) child_end = children[0]["metrics"]["end"] assert agent_end >= child_end, ( - f"Agent TOOL span must encapsulate its child TASK span. " - f"Agent end={agent_end}, child TASK end={child_end}" + f"Agent TOOL span must encapsulate its child TASK span. Agent end={agent_end}, child TASK end={child_end}" ) @@ -1386,7 +1400,9 @@ def test_tool_span_tracker_logs_errors(memory_logger): ) tracker.finish_tool_spans( UserMessage( - content=[ToolResultBlock(tool_use_id="call-err", content=[TextBlock("Division by zero")], is_error=True)] + content=[ + ToolResultBlock(tool_use_id="call-err", content=[TextBlock("Division by zero")], is_error=True) + ] ) ) llm_span.end() @@ -1560,7 +1576,9 @@ def test_serialize_system_message_extracts_known_fields(message, expected): def test_extract_usage_from_result_message_normalizes_anthropic_tokens(): - metrics = _extract_usage_from_result_message(ResultMessage(input_tokens=5, output_tokens=3, cache_creation_input_tokens=2)) + metrics = _extract_usage_from_result_message( + ResultMessage(input_tokens=5, output_tokens=3, cache_creation_input_tokens=2) + ) assert metrics == { "prompt_tokens": 7.0, @@ -1715,7 +1733,7 @@ async def calculator_handler(args): nested_span = start_span(name=f"nested_tool_work_{args['a']}") nested_span.log(input=args) nested_span.end() - return {"content": [{"type": "text", "text": str(args['a'] + args['b'])}]} + return {"content": [{"type": "text", "text": str(args["a"] + args["b"])}]} calculator_tool = wrapped_tool_class( name="calculator", @@ -1767,8 +1785,13 @@ async def calculator_handler(args): nested_span_first = _find_span_by_name(spans, "nested_tool_work_2") nested_span_second = _find_span_by_name(spans, "nested_tool_work_10") - assert tool_span_by_input[(("a", 2), ("b", 3), ("operation", "add"))]["span_id"] in nested_span_first["span_parents"] - assert tool_span_by_input[(("a", 10), ("b", 5), ("operation", "add"))]["span_id"] in nested_span_second["span_parents"] + assert ( + tool_span_by_input[(("a", 2), ("b", 3), ("operation", "add"))]["span_id"] in nested_span_first["span_parents"] + ) + assert ( + tool_span_by_input[(("a", 10), ("b", 5), ("operation", "add"))]["span_id"] + in nested_span_second["span_parents"] + ) class TestAutoInstrumentClaudeAgentSDK: @@ -1779,6 +1802,7 @@ def test_auto_instrument_claude_agent_sdk(self): """Test auto_instrument patches Claude Agent SDK and creates spans.""" verify_autoinstrument_script("test_auto_claude_agent_sdk.py") + @pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") @pytest.mark.asyncio async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, monkeypatch): @@ -1809,7 +1833,9 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m async def main() -> None: loop = asyncio.get_running_loop() - loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message"))) + loop.set_exception_handler( + lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message")) + ) options = getattr(consumer_module, "ClaudeAgentOptions")( model="claude-3-5-haiku-20241022", diff --git a/py/src/braintrust/wrappers/dspy.py b/py/src/braintrust/wrappers/dspy.py index 8fad6691..713b3cfe 100644 --- a/py/src/braintrust/wrappers/dspy.py +++ b/py/src/braintrust/wrappers/dspy.py @@ -53,6 +53,7 @@ from braintrust.span_types import SpanTypeAttribute from wrapt import wrap_function_wrapper + # Note: For detailed token and cost metrics, use patch_litellm() before importing DSPy. # The DSPy callback focuses on execution flow and span hierarchy. diff --git a/py/src/braintrust/wrappers/google_genai/__init__.py b/py/src/braintrust/wrappers/google_genai/__init__.py index f80db150..61df30ab 100644 --- a/py/src/braintrust/wrappers/google_genai/__init__.py +++ b/py/src/braintrust/wrappers/google_genai/__init__.py @@ -8,6 +8,7 @@ from braintrust.span_types import SpanTypeAttribute from wrapt import wrap_function_wrapper + logger = logging.getLogger(__name__) diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py index c723d062..6beeb578 100644 --- a/py/src/braintrust/wrappers/langchain.py +++ b/py/src/braintrust/wrappers/langchain.py @@ -5,6 +5,7 @@ import braintrust + _logger = logging.getLogger("braintrust.wrappers.langchain") try: diff --git a/py/src/braintrust/wrappers/langsmith_wrapper.py b/py/src/braintrust/wrappers/langsmith_wrapper.py index a00a9b40..b22117df 100644 --- a/py/src/braintrust/wrappers/langsmith_wrapper.py +++ b/py/src/braintrust/wrappers/langsmith_wrapper.py @@ -46,6 +46,7 @@ def my_function(inputs: dict) -> dict: from braintrust.logger import NOOP_SPAN, current_span, init_logger, traced from wrapt import wrap_function_wrapper + logger = logging.getLogger(__name__) # Global list to store Braintrust eval results when running in tandem mode diff --git a/py/src/braintrust/wrappers/litellm.py b/py/src/braintrust/wrappers/litellm.py index 526b222f..236df998 100644 --- a/py/src/braintrust/wrappers/litellm.py +++ b/py/src/braintrust/wrappers/litellm.py @@ -9,6 +9,7 @@ from braintrust.span_types import SpanTypeAttribute from braintrust.util import is_numeric, merge_dicts + X_LEGACY_CACHED_HEADER = "x-cached" X_CACHED_HEADER = "x-bt-cached" @@ -572,7 +573,6 @@ def _parse_metrics_from_usage(usage: Any) -> dict[str, Any]: return metrics - def prettify_params(params: dict[str, Any]) -> dict[str, Any]: """Clean up parameters by filtering out NOT_GIVEN values and serializing response_format.""" # Filter out NOT_GIVEN parameters diff --git a/py/src/braintrust/wrappers/openai.py b/py/src/braintrust/wrappers/openai.py index 82bb3903..484a769d 100644 --- a/py/src/braintrust/wrappers/openai.py +++ b/py/src/braintrust/wrappers/openai.py @@ -217,7 +217,7 @@ def _mcp_list_tools_log_data(self, span: tracing.Span[tracing.MCPListToolsSpanDa "output": span.span_data.result, "metadata": { "server": span.span_data.server, - } + }, } def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanData]) -> dict[str, Any]: @@ -227,7 +227,7 @@ def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanDa "metadata": { "model": span.span_data.model, "model_config": span.span_data.model_config, - } + }, } def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[str, Any]: @@ -237,7 +237,7 @@ def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[s "metadata": { "model": span.span_data.model, "model_config": span.span_data.model_config, - } + }, } def _speech_group_log_data(self, span: tracing.Span[tracing.SpeechGroupSpanData]) -> dict[str, Any]: diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py index 9ed61462..e3442b85 100644 --- a/py/src/braintrust/wrappers/pydantic_ai.py +++ b/py/src/braintrust/wrappers/pydantic_ai.py @@ -11,6 +11,7 @@ from braintrust.span_types import SpanTypeAttribute from wrapt import wrap_function_wrapper + logger = logging.getLogger(__name__) __all__ = ["setup_pydantic_ai"] diff --git a/py/src/braintrust/wrappers/pytest_plugin/plugin.py b/py/src/braintrust/wrappers/pytest_plugin/plugin.py index 12eb3113..3ad84a7e 100644 --- a/py/src/braintrust/wrappers/pytest_plugin/plugin.py +++ b/py/src/braintrust/wrappers/pytest_plugin/plugin.py @@ -14,6 +14,7 @@ import pytest + if TYPE_CHECKING: from braintrust.logger import Span diff --git a/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py b/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py index 4baaac49..4f5bc4ce 100644 --- a/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py +++ b/py/src/braintrust/wrappers/pytest_plugin/test_plugin.py @@ -13,13 +13,13 @@ import pytest + # --------------------------------------------------------------------------- # Tell pytest we need the pytester plugin. # --------------------------------------------------------------------------- pytest_plugins = ["pytester"] - # --------------------------------------------------------------------------- # Helper: inline conftest that mocks braintrust.init for child tests. # diff --git a/py/src/braintrust/wrappers/test_anthropic.py b/py/src/braintrust/wrappers/test_anthropic.py index 5d8da9f3..54182597 100644 --- a/py/src/braintrust/wrappers/test_anthropic.py +++ b/py/src/braintrust/wrappers/test_anthropic.py @@ -11,6 +11,7 @@ from braintrust.wrappers.anthropic import wrap_anthropic from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script + TEST_ORG_ID = "test-org-123" PROJECT_NAME = "test-anthropic-app" MODEL = "claude-3-haiku-20240307" # use the cheapest model since answers dont matter @@ -448,7 +449,9 @@ async def test_anthropic_beta_messages_create_async(memory_logger): assert "10" in span["output"]["content"][0]["text"] -@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path", "body"]) # exclude query - varies by SDK version +@pytest.mark.vcr( + match_on=["method", "scheme", "host", "port", "path", "body"] +) # exclude query - varies by SDK version @pytest.mark.asyncio async def test_anthropic_beta_messages_streaming_async(memory_logger): assert not memory_logger.pop() diff --git a/py/src/braintrust/wrappers/test_dspy.py b/py/src/braintrust/wrappers/test_dspy.py index a9faa6af..edbc6334 100644 --- a/py/src/braintrust/wrappers/test_dspy.py +++ b/py/src/braintrust/wrappers/test_dspy.py @@ -9,6 +9,7 @@ from braintrust.wrappers.dspy import BraintrustDSpyCallback from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script + PROJECT_NAME = "test-dspy-app" MODEL = "openai/gpt-4o-mini" diff --git a/py/src/braintrust/wrappers/test_google_genai.py b/py/src/braintrust/wrappers/test_google_genai.py index 02fc21f5..51b3b090 100644 --- a/py/src/braintrust/wrappers/test_google_genai.py +++ b/py/src/braintrust/wrappers/test_google_genai.py @@ -10,6 +10,7 @@ from google.genai import types from google.genai.client import Client + PROJECT_NAME = "test-genai-app" MODEL = "gemini-2.0-flash-001" FIXTURES_DIR = Path(__file__).parent.parent.parent.parent.parent / "internal/golden/fixtures" diff --git a/py/src/braintrust/wrappers/test_langsmith_wrapper.py b/py/src/braintrust/wrappers/test_langsmith_wrapper.py index c009a410..c25256c2 100644 --- a/py/src/braintrust/wrappers/test_langsmith_wrapper.py +++ b/py/src/braintrust/wrappers/test_langsmith_wrapper.py @@ -9,7 +9,6 @@ Tests for the LangSmith wrapper to ensure compatibility with LangSmith's API. """ - from braintrust.wrappers.langsmith_wrapper import ( _convert_langsmith_data, _is_patched, @@ -292,7 +291,11 @@ def test_make_braintrust_scorer_handles_wrapped_outputs(self): def langsmith_evaluator(inputs, outputs, reference_outputs): # outputs will be wrapped as {"output": value} for non-dict results actual = outputs.get("output", outputs) - expected = reference_outputs.get("output", reference_outputs) if isinstance(reference_outputs, dict) else reference_outputs + expected = ( + reference_outputs.get("output", reference_outputs) + if isinstance(reference_outputs, dict) + else reference_outputs + ) return {"key": "match", "score": 1.0 if actual == expected else 0.0} converted = _make_braintrust_scorer(langsmith_evaluator) diff --git a/py/src/braintrust/wrappers/test_litellm.py b/py/src/braintrust/wrappers/test_litellm.py index 4639d809..6020634c 100644 --- a/py/src/braintrust/wrappers/test_litellm.py +++ b/py/src/braintrust/wrappers/test_litellm.py @@ -8,6 +8,7 @@ from braintrust.wrappers.litellm import wrap_litellm from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script + TEST_ORG_ID = "test-org-litellm-py-tracing" PROJECT_NAME = "test-project-litellm-py-tracing" TEST_MODEL = "gpt-4o-mini" # cheapest model for tests diff --git a/py/src/braintrust/wrappers/test_oai_attachments.py b/py/src/braintrust/wrappers/test_oai_attachments.py index 737b20da..3e065f3c 100644 --- a/py/src/braintrust/wrappers/test_oai_attachments.py +++ b/py/src/braintrust/wrappers/test_oai_attachments.py @@ -1,4 +1,5 @@ """Tests for OpenAI wrapper attachment processing.""" + import time import openai @@ -7,6 +8,7 @@ from braintrust.test_helpers import init_test_logger from braintrust.wrappers.test_utils import assert_metrics_are_valid + PROJECT_NAME = "test-project-openai-attachment-processing" TEST_MODEL = "gpt-4o-mini" diff --git a/py/src/braintrust/wrappers/test_openai.py b/py/src/braintrust/wrappers/test_openai.py index e832993d..6ab9b343 100644 --- a/py/src/braintrust/wrappers/test_openai.py +++ b/py/src/braintrust/wrappers/test_openai.py @@ -12,6 +12,7 @@ from openai._types import NOT_GIVEN from pydantic import BaseModel + TEST_ORG_ID = "test-org-openai-py-tracing" PROJECT_NAME = "test-project-openai-py-tracing" TEST_MODEL = "gpt-4o-mini" # cheapest model for tests diff --git a/py/src/braintrust/wrappers/test_openrouter.py b/py/src/braintrust/wrappers/test_openrouter.py index 1d750659..39365e1e 100644 --- a/py/src/braintrust/wrappers/test_openrouter.py +++ b/py/src/braintrust/wrappers/test_openrouter.py @@ -15,6 +15,7 @@ from braintrust.wrappers.test_utils import assert_metrics_are_valid from openai import AsyncOpenAI, OpenAI + PROJECT_NAME = "test-openrouter" TEST_MODEL = "openai/gpt-4o-mini" diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py index 14088f74..b794b18b 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py @@ -16,6 +16,7 @@ from pydantic_ai.messages import ModelRequest, UserPromptPart from pydantic_ai.usage import UsageLimits + PROJECT_NAME = "test-pydantic-ai-integration" MODEL = "openai:gpt-4o-mini" # Use cheaper model for tests TEST_PROMPT = "What is 2+2? Answer with just the number." diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py b/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py index 5bbe252b..661b7bf7 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py @@ -13,6 +13,7 @@ from braintrust.test_helpers import init_test_logger from pydantic_ai import Agent, ModelSettings + PROJECT_NAME = "test-pydantic-ai-logfire" MODEL = "openai:gpt-4o-mini" TEST_PROMPT = "What is 2+2? Answer with just the number." diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py index b7e2bd9c..c1dfceb3 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py @@ -5,6 +5,7 @@ from openai import AsyncOpenAI from pydantic_ai import Agent # pylint: disable=import-error + try: # Try new API first (pydantic_ai >= 1.0) from pydantic_ai.models.openai import OpenAIChatModel # pylint: disable=import-error @@ -20,6 +21,7 @@ from braintrust.test_helpers import init_test_logger from pydantic_ai.providers.openai import OpenAIProvider # pylint: disable=import-error + PROJECT_NAME = "test-pydantic-ai" MODEL = "gpt-3.5-turbo" # Use a cheaper model for testing TEST_PROMPT = "What is the capital of Italy?" diff --git a/py/src/braintrust/wrappers/test_utils.py b/py/src/braintrust/wrappers/test_utils.py index be3b92f9..80d9d661 100644 --- a/py/src/braintrust/wrappers/test_utils.py +++ b/py/src/braintrust/wrappers/test_utils.py @@ -10,6 +10,7 @@ from braintrust.conftest import get_vcr_config from braintrust.test_helpers import init_test_logger + # Source directory paths (resolved to handle installed vs source locations) _SOURCE_DIR = Path(__file__).resolve().parent AUTO_TEST_SCRIPTS_DIR = _SOURCE_DIR / "auto_test_scripts" @@ -18,9 +19,7 @@ CASSETTES_DIR = Path(os.environ.get("BRAINTRUST_CASSETTES_DIR", _SOURCE_DIR / "cassettes")) -def run_in_subprocess( - code: str, timeout: int = 30, env: dict[str, str] | None = None -) -> subprocess.CompletedProcess: +def run_in_subprocess(code: str, timeout: int = 30, env: dict[str, str] | None = None) -> subprocess.CompletedProcess: """Run Python code in a fresh subprocess.""" run_env = os.environ.copy() if env: @@ -43,9 +42,7 @@ def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subproc # Pass cassettes dir to subprocess since it may use installed package env = os.environ.copy() env["BRAINTRUST_CASSETTES_DIR"] = str(_SOURCE_DIR / "cassettes") - env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str( - _SOURCE_DIR / "claude_agent_sdk" / "cassettes" - ) + env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str(_SOURCE_DIR / "claude_agent_sdk" / "cassettes") result = subprocess.run( [sys.executable, str(script_path)], capture_output=True, diff --git a/py/src/braintrust/wrappers/threads.py b/py/src/braintrust/wrappers/threads.py index 4572e638..71c58f25 100644 --- a/py/src/braintrust/wrappers/threads.py +++ b/py/src/braintrust/wrappers/threads.py @@ -7,6 +7,7 @@ from wrapt import wrap_function_wrapper # pyright: ignore[reportUnknownVariableType, reportMissingTypeStubs] + logger = logging.getLogger(__name__) __all__ = ["setup_threads", "patch_thread", "patch_thread_pool_executor"] diff --git a/pyproject.toml b/pyproject.toml index f0618c8f..ec2cdf1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,3 @@ -[tool.black] -line-length = 119 - [tool.ruff] line-length = 119 @@ -11,6 +8,8 @@ select = [ ] [tool.ruff.lint.isort] known-third-party = ["braintrust", "braintrust_local", "autoevals"] +lines-after-imports = 2 +split-on-trailing-comma = true [tool.pytest.ini_options] asyncio_mode = "strict"