From f55118d56398422f3d7b05713b2de059577f6a1d Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Mon, 13 Apr 2026 15:23:33 -0400 Subject: [PATCH] feat(anthropic): trace beta managed agents apis Add tracing wrappers for beta agents, sessions, and session events across sync and async clients. Capture request input, result metadata, streamed usage, and tool spans for managed-agent tool events. Add VCR regression coverage for agent creation, session creation, and session event send/stream flows. --- .../test_anthropic_beta_agents_create.yaml | 171 ++++ .../test_anthropic_beta_sessions_create.yaml | 415 +++++++++ ..._beta_sessions_events_send_and_stream.yaml | 191 +++++ .../integrations/anthropic/test_anthropic.py | 228 +++++ .../integrations/anthropic/tracing.py | 794 ++++++++++++++++++ 5 files changed, 1799 insertions(+) create mode 100644 py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_agents_create.yaml create mode 100644 py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_create.yaml create mode 100644 py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_events_send_and_stream.yaml diff --git a/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_agents_create.yaml b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_agents_create.yaml new file mode 100644 index 00000000..cd87103c --- /dev/null +++ b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_agents_create.yaml @@ -0,0 +1,171 @@ +interactions: +- request: + body: '{"model":"claude-haiku-4-5","name":"braintrust-sdk-managed-agent","description":"Does + math","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '103' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/agents?beta=true + response: + body: + string: '{"archived_at":null,"created_at":"2026-04-13T17:32:25.995104Z","description":"Does + math","id":"agent_011Ca2ApJohLXg95LeGVGeAi","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent","skills":[],"system":null,"tools":[],"type":"agent","updated_at":"2026-04-13T17:32:25.995104Z","version":1}' + headers: + CF-RAY: + - 9ebc35c62abda241-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:26 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '356' + request-id: + - req_011Ca2ApJi11cqvB5DSV78Bu + server-timing: + - x-originResponse;dur=155 + set-cookie: + - _cfuvid=SF2vYfK17e8jzBxkyYB0.II1AE6BF_gRt6z72FerAlQ-1776101545.9504387-1.0.1.1-0b109w0pht0sh2BXwJVhJHV6YSAs3PLA0Sws9PGfNTA; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '153' + status: + code: 200 + message: OK +- request: + body: '' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/agents/agent_011Ca2ApJohLXg95LeGVGeAi/archive?beta=true + response: + body: + string: '{"archived_at":"2026-04-13T17:32:26.264468Z","created_at":"2026-04-13T17:32:25.995104Z","description":"Does + math","id":"agent_011Ca2ApJohLXg95LeGVGeAi","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent","skills":[],"system":null,"tools":[],"type":"agent","updated_at":"2026-04-13T17:32:26.264468Z","version":1}' + headers: + CF-RAY: + - 9ebc35c7e909b0a3-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:26 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '381' + request-id: + - req_011Ca2ApKugAKn53gFjhxcy5 + server-timing: + - x-originResponse;dur=232 + set-cookie: + - _cfuvid=R432sOZDToh3fh3LNvZQynsWm_dY1Fj9k.nd6Rsk1lg-1776101546.2308397-1.0.1.1-7jHNop7vMdBOfl4a.020akwnxMi7TP69cl_mghpLPkE; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '231' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_create.yaml b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_create.yaml new file mode 100644 index 00000000..28c2632a --- /dev/null +++ b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_create.yaml @@ -0,0 +1,415 @@ +interactions: +- request: + body: '' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: GET + uri: https://api.anthropic.com/v1/environments?beta=true&limit=1 + response: + body: + string: '{"data":[{"id":"env_012W55H8BqqsSgp2XJ3uQmBK","type":"environment","name":"braintrust-sdk-test-env","description":"","created_at":"2026-04-13T16:41:51.307182Z","updated_at":"2026-04-13T16:41:51.307182Z","archived_at":null,"state":"active","config":{"type":"cloud","packages":{"type":"packages","pip":[],"npm":[],"apt":[],"cargo":[],"gem":[],"go":[]},"networking":{"type":"unrestricted"},"init_script":"","environment":{}},"metadata":{},"scope":"organization"}],"next_page":"page_ZW52XzAxMlc1NUg4QnFxc1NncDJYSjN1UW1CS3wx"}' + headers: + CF-RAY: + - 9ebc35ca78391341-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + cf-cache-status: + - DYNAMIC + content-length: + - '521' + request-id: + - req_011Ca2ApMiKp9SGQETboBAPE + server-timing: + - x-originResponse;dur=343 + set-cookie: + - _cfuvid=dWwKFH.qwLRY9h6YoZUXgruDdniVPIID7GKTIOkSWtI-1776101546.6328456-1.0.1.1-l5Hq1vuSg0ybI_88CJfXftnTXR.qvalinNOUGCd.gVo; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '339' + status: + code: 200 + message: OK +- request: + body: '{"model":"claude-haiku-4-5","name":"braintrust-sdk-managed-agent","description":"Does + math","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '103' + Content-Type: + - application/json + Cookie: + - _cfuvid=dWwKFH.qwLRY9h6YoZUXgruDdniVPIID7GKTIOkSWtI-1776101546.6328456-1.0.1.1-l5Hq1vuSg0ybI_88CJfXftnTXR.qvalinNOUGCd.gVo + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/agents?beta=true + response: + body: + string: '{"archived_at":null,"created_at":"2026-04-13T17:32:27.135331Z","description":"Does + math","id":"agent_011Ca2ApPgPdhN6bs1QJbQKN","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent","skills":[],"system":null,"tools":[],"type":"agent","updated_at":"2026-04-13T17:32:27.135331Z","version":1}' + headers: + CF-RAY: + - 9ebc35cd680faa9b-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '356' + request-id: + - req_011Ca2ApPdRGhNDwzH5dgEKC + server-timing: + - x-originResponse;dur=143 + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '141' + status: + code: 200 + message: OK +- request: + body: '{"agent":"agent_011Ca2ApPgPdhN6bs1QJbQKN","environment_id":"env_012W55H8BqqsSgp2XJ3uQmBK","metadata":{"purpose":"test"},"title":"Issue + 259 test"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '145' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/sessions?beta=true + response: + body: + string: '{"agent":{"description":"Does math","id":"agent_011Ca2ApPgPdhN6bs1QJbQKN","mcp_servers":[],"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent","skills":[],"system":null,"tools":[],"type":"agent","version":1},"archived_at":null,"created_at":"2026-04-13T17:32:27.568084Z","environment_id":"env_012W55H8BqqsSgp2XJ3uQmBK","id":"sesn_011Ca2ApRV2z2dMSsvK4ouXQ","metadata":{"purpose":"test"},"resources":[],"stats":{"active_seconds":0,"duration_seconds":0},"status":"idle","title":"Issue + 259 test","type":"session","updated_at":"2026-04-13T17:32:27.568084Z","usage":{"cache_creation":null,"cache_read_input_tokens":0,"input_tokens":0,"output_tokens":0},"vault_ids":[]}' + headers: + CF-RAY: + - 9ebc35cfaad5d5a9-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '700' + request-id: + - req_011Ca2ApRBBTxYaN2fD6DhhJ + server-timing: + - x-originResponse;dur=261 + set-cookie: + - _cfuvid=kBzfr25zAEYODaYUn6i14ojwxINA.cKg7Eb5QWZX8rc-1776101547.4671574-1.0.1.1-wC9YDJNLF_NJeuEQ9WdihY_v4BCdp_CSiFM7opudi98; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '259' + status: + code: 200 + message: OK +- request: + body: '' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + Cookie: + - _cfuvid=dWwKFH.qwLRY9h6YoZUXgruDdniVPIID7GKTIOkSWtI-1776101546.6328456-1.0.1.1-l5Hq1vuSg0ybI_88CJfXftnTXR.qvalinNOUGCd.gVo + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: DELETE + uri: https://api.anthropic.com/v1/sessions/sesn_011Ca2ApRV2z2dMSsvK4ouXQ?beta=true + response: + body: + string: '{"id":"sesn_011Ca2ApRV2z2dMSsvK4ouXQ","type":"session_deleted"}' + headers: + CF-RAY: + - 9ebc35d1f82024ee-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:28 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '63' + request-id: + - req_011Ca2ApSnAnPg14bWJnzmVo + server-timing: + - x-originResponse;dur=569 + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '565' + status: + code: 200 + message: OK +- request: + body: '' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + Content-Type: + - application/json + Cookie: + - _cfuvid=dWwKFH.qwLRY9h6YoZUXgruDdniVPIID7GKTIOkSWtI-1776101546.6328456-1.0.1.1-l5Hq1vuSg0ybI_88CJfXftnTXR.qvalinNOUGCd.gVo + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.94.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.94.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/agents/agent_011Ca2ApPgPdhN6bs1QJbQKN/archive?beta=true + response: + body: + string: '{"archived_at":"2026-04-13T17:32:28.568924Z","created_at":"2026-04-13T17:32:27.135331Z","description":"Does + math","id":"agent_011Ca2ApPgPdhN6bs1QJbQKN","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent","skills":[],"system":null,"tools":[],"type":"agent","updated_at":"2026-04-13T17:32:28.568924Z","version":1}' + headers: + CF-RAY: + - 9ebc35d65d82ebb6-YYZ + Connection: + - keep-alive + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Mon, 13 Apr 2026 17:32:28 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + accept-encoding: + - gzip + cf-cache-status: + - DYNAMIC + content-length: + - '381' + request-id: + - req_011Ca2ApVm1fiGCBJ4EWfDnF + server-timing: + - x-originResponse;dur=240 + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '238' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_events_send_and_stream.yaml b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_events_send_and_stream.yaml new file mode 100644 index 00000000..f47c5dd8 --- /dev/null +++ b/py/src/braintrust/integrations/anthropic/cassettes/test_anthropic_beta_sessions_events_send_and_stream.yaml @@ -0,0 +1,191 @@ +interactions: +- request: + body: '' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + method: GET + uri: https://api.anthropic.com/v1/environments?limit=1 + response: + body: + string: '{"data":[{"id":"env_012W55H8BqqsSgp2XJ3uQmBK","type":"environment","name":"braintrust-sdk-test-env","description":"","created_at":"2026-04-13T16:41:51.307182Z","updated_at":"2026-04-13T16:41:51.307182Z","archived_at":null,"state":"active","config":{"type":"cloud","packages":{"type":"packages","pip":[],"npm":[],"apt":[],"cargo":[],"gem":[],"go":[]},"networking":{"type":"unrestricted"},"init_script":"","environment":{}},"metadata":{},"scope":"organization"}],"next_page":"page_ZW52XzAxMlc1NUg4QnFxc1NncDJYSjN1UW1CS3wx"}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +- request: + body: '{"model":"claude-haiku-4-5","name":"braintrust-sdk-managed-agent-bash","description":"Uses bash for a single arithmetic command","tools":[{"type":"agent_toolset_20260401","default_config":{"enabled":false},"configs":[{"name":"bash","enabled":true,"permission_policy":{"type":"always_allow"}}]}],"system":"For arithmetic requests, use exactly one bash command and then answer with only the numeric result."}' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + content-type: + - application/json + method: POST + uri: https://api.anthropic.com/v1/agents?beta=true + response: + body: + string: '{"id":"agent_011Ca2FF3gXLbMKrSqqZj8k4","archived_at":null,"created_at":"2026-04-13 18:30:28.891213+00:00","description":"Uses bash for a single arithmetic command","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent-bash","skills":[],"system":"For arithmetic requests, use exactly one bash command and then answer with only the numeric result.","tools":[{"configs":[{"enabled":true,"name":"bash","permission_policy":{"type":"always_allow"}}],"default_config":{"enabled":false,"permission_policy":{"type":"always_allow"}},"type":"agent_toolset_20260401"}],"type":"agent","updated_at":"2026-04-13 18:30:28.891213+00:00","version":1}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +- request: + body: '{"agent":"agent_011Ca2FF3gXLbMKrSqqZj8k4","environment_id":"env_012W55H8BqqsSgp2XJ3uQmBK","metadata":{"purpose":"test"},"title":"Issue 259 event stream"}' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + content-type: + - application/json + method: POST + uri: https://api.anthropic.com/v1/sessions?beta=true + response: + body: + string: '{"id":"sesn_011Ca2FF4rDgxQyGyz39EC2d","agent":{"id":"agent_011Ca2FF3gXLbMKrSqqZj8k4","description":"Uses bash for a single arithmetic command","mcp_servers":[],"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent-bash","skills":[],"system":"For arithmetic requests, use exactly one bash command and then answer with only the numeric result.","tools":[{"configs":[{"enabled":true,"name":"bash","permission_policy":{"type":"always_allow"}}],"default_config":{"enabled":false,"permission_policy":{"type":"always_allow"}},"type":"agent_toolset_20260401"}],"type":"agent","version":1},"archived_at":null,"created_at":"2026-04-13 18:30:29.173545+00:00","environment_id":"env_012W55H8BqqsSgp2XJ3uQmBK","metadata":{"purpose":"test"},"resources":[],"stats":{"active_seconds":0.0,"duration_seconds":0.0},"status":"idle","title":"Issue 259 event stream","type":"session","updated_at":"2026-04-13 18:30:29.173545+00:00","usage":{"cache_creation":null,"cache_read_input_tokens":0,"input_tokens":0,"output_tokens":0},"vault_ids":[]}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +- request: + body: '{"events":[{"type":"user.message","content":[{"type":"text","text":"Use bash once to print 2+2, then reply with only the number."}]}]}' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + content-type: + - application/json + method: POST + uri: https://api.anthropic.com/v1/sessions/sesn_011Ca2FF4rDgxQyGyz39EC2d/events?beta=true + response: + body: + string: '{"data":[{"id":"sevt_015sLSDzRxyCtPYxH8as4oTZ","content":[{"text":"Use bash once to print 2+2, then reply with only the number.","type":"text"}],"type":"user.message"}]}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +- request: + body: '' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + method: GET + uri: https://api.anthropic.com/v1/sessions/sesn_011Ca2FF4rDgxQyGyz39EC2d/events/stream?beta=true + response: + body: + string: 'event: message + + data: {"id":"sevt_01L2X1ULRQ3ChFRLzwxcekUy","processed_at":"2026-04-13T18:30:30.153Z","type":"session.status_running"} + + + event: message + + data: {"content":[{"text":"Use bash once to print 2+2, then reply with only the number.","type":"text"}],"id":"sevt_015sLSDzRxyCtPYxH8as4oTZ","processed_at":"2026-04-13T18:30:30.614Z","type":"user.message"} + + + event: message + + data: {"id":"sevt_01TXgSMcKrrJZ69EhenxVhzt","processed_at":"2026-04-13T18:30:30.720Z","type":"span.model_request_start"} + + + event: message + + data: {"id":"sevt_017DMzp5Xnfgjxz6o7bHT8WM","processed_at":"2026-04-13T18:30:32.342Z","type":"agent.thinking"} + + + event: message + + data: {"evaluated_permission":"allow","id":"sevt_01GrRZAZ8Nqcuv3VGmx2k765","input":{"command":"echo $((2+2))"},"name":"bash","processed_at":"2026-04-13T18:30:32.342Z","type":"agent.tool_use"} + + + event: message + + data: {"id":"sevt_01KLKXathDbA8urqgm6VyeG2","is_error":false,"model_request_start_id":"sevt_01TXgSMcKrrJZ69EhenxVhzt","model_usage":{"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"input_tokens":2012,"output_tokens":134},"processed_at":"2026-04-13T18:30:32.673Z","type":"span.model_request_end"} + + + event: message + + data: {"content":[{"text":"4\n","type":"text"}],"id":"sevt_0125HcaRaDq81rtK3DB1RUbc","is_error":false,"processed_at":"2026-04-13T18:30:36.038Z","tool_use_id":"sevt_01GrRZAZ8Nqcuv3VGmx2k765","type":"agent.tool_result"} + + + event: message + + data: {"id":"sevt_0164cqfsmw6YW4omCGExX88D","processed_at":"2026-04-13T18:30:36.164Z","type":"span.model_request_start"} + + + event: message + + data: {"content":[{"text":"4","type":"text"}],"id":"sevt_01UgzkbBHpf34iufDY2UrJL8","processed_at":"2026-04-13T18:30:37.217Z","type":"agent.message"} + + + event: message + + data: {"id":"sevt_01CWW7Yh1FrASa9K7DVMMGBQ","is_error":false,"model_request_start_id":"sevt_0164cqfsmw6YW4omCGExX88D","model_usage":{"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"input_tokens":2160,"output_tokens":4},"processed_at":"2026-04-13T18:30:37.338Z","type":"span.model_request_end"} + + + event: message + + data: {"id":"sevt_01JXMUBRBXXuCvcrNb2WJcEF","processed_at":"2026-04-13T18:30:37.497Z","stop_reason":{"type":"end_turn"},"type":"session.status_idle"} + + + ' + headers: + Content-Type: + - text/event-stream + status: + code: 200 + message: OK +- request: + body: '' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + method: DELETE + uri: https://api.anthropic.com/v1/sessions/sesn_011Ca2FF4rDgxQyGyz39EC2d?beta=true + response: + body: + string: '{"id":"sesn_011Ca2FF4rDgxQyGyz39EC2d","type":"session_deleted"}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +- request: + body: '' + headers: + anthropic-beta: + - managed-agents-2026-04-01 + anthropic-version: + - '2023-06-01' + content-type: + - application/json + method: POST + uri: https://api.anthropic.com/v1/agents/agent_011Ca2FF3gXLbMKrSqqZj8k4/archive?beta=true + response: + body: + string: '{"id":"agent_011Ca2FF3gXLbMKrSqqZj8k4","archived_at":"2026-04-13 18:30:39.986792+00:00","created_at":"2026-04-13 18:30:28.891213+00:00","description":"Uses bash for a single arithmetic command","mcp_servers":[],"metadata":{},"model":{"id":"claude-haiku-4-5","speed":"standard"},"name":"braintrust-sdk-managed-agent-bash","skills":[],"system":"For arithmetic requests, use exactly one bash command and then answer with only the numeric result.","tools":[{"configs":[{"enabled":true,"name":"bash","permission_policy":{"type":"always_allow"}}],"default_config":{"enabled":false,"permission_policy":{"type":"always_allow"}},"type":"agent_toolset_20260401"}],"type":"agent","updated_at":"2026-04-13 18:30:39.986792+00:00","version":1}' + headers: + Content-Type: + - application/json + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/anthropic/test_anthropic.py b/py/src/braintrust/integrations/anthropic/test_anthropic.py index 5dcad1a1..a5b7c315 100644 --- a/py/src/braintrust/integrations/anthropic/test_anthropic.py +++ b/py/src/braintrust/integrations/anthropic/test_anthropic.py @@ -82,6 +82,61 @@ def _skip_if_server_tool_content_blocks_unsupported(): pytest.skip("Installed anthropic SDK does not support Anthropic server tool content blocks") +def _skip_if_managed_agents_unsupported(): + client = _get_client() + if not hasattr(client.beta, "agents"): + pytest.skip("Installed anthropic SDK does not support beta managed agents") + if not hasattr(client.beta, "sessions"): + pytest.skip("Installed anthropic SDK does not support beta managed agent sessions") + if not hasattr(client.beta.sessions, "events") or not hasattr(client.beta.sessions.events, "send"): + pytest.skip("Installed anthropic SDK does not support beta managed agent session events") + + +_MANAGED_AGENTS_EVENTS_PROMPT = "Use bash once to print 2+2, then reply with only the number." +_MANAGED_AGENTS_AGENT_NAME = "braintrust-sdk-managed-agent" +_MANAGED_AGENTS_BASH_AGENT_NAME = "braintrust-sdk-managed-agent-bash" +_MANAGED_AGENTS_BASH_SYSTEM_PROMPT = ( + "For arithmetic requests, use exactly one bash command and then answer with only the numeric result." +) + + +def _get_managed_agents_environment_id(client): + environments = client.beta.environments.list(limit=1) + for environment in environments: + return environment.id + pytest.skip("No Anthropic managed-agent environment available for re-recording") + + +def _create_managed_agent(client, *, with_bash: bool = False): + create_kwargs = { + "model": "claude-haiku-4-5", + "name": _MANAGED_AGENTS_BASH_AGENT_NAME if with_bash else _MANAGED_AGENTS_AGENT_NAME, + "description": "Does math", + "tools": [], + } + if with_bash: + create_kwargs["description"] = "Uses bash for a single arithmetic command" + create_kwargs["system"] = _MANAGED_AGENTS_BASH_SYSTEM_PROMPT + create_kwargs["tools"] = [ + { + "type": "agent_toolset_20260401", + "default_config": {"enabled": False}, + "configs": [ + {"name": "bash", "enabled": True, "permission_policy": {"type": "always_allow"}}, + ], + } + ] + + return client.beta.agents.create(**create_kwargs) + + +def _cleanup_managed_agent_resources(client, agent_id: str | None = None, session_id: str | None = None): + if session_id: + client.beta.sessions.delete(session_id) + if agent_id and hasattr(client.beta.agents, "archive"): + client.beta.agents.archive(agent_id) + + @pytest.fixture def memory_logger(): init_test_logger(PROJECT_NAME) @@ -1012,6 +1067,179 @@ async def test_anthropic_beta_messages_streaming_async(memory_logger): assert metrics["tokens"] == usage.input_tokens + usage.output_tokens +@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path", "body"]) +def test_anthropic_beta_agents_create(memory_logger): + _skip_if_managed_agents_unsupported() + assert not memory_logger.pop() + + raw_client = _get_client() + agent_name = _MANAGED_AGENTS_AGENT_NAME + agent = None + try: + client = wrap_anthropic(_get_client()) + agent = client.beta.agents.create( + model="claude-haiku-4-5", + name=agent_name, + description="Does math", + tools=[], + ) + + assert agent.id.startswith("agent_") + assert agent.version >= 1 + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["span_attributes"]["name"] == "anthropic.beta.agents.create" + assert span["span_attributes"]["type"] == "task" + assert span["metadata"]["provider"] == "anthropic" + assert span["metadata"]["anthropic_api"] == "managed_agents" + assert span["metadata"]["model"] == "claude-haiku-4-5" + assert span["input"] == { + "model": "claude-haiku-4-5", + "name": agent_name, + "description": "Does math", + "tools": [], + } + assert span["output"]["id"] == agent.id + assert span["output"]["type"] == "agent" + assert span["output"]["model"]["id"] == "claude-haiku-4-5" + finally: + if agent is not None: + _cleanup_managed_agent_resources(raw_client, agent_id=agent.id) + + +@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path", "body"]) +def test_anthropic_beta_sessions_create(memory_logger): + _skip_if_managed_agents_unsupported() + assert not memory_logger.pop() + + raw_client = _get_client() + environment_id = _get_managed_agents_environment_id(raw_client) + agent = _create_managed_agent(raw_client) + session = None + try: + client = wrap_anthropic(_get_client()) + session = client.beta.sessions.create( + agent=agent.id, + environment_id=environment_id, + metadata={"purpose": "test"}, + title="Issue 259 test", + ) + + assert session.id.startswith("sesn_") + assert session.status == "idle" + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["span_attributes"]["name"] == "anthropic.beta.sessions.create" + assert span["span_attributes"]["type"] == "task" + assert span["metadata"]["provider"] == "anthropic" + assert span["metadata"]["anthropic_api"] == "managed_agents" + assert span["metadata"]["session_status"] == "idle" + assert span["input"] == { + "agent": agent.id, + "environment_id": environment_id, + "metadata": {"purpose": "test"}, + "title": "Issue 259 test", + } + assert span["metrics"]["prompt_tokens"] >= 0 + assert span["metrics"]["completion_tokens"] >= 0 + assert span["metrics"]["tokens"] >= span["metrics"]["prompt_tokens"] + assert span["metrics"]["active_seconds"] >= 0 + assert span["metrics"]["duration_seconds"] >= span["metrics"]["active_seconds"] + assert span["output"]["id"] == session.id + assert span["output"]["status"] == "idle" + assert span["output"]["environment_id"] == environment_id + finally: + _cleanup_managed_agent_resources(raw_client, agent_id=agent.id, session_id=getattr(session, "id", None)) + + +@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path", "body"]) +def test_anthropic_beta_sessions_events_send_and_stream(memory_logger): + _skip_if_managed_agents_unsupported() + assert not memory_logger.pop() + + raw_client = _get_client() + environment_id = _get_managed_agents_environment_id(raw_client) + agent = _create_managed_agent(raw_client, with_bash=True) + session = raw_client.beta.sessions.create( + agent=agent.id, + environment_id=environment_id, + metadata={"purpose": "test"}, + title="Issue 259 event stream", + ) + try: + client = wrap_anthropic(_get_client()) + sent = client.beta.sessions.events.send( + session.id, + events=[ + { + "type": "user.message", + "content": [{"type": "text", "text": _MANAGED_AGENTS_EVENTS_PROMPT}], + } + ], + ) + streamed_events = [] + with client.beta.sessions.events.stream(session.id) as stream: + for event in stream: + streamed_events.append(event) + if event.type in {"session.status_idle", "session.status_terminated"}: + break + + assert sent.data and sent.data[0].type == "user.message" + event_types = [event.type for event in streamed_events] + assert event_types[0] == "session.status_running" + assert event_types[-1] == "session.status_idle" + assert "agent.tool_use" in event_types + assert "agent.tool_result" in event_types + assert "span.model_request_end" in event_types + + spans = memory_logger.pop() + task_spans = find_spans_by_type(spans, SpanTypeAttribute.TASK) + tool_spans = find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + assert len(task_spans) == 2 + assert len(tool_spans) >= 1 + + send_span = find_span_by_name(task_spans, "anthropic.beta.sessions.events.send") + stream_span = find_span_by_name(task_spans, "anthropic.beta.sessions.events.stream") + tool_span = find_span_by_name(tool_spans, "bash") + + assert send_span["input"] == { + "session_id": session.id, + "events": [{"type": "user.message", "content": [{"type": "text", "text": _MANAGED_AGENTS_EVENTS_PROMPT}]}], + } + assert send_span["output"]["data"][0]["type"] == "user.message" + assert send_span["output"]["data"][0]["content"][0]["text"] == _MANAGED_AGENTS_EVENTS_PROMPT + + assert stream_span["input"] == {"session_id": session.id} + streamed_output_types = [event["type"] for event in stream_span["output"]] + assert streamed_output_types[0] == "session.status_running" + assert streamed_output_types[-1] == "session.status_idle" + assert "agent.tool_use" in streamed_output_types + assert "agent.tool_result" in streamed_output_types + assert "agent.message" in streamed_output_types + assert stream_span["metadata"]["provider"] == "anthropic" + assert stream_span["metadata"]["anthropic_api"] == "managed_agents" + assert stream_span["metadata"]["session_status"] == "idle" + assert stream_span["metadata"]["stop_reason"] == "end_turn" + assert stream_span["metrics"]["prompt_tokens"] > 0 + assert stream_span["metrics"]["completion_tokens"] > 0 + assert stream_span["metrics"]["tokens"] >= stream_span["metrics"]["prompt_tokens"] + + assert tool_span["input"]["command"] + assert tool_span["output"][0]["text"].strip() == "4" + assert tool_span["metadata"]["tool_call_type"] == "agent.tool_use" + assert tool_span["metadata"]["tool_result_type"] == "agent.tool_result" + assert tool_span["metadata"]["tool_use_id"] + assert tool_span["span_parents"] == [stream_span["span_id"]] + assert tool_span["root_span_id"] == stream_span["root_span_id"] + finally: + _cleanup_managed_agent_resources(raw_client, agent_id=agent.id, session_id=session.id) + + @pytest.mark.vcr def test_setup_creates_spans(memory_logger): """`AnthropicIntegration.setup()` should create spans when making API calls.""" diff --git a/py/src/braintrust/integrations/anthropic/tracing.py b/py/src/braintrust/integrations/anthropic/tracing.py index c110edc0..a95eaa17 100644 --- a/py/src/braintrust/integrations/anthropic/tracing.py +++ b/py/src/braintrust/integrations/anthropic/tracing.py @@ -8,6 +8,7 @@ from braintrust.integrations.utils import _materialize_attachment from braintrust.logger import log_exc_info_to_span, start_span from braintrust.span_types import SpanTypeAttribute +from braintrust.util import is_numeric log = logging.getLogger(__name__) @@ -132,6 +133,14 @@ def __init__(self, beta): def messages(self): return AsyncMessages(self.__beta.messages) + @property + def agents(self): + return AsyncAgents(self.__beta.agents) + + @property + def sessions(self): + return AsyncSessions(self.__beta.sessions) + class TracedAnthropic(Wrapper): def __init__(self, client): @@ -192,6 +201,366 @@ def __init__(self, beta): def messages(self): return Messages(self.__beta.messages) + @property + def agents(self): + return Agents(self.__beta.agents) + + @property + def sessions(self): + return Sessions(self.__beta.sessions) + + +class Agents(Wrapper): + def __init__(self, agents): + super().__init__(agents) + self.__agents = agents + + def create(self, *args, **kwargs): + return _trace_managed_agents_call(self.__agents.create, "anthropic.beta.agents.create", kwargs, kwargs) + + def retrieve(self, agent_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.retrieve, + "anthropic.beta.agents.retrieve", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + def list(self, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.list, + "anthropic.beta.agents.list", + kwargs, + kwargs, + output_factory=_managed_agents_paginator_output, + ) + + def update(self, agent_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.update, + "anthropic.beta.agents.update", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + def delete(self, agent_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.delete, + "anthropic.beta.agents.delete", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + def archive(self, agent_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.archive, + "anthropic.beta.agents.archive", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + +class AsyncAgents(Wrapper): + def __init__(self, agents): + super().__init__(agents) + self.__agents = agents + + async def create(self, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__agents.create, + "anthropic.beta.agents.create", + kwargs, + kwargs, + ) + + async def retrieve(self, agent_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__agents.retrieve, + "anthropic.beta.agents.retrieve", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + def list(self, *args, **kwargs): + return _trace_managed_agents_call( + self.__agents.list, + "anthropic.beta.agents.list", + kwargs, + kwargs, + output_factory=_managed_agents_paginator_output, + ) + + async def update(self, agent_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__agents.update, + "anthropic.beta.agents.update", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + async def delete(self, agent_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__agents.delete, + "anthropic.beta.agents.delete", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + async def archive(self, agent_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__agents.archive, + "anthropic.beta.agents.archive", + {"agent_id": agent_id, **kwargs}, + kwargs, + agent_id, + *args, + ) + + +class Sessions(Wrapper): + def __init__(self, sessions): + super().__init__(sessions) + self.__sessions = sessions + + @property + def events(self): + return SessionEvents(self.__sessions.events) + + def create(self, *args, **kwargs): + return _trace_managed_agents_call(self.__sessions.create, "anthropic.beta.sessions.create", kwargs, kwargs) + + def retrieve(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.retrieve, + "anthropic.beta.sessions.retrieve", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def list(self, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.list, + "anthropic.beta.sessions.list", + kwargs, + kwargs, + output_factory=_managed_agents_paginator_output, + ) + + def update(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.update, + "anthropic.beta.sessions.update", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def delete(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.delete, + "anthropic.beta.sessions.delete", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def archive(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.archive, + "anthropic.beta.sessions.archive", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + +class AsyncSessions(Wrapper): + def __init__(self, sessions): + super().__init__(sessions) + self.__sessions = sessions + + @property + def events(self): + return AsyncSessionEvents(self.__sessions.events) + + async def create(self, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__sessions.create, + "anthropic.beta.sessions.create", + kwargs, + kwargs, + ) + + async def retrieve(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__sessions.retrieve, + "anthropic.beta.sessions.retrieve", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def list(self, *args, **kwargs): + return _trace_managed_agents_call( + self.__sessions.list, + "anthropic.beta.sessions.list", + kwargs, + kwargs, + output_factory=_managed_agents_paginator_output, + ) + + async def update(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__sessions.update, + "anthropic.beta.sessions.update", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + async def delete(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__sessions.delete, + "anthropic.beta.sessions.delete", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + async def archive(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__sessions.archive, + "anthropic.beta.sessions.archive", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + +class SessionEvents(Wrapper): + def __init__(self, events): + super().__init__(events) + self.__events = events + + def list(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__events.list, + "anthropic.beta.sessions.events.list", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + output_factory=_managed_agents_paginator_output, + ) + + def send(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__events.send, + "anthropic.beta.sessions.events.send", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def create(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__events.create, + "anthropic.beta.sessions.events.create", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + def stream(self, session_id, *args, **kwargs): + span = _start_managed_agents_span( + "anthropic.beta.sessions.events.stream", + {"session_id": session_id, **kwargs}, + request_kwargs=kwargs, + ) + try: + stream = self.__events.stream(session_id, *args, **kwargs) + return TracedManagedAgentsEventStream(stream, span) + except Exception as e: + span.log(error=e) + span.end() + raise + + +class AsyncSessionEvents(Wrapper): + def __init__(self, events): + super().__init__(events) + self.__events = events + + def list(self, session_id, *args, **kwargs): + return _trace_managed_agents_call( + self.__events.list, + "anthropic.beta.sessions.events.list", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + output_factory=_managed_agents_paginator_output, + ) + + async def send(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__events.send, + "anthropic.beta.sessions.events.send", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + async def create(self, session_id, *args, **kwargs): + return await _trace_async_managed_agents_call( + self.__events.create, + "anthropic.beta.sessions.events.create", + {"session_id": session_id, **kwargs}, + kwargs, + session_id, + *args, + ) + + async def stream(self, session_id, *args, **kwargs): + span = _start_managed_agents_span( + "anthropic.beta.sessions.events.stream", + {"session_id": session_id, **kwargs}, + request_kwargs=kwargs, + ) + try: + stream = await self.__events.stream(session_id, *args, **kwargs) + return AsyncTracedManagedAgentsEventStream(stream, span) + except Exception as e: + span.log(error=e) + span.end() + raise + class Batches(Wrapper): """Wrapper for sync Anthropic Messages Batches resource.""" @@ -362,6 +731,431 @@ def __process_message(self, m): self.__snapshot = accumulate_event(event=m, current_snapshot=self.__snapshot) +class TracedManagedAgentsEventStream(Wrapper): + def __init__(self, stream, span): + super().__init__(stream) + self.__stream = stream + self.__span = span + self.__events: list[dict[str, Any]] = [] + self.__finished = False + + def __iter__(self): + return self + + def __next__(self): + try: + event = next(self.__stream) + except StopIteration: + self._finish() + raise + except Exception as e: + self._finish(error=e) + raise + + self.__events.append(_normalize_anthropic_data(event)) + return event + + def __enter__(self): + entered = self.__stream.__enter__() + if entered is not self.__stream: + self.__stream = entered + return self + + def __exit__(self, exc_type, exc_value, traceback): + try: + return self.__stream.__exit__(exc_type, exc_value, traceback) + finally: + self._finish(exc_type=exc_type, exc_value=exc_value, traceback=traceback) + + def close(self): + try: + close = getattr(self.__stream, "close", None) + if callable(close): + close() + finally: + self._finish() + + def _finish(self, exc_type=None, exc_value=None, traceback=None, error=None): + if self.__finished: + return + self.__finished = True + + _log_managed_agents_stream_to_span(self.__events, self.__span) + if error is not None: + self.__span.log(error=error) + elif exc_type is not None: + log_exc_info_to_span(self.__span, exc_type, exc_value, traceback) + self.__span.end() + + +class AsyncTracedManagedAgentsEventStream(Wrapper): + def __init__(self, stream, span): + super().__init__(stream) + self.__stream = stream + self.__span = span + self.__events: list[dict[str, Any]] = [] + self.__finished = False + + def __aiter__(self): + return self + + async def __anext__(self): + try: + event = await self.__stream.__anext__() + except StopAsyncIteration: + await self._finish() + raise + except Exception as e: + await self._finish(error=e) + raise + + self.__events.append(_normalize_anthropic_data(event)) + return event + + async def __aenter__(self): + entered = await self.__stream.__aenter__() + if entered is not self.__stream: + self.__stream = entered + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + try: + return await self.__stream.__aexit__(exc_type, exc_value, traceback) + finally: + await self._finish(exc_type=exc_type, exc_value=exc_value, traceback=traceback) + + async def close(self): + try: + close = getattr(self.__stream, "close", None) + if callable(close): + await close() + finally: + await self._finish() + + async def _finish(self, exc_type=None, exc_value=None, traceback=None, error=None): + if self.__finished: + return + self.__finished = True + + _log_managed_agents_stream_to_span(self.__events, self.__span) + if error is not None: + self.__span.log(error=error) + elif exc_type is not None: + log_exc_info_to_span(self.__span, exc_type, exc_value, traceback) + self.__span.end() + + +_MANAGED_AGENTS_CALL_TYPES = frozenset({"agent.tool_use", "agent.mcp_tool_use", "agent.custom_tool_use"}) +_MANAGED_AGENTS_RESULT_REF_KEYS = { + "agent.tool_result": "tool_use_id", + "agent.mcp_tool_result": "mcp_tool_use_id", + "user.custom_tool_result": "custom_tool_use_id", +} + + +def _normalize_anthropic_data(value: Any) -> Any: + converted = _try_to_dict(value) + if converted is not None: + value = converted + + if isinstance(value, tuple): + value = list(value) + + if isinstance(value, list): + return [_normalize_anthropic_data(item) for item in value] + + if isinstance(value, dict): + return {key: _normalize_anthropic_data(item) for key, item in value.items()} + + return value + + +def _normalize_anthropic_input(value: Any) -> Any: + return _process_input_attachments(_normalize_anthropic_data(bt_safe_deep_copy(value))) + + +def _managed_agents_model_name(value: Any) -> str | None: + value = _normalize_anthropic_data(value) + if isinstance(value, str): + return value + if isinstance(value, dict) and isinstance(value.get("id"), str): + return value["id"] + return None + + +def _managed_agents_request_metadata(request_kwargs: dict[str, Any] | None) -> dict[str, Any]: + metadata: dict[str, Any] = {"provider": "anthropic", "anthropic_api": "managed_agents"} + if not request_kwargs: + return metadata + + model_name = _managed_agents_model_name(request_kwargs.get("model")) + if model_name is not None: + metadata["model"] = model_name + return metadata + + +def _start_managed_agents_span(name: str, span_input: Any, request_kwargs: dict[str, Any] | None = None): + return start_span( + name=name, + type="task", + metadata=_managed_agents_request_metadata(request_kwargs), + input=_normalize_anthropic_input(span_input), + ) + + +def _managed_agents_paginator_output(result: Any) -> dict[str, Any]: + return {"type": type(result).__name__} + + +def _extract_managed_agents_result_metrics_and_metadata(result: Any) -> tuple[dict[str, float], dict[str, Any]]: + metrics: dict[str, float] = {} + metadata: dict[str, Any] = {} + + usage_metrics, usage_metadata = extract_anthropic_usage(getattr(result, "usage", None)) + metrics.update(usage_metrics) + metadata.update(usage_metadata) + + stats = _try_to_dict(getattr(result, "stats", None)) + if isinstance(stats, dict): + for key in ("active_seconds", "duration_seconds"): + value = stats.get(key) + if is_numeric(value): + metrics[key] = float(value) + + model_name = _managed_agents_model_name(getattr(result, "model", None)) + if model_name is None: + agent = _try_to_dict(getattr(result, "agent", None)) + if isinstance(agent, dict): + model_name = _managed_agents_model_name(agent.get("model")) + if model_name is not None: + metadata["model"] = model_name + + status = getattr(result, "status", None) + if isinstance(status, str): + metadata["session_status"] = status + + return metrics, metadata + + +def _log_managed_agents_result_to_span(result: Any, span, output_factory=None) -> None: + output = output_factory(result) if output_factory is not None else _normalize_anthropic_data(result) + metrics, metadata = _extract_managed_agents_result_metrics_and_metadata(result) + + span_log_kwargs = {} + if output is not None: + span_log_kwargs["output"] = output + if metrics: + span_log_kwargs["metrics"] = metrics + if metadata: + span_log_kwargs["metadata"] = metadata + if span_log_kwargs: + span.log(**span_log_kwargs) + + +def _trace_managed_agents_call(method, span_name, span_input, request_kwargs, *args, output_factory=None): + span = _start_managed_agents_span(span_name, span_input, request_kwargs=request_kwargs) + method_kwargs = dict(request_kwargs or {}) + try: + result = method(*args, **method_kwargs) + _log_managed_agents_result_to_span(result, span, output_factory=output_factory) + return result + except Exception as e: + span.log(error=e) + raise + finally: + span.end() + + +async def _trace_async_managed_agents_call(method, span_name, span_input, request_kwargs, *args, output_factory=None): + span = _start_managed_agents_span(span_name, span_input, request_kwargs=request_kwargs) + method_kwargs = dict(request_kwargs or {}) + try: + result = await method(*args, **method_kwargs) + _log_managed_agents_result_to_span(result, span, output_factory=output_factory) + return result + except Exception as e: + span.log(error=e) + raise + finally: + span.end() + + +def _managed_agents_stream_metrics_and_metadata( + events: list[dict[str, Any]], +) -> tuple[dict[str, float], dict[str, Any]]: + metrics: dict[str, float] = {} + metadata: dict[str, Any] = {} + + final_status: str | None = None + stop_reason: str | None = None + session_error: str | None = None + + for event in events: + event_type = event.get("type") + if event_type == "span.model_request_end": + event_metrics, _ = extract_anthropic_usage(event.get("model_usage")) + for key, value in event_metrics.items(): + metrics[key] = metrics.get(key, 0.0) + value + elif isinstance(event_type, str) and event_type.startswith("session.status_"): + final_status = event_type.removeprefix("session.status_") + if event_type == "session.status_idle": + stop_reason_data = event.get("stop_reason") + if isinstance(stop_reason_data, dict): + reason = stop_reason_data.get("type") + if isinstance(reason, str): + stop_reason = reason + elif event_type == "session.error": + error_data = event.get("error") + if isinstance(error_data, dict): + session_error = error_data.get("message") or error_data.get("type") + + if final_status is not None: + metadata["session_status"] = final_status + if stop_reason is not None: + metadata["stop_reason"] = stop_reason + if session_error is not None: + metadata["session_error"] = session_error + + return metrics, metadata + + +def _managed_agents_tool_ref_key(result_item: dict[str, Any] | None) -> str | None: + if not result_item: + return None + result_type = result_item.get("type") + if not isinstance(result_type, str): + return None + return _MANAGED_AGENTS_RESULT_REF_KEYS.get(result_type) + + +def _managed_agents_tool_span_name(call_item: dict[str, Any] | None, result_item: dict[str, Any] | None) -> str: + if isinstance((call_item or {}).get("name"), str): + return call_item["name"] + + result_type = (result_item or {}).get("type") + if isinstance(result_type, str): + return result_type.replace(".", "_") + + return "managed_agent_tool" + + +def _managed_agents_tool_span_input(call_item: dict[str, Any] | None) -> Any: + if not call_item: + return None + return call_item.get("input") + + +def _managed_agents_tool_span_output(result_item: dict[str, Any] | None) -> Any: + if not result_item: + return None + return result_item.get("content") + + +def _managed_agents_tool_span_error(result_item: dict[str, Any] | None) -> str | None: + if not result_item: + return None + if result_item.get("is_error"): + result_type = result_item.get("type") + if isinstance(result_type, str): + return result_type + return "tool_error" + return None + + +def _managed_agents_tool_span_metadata( + call_item: dict[str, Any] | None, result_item: dict[str, Any] | None +) -> dict[str, Any] | None: + ref_key = _managed_agents_tool_ref_key(result_item) + metadata = { + key: value + for key, value in { + "tool_use_id": (call_item or {}).get("id") or ((result_item or {}).get(ref_key) if ref_key else None), + "tool_call_type": (call_item or {}).get("type"), + "tool_result_type": (result_item or {}).get("type"), + "mcp_server_name": (call_item or {}).get("mcp_server_name"), + "evaluated_permission": (call_item or {}).get("evaluated_permission"), + }.items() + if value is not None + } + return metadata or None + + +def _log_managed_agents_tool_span( + parent_span, call_item: dict[str, Any] | None, result_item: dict[str, Any] | None +) -> None: + tool_span = start_span( + name=_managed_agents_tool_span_name(call_item, result_item), + type=SpanTypeAttribute.TOOL, + parent=parent_span.export(), + input=_managed_agents_tool_span_input(call_item), + metadata=_managed_agents_tool_span_metadata(call_item, result_item), + ) + try: + output = _managed_agents_tool_span_output(result_item) + error = _managed_agents_tool_span_error(result_item) + if output is None and error is None: + return + if error is not None: + tool_span.log(output=output, error=error) + else: + tool_span.log(output=output) + finally: + tool_span.end() + + +def _log_managed_agents_tool_spans(events: list[dict[str, Any]], parent_span) -> None: + calls_by_id: dict[str, dict[str, Any]] = {} + pending_results_by_id: dict[str, list[dict[str, Any]]] = {} + matched_call_ids: set[str] = set() + pairs: list[tuple[dict[str, Any] | None, dict[str, Any] | None]] = [] + + for event in events: + event_type = event.get("type") + if event_type in _MANAGED_AGENTS_CALL_TYPES: + call_id = event.get("id") + if isinstance(call_id, str): + calls_by_id[call_id] = event + for pending_result in pending_results_by_id.pop(call_id, []): + pairs.append((event, pending_result)) + matched_call_ids.add(call_id) + else: + pairs.append((event, None)) + continue + + ref_key = _managed_agents_tool_ref_key(event) + if ref_key is None: + continue + + call_id = event.get(ref_key) + if isinstance(call_id, str) and call_id in calls_by_id: + pairs.append((calls_by_id[call_id], event)) + matched_call_ids.add(call_id) + elif isinstance(call_id, str): + pending_results_by_id.setdefault(call_id, []).append(event) + else: + pairs.append((None, event)) + + for call_item, result_item in pairs: + _log_managed_agents_tool_span(parent_span, call_item, result_item) + + for call_id, call_item in calls_by_id.items(): + if call_id not in matched_call_ids: + _log_managed_agents_tool_span(parent_span, call_item, None) + + for pending_results in pending_results_by_id.values(): + for result_item in pending_results: + _log_managed_agents_tool_span(parent_span, None, result_item) + + +def _log_managed_agents_stream_to_span(events: list[dict[str, Any]], span) -> None: + if not events: + return + + metrics, metadata = _managed_agents_stream_metrics_and_metadata(events) + span.log(output=events, metrics=metrics or None, metadata=metadata or None) + _log_managed_agents_tool_spans(events, span) + + def _start_batch_create_span(kwargs): requests = list(kwargs.get("requests", [])) # Extract models from the batch requests for metadata