diff --git a/crates/browser-use-agent/src/config_overrides.rs b/crates/browser-use-agent/src/config_overrides.rs index c1918543..61420a52 100644 --- a/crates/browser-use-agent/src/config_overrides.rs +++ b/crates/browser-use-agent/src/config_overrides.rs @@ -101,6 +101,7 @@ impl Default for MultiAgentV2Options { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ProviderBackend { Codex, + BrowserUse, Openai, Anthropic, Openrouter, @@ -113,6 +114,7 @@ impl ProviderBackend { pub fn from_provider_id(value: &str) -> Option { match value.trim().to_ascii_lowercase().as_str() { "codex" => Some(Self::Codex), + "browser-use" | "browser_use" | "browseruse" => Some(Self::BrowserUse), "openai" => Some(Self::Openai), "anthropic" => Some(Self::Anthropic), "openrouter" => Some(Self::Openrouter), @@ -1957,6 +1959,7 @@ command = "profile-server" // full variant set matches `browser-use-core::ProviderBackend`. let all = [ ProviderBackend::Codex, + ProviderBackend::BrowserUse, ProviderBackend::Openai, ProviderBackend::Anthropic, ProviderBackend::Openrouter, @@ -1968,6 +1971,7 @@ command = "profile-server" let name = format!("{backend:?}"); let round_tripped = match name.as_str() { "Codex" => ProviderBackend::Codex, + "BrowserUse" => ProviderBackend::BrowserUse, "Openai" => ProviderBackend::Openai, "Anthropic" => ProviderBackend::Anthropic, "Openrouter" => ProviderBackend::Openrouter, @@ -1978,7 +1982,7 @@ command = "profile-server" }; assert_eq!(backend, round_tripped); } - assert_eq!(all.len(), 7); + assert_eq!(all.len(), 8); } #[test] diff --git a/crates/browser-use-agent/src/entrypoint/mod.rs b/crates/browser-use-agent/src/entrypoint/mod.rs index 4c743a69..5cfc6e3f 100644 --- a/crates/browser-use-agent/src/entrypoint/mod.rs +++ b/crates/browser-use-agent/src/entrypoint/mod.rs @@ -5865,6 +5865,7 @@ mod tests { id: "call-1".to_string(), name: "shell".to_string(), namespace: None, + provider_metadata: None, input: serde_json::json!({ "command": ["echo", "fusion-ok"] }), }, LlmEvent::Finish { diff --git a/crates/browser-use-agent/src/entrypoint/provider.rs b/crates/browser-use-agent/src/entrypoint/provider.rs index 3281a54c..47b5c2ed 100644 --- a/crates/browser-use-agent/src/entrypoint/provider.rs +++ b/crates/browser-use-agent/src/entrypoint/provider.rs @@ -19,6 +19,9 @@ //! * [`ProviderBackend::Openai`] → [`ProviderChoice::OpenAiResponses`] //! (key from `OPENAI_API_KEY` / `LLM_BROWSER_OPENAI_API_KEY`, optional //! `LLM_BROWSER_OPENAI_BASE_URL`), +//! * [`ProviderBackend::BrowserUse`] → [`ProviderChoice::OpenAiCompatibleCustom`] +//! id `"browser-use"` (key from `BROWSER_USE_API_KEY`, base override +//! `LLM_BROWSER_BROWSER_USE_BASE_URL`), //! * [`ProviderBackend::Anthropic`] → [`ProviderChoice::Anthropic`] //! (key from `ANTHROPIC_API_KEY` / `LLM_BROWSER_ANTHROPIC_API_KEY`), //! * [`ProviderBackend::Openrouter`] → [`ProviderChoice::OpenAiCompatibleProvider`] @@ -798,6 +801,27 @@ pub fn provider_choice_for_backend( base_url: env_first(&["LLM_BROWSER_OPENAI_BASE_URL"]), })) } + ProviderBackend::BrowserUse => { + let api_key = key_env_then_store( + &["LLM_BROWSER_BROWSER_USE_API_KEY", "BROWSER_USE_API_KEY"], + store, + "browser_use_cloud", + ) + .ok_or(ProviderResolveError::MissingCredentials( + "set BROWSER_USE_API_KEY (or run `auth login browser-use-cloud`) for the browser-use backend", + ))?; + Ok(Some(ProviderChoice::OpenAiCompatibleCustom { + provider_id: "browser-use".to_string(), + base_url: env_first(&["LLM_BROWSER_BROWSER_USE_BASE_URL"]) + .unwrap_or_else(|| "https://llm.api.browser-use.com/v1".to_string()), + api_key, + extra_headers: vec![( + "x-browser-use-request-type".to_string(), + env_first(&["LLM_BROWSER_BROWSER_USE_REQUEST_TYPE"]) + .unwrap_or_else(|| "rust_agent".to_string()), + )], + })) + } ProviderBackend::Anthropic => { let api_key = key_env_then_store( &["LLM_BROWSER_ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY"], @@ -2704,6 +2728,43 @@ mod tests { assert!(matches!(resolved, ResolvedProvider::Real(_))); } + #[test] + fn browser_use_backend_resolves_gateway_route_from_cloud_key() { + let _guard = ENV_LOCK.lock().unwrap(); + std::env::remove_var("BROWSER_USE_API_KEY"); + std::env::remove_var("LLM_BROWSER_BROWSER_USE_API_KEY"); + let dir = tempfile::tempdir().expect("tempdir"); + let store = Store::open(dir.path()).expect("store"); + store + .set_setting("auth.browser_use_cloud.api_key", "stored-browser-use-key") + .unwrap(); + + let choice = provider_choice_for_backend(ProviderBackend::BrowserUse, Some(&store)) + .expect("resolves") + .expect("browser-use is a real provider"); + + match choice { + ProviderChoice::OpenAiCompatibleCustom { + provider_id, + base_url, + api_key, + extra_headers, + } => { + assert_eq!(provider_id, "browser-use"); + assert_eq!(base_url, "https://llm.api.browser-use.com/v1"); + assert_eq!(api_key, "stored-browser-use-key"); + assert_eq!( + extra_headers, + vec![( + "x-browser-use-request-type".to_string(), + "rust_agent".to_string() + )] + ); + } + other => panic!("expected browser-use gateway choice, got {other:?}"), + } + } + /// A real Anthropic backend also constructs offline given its key. #[test] fn resolves_real_anthropic_driver_offline() { diff --git a/crates/browser-use-agent/src/events/map_tests.rs b/crates/browser-use-agent/src/events/map_tests.rs index 667a685d..175579a6 100644 --- a/crates/browser-use-agent/src/events/map_tests.rs +++ b/crates/browser-use-agent/src/events/map_tests.rs @@ -71,6 +71,7 @@ fn tool_call_maps_to_tool_started_with_parsed_arguments() { id: "c0".to_string(), name: "click".to_string(), namespace: None, + provider_metadata: None, input: json!({ "index": 5 }), }, ); diff --git a/crates/browser-use-agent/src/turn/fusion_tests.rs b/crates/browser-use-agent/src/turn/fusion_tests.rs index 1b467474..7aa5f685 100644 --- a/crates/browser-use-agent/src/turn/fusion_tests.rs +++ b/crates/browser-use-agent/src/turn/fusion_tests.rs @@ -251,6 +251,7 @@ fn tool_call_ev(id: &str, name: &str, input: serde_json::Value) -> LlmEvent { id: id.to_string(), name: name.to_string(), namespace: None, + provider_metadata: None, input, } } diff --git a/crates/browser-use-agent/src/turn/model_path.rs b/crates/browser-use-agent/src/turn/model_path.rs index 1e64a0bd..6854a6e7 100644 --- a/crates/browser-use-agent/src/turn/model_path.rs +++ b/crates/browser-use-agent/src/turn/model_path.rs @@ -28,7 +28,7 @@ use browser_use_llm::auth::{codex_route, CodexAuth}; use browser_use_llm::providers::{ Anthropic, AnthropicConfig, OpenAi, OpenAiCompatible, OpenAiConfig, }; -use browser_use_llm::route::{ModelClient, Route}; +use browser_use_llm::route::{Auth, ModelClient, Route}; use browser_use_llm::schema::{ContentPart, LlmRequest, Message, MessageRole, SystemPart}; use crate::events::{EventSink, TurnCtx}; @@ -71,6 +71,8 @@ pub enum ProviderChoice { base_url: String, /// API key. api_key: String, + /// Additional static headers to apply to every request for this route. + extra_headers: Vec<(String, String)>, }, /// The codex (chatgpt.com) backend, reached via the Codex CLI OAuth login. /// @@ -166,6 +168,7 @@ pub fn provider_choice_from_env() -> Result { provider_id: "openai-compatible".to_string(), base_url, api_key, + extra_headers: Vec::new(), }); } Err(ModelPathError::MissingCredentials( @@ -202,10 +205,17 @@ pub fn build_route(choice: &ProviderChoice, model: &str) -> Result { let provider = OpenAiCompatible::configure(provider_id.clone(), base_url.clone(), api_key.clone()); - Ok(provider.chat(model)) + let mut route = provider.chat(model); + for (name, value) in extra_headers { + route.auth = route + .auth + .and_then(Auth::header(name.clone(), value.clone())); + } + Ok(route) } ProviderChoice::Codex { access_token, @@ -249,9 +259,12 @@ pub fn build_transport( ), ); } + apply_browser_use_provider_options(&ctx.provider, &mut req); ModelClientTransport::new(client, route, req) } +pub(crate) fn apply_browser_use_provider_options(_provider: &str, _req: &mut LlmRequest) {} + /// Build the production text-only [`ModelSamplingDriver`] over a live transport. /// /// This is the real [`SamplingDriver`](crate::turn::SamplingDriver) the turn loop @@ -353,6 +366,7 @@ mod tests { provider_id: "internal".to_string(), base_url: "https://llm.internal/v1".to_string(), api_key: "k".to_string(), + extra_headers: Vec::new(), }; let route = build_route(&choice, "m").unwrap(); assert_eq!( @@ -361,6 +375,34 @@ mod tests { ); } + #[test] + fn openai_compatible_custom_applies_extra_headers() { + let choice = ProviderChoice::OpenAiCompatibleCustom { + provider_id: "browser-use".to_string(), + base_url: "https://llm.api.browser-use.com/v1".to_string(), + api_key: "k".to_string(), + extra_headers: vec![( + "x-browser-use-request-type".to_string(), + "rust_agent".to_string(), + )], + }; + let route = build_route(&choice, "bu-3-max").unwrap(); + + assert_eq!( + header(&route, "x-browser-use-request-type").as_deref(), + Some("rust_agent") + ); + } + + #[test] + fn browser_use_provider_options_do_not_tag_request_body() { + let mut req = LlmRequest::new("bu-3-max", "browseruse"); + + apply_browser_use_provider_options("browser-use", &mut req); + + assert_eq!(req.provider_options, None); + } + /// Only the `Codex` variant targets chatgpt.com: the env-keyed providers never /// route to the codex backend, while `Codex` does (and only it). #[test] @@ -379,6 +421,7 @@ mod tests { provider_id: "x".into(), base_url: "https://llm.internal/v1".into(), api_key: "k".into(), + extra_headers: Vec::new(), }, ] { let url = build_route(&choice, "m").unwrap().endpoint.url(); diff --git a/crates/browser-use-agent/src/turn/sampling.rs b/crates/browser-use-agent/src/turn/sampling.rs index 531739f4..95f92092 100644 --- a/crates/browser-use-agent/src/turn/sampling.rs +++ b/crates/browser-use-agent/src/turn/sampling.rs @@ -603,6 +603,7 @@ impl ModelSamplingDriver { id, name, namespace, + provider_metadata, input, } => { // Capture the actual call (model order) so the fused dispatch can @@ -611,8 +612,9 @@ impl ModelSamplingDriver { id, name, input, - provider_metadata: namespace - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.or_else(|| { + namespace.map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); Ok(StreamProgress::Continue) } @@ -1083,6 +1085,7 @@ fn build_request(ctx: &TurnCtx, input: Vec) -> LlmRequest { ), ); } + super::model_path::apply_browser_use_provider_options(&ctx.provider, &mut req); mark_message_cache_breakpoints(&mut req.messages); req } diff --git a/crates/browser-use-agent/src/turn/sampling_tests.rs b/crates/browser-use-agent/src/turn/sampling_tests.rs index 341c1c2e..c6c1257e 100644 --- a/crates/browser-use-agent/src/turn/sampling_tests.rs +++ b/crates/browser-use-agent/src/turn/sampling_tests.rs @@ -215,6 +215,7 @@ fn tool_call(name: &str) -> Result { id: "call-1".to_string(), name: name.to_string(), namespace: None, + provider_metadata: None, input: serde_json::json!({"arg": 1}), }) } @@ -224,6 +225,7 @@ fn tool_call_with_input(name: &str, input: serde_json::Value) -> Result, }, + RunBrowserUse { + text: String, + #[arg(long, default_value = "bu-3-max")] + model: String, + }, RunAnthropic { text: String, #[arg(long, default_value = "claude-sonnet-4-6")] @@ -255,6 +260,11 @@ enum Command { #[arg(long)] model: Option, }, + RunBrowserUseSession { + task_id: String, + #[arg(long, default_value = "bu-3-max")] + model: String, + }, RunAnthropicSession { task_id: String, #[arg(long, default_value = "claude-sonnet-4-6")] @@ -845,6 +855,15 @@ fn main() -> Result<()> { collaboration_mode, &runtime_options, ), + Command::RunBrowserUse { text, model } => run_browser_use( + &store, + text, + model, + config_profile.as_deref(), + &config_overrides, + collaboration_mode, + &runtime_options, + ), Command::RunAnthropic { text, model } => run_anthropic( &store, text, @@ -899,6 +918,15 @@ fn main() -> Result<()> { collaboration_mode, &runtime_options, ), + Command::RunBrowserUseSession { task_id, model } => run_browser_use_session( + &store, + &task_id, + model, + config_profile.as_deref(), + &config_overrides, + collaboration_mode, + &runtime_options, + ), Command::RunAnthropicSession { task_id, model } => run_anthropic_session( &store, &task_id, @@ -1204,12 +1232,14 @@ fn command_name(command: &Command) -> &'static str { Command::Start { .. } => "start", Command::RunFake { .. } => "run_fake", Command::RunOpenai { .. } => "run_openai", + Command::RunBrowserUse { .. } => "run_browser_use", Command::RunAnthropic { .. } => "run_anthropic", Command::RunOpenrouter { .. } => "run_openrouter", Command::RunDeepseek { .. } => "run_deepseek", Command::RunCodex { .. } => "run_codex", Command::RunCodexSession { .. } => "run_codex_session", Command::RunOpenaiSession { .. } => "run_openai_session", + Command::RunBrowserUseSession { .. } => "run_browser_use_session", Command::RunAnthropicSession { .. } => "run_anthropic_session", Command::RunOpenrouterSession { .. } => "run_openrouter_session", Command::RunDeepseekSession { .. } => "run_deepseek_session", @@ -2142,6 +2172,7 @@ fn default_cli_model_for_backend_with_overrides( ProviderBackend::Openai => { default_model_for_cwd_with_options(cwd, config_profile, config_overrides, false) } + ProviderBackend::BrowserUse => Ok("bu-3-max".to_string()), ProviderBackend::Anthropic => Ok("claude-sonnet-4-6".to_string()), ProviderBackend::Openrouter => Ok("openai/gpt-5.5".to_string()), ProviderBackend::Deepseek => Ok("deepseek-v4-pro".to_string()), @@ -2154,6 +2185,7 @@ fn default_cli_model_for_backend_with_overrides( fn default_provider_id_for_backend(backend: ProviderBackend) -> &'static str { match backend { ProviderBackend::Openai => "openai", + ProviderBackend::BrowserUse => "browser-use", ProviderBackend::Anthropic => "anthropic", ProviderBackend::Openrouter => "openrouter", ProviderBackend::Deepseek => "deepseek", @@ -2253,6 +2285,27 @@ fn run_anthropic( run_new_session_from_config(store, text, config) } +fn run_browser_use( + store: &Store, + text: String, + model: String, + config_profile: Option<&str>, + raw_config_overrides: &[String], + collaboration_mode: CollaborationModeKind, + runtime_options: &CliRuntimeOptions, +) -> Result<()> { + let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, model).with_options( + cli_agent_options( + config_profile, + raw_config_overrides, + collaboration_mode, + runtime_options, + )? + .with_default_model_provider_id("browser-use"), + ); + run_new_session_from_config(store, text, config) +} + fn run_openrouter( store: &Store, text: String, @@ -2390,6 +2443,30 @@ fn run_anthropic_session( Ok(()) } +fn run_browser_use_session( + store: &Store, + task_id: &str, + model: String, + config_profile: Option<&str>, + raw_config_overrides: &[String], + collaboration_mode: CollaborationModeKind, + runtime_options: &CliRuntimeOptions, +) -> Result<()> { + ensure_task_exists(store, task_id)?; + let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, model).with_options( + cli_agent_options( + config_profile, + raw_config_overrides, + collaboration_mode, + runtime_options, + )? + .with_default_model_provider_id("browser-use"), + ); + let session_id = run_existing_session_from_config_and_notify(store, task_id, config, None)?; + println!("{session_id}"); + Ok(()) +} + fn run_openrouter_session( store: &Store, task_id: &str, @@ -6986,9 +7063,7 @@ fn sdk_provider_backend(provider: &str, model: &str) -> Result } let normalized = provider.trim().to_ascii_lowercase(); if normalized == "browser-use" || normalized == "browser_use" { - bail!( - "Browser Use LLM gateway models are not supported by the Rust terminal SDK yet; use ChatOpenAI, ChatAnthropic, ChatOpenRouter, or ChatDeepSeek" - ); + return Ok(ProviderBackend::BrowserUse); } ProviderBackend::from_provider_id(&normalized) .filter(|backend| *backend != ProviderBackend::None) @@ -6999,7 +7074,7 @@ fn sdk_provider_id(provider: &str, backend: ProviderBackend) -> String { let normalized = provider.trim().to_ascii_lowercase(); if matches!( normalized.as_str(), - "openai" | "anthropic" | "openrouter" | "deepseek" | "codex" | "fake" + "browser-use" | "openai" | "anthropic" | "openrouter" | "deepseek" | "codex" | "fake" ) { return normalized; } @@ -9736,18 +9811,21 @@ command = "test-mcp" } #[test] - fn sdk_provider_run_config_rejects_browser_use_gateway_provider() { + fn sdk_provider_run_config_accepts_browser_use_gateway_provider() -> Result<()> { let params = serde_json::json!({ "task": "inspect", - "llm": {"provider": "browser-use", "model": "bu-2-0"} + "llm": {"provider": "browser-use", "model": "bu-3-max"} }); - let error = sdk_provider_run_config(¶ms, Some("inspect"), None) - .expect_err("browser-use gateway provider should not be misrouted to OpenAI"); + let config = sdk_provider_run_config(¶ms, Some("inspect"), None)?; - assert!(error - .to_string() - .contains("Browser Use LLM gateway models are not supported")); + assert_eq!(config.backend, ProviderBackend::BrowserUse); + assert_eq!(config.model, "bu-3-max"); + assert_eq!( + config.options.model_provider_id.as_deref(), + Some("browser-use") + ); + Ok(()) } #[test] @@ -11570,6 +11648,23 @@ command = "test-mcp" Ok(()) } + #[test] + fn cli_browser_use_backend_defaults_to_bu3_max() -> Result<()> { + assert_eq!( + default_cli_model_for_backend_with_overrides(ProviderBackend::BrowserUse, None, &[])?, + "bu-3-max" + ); + assert_eq!( + resolved_cli_provider_id_for_backend_with_overrides( + ProviderBackend::BrowserUse, + None, + &[] + )?, + "browser-use" + ); + Ok(()) + } + #[test] fn cli_model_source_treats_config_model_override_as_explicit() -> Result<()> { let (model, source) = resolve_cli_model_with_source( diff --git a/crates/browser-use-llm/src/protocols/anthropic_messages.rs b/crates/browser-use-llm/src/protocols/anthropic_messages.rs index c5c5f240..6789fe6a 100644 --- a/crates/browser-use-llm/src/protocols/anthropic_messages.rs +++ b/crates/browser-use-llm/src/protocols/anthropic_messages.rs @@ -1206,6 +1206,7 @@ mod tests { id: "toolu_1".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "Paris" }), }, LlmEvent::StepFinish { diff --git a/crates/browser-use-llm/src/protocols/openai_chat.rs b/crates/browser-use-llm/src/protocols/openai_chat.rs index 308f370b..794c1f8e 100644 --- a/crates/browser-use-llm/src/protocols/openai_chat.rs +++ b/crates/browser-use-llm/src/protocols/openai_chat.rs @@ -102,6 +102,12 @@ impl Protocol for OpenAiChatProtocol { apply_generation(&mut body, &req.generation); + if let Some(Value::Object(provider_options)) = &req.provider_options { + for (key, value) in provider_options { + body.entry(key.clone()).or_insert_with(|| value.clone()); + } + } + body.insert("stream".to_string(), Value::Bool(true)); body.insert( "stream_options".to_string(), @@ -205,7 +211,10 @@ fn build_assistant_message(message: &Message) -> Result { let mut tool_calls: Vec = Vec::new(); for part in &message.content { if let ContentPart::ToolCall { - id, name, input, .. + id, + name, + input, + provider_metadata, } = part { let arguments = serde_json::to_string(input).map_err(|e| { @@ -214,11 +223,17 @@ fn build_assistant_message(message: &Message) -> Result { format!("tool call arguments not serializable: {e}"), ) })?; - tool_calls.push(json!({ - "id": id, - "type": "function", - "function": { "name": name, "arguments": arguments }, - })); + let mut tool_call = Map::new(); + tool_call.insert("id".to_string(), json!(id)); + tool_call.insert("type".to_string(), json!("function")); + tool_call.insert( + "function".to_string(), + json!({ "name": name, "arguments": arguments }), + ); + if let Some(metadata) = provider_metadata { + tool_call.insert("provider_metadata".to_string(), metadata.clone()); + } + tool_calls.push(Value::Object(tool_call)); } } // Omit `content` for a tool-only assistant turn rather than sending an empty @@ -654,6 +669,8 @@ impl OpenAiChatStream { .and_then(|f| f.get("name")) .and_then(Value::as_str) .filter(|n| !n.is_empty()); + self.tools + .set_provider_metadata(&id, tool_call_provider_metadata(call)); let fragment = function .and_then(|f| f.get("arguments")) .and_then(Value::as_str) @@ -674,6 +691,13 @@ impl OpenAiChatStream { } } +fn tool_call_provider_metadata(call: &Value) -> Option { + call.get("provider_metadata") + .or_else(|| call.get("browser_use")) + .filter(|value| !value.is_null()) + .cloned() +} + /// Map a Chat Completions `finish_reason` string onto a [`FinishReason`]. fn map_finish_reason(reason: &str) -> FinishReason { match reason { @@ -809,6 +833,44 @@ mod tests { assert_eq!(body, expected); } + #[test] + fn build_body_replays_tool_call_provider_metadata() { + let mut req = LlmRequest::new("bu-3-max", "browser-use"); + req.messages.push(Message::new( + MessageRole::Assistant, + vec![ContentPart::ToolCall { + id: "call_1".into(), + name: "get_weather".into(), + input: json!({ "city": "Paris" }), + provider_metadata: Some(json!({ + "google": { "thought_signature": "sig-123" } + })), + }], + )); + + let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); + let tool_call = &body["messages"][0]["tool_calls"][0]; + + assert_eq!( + tool_call["provider_metadata"], + json!({ "google": { "thought_signature": "sig-123" } }) + ); + } + + #[test] + fn build_body_merges_provider_options_without_overriding_core_fields() { + let mut req = LlmRequest::new("gpt-4o", "browser-use"); + req.provider_options = Some(json!({ + "request_type": "rust_agent", + "model": "wrong-model" + })); + + let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); + + assert_eq!(body["request_type"], "rust_agent"); + assert_eq!(body["model"], "gpt-4o"); + } + #[test] fn build_body_preserves_user_image_content() { let mut req = LlmRequest::new("gpt-4o", "openai"); @@ -1014,6 +1076,7 @@ mod tests { id: "call_42".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "Paris" }), }, LlmEvent::StepFinish { @@ -1029,6 +1092,37 @@ mod tests { assert_eq!(events, expected); } + #[test] + fn decoder_preserves_tool_call_provider_metadata() { + let mut stream = OpenAiChatProtocol::new().decoder(); + let mut events = Vec::new(); + events.extend( + stream + .on_frame(&frame( + r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_42","type":"function","provider_metadata":{"google":{"thought_signature":"sig-123"}},"function":{"name":"get_weather","arguments":""}}]}}]}"#, + )) + .unwrap(), + ); + events.extend( + stream + .on_frame(&frame( + r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}}]}}]}"#, + )) + .unwrap(), + ); + events.extend(stream.finish().unwrap()); + + assert!(events.contains(&LlmEvent::ToolCall { + id: "call_42".into(), + name: "get_weather".into(), + namespace: None, + provider_metadata: Some(json!({ + "google": { "thought_signature": "sig-123" } + })), + input: json!({}), + })); + } + #[test] fn decoder_plain_text_finish_computes_total() { let mut stream = OpenAiChatProtocol::new().decoder(); diff --git a/crates/browser-use-llm/src/protocols/openai_responses.rs b/crates/browser-use-llm/src/protocols/openai_responses.rs index b71deb6a..52020ada 100644 --- a/crates/browser-use-llm/src/protocols/openai_responses.rs +++ b/crates/browser-use-llm/src/protocols/openai_responses.rs @@ -966,6 +966,7 @@ mod tests { id: "call_1".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "NYC" }), }, LlmEvent::StepFinish { @@ -1129,6 +1130,7 @@ mod tests { id: "call_9".into(), name: "do_it".into(), namespace: None, + provider_metadata: None, input: json!({}), })); assert!(matches!(events.last(), Some(LlmEvent::Finish { .. }))); @@ -1154,6 +1156,7 @@ mod tests { id: "call_9".into(), name: "spawn_agent".into(), namespace: Some("agents".into()), + provider_metadata: None, input: json!({ "task_name": "audit", "message": "check" }), })); } diff --git a/crates/browser-use-llm/src/protocols/utils/tool_stream.rs b/crates/browser-use-llm/src/protocols/utils/tool_stream.rs index 618afa35..4ebef65a 100644 --- a/crates/browser-use-llm/src/protocols/utils/tool_stream.rs +++ b/crates/browser-use-llm/src/protocols/utils/tool_stream.rs @@ -15,6 +15,7 @@ use crate::schema::{LlmError, LlmErrorReason, LlmEvent}; struct Accum { name: String, namespace: Option, + provider_metadata: Option, args: String, started: bool, ended: bool, @@ -68,6 +69,18 @@ impl ToolStream { vec![LlmEvent::ToolInputStart { id, name: resolved }] } + /// Attach opaque provider metadata to a call. The next request can replay it + /// without the core understanding provider-specific fields. + pub fn set_provider_metadata(&mut self, id: impl AsRef, metadata: Option) { + if metadata.is_none() { + return; + } + let e = self.entry(id.as_ref()); + if e.provider_metadata.is_none() { + e.provider_metadata = metadata; + } + } + /// Argument fragment. `name` may be supplied here for providers that only /// reveal the tool name on the first delta. Emits `ToolInputStart` (if not /// already started) followed by `ToolInputDelta`. @@ -102,10 +115,15 @@ impl ToolStream { /// No-op if the id is unknown or already ended. pub fn end(&mut self, id: impl AsRef) -> Result, LlmError> { let id = id.as_ref().to_string(); - let (name, namespace, args) = match self.calls.get_mut(&id) { + let (name, namespace, provider_metadata, args) = match self.calls.get_mut(&id) { Some(e) if !e.ended => { e.ended = true; - (e.name.clone(), e.namespace.clone(), e.args.clone()) + ( + e.name.clone(), + e.namespace.clone(), + e.provider_metadata.clone(), + e.args.clone(), + ) } _ => return Ok(Vec::new()), }; @@ -116,6 +134,7 @@ impl ToolStream { id, name, namespace, + provider_metadata, input, }, ]) @@ -203,6 +222,7 @@ mod tests { id: "c0".into(), name: "shell".into(), namespace: None, + provider_metadata: None, input: json!({ "command": ["ls"] }), }, ] @@ -236,6 +256,7 @@ mod tests { id: "0".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "NYC" }), }, ] @@ -253,6 +274,7 @@ mod tests { id: "c0".into(), name: "now".into(), namespace: None, + provider_metadata: None, input: json!({}), } ); diff --git a/crates/browser-use-llm/src/route/client.rs b/crates/browser-use-llm/src/route/client.rs index 37290888..c5d3d0eb 100644 --- a/crates/browser-use-llm/src/route/client.rs +++ b/crates/browser-use-llm/src/route/client.rs @@ -483,14 +483,16 @@ fn aggregate(events: Vec) -> LlmResponse { id, name, namespace, + provider_metadata, input, } => { tool_calls.push(ContentPart::ToolCall { id, name, input, - provider_metadata: namespace - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.or_else(|| { + namespace.map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); } LlmEvent::Finish { @@ -979,6 +981,7 @@ mod tests { id: "call_1".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: serde_json::json!({ "city": "NYC" }), }, LlmEvent::StepFinish { diff --git a/crates/browser-use-llm/src/schema/event.rs b/crates/browser-use-llm/src/schema/event.rs index f4aeb228..542f5163 100644 --- a/crates/browser-use-llm/src/schema/event.rs +++ b/crates/browser-use-llm/src/schema/event.rs @@ -87,6 +87,8 @@ pub enum LlmEvent { name: String, #[serde(default, skip_serializing_if = "Option::is_none")] namespace: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + provider_metadata: Option, input: Value, }, StepFinish { diff --git a/crates/browser-use-llm/src/schema/mod.rs b/crates/browser-use-llm/src/schema/mod.rs index e883f65c..020fc278 100644 --- a/crates/browser-use-llm/src/schema/mod.rs +++ b/crates/browser-use-llm/src/schema/mod.rs @@ -101,6 +101,7 @@ mod tests { id: "c0".into(), name: "shell".into(), namespace: None, + provider_metadata: None, input: json!({}), }, LlmEvent::Finish { diff --git a/crates/browser-use-llm/src/tool_runtime.rs b/crates/browser-use-llm/src/tool_runtime.rs index 92d9fed7..a07290f7 100644 --- a/crates/browser-use-llm/src/tool_runtime.rs +++ b/crates/browser-use-llm/src/tool_runtime.rs @@ -108,15 +108,18 @@ fn reduce_turn(events: Vec) -> TurnOutcome { id, name, namespace, + provider_metadata, input, } => { assistant_tool_parts.push(ContentPart::ToolCall { id: id.clone(), name: name.clone(), input: input.clone(), - provider_metadata: namespace - .clone() - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.clone().or_else(|| { + namespace + .clone() + .map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); tool_calls.push(ToolCall { id, @@ -383,6 +386,7 @@ mod tests { id: id.into(), name: "add".into(), namespace: None, + provider_metadata: None, input: json!({ "a": a, "b": b }), }, LlmEvent::Finish { @@ -536,6 +540,7 @@ mod tests { id: "bad_1".into(), name: "add".into(), namespace: None, + provider_metadata: None, input: json!({ "a": "oops", "b": 3 }), }, LlmEvent::Finish { @@ -582,6 +587,7 @@ mod tests { id: "u1".into(), name: "nonexistent".into(), namespace: None, + provider_metadata: None, input: json!({}), }, LlmEvent::Finish {