From bd52a1ac53482efd1dc519e73dd2154073c14590 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Sat, 6 Jun 2026 13:51:46 -0700 Subject: [PATCH 1/5] add browser-use provider --- .../browser-use-agent/src/config_overrides.rs | 6 +- .../src/entrypoint/provider.rs | 48 +++++++++ .../browser-use-agent/src/turn/model_path.rs | 21 ++++ crates/browser-use-agent/src/turn/sampling.rs | 1 + crates/browser-use-cli/src/main.rs | 98 ++++++++++++++++++- .../src/protocols/openai_chat.rs | 20 ++++ 6 files changed, 191 insertions(+), 3 deletions(-) diff --git a/crates/browser-use-agent/src/config_overrides.rs b/crates/browser-use-agent/src/config_overrides.rs index 5f7d2b2a..319968c0 100644 --- a/crates/browser-use-agent/src/config_overrides.rs +++ b/crates/browser-use-agent/src/config_overrides.rs @@ -101,6 +101,7 @@ impl Default for MultiAgentV2Options { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ProviderBackend { Codex, + BrowserUse, Openai, Anthropic, Openrouter, @@ -113,6 +114,7 @@ impl ProviderBackend { pub fn from_provider_id(value: &str) -> Option { match value.trim().to_ascii_lowercase().as_str() { "codex" => Some(Self::Codex), + "browser-use" | "browser_use" | "browseruse" => Some(Self::BrowserUse), "openai" => Some(Self::Openai), "anthropic" => Some(Self::Anthropic), "openrouter" => Some(Self::Openrouter), @@ -1936,6 +1938,7 @@ command = "profile-server" // full variant set matches `browser-use-core::ProviderBackend`. let all = [ ProviderBackend::Codex, + ProviderBackend::BrowserUse, ProviderBackend::Openai, ProviderBackend::Anthropic, ProviderBackend::Openrouter, @@ -1947,6 +1950,7 @@ command = "profile-server" let name = format!("{backend:?}"); let round_tripped = match name.as_str() { "Codex" => ProviderBackend::Codex, + "BrowserUse" => ProviderBackend::BrowserUse, "Openai" => ProviderBackend::Openai, "Anthropic" => ProviderBackend::Anthropic, "Openrouter" => ProviderBackend::Openrouter, @@ -1957,7 +1961,7 @@ command = "profile-server" }; assert_eq!(backend, round_tripped); } - assert_eq!(all.len(), 7); + assert_eq!(all.len(), 8); } #[test] diff --git a/crates/browser-use-agent/src/entrypoint/provider.rs b/crates/browser-use-agent/src/entrypoint/provider.rs index 1b64eb89..bb90c7e4 100644 --- a/crates/browser-use-agent/src/entrypoint/provider.rs +++ b/crates/browser-use-agent/src/entrypoint/provider.rs @@ -19,6 +19,9 @@ //! * [`ProviderBackend::Openai`] → [`ProviderChoice::OpenAiResponses`] //! (key from `OPENAI_API_KEY` / `LLM_BROWSER_OPENAI_API_KEY`, optional //! `LLM_BROWSER_OPENAI_BASE_URL`), +//! * [`ProviderBackend::BrowserUse`] → [`ProviderChoice::OpenAiCompatibleCustom`] +//! id `"browser-use"` (key from `BROWSER_USE_API_KEY`, base override +//! `LLM_BROWSER_BROWSER_USE_BASE_URL`), //! * [`ProviderBackend::Anthropic`] → [`ProviderChoice::Anthropic`] //! (key from `ANTHROPIC_API_KEY` / `LLM_BROWSER_ANTHROPIC_API_KEY`), //! * [`ProviderBackend::Openrouter`] → [`ProviderChoice::OpenAiCompatibleProvider`] @@ -798,6 +801,22 @@ pub fn provider_choice_for_backend( base_url: env_first(&["LLM_BROWSER_OPENAI_BASE_URL"]), })) } + ProviderBackend::BrowserUse => { + let api_key = key_env_then_store( + &["LLM_BROWSER_BROWSER_USE_API_KEY", "BROWSER_USE_API_KEY"], + store, + "browser_use_cloud", + ) + .ok_or(ProviderResolveError::MissingCredentials( + "set BROWSER_USE_API_KEY (or run `auth login browser-use-cloud`) for the browser-use backend", + ))?; + Ok(Some(ProviderChoice::OpenAiCompatibleCustom { + provider_id: "browser-use".to_string(), + base_url: env_first(&["LLM_BROWSER_BROWSER_USE_BASE_URL"]) + .unwrap_or_else(|| "https://llm.api.browser-use.com/v1".to_string()), + api_key, + })) + } ProviderBackend::Anthropic => { let api_key = key_env_then_store( &["LLM_BROWSER_ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY"], @@ -2702,6 +2721,35 @@ mod tests { assert!(matches!(resolved, ResolvedProvider::Real(_))); } + #[test] + fn browser_use_backend_resolves_gateway_route_from_cloud_key() { + let _guard = ENV_LOCK.lock().unwrap(); + std::env::remove_var("BROWSER_USE_API_KEY"); + std::env::remove_var("LLM_BROWSER_BROWSER_USE_API_KEY"); + let dir = tempfile::tempdir().expect("tempdir"); + let store = Store::open(dir.path()).expect("store"); + store + .set_setting("auth.browser_use_cloud.api_key", "stored-browser-use-key") + .unwrap(); + + let choice = provider_choice_for_backend(ProviderBackend::BrowserUse, Some(&store)) + .expect("resolves") + .expect("browser-use is a real provider"); + + match choice { + ProviderChoice::OpenAiCompatibleCustom { + provider_id, + base_url, + api_key, + } => { + assert_eq!(provider_id, "browser-use"); + assert_eq!(base_url, "https://llm.api.browser-use.com/v1"); + assert_eq!(api_key, "stored-browser-use-key"); + } + other => panic!("expected browser-use gateway choice, got {other:?}"), + } + } + /// A real Anthropic backend also constructs offline given its key. #[test] fn resolves_real_anthropic_driver_offline() { diff --git a/crates/browser-use-agent/src/turn/model_path.rs b/crates/browser-use-agent/src/turn/model_path.rs index 1e64a0bd..475c3f62 100644 --- a/crates/browser-use-agent/src/turn/model_path.rs +++ b/crates/browser-use-agent/src/turn/model_path.rs @@ -30,6 +30,7 @@ use browser_use_llm::providers::{ }; use browser_use_llm::route::{ModelClient, Route}; use browser_use_llm::schema::{ContentPart, LlmRequest, Message, MessageRole, SystemPart}; +use serde_json::json; use crate::events::{EventSink, TurnCtx}; use crate::turn::sampling::{ModelClientTransport, ModelSamplingDriver}; @@ -249,9 +250,17 @@ pub fn build_transport( ), ); } + apply_browser_use_provider_options(&ctx.provider, &mut req); ModelClientTransport::new(client, route, req) } +pub(crate) fn apply_browser_use_provider_options(provider: &str, req: &mut LlmRequest) { + let normalized = provider.trim().to_ascii_lowercase().replace(['_', '-'], ""); + if normalized == "browseruse" { + req.provider_options = Some(json!({ "request_type": "rust_agent" })); + } +} + /// Build the production text-only [`ModelSamplingDriver`] over a live transport. /// /// This is the real [`SamplingDriver`](crate::turn::SamplingDriver) the turn loop @@ -361,6 +370,18 @@ mod tests { ); } + #[test] + fn browser_use_provider_options_tag_rust_agent_requests() { + let mut req = LlmRequest::new("bu-3-max", "browseruse"); + + apply_browser_use_provider_options("browser-use", &mut req); + + assert_eq!( + req.provider_options, + Some(serde_json::json!({ "request_type": "rust_agent" })) + ); + } + /// Only the `Codex` variant targets chatgpt.com: the env-keyed providers never /// route to the codex backend, while `Codex` does (and only it). #[test] diff --git a/crates/browser-use-agent/src/turn/sampling.rs b/crates/browser-use-agent/src/turn/sampling.rs index 531739f4..5680b67e 100644 --- a/crates/browser-use-agent/src/turn/sampling.rs +++ b/crates/browser-use-agent/src/turn/sampling.rs @@ -1083,6 +1083,7 @@ fn build_request(ctx: &TurnCtx, input: Vec) -> LlmRequest { ), ); } + super::model_path::apply_browser_use_provider_options(&ctx.provider, &mut req); mark_message_cache_breakpoints(&mut req.messages); req } diff --git a/crates/browser-use-cli/src/main.rs b/crates/browser-use-cli/src/main.rs index 3274eef3..0a5626a6 100644 --- a/crates/browser-use-cli/src/main.rs +++ b/crates/browser-use-cli/src/main.rs @@ -182,6 +182,11 @@ enum Command { #[arg(long)] model: Option, }, + RunBrowserUse { + text: String, + #[arg(long, default_value = "bu-3-max")] + model: String, + }, RunAnthropic { text: String, #[arg(long, default_value = "claude-sonnet-4-6")] @@ -217,6 +222,11 @@ enum Command { #[arg(long)] model: Option, }, + RunBrowserUseSession { + task_id: String, + #[arg(long, default_value = "bu-3-max")] + model: String, + }, RunAnthropicSession { task_id: String, #[arg(long, default_value = "claude-sonnet-4-6")] @@ -724,6 +734,15 @@ fn main() -> Result<()> { collaboration_mode, &runtime_options, ), + Command::RunBrowserUse { text, model } => run_browser_use( + &store, + text, + model, + config_profile.as_deref(), + &config_overrides, + collaboration_mode, + &runtime_options, + ), Command::RunAnthropic { text, model } => run_anthropic( &store, text, @@ -778,6 +797,15 @@ fn main() -> Result<()> { collaboration_mode, &runtime_options, ), + Command::RunBrowserUseSession { task_id, model } => run_browser_use_session( + &store, + &task_id, + model, + config_profile.as_deref(), + &config_overrides, + collaboration_mode, + &runtime_options, + ), Command::RunAnthropicSession { task_id, model } => run_anthropic_session( &store, &task_id, @@ -1081,12 +1109,14 @@ fn command_name(command: &Command) -> &'static str { Command::Start { .. } => "start", Command::RunFake { .. } => "run_fake", Command::RunOpenai { .. } => "run_openai", + Command::RunBrowserUse { .. } => "run_browser_use", Command::RunAnthropic { .. } => "run_anthropic", Command::RunOpenrouter { .. } => "run_openrouter", Command::RunDeepseek { .. } => "run_deepseek", Command::RunCodex { .. } => "run_codex", Command::RunCodexSession { .. } => "run_codex_session", Command::RunOpenaiSession { .. } => "run_openai_session", + Command::RunBrowserUseSession { .. } => "run_browser_use_session", Command::RunAnthropicSession { .. } => "run_anthropic_session", Command::RunOpenrouterSession { .. } => "run_openrouter_session", Command::RunDeepseekSession { .. } => "run_deepseek_session", @@ -2017,6 +2047,7 @@ fn default_cli_model_for_backend_with_overrides( ProviderBackend::Openai => { default_model_for_cwd_with_options(cwd, config_profile, config_overrides, false) } + ProviderBackend::BrowserUse => Ok("bu-3-max".to_string()), ProviderBackend::Anthropic => Ok("claude-sonnet-4-6".to_string()), ProviderBackend::Openrouter => Ok("openai/gpt-5.5".to_string()), ProviderBackend::Deepseek => Ok("deepseek-v4-pro".to_string()), @@ -2029,6 +2060,7 @@ fn default_cli_model_for_backend_with_overrides( fn default_provider_id_for_backend(backend: ProviderBackend) -> &'static str { match backend { ProviderBackend::Openai => "openai", + ProviderBackend::BrowserUse => "browser-use", ProviderBackend::Anthropic => "anthropic", ProviderBackend::Openrouter => "openrouter", ProviderBackend::Deepseek => "deepseek", @@ -2128,6 +2160,27 @@ fn run_anthropic( run_new_session_from_config(store, text, config) } +fn run_browser_use( + store: &Store, + text: String, + model: String, + config_profile: Option<&str>, + raw_config_overrides: &[String], + collaboration_mode: CollaborationModeKind, + runtime_options: &CliRuntimeOptions, +) -> Result<()> { + let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, model).with_options( + cli_agent_options( + config_profile, + raw_config_overrides, + collaboration_mode, + runtime_options, + )? + .with_default_model_provider_id("browser-use"), + ); + run_new_session_from_config(store, text, config) +} + fn run_openrouter( store: &Store, text: String, @@ -2265,6 +2318,30 @@ fn run_anthropic_session( Ok(()) } +fn run_browser_use_session( + store: &Store, + task_id: &str, + model: String, + config_profile: Option<&str>, + raw_config_overrides: &[String], + collaboration_mode: CollaborationModeKind, + runtime_options: &CliRuntimeOptions, +) -> Result<()> { + ensure_task_exists(store, task_id)?; + let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, model).with_options( + cli_agent_options( + config_profile, + raw_config_overrides, + collaboration_mode, + runtime_options, + )? + .with_default_model_provider_id("browser-use"), + ); + let session_id = run_existing_session_from_config_and_notify(store, task_id, config, None)?; + println!("{session_id}"); + Ok(()) +} + fn run_openrouter_session( store: &Store, task_id: &str, @@ -5348,7 +5425,7 @@ fn sdk_provider_backend(provider: &str, model: &str) -> Result } let normalized = provider.trim().to_ascii_lowercase(); if normalized == "browser-use" || normalized == "browser_use" { - return Ok(ProviderBackend::Openai); + return Ok(ProviderBackend::BrowserUse); } ProviderBackend::from_provider_id(&normalized) .filter(|backend| *backend != ProviderBackend::None) @@ -5359,7 +5436,7 @@ fn sdk_provider_id(provider: &str, backend: ProviderBackend) -> String { let normalized = provider.trim().to_ascii_lowercase(); if matches!( normalized.as_str(), - "openai" | "anthropic" | "openrouter" | "deepseek" | "codex" | "fake" + "browser-use" | "openai" | "anthropic" | "openrouter" | "deepseek" | "codex" | "fake" ) { return normalized; } @@ -9705,6 +9782,23 @@ command = "test-mcp" Ok(()) } + #[test] + fn cli_browser_use_backend_defaults_to_bu3_max() -> Result<()> { + assert_eq!( + default_cli_model_for_backend_with_overrides(ProviderBackend::BrowserUse, None, &[])?, + "bu-3-max" + ); + assert_eq!( + resolved_cli_provider_id_for_backend_with_overrides( + ProviderBackend::BrowserUse, + None, + &[] + )?, + "browser-use" + ); + Ok(()) + } + #[test] fn cli_model_source_treats_config_model_override_as_explicit() -> Result<()> { let (model, source) = resolve_cli_model_with_source( diff --git a/crates/browser-use-llm/src/protocols/openai_chat.rs b/crates/browser-use-llm/src/protocols/openai_chat.rs index 308f370b..4a9577a5 100644 --- a/crates/browser-use-llm/src/protocols/openai_chat.rs +++ b/crates/browser-use-llm/src/protocols/openai_chat.rs @@ -102,6 +102,12 @@ impl Protocol for OpenAiChatProtocol { apply_generation(&mut body, &req.generation); + if let Some(Value::Object(provider_options)) = &req.provider_options { + for (key, value) in provider_options { + body.entry(key.clone()).or_insert_with(|| value.clone()); + } + } + body.insert("stream".to_string(), Value::Bool(true)); body.insert( "stream_options".to_string(), @@ -809,6 +815,20 @@ mod tests { assert_eq!(body, expected); } + #[test] + fn build_body_merges_provider_options_without_overriding_core_fields() { + let mut req = LlmRequest::new("gpt-4o", "browser-use"); + req.provider_options = Some(json!({ + "request_type": "rust_agent", + "model": "wrong-model" + })); + + let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); + + assert_eq!(body["request_type"], "rust_agent"); + assert_eq!(body["model"], "gpt-4o"); + } + #[test] fn build_body_preserves_user_image_content() { let mut req = LlmRequest::new("gpt-4o", "openai"); From 3b94d1bbfdbdcd411fb91c8efd0125aa10b45f8f Mon Sep 17 00:00:00 2001 From: MagMueller Date: Mon, 8 Jun 2026 13:48:14 -0700 Subject: [PATCH 2/5] send browser use request type header --- .../src/entrypoint/provider.rs | 13 +++++ .../browser-use-agent/src/turn/model_path.rs | 50 +++++++++++++------ 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/crates/browser-use-agent/src/entrypoint/provider.rs b/crates/browser-use-agent/src/entrypoint/provider.rs index bb90c7e4..426eaa7a 100644 --- a/crates/browser-use-agent/src/entrypoint/provider.rs +++ b/crates/browser-use-agent/src/entrypoint/provider.rs @@ -815,6 +815,11 @@ pub fn provider_choice_for_backend( base_url: env_first(&["LLM_BROWSER_BROWSER_USE_BASE_URL"]) .unwrap_or_else(|| "https://llm.api.browser-use.com/v1".to_string()), api_key, + extra_headers: vec![( + "x-browser-use-request-type".to_string(), + env_first(&["LLM_BROWSER_BROWSER_USE_REQUEST_TYPE"]) + .unwrap_or_else(|| "rust_agent".to_string()), + )], })) } ProviderBackend::Anthropic => { @@ -2741,10 +2746,18 @@ mod tests { provider_id, base_url, api_key, + extra_headers, } => { assert_eq!(provider_id, "browser-use"); assert_eq!(base_url, "https://llm.api.browser-use.com/v1"); assert_eq!(api_key, "stored-browser-use-key"); + assert_eq!( + extra_headers, + vec![( + "x-browser-use-request-type".to_string(), + "rust_agent".to_string() + )] + ); } other => panic!("expected browser-use gateway choice, got {other:?}"), } diff --git a/crates/browser-use-agent/src/turn/model_path.rs b/crates/browser-use-agent/src/turn/model_path.rs index 475c3f62..6854a6e7 100644 --- a/crates/browser-use-agent/src/turn/model_path.rs +++ b/crates/browser-use-agent/src/turn/model_path.rs @@ -28,9 +28,8 @@ use browser_use_llm::auth::{codex_route, CodexAuth}; use browser_use_llm::providers::{ Anthropic, AnthropicConfig, OpenAi, OpenAiCompatible, OpenAiConfig, }; -use browser_use_llm::route::{ModelClient, Route}; +use browser_use_llm::route::{Auth, ModelClient, Route}; use browser_use_llm::schema::{ContentPart, LlmRequest, Message, MessageRole, SystemPart}; -use serde_json::json; use crate::events::{EventSink, TurnCtx}; use crate::turn::sampling::{ModelClientTransport, ModelSamplingDriver}; @@ -72,6 +71,8 @@ pub enum ProviderChoice { base_url: String, /// API key. api_key: String, + /// Additional static headers to apply to every request for this route. + extra_headers: Vec<(String, String)>, }, /// The codex (chatgpt.com) backend, reached via the Codex CLI OAuth login. /// @@ -167,6 +168,7 @@ pub fn provider_choice_from_env() -> Result { provider_id: "openai-compatible".to_string(), base_url, api_key, + extra_headers: Vec::new(), }); } Err(ModelPathError::MissingCredentials( @@ -203,10 +205,17 @@ pub fn build_route(choice: &ProviderChoice, model: &str) -> Result { let provider = OpenAiCompatible::configure(provider_id.clone(), base_url.clone(), api_key.clone()); - Ok(provider.chat(model)) + let mut route = provider.chat(model); + for (name, value) in extra_headers { + route.auth = route + .auth + .and_then(Auth::header(name.clone(), value.clone())); + } + Ok(route) } ProviderChoice::Codex { access_token, @@ -254,12 +263,7 @@ pub fn build_transport( ModelClientTransport::new(client, route, req) } -pub(crate) fn apply_browser_use_provider_options(provider: &str, req: &mut LlmRequest) { - let normalized = provider.trim().to_ascii_lowercase().replace(['_', '-'], ""); - if normalized == "browseruse" { - req.provider_options = Some(json!({ "request_type": "rust_agent" })); - } -} +pub(crate) fn apply_browser_use_provider_options(_provider: &str, _req: &mut LlmRequest) {} /// Build the production text-only [`ModelSamplingDriver`] over a live transport. /// @@ -362,6 +366,7 @@ mod tests { provider_id: "internal".to_string(), base_url: "https://llm.internal/v1".to_string(), api_key: "k".to_string(), + extra_headers: Vec::new(), }; let route = build_route(&choice, "m").unwrap(); assert_eq!( @@ -371,15 +376,31 @@ mod tests { } #[test] - fn browser_use_provider_options_tag_rust_agent_requests() { + fn openai_compatible_custom_applies_extra_headers() { + let choice = ProviderChoice::OpenAiCompatibleCustom { + provider_id: "browser-use".to_string(), + base_url: "https://llm.api.browser-use.com/v1".to_string(), + api_key: "k".to_string(), + extra_headers: vec![( + "x-browser-use-request-type".to_string(), + "rust_agent".to_string(), + )], + }; + let route = build_route(&choice, "bu-3-max").unwrap(); + + assert_eq!( + header(&route, "x-browser-use-request-type").as_deref(), + Some("rust_agent") + ); + } + + #[test] + fn browser_use_provider_options_do_not_tag_request_body() { let mut req = LlmRequest::new("bu-3-max", "browseruse"); apply_browser_use_provider_options("browser-use", &mut req); - assert_eq!( - req.provider_options, - Some(serde_json::json!({ "request_type": "rust_agent" })) - ); + assert_eq!(req.provider_options, None); } /// Only the `Codex` variant targets chatgpt.com: the env-keyed providers never @@ -400,6 +421,7 @@ mod tests { provider_id: "x".into(), base_url: "https://llm.internal/v1".into(), api_key: "k".into(), + extra_headers: Vec::new(), }, ] { let url = build_route(&choice, "m").unwrap().endpoint.url(); From 9e85b9f0d70a2e650b19e004a7867caeb7639cc0 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Mon, 8 Jun 2026 14:32:39 -0700 Subject: [PATCH 3/5] forward rust output schemas --- crates/browser-use-agent/src/compact/tests.rs | 1 + .../src/compact/threshold_tests.rs | 1 + .../browser-use-agent/src/entrypoint/mod.rs | 19 ++++++++ .../src/entrypoint/provider.rs | 1 + .../browser-use-agent/src/events/map_tests.rs | 1 + crates/browser-use-agent/src/events/mod.rs | 1 + .../src/turn/fusion_tests.rs | 1 + .../browser-use-agent/src/turn/loop_tests.rs | 1 + .../browser-use-agent/src/turn/model_path.rs | 45 ++++++++++++++++++- crates/browser-use-agent/src/turn/sampling.rs | 1 + .../src/turn/sampling_tests.rs | 1 + .../src/protocols/openai_chat.rs | 40 ++++++++++++++++- .../src/protocols/openai_responses.rs | 37 +++++++++++++++ 13 files changed, 148 insertions(+), 2 deletions(-) diff --git a/crates/browser-use-agent/src/compact/tests.rs b/crates/browser-use-agent/src/compact/tests.rs index 8905ec56..3a0f41a0 100644 --- a/crates/browser-use-agent/src/compact/tests.rs +++ b/crates/browser-use-agent/src/compact/tests.rs @@ -473,6 +473,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/compact/threshold_tests.rs b/crates/browser-use-agent/src/compact/threshold_tests.rs index ca86265e..7a977ae1 100644 --- a/crates/browser-use-agent/src/compact/threshold_tests.rs +++ b/crates/browser-use-agent/src/compact/threshold_tests.rs @@ -255,6 +255,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/entrypoint/mod.rs b/crates/browser-use-agent/src/entrypoint/mod.rs index 4c743a69..56ac07f7 100644 --- a/crates/browser-use-agent/src/entrypoint/mod.rs +++ b/crates/browser-use-agent/src/entrypoint/mod.rs @@ -2485,6 +2485,7 @@ fn turn_ctx(session_id: &SessionId, config: &ProviderRunConfig) -> TurnCtx { .browser_mode .as_deref() .map(crate::prompts::browser_mode_instruction), + response_format: config.options.final_output_json_schema.clone(), turn_idx: 0, attempt: 0, } @@ -3261,6 +3262,23 @@ mod tests { } } + #[test] + fn turn_ctx_carries_final_output_schema_as_response_format() { + let schema = serde_json::json!({ + "type": "object", + "properties": { "answer": { "type": "string" } }, + "required": ["answer"] + }); + let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, "bu-3-max").with_options( + AgentRunOptions::default().with_final_output_json_schema(schema.clone(), true), + ); + + let ctx = turn_ctx(&SessionId("sess-schema".to_string()), &config); + + assert_eq!(ctx.provider, "browseruse"); + assert_eq!(ctx.response_format, Some(schema)); + } + /// A tempdir-backed `SharedStore` with a fresh session row (the `events` table /// has a FK on `sessions(id)`, so the session must exist before we append). /// Returns the `TempDir` so the caller keeps the on-disk sqlite db alive. @@ -5850,6 +5868,7 @@ mod tests { provider: "fake".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, }; diff --git a/crates/browser-use-agent/src/entrypoint/provider.rs b/crates/browser-use-agent/src/entrypoint/provider.rs index 47b5c2ed..d07d3657 100644 --- a/crates/browser-use-agent/src/entrypoint/provider.rs +++ b/crates/browser-use-agent/src/entrypoint/provider.rs @@ -2691,6 +2691,7 @@ mod tests { provider: "p".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/events/map_tests.rs b/crates/browser-use-agent/src/events/map_tests.rs index 667a685d..5e348fe0 100644 --- a/crates/browser-use-agent/src/events/map_tests.rs +++ b/crates/browser-use-agent/src/events/map_tests.rs @@ -18,6 +18,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 3, attempt: 0, } diff --git a/crates/browser-use-agent/src/events/mod.rs b/crates/browser-use-agent/src/events/mod.rs index d4f0d3df..058077f7 100644 --- a/crates/browser-use-agent/src/events/mod.rs +++ b/crates/browser-use-agent/src/events/mod.rs @@ -53,6 +53,7 @@ pub struct TurnCtx { pub provider: String, pub base_instructions: String, pub browser_mode_instruction: Option, + pub response_format: Option, pub turn_idx: usize, pub attempt: usize, } diff --git a/crates/browser-use-agent/src/turn/fusion_tests.rs b/crates/browser-use-agent/src/turn/fusion_tests.rs index 1b467474..f1ec9d9a 100644 --- a/crates/browser-use-agent/src/turn/fusion_tests.rs +++ b/crates/browser-use-agent/src/turn/fusion_tests.rs @@ -225,6 +225,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/turn/loop_tests.rs b/crates/browser-use-agent/src/turn/loop_tests.rs index 06a2097d..54a8d46e 100644 --- a/crates/browser-use-agent/src/turn/loop_tests.rs +++ b/crates/browser-use-agent/src/turn/loop_tests.rs @@ -228,6 +228,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/turn/model_path.rs b/crates/browser-use-agent/src/turn/model_path.rs index 6854a6e7..4b67a4e1 100644 --- a/crates/browser-use-agent/src/turn/model_path.rs +++ b/crates/browser-use-agent/src/turn/model_path.rs @@ -30,6 +30,7 @@ use browser_use_llm::providers::{ }; use browser_use_llm::route::{Auth, ModelClient, Route}; use browser_use_llm::schema::{ContentPart, LlmRequest, Message, MessageRole, SystemPart}; +use serde_json::{Map, Value}; use crate::events::{EventSink, TurnCtx}; use crate::turn::sampling::{ModelClientTransport, ModelSamplingDriver}; @@ -263,7 +264,24 @@ pub fn build_transport( ModelClientTransport::new(client, route, req) } -pub(crate) fn apply_browser_use_provider_options(_provider: &str, _req: &mut LlmRequest) {} +pub(crate) fn apply_browser_use_provider_options(provider: &str, req: &mut LlmRequest) { + let normalized = provider.trim().to_ascii_lowercase().replace('_', "-"); + if normalized != "browser-use" && normalized != "browseruse" { + return; + } + let Some(output_format) = req.response_format.clone() else { + return; + }; + + let mut options = match req.provider_options.take() { + Some(Value::Object(options)) => options, + _ => Map::new(), + }; + options + .entry("output_format".to_string()) + .or_insert(output_format); + req.provider_options = Some(Value::Object(options)); +} /// Build the production text-only [`ModelSamplingDriver`] over a live transport. /// @@ -403,6 +421,31 @@ mod tests { assert_eq!(req.provider_options, None); } + #[test] + fn browser_use_provider_options_forward_response_format_as_output_format() { + let mut req = LlmRequest::new("bu-3-max", "browseruse"); + req.response_format = Some(serde_json::json!({ + "type": "object", + "properties": { "answer": { "type": "string" } }, + "required": ["answer"] + })); + + apply_browser_use_provider_options("browser-use", &mut req); + + assert_eq!( + req.provider_options + .as_ref() + .and_then(|options| options.get("output_format")), + req.response_format.as_ref() + ); + assert_eq!( + req.provider_options + .as_ref() + .and_then(|options| options.get("request_type")), + None + ); + } + /// Only the `Codex` variant targets chatgpt.com: the env-keyed providers never /// route to the codex backend, while `Codex` does (and only it). #[test] diff --git a/crates/browser-use-agent/src/turn/sampling.rs b/crates/browser-use-agent/src/turn/sampling.rs index 5680b67e..8f8cc15a 100644 --- a/crates/browser-use-agent/src/turn/sampling.rs +++ b/crates/browser-use-agent/src/turn/sampling.rs @@ -1074,6 +1074,7 @@ fn build_request(ctx: &TurnCtx, input: Vec) -> LlmRequest { base_system.cache = Some(CacheHint::Ephemeral); req.system.push(base_system); req.messages = input; + req.response_format = ctx.response_format.clone(); if let Some(instruction) = ctx.browser_mode_instruction.as_deref() { req.messages.insert( 0, diff --git a/crates/browser-use-agent/src/turn/sampling_tests.rs b/crates/browser-use-agent/src/turn/sampling_tests.rs index 341c1c2e..1d5e3f92 100644 --- a/crates/browser-use-agent/src/turn/sampling_tests.rs +++ b/crates/browser-use-agent/src/turn/sampling_tests.rs @@ -126,6 +126,7 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, + response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-llm/src/protocols/openai_chat.rs b/crates/browser-use-llm/src/protocols/openai_chat.rs index 4a9577a5..72929b72 100644 --- a/crates/browser-use-llm/src/protocols/openai_chat.rs +++ b/crates/browser-use-llm/src/protocols/openai_chat.rs @@ -102,6 +102,20 @@ impl Protocol for OpenAiChatProtocol { apply_generation(&mut body, &req.generation); + if let Some(schema) = &req.response_format { + body.insert( + "response_format".to_string(), + json!({ + "type": "json_schema", + "json_schema": { + "name": "final_output", + "strict": true, + "schema": schema, + } + }), + ); + } + if let Some(Value::Object(provider_options)) = &req.provider_options { for (key, value) in provider_options { body.entry(key.clone()).or_insert_with(|| value.clone()); @@ -717,7 +731,7 @@ fn parse_usage(usage: &Value) -> Usage { #[cfg(test)] mod tests { use super::*; - use crate::schema::{LlmRequest, SystemPart, ToolDefinition}; + use crate::schema::{LlmRequest, Message, SystemPart, ToolDefinition}; fn frame(data: &str) -> SseFrame { SseFrame { @@ -829,6 +843,30 @@ mod tests { assert_eq!(body["model"], "gpt-4o"); } + #[test] + fn build_body_lowers_response_format_to_json_schema() { + let mut req = LlmRequest::new("gpt-4o", "openai"); + req.messages.push(Message::user_text("answer")); + req.response_format = Some(json!({ + "type": "object", + "properties": { "answer": { "type": "string" } }, + "required": ["answer"], + "additionalProperties": false + })); + + let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); + + assert_eq!(body["response_format"]["type"], json!("json_schema")); + assert_eq!( + body["response_format"]["json_schema"]["schema"], + req.response_format.unwrap() + ); + assert_eq!( + body["response_format"]["json_schema"]["strict"], + json!(true) + ); + } + #[test] fn build_body_preserves_user_image_content() { let mut req = LlmRequest::new("gpt-4o", "openai"); diff --git a/crates/browser-use-llm/src/protocols/openai_responses.rs b/crates/browser-use-llm/src/protocols/openai_responses.rs index b71deb6a..c03b0d3d 100644 --- a/crates/browser-use-llm/src/protocols/openai_responses.rs +++ b/crates/browser-use-llm/src/protocols/openai_responses.rs @@ -95,6 +95,20 @@ impl Protocol for OpenAiResponsesProtocol { } } + if let Some(schema) = &request.response_format { + body.insert( + "text".to_string(), + json!({ + "format": { + "type": "json_schema", + "name": "final_output", + "strict": true, + "schema": schema, + } + }), + ); + } + Ok(Value::Object(body)) } @@ -1267,6 +1281,29 @@ mod tests { assert_eq!(namespace_tools[1]["name"], json!("wait_agent")); } + #[test] + fn response_format_lowers_to_text_json_schema() { + let mut request = LlmRequest::new("gpt-5.1", "openai"); + request.messages.push(Message::user_text("answer")); + request.response_format = Some(json!({ + "type": "object", + "properties": { "answer": { "type": "string" } }, + "required": ["answer"], + "additionalProperties": false + })); + + let body = OpenAiResponsesProtocol::new() + .build_body(&request) + .expect("build_body"); + + assert_eq!(body["text"]["format"]["type"], json!("json_schema")); + assert_eq!( + body["text"]["format"]["schema"], + request.response_format.unwrap() + ); + assert_eq!(body["text"]["format"]["strict"], json!(true)); + } + #[test] fn assistant_tool_call_history_preserves_namespace_metadata() { let mut request = LlmRequest::new("gpt-5.1-codex", "openai"); From dd7252977e792c5e73fb0836c724104d8d960335 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Mon, 8 Jun 2026 14:46:44 -0700 Subject: [PATCH 4/5] Revert "forward rust output schemas" This reverts commit 9e85b9f0d70a2e650b19e004a7867caeb7639cc0. --- crates/browser-use-agent/src/compact/tests.rs | 1 - .../src/compact/threshold_tests.rs | 1 - .../browser-use-agent/src/entrypoint/mod.rs | 19 -------- .../src/entrypoint/provider.rs | 1 - .../browser-use-agent/src/events/map_tests.rs | 1 - crates/browser-use-agent/src/events/mod.rs | 1 - .../src/turn/fusion_tests.rs | 1 - .../browser-use-agent/src/turn/loop_tests.rs | 1 - .../browser-use-agent/src/turn/model_path.rs | 45 +------------------ crates/browser-use-agent/src/turn/sampling.rs | 1 - .../src/turn/sampling_tests.rs | 1 - .../src/protocols/openai_chat.rs | 40 +---------------- .../src/protocols/openai_responses.rs | 37 --------------- 13 files changed, 2 insertions(+), 148 deletions(-) diff --git a/crates/browser-use-agent/src/compact/tests.rs b/crates/browser-use-agent/src/compact/tests.rs index 3a0f41a0..8905ec56 100644 --- a/crates/browser-use-agent/src/compact/tests.rs +++ b/crates/browser-use-agent/src/compact/tests.rs @@ -473,7 +473,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/compact/threshold_tests.rs b/crates/browser-use-agent/src/compact/threshold_tests.rs index 7a977ae1..ca86265e 100644 --- a/crates/browser-use-agent/src/compact/threshold_tests.rs +++ b/crates/browser-use-agent/src/compact/threshold_tests.rs @@ -255,7 +255,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/entrypoint/mod.rs b/crates/browser-use-agent/src/entrypoint/mod.rs index 56ac07f7..4c743a69 100644 --- a/crates/browser-use-agent/src/entrypoint/mod.rs +++ b/crates/browser-use-agent/src/entrypoint/mod.rs @@ -2485,7 +2485,6 @@ fn turn_ctx(session_id: &SessionId, config: &ProviderRunConfig) -> TurnCtx { .browser_mode .as_deref() .map(crate::prompts::browser_mode_instruction), - response_format: config.options.final_output_json_schema.clone(), turn_idx: 0, attempt: 0, } @@ -3262,23 +3261,6 @@ mod tests { } } - #[test] - fn turn_ctx_carries_final_output_schema_as_response_format() { - let schema = serde_json::json!({ - "type": "object", - "properties": { "answer": { "type": "string" } }, - "required": ["answer"] - }); - let config = ProviderRunConfig::new(ProviderBackend::BrowserUse, "bu-3-max").with_options( - AgentRunOptions::default().with_final_output_json_schema(schema.clone(), true), - ); - - let ctx = turn_ctx(&SessionId("sess-schema".to_string()), &config); - - assert_eq!(ctx.provider, "browseruse"); - assert_eq!(ctx.response_format, Some(schema)); - } - /// A tempdir-backed `SharedStore` with a fresh session row (the `events` table /// has a FK on `sessions(id)`, so the session must exist before we append). /// Returns the `TempDir` so the caller keeps the on-disk sqlite db alive. @@ -5868,7 +5850,6 @@ mod tests { provider: "fake".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, }; diff --git a/crates/browser-use-agent/src/entrypoint/provider.rs b/crates/browser-use-agent/src/entrypoint/provider.rs index d07d3657..47b5c2ed 100644 --- a/crates/browser-use-agent/src/entrypoint/provider.rs +++ b/crates/browser-use-agent/src/entrypoint/provider.rs @@ -2691,7 +2691,6 @@ mod tests { provider: "p".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/events/map_tests.rs b/crates/browser-use-agent/src/events/map_tests.rs index 5e348fe0..667a685d 100644 --- a/crates/browser-use-agent/src/events/map_tests.rs +++ b/crates/browser-use-agent/src/events/map_tests.rs @@ -18,7 +18,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 3, attempt: 0, } diff --git a/crates/browser-use-agent/src/events/mod.rs b/crates/browser-use-agent/src/events/mod.rs index 058077f7..d4f0d3df 100644 --- a/crates/browser-use-agent/src/events/mod.rs +++ b/crates/browser-use-agent/src/events/mod.rs @@ -53,7 +53,6 @@ pub struct TurnCtx { pub provider: String, pub base_instructions: String, pub browser_mode_instruction: Option, - pub response_format: Option, pub turn_idx: usize, pub attempt: usize, } diff --git a/crates/browser-use-agent/src/turn/fusion_tests.rs b/crates/browser-use-agent/src/turn/fusion_tests.rs index f1ec9d9a..1b467474 100644 --- a/crates/browser-use-agent/src/turn/fusion_tests.rs +++ b/crates/browser-use-agent/src/turn/fusion_tests.rs @@ -225,7 +225,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/turn/loop_tests.rs b/crates/browser-use-agent/src/turn/loop_tests.rs index 54a8d46e..06a2097d 100644 --- a/crates/browser-use-agent/src/turn/loop_tests.rs +++ b/crates/browser-use-agent/src/turn/loop_tests.rs @@ -228,7 +228,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-agent/src/turn/model_path.rs b/crates/browser-use-agent/src/turn/model_path.rs index 4b67a4e1..6854a6e7 100644 --- a/crates/browser-use-agent/src/turn/model_path.rs +++ b/crates/browser-use-agent/src/turn/model_path.rs @@ -30,7 +30,6 @@ use browser_use_llm::providers::{ }; use browser_use_llm::route::{Auth, ModelClient, Route}; use browser_use_llm::schema::{ContentPart, LlmRequest, Message, MessageRole, SystemPart}; -use serde_json::{Map, Value}; use crate::events::{EventSink, TurnCtx}; use crate::turn::sampling::{ModelClientTransport, ModelSamplingDriver}; @@ -264,24 +263,7 @@ pub fn build_transport( ModelClientTransport::new(client, route, req) } -pub(crate) fn apply_browser_use_provider_options(provider: &str, req: &mut LlmRequest) { - let normalized = provider.trim().to_ascii_lowercase().replace('_', "-"); - if normalized != "browser-use" && normalized != "browseruse" { - return; - } - let Some(output_format) = req.response_format.clone() else { - return; - }; - - let mut options = match req.provider_options.take() { - Some(Value::Object(options)) => options, - _ => Map::new(), - }; - options - .entry("output_format".to_string()) - .or_insert(output_format); - req.provider_options = Some(Value::Object(options)); -} +pub(crate) fn apply_browser_use_provider_options(_provider: &str, _req: &mut LlmRequest) {} /// Build the production text-only [`ModelSamplingDriver`] over a live transport. /// @@ -421,31 +403,6 @@ mod tests { assert_eq!(req.provider_options, None); } - #[test] - fn browser_use_provider_options_forward_response_format_as_output_format() { - let mut req = LlmRequest::new("bu-3-max", "browseruse"); - req.response_format = Some(serde_json::json!({ - "type": "object", - "properties": { "answer": { "type": "string" } }, - "required": ["answer"] - })); - - apply_browser_use_provider_options("browser-use", &mut req); - - assert_eq!( - req.provider_options - .as_ref() - .and_then(|options| options.get("output_format")), - req.response_format.as_ref() - ); - assert_eq!( - req.provider_options - .as_ref() - .and_then(|options| options.get("request_type")), - None - ); - } - /// Only the `Codex` variant targets chatgpt.com: the env-keyed providers never /// route to the codex backend, while `Codex` does (and only it). #[test] diff --git a/crates/browser-use-agent/src/turn/sampling.rs b/crates/browser-use-agent/src/turn/sampling.rs index 8f8cc15a..5680b67e 100644 --- a/crates/browser-use-agent/src/turn/sampling.rs +++ b/crates/browser-use-agent/src/turn/sampling.rs @@ -1074,7 +1074,6 @@ fn build_request(ctx: &TurnCtx, input: Vec) -> LlmRequest { base_system.cache = Some(CacheHint::Ephemeral); req.system.push(base_system); req.messages = input; - req.response_format = ctx.response_format.clone(); if let Some(instruction) = ctx.browser_mode_instruction.as_deref() { req.messages.insert( 0, diff --git a/crates/browser-use-agent/src/turn/sampling_tests.rs b/crates/browser-use-agent/src/turn/sampling_tests.rs index 1d5e3f92..341c1c2e 100644 --- a/crates/browser-use-agent/src/turn/sampling_tests.rs +++ b/crates/browser-use-agent/src/turn/sampling_tests.rs @@ -126,7 +126,6 @@ fn ctx() -> TurnCtx { provider: "openai".to_string(), base_instructions: crate::prompts::browser_agent_system_prompt(), browser_mode_instruction: None, - response_format: None, turn_idx: 0, attempt: 0, } diff --git a/crates/browser-use-llm/src/protocols/openai_chat.rs b/crates/browser-use-llm/src/protocols/openai_chat.rs index 72929b72..4a9577a5 100644 --- a/crates/browser-use-llm/src/protocols/openai_chat.rs +++ b/crates/browser-use-llm/src/protocols/openai_chat.rs @@ -102,20 +102,6 @@ impl Protocol for OpenAiChatProtocol { apply_generation(&mut body, &req.generation); - if let Some(schema) = &req.response_format { - body.insert( - "response_format".to_string(), - json!({ - "type": "json_schema", - "json_schema": { - "name": "final_output", - "strict": true, - "schema": schema, - } - }), - ); - } - if let Some(Value::Object(provider_options)) = &req.provider_options { for (key, value) in provider_options { body.entry(key.clone()).or_insert_with(|| value.clone()); @@ -731,7 +717,7 @@ fn parse_usage(usage: &Value) -> Usage { #[cfg(test)] mod tests { use super::*; - use crate::schema::{LlmRequest, Message, SystemPart, ToolDefinition}; + use crate::schema::{LlmRequest, SystemPart, ToolDefinition}; fn frame(data: &str) -> SseFrame { SseFrame { @@ -843,30 +829,6 @@ mod tests { assert_eq!(body["model"], "gpt-4o"); } - #[test] - fn build_body_lowers_response_format_to_json_schema() { - let mut req = LlmRequest::new("gpt-4o", "openai"); - req.messages.push(Message::user_text("answer")); - req.response_format = Some(json!({ - "type": "object", - "properties": { "answer": { "type": "string" } }, - "required": ["answer"], - "additionalProperties": false - })); - - let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); - - assert_eq!(body["response_format"]["type"], json!("json_schema")); - assert_eq!( - body["response_format"]["json_schema"]["schema"], - req.response_format.unwrap() - ); - assert_eq!( - body["response_format"]["json_schema"]["strict"], - json!(true) - ); - } - #[test] fn build_body_preserves_user_image_content() { let mut req = LlmRequest::new("gpt-4o", "openai"); diff --git a/crates/browser-use-llm/src/protocols/openai_responses.rs b/crates/browser-use-llm/src/protocols/openai_responses.rs index c03b0d3d..b71deb6a 100644 --- a/crates/browser-use-llm/src/protocols/openai_responses.rs +++ b/crates/browser-use-llm/src/protocols/openai_responses.rs @@ -95,20 +95,6 @@ impl Protocol for OpenAiResponsesProtocol { } } - if let Some(schema) = &request.response_format { - body.insert( - "text".to_string(), - json!({ - "format": { - "type": "json_schema", - "name": "final_output", - "strict": true, - "schema": schema, - } - }), - ); - } - Ok(Value::Object(body)) } @@ -1281,29 +1267,6 @@ mod tests { assert_eq!(namespace_tools[1]["name"], json!("wait_agent")); } - #[test] - fn response_format_lowers_to_text_json_schema() { - let mut request = LlmRequest::new("gpt-5.1", "openai"); - request.messages.push(Message::user_text("answer")); - request.response_format = Some(json!({ - "type": "object", - "properties": { "answer": { "type": "string" } }, - "required": ["answer"], - "additionalProperties": false - })); - - let body = OpenAiResponsesProtocol::new() - .build_body(&request) - .expect("build_body"); - - assert_eq!(body["text"]["format"]["type"], json!("json_schema")); - assert_eq!( - body["text"]["format"]["schema"], - request.response_format.unwrap() - ); - assert_eq!(body["text"]["format"]["strict"], json!(true)); - } - #[test] fn assistant_tool_call_history_preserves_namespace_metadata() { let mut request = LlmRequest::new("gpt-5.1-codex", "openai"); From f11b65c6f96bf049b15139b3412cba6c9e297431 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Mon, 8 Jun 2026 15:08:21 -0700 Subject: [PATCH 5/5] Preserve provider metadata on tool calls --- .../browser-use-agent/src/entrypoint/mod.rs | 1 + .../browser-use-agent/src/events/map_tests.rs | 1 + .../src/turn/fusion_tests.rs | 1 + crates/browser-use-agent/src/turn/sampling.rs | 6 +- .../src/turn/sampling_tests.rs | 2 + .../src/protocols/anthropic_messages.rs | 1 + .../src/protocols/openai_chat.rs | 86 +++++++++++++++++-- .../src/protocols/openai_responses.rs | 3 + .../src/protocols/utils/tool_stream.rs | 26 +++++- crates/browser-use-llm/src/route/client.rs | 7 +- crates/browser-use-llm/src/schema/event.rs | 2 + crates/browser-use-llm/src/schema/mod.rs | 1 + crates/browser-use-llm/src/tool_runtime.rs | 12 ++- 13 files changed, 134 insertions(+), 15 deletions(-) diff --git a/crates/browser-use-agent/src/entrypoint/mod.rs b/crates/browser-use-agent/src/entrypoint/mod.rs index 4c743a69..5cfc6e3f 100644 --- a/crates/browser-use-agent/src/entrypoint/mod.rs +++ b/crates/browser-use-agent/src/entrypoint/mod.rs @@ -5865,6 +5865,7 @@ mod tests { id: "call-1".to_string(), name: "shell".to_string(), namespace: None, + provider_metadata: None, input: serde_json::json!({ "command": ["echo", "fusion-ok"] }), }, LlmEvent::Finish { diff --git a/crates/browser-use-agent/src/events/map_tests.rs b/crates/browser-use-agent/src/events/map_tests.rs index 667a685d..175579a6 100644 --- a/crates/browser-use-agent/src/events/map_tests.rs +++ b/crates/browser-use-agent/src/events/map_tests.rs @@ -71,6 +71,7 @@ fn tool_call_maps_to_tool_started_with_parsed_arguments() { id: "c0".to_string(), name: "click".to_string(), namespace: None, + provider_metadata: None, input: json!({ "index": 5 }), }, ); diff --git a/crates/browser-use-agent/src/turn/fusion_tests.rs b/crates/browser-use-agent/src/turn/fusion_tests.rs index 1b467474..7aa5f685 100644 --- a/crates/browser-use-agent/src/turn/fusion_tests.rs +++ b/crates/browser-use-agent/src/turn/fusion_tests.rs @@ -251,6 +251,7 @@ fn tool_call_ev(id: &str, name: &str, input: serde_json::Value) -> LlmEvent { id: id.to_string(), name: name.to_string(), namespace: None, + provider_metadata: None, input, } } diff --git a/crates/browser-use-agent/src/turn/sampling.rs b/crates/browser-use-agent/src/turn/sampling.rs index 5680b67e..95f92092 100644 --- a/crates/browser-use-agent/src/turn/sampling.rs +++ b/crates/browser-use-agent/src/turn/sampling.rs @@ -603,6 +603,7 @@ impl ModelSamplingDriver { id, name, namespace, + provider_metadata, input, } => { // Capture the actual call (model order) so the fused dispatch can @@ -611,8 +612,9 @@ impl ModelSamplingDriver { id, name, input, - provider_metadata: namespace - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.or_else(|| { + namespace.map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); Ok(StreamProgress::Continue) } diff --git a/crates/browser-use-agent/src/turn/sampling_tests.rs b/crates/browser-use-agent/src/turn/sampling_tests.rs index 341c1c2e..c6c1257e 100644 --- a/crates/browser-use-agent/src/turn/sampling_tests.rs +++ b/crates/browser-use-agent/src/turn/sampling_tests.rs @@ -215,6 +215,7 @@ fn tool_call(name: &str) -> Result { id: "call-1".to_string(), name: name.to_string(), namespace: None, + provider_metadata: None, input: serde_json::json!({"arg": 1}), }) } @@ -224,6 +225,7 @@ fn tool_call_with_input(name: &str, input: serde_json::Value) -> Result Result { let mut tool_calls: Vec = Vec::new(); for part in &message.content { if let ContentPart::ToolCall { - id, name, input, .. + id, + name, + input, + provider_metadata, } = part { let arguments = serde_json::to_string(input).map_err(|e| { @@ -220,11 +223,17 @@ fn build_assistant_message(message: &Message) -> Result { format!("tool call arguments not serializable: {e}"), ) })?; - tool_calls.push(json!({ - "id": id, - "type": "function", - "function": { "name": name, "arguments": arguments }, - })); + let mut tool_call = Map::new(); + tool_call.insert("id".to_string(), json!(id)); + tool_call.insert("type".to_string(), json!("function")); + tool_call.insert( + "function".to_string(), + json!({ "name": name, "arguments": arguments }), + ); + if let Some(metadata) = provider_metadata { + tool_call.insert("provider_metadata".to_string(), metadata.clone()); + } + tool_calls.push(Value::Object(tool_call)); } } // Omit `content` for a tool-only assistant turn rather than sending an empty @@ -660,6 +669,8 @@ impl OpenAiChatStream { .and_then(|f| f.get("name")) .and_then(Value::as_str) .filter(|n| !n.is_empty()); + self.tools + .set_provider_metadata(&id, tool_call_provider_metadata(call)); let fragment = function .and_then(|f| f.get("arguments")) .and_then(Value::as_str) @@ -680,6 +691,13 @@ impl OpenAiChatStream { } } +fn tool_call_provider_metadata(call: &Value) -> Option { + call.get("provider_metadata") + .or_else(|| call.get("browser_use")) + .filter(|value| !value.is_null()) + .cloned() +} + /// Map a Chat Completions `finish_reason` string onto a [`FinishReason`]. fn map_finish_reason(reason: &str) -> FinishReason { match reason { @@ -815,6 +833,30 @@ mod tests { assert_eq!(body, expected); } + #[test] + fn build_body_replays_tool_call_provider_metadata() { + let mut req = LlmRequest::new("bu-3-max", "browser-use"); + req.messages.push(Message::new( + MessageRole::Assistant, + vec![ContentPart::ToolCall { + id: "call_1".into(), + name: "get_weather".into(), + input: json!({ "city": "Paris" }), + provider_metadata: Some(json!({ + "google": { "thought_signature": "sig-123" } + })), + }], + )); + + let body = OpenAiChatProtocol::new().build_body(&req).unwrap(); + let tool_call = &body["messages"][0]["tool_calls"][0]; + + assert_eq!( + tool_call["provider_metadata"], + json!({ "google": { "thought_signature": "sig-123" } }) + ); + } + #[test] fn build_body_merges_provider_options_without_overriding_core_fields() { let mut req = LlmRequest::new("gpt-4o", "browser-use"); @@ -1034,6 +1076,7 @@ mod tests { id: "call_42".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "Paris" }), }, LlmEvent::StepFinish { @@ -1049,6 +1092,37 @@ mod tests { assert_eq!(events, expected); } + #[test] + fn decoder_preserves_tool_call_provider_metadata() { + let mut stream = OpenAiChatProtocol::new().decoder(); + let mut events = Vec::new(); + events.extend( + stream + .on_frame(&frame( + r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_42","type":"function","provider_metadata":{"google":{"thought_signature":"sig-123"}},"function":{"name":"get_weather","arguments":""}}]}}]}"#, + )) + .unwrap(), + ); + events.extend( + stream + .on_frame(&frame( + r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}}]}}]}"#, + )) + .unwrap(), + ); + events.extend(stream.finish().unwrap()); + + assert!(events.contains(&LlmEvent::ToolCall { + id: "call_42".into(), + name: "get_weather".into(), + namespace: None, + provider_metadata: Some(json!({ + "google": { "thought_signature": "sig-123" } + })), + input: json!({}), + })); + } + #[test] fn decoder_plain_text_finish_computes_total() { let mut stream = OpenAiChatProtocol::new().decoder(); diff --git a/crates/browser-use-llm/src/protocols/openai_responses.rs b/crates/browser-use-llm/src/protocols/openai_responses.rs index b71deb6a..52020ada 100644 --- a/crates/browser-use-llm/src/protocols/openai_responses.rs +++ b/crates/browser-use-llm/src/protocols/openai_responses.rs @@ -966,6 +966,7 @@ mod tests { id: "call_1".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "NYC" }), }, LlmEvent::StepFinish { @@ -1129,6 +1130,7 @@ mod tests { id: "call_9".into(), name: "do_it".into(), namespace: None, + provider_metadata: None, input: json!({}), })); assert!(matches!(events.last(), Some(LlmEvent::Finish { .. }))); @@ -1154,6 +1156,7 @@ mod tests { id: "call_9".into(), name: "spawn_agent".into(), namespace: Some("agents".into()), + provider_metadata: None, input: json!({ "task_name": "audit", "message": "check" }), })); } diff --git a/crates/browser-use-llm/src/protocols/utils/tool_stream.rs b/crates/browser-use-llm/src/protocols/utils/tool_stream.rs index 618afa35..4ebef65a 100644 --- a/crates/browser-use-llm/src/protocols/utils/tool_stream.rs +++ b/crates/browser-use-llm/src/protocols/utils/tool_stream.rs @@ -15,6 +15,7 @@ use crate::schema::{LlmError, LlmErrorReason, LlmEvent}; struct Accum { name: String, namespace: Option, + provider_metadata: Option, args: String, started: bool, ended: bool, @@ -68,6 +69,18 @@ impl ToolStream { vec![LlmEvent::ToolInputStart { id, name: resolved }] } + /// Attach opaque provider metadata to a call. The next request can replay it + /// without the core understanding provider-specific fields. + pub fn set_provider_metadata(&mut self, id: impl AsRef, metadata: Option) { + if metadata.is_none() { + return; + } + let e = self.entry(id.as_ref()); + if e.provider_metadata.is_none() { + e.provider_metadata = metadata; + } + } + /// Argument fragment. `name` may be supplied here for providers that only /// reveal the tool name on the first delta. Emits `ToolInputStart` (if not /// already started) followed by `ToolInputDelta`. @@ -102,10 +115,15 @@ impl ToolStream { /// No-op if the id is unknown or already ended. pub fn end(&mut self, id: impl AsRef) -> Result, LlmError> { let id = id.as_ref().to_string(); - let (name, namespace, args) = match self.calls.get_mut(&id) { + let (name, namespace, provider_metadata, args) = match self.calls.get_mut(&id) { Some(e) if !e.ended => { e.ended = true; - (e.name.clone(), e.namespace.clone(), e.args.clone()) + ( + e.name.clone(), + e.namespace.clone(), + e.provider_metadata.clone(), + e.args.clone(), + ) } _ => return Ok(Vec::new()), }; @@ -116,6 +134,7 @@ impl ToolStream { id, name, namespace, + provider_metadata, input, }, ]) @@ -203,6 +222,7 @@ mod tests { id: "c0".into(), name: "shell".into(), namespace: None, + provider_metadata: None, input: json!({ "command": ["ls"] }), }, ] @@ -236,6 +256,7 @@ mod tests { id: "0".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: json!({ "city": "NYC" }), }, ] @@ -253,6 +274,7 @@ mod tests { id: "c0".into(), name: "now".into(), namespace: None, + provider_metadata: None, input: json!({}), } ); diff --git a/crates/browser-use-llm/src/route/client.rs b/crates/browser-use-llm/src/route/client.rs index 37290888..c5d3d0eb 100644 --- a/crates/browser-use-llm/src/route/client.rs +++ b/crates/browser-use-llm/src/route/client.rs @@ -483,14 +483,16 @@ fn aggregate(events: Vec) -> LlmResponse { id, name, namespace, + provider_metadata, input, } => { tool_calls.push(ContentPart::ToolCall { id, name, input, - provider_metadata: namespace - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.or_else(|| { + namespace.map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); } LlmEvent::Finish { @@ -979,6 +981,7 @@ mod tests { id: "call_1".into(), name: "get_weather".into(), namespace: None, + provider_metadata: None, input: serde_json::json!({ "city": "NYC" }), }, LlmEvent::StepFinish { diff --git a/crates/browser-use-llm/src/schema/event.rs b/crates/browser-use-llm/src/schema/event.rs index f4aeb228..542f5163 100644 --- a/crates/browser-use-llm/src/schema/event.rs +++ b/crates/browser-use-llm/src/schema/event.rs @@ -87,6 +87,8 @@ pub enum LlmEvent { name: String, #[serde(default, skip_serializing_if = "Option::is_none")] namespace: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + provider_metadata: Option, input: Value, }, StepFinish { diff --git a/crates/browser-use-llm/src/schema/mod.rs b/crates/browser-use-llm/src/schema/mod.rs index e883f65c..020fc278 100644 --- a/crates/browser-use-llm/src/schema/mod.rs +++ b/crates/browser-use-llm/src/schema/mod.rs @@ -101,6 +101,7 @@ mod tests { id: "c0".into(), name: "shell".into(), namespace: None, + provider_metadata: None, input: json!({}), }, LlmEvent::Finish { diff --git a/crates/browser-use-llm/src/tool_runtime.rs b/crates/browser-use-llm/src/tool_runtime.rs index 92d9fed7..a07290f7 100644 --- a/crates/browser-use-llm/src/tool_runtime.rs +++ b/crates/browser-use-llm/src/tool_runtime.rs @@ -108,15 +108,18 @@ fn reduce_turn(events: Vec) -> TurnOutcome { id, name, namespace, + provider_metadata, input, } => { assistant_tool_parts.push(ContentPart::ToolCall { id: id.clone(), name: name.clone(), input: input.clone(), - provider_metadata: namespace - .clone() - .map(|namespace| serde_json::json!({ "namespace": namespace })), + provider_metadata: provider_metadata.clone().or_else(|| { + namespace + .clone() + .map(|namespace| serde_json::json!({ "namespace": namespace })) + }), }); tool_calls.push(ToolCall { id, @@ -383,6 +386,7 @@ mod tests { id: id.into(), name: "add".into(), namespace: None, + provider_metadata: None, input: json!({ "a": a, "b": b }), }, LlmEvent::Finish { @@ -536,6 +540,7 @@ mod tests { id: "bad_1".into(), name: "add".into(), namespace: None, + provider_metadata: None, input: json!({ "a": "oops", "b": 3 }), }, LlmEvent::Finish { @@ -582,6 +587,7 @@ mod tests { id: "u1".into(), name: "nonexistent".into(), namespace: None, + provider_metadata: None, input: json!({}), }, LlmEvent::Finish {