From 8b4ab5b057e8dd603fd6033ca877ffdaed9bf66f Mon Sep 17 00:00:00 2001 From: waple0820 Date: Tue, 12 May 2026 14:35:54 +0800 Subject: [PATCH] Add configurable browser API adapters --- browser_use/adapters.py | 247 ++++++++++++++++++++++++++++++++ browser_use/agent/service.py | 29 ++++ browser_use/tools/service.py | 121 ++++++++++++++++ browser_use/tools/views.py | 7 + tests/ci/test_agent_adapters.py | 65 +++++++++ 5 files changed, 469 insertions(+) create mode 100644 browser_use/adapters.py create mode 100644 tests/ci/test_agent_adapters.py diff --git a/browser_use/adapters.py b/browser_use/adapters.py new file mode 100644 index 0000000000..04e1038dd6 --- /dev/null +++ b/browser_use/adapters.py @@ -0,0 +1,247 @@ +"""Lightweight API adapter support for browser-use agents. + +Adapters describe website API endpoints that are safe to try before slower UI +navigation. They are intentionally data-driven so benchmark harnesses can pass +an adapter directory or generated catalog without changing library code. +""" + +from __future__ import annotations + +import json +import logging +import re +from datetime import datetime, timezone +from pathlib import Path +from urllib.parse import urlparse + +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + +_TWO_PART_TLDS = { + 'co.uk', + 'co.jp', + 'co.nz', + 'co.za', + 'co.kr', + 'co.in', + 'com.au', + 'com.br', + 'com.cn', + 'com.hk', + 'com.tw', + 'com.ar', + 'com.mx', + 'com.sg', + 'org.uk', + 'gov.uk', + 'net.au', + 'net.cn', + 'org.cn', + 'gov.cn', +} + + +class AdapterEndpoint(BaseModel): + """A known website API endpoint.""" + + domain: str = Field(description="Root domain this endpoint belongs to, e.g. 'ycombinator.com'") + url_pattern: str = Field(description="Regex pattern matching this endpoint's URL") + method: str = Field(default='GET', description='HTTP method') + description: str = Field(description='One-line description of what this API returns') + example_url: str = Field(description='A concrete example URL that matched this pattern') + requires_auth: bool = Field(default=False, description='Whether the endpoint requires session cookies/auth') + discovered_at: str = Field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + usage_count: int = Field(default=1) + success_count: int = Field(default=0) + failure_count: int = Field(default=0) + last_used_at: str | None = None + last_used_successfully_at: str | None = None + last_used_failed_at: str | None = None + source_version: int = Field(default=0, description='Catalog source version for seeded adapters') + + def matches(self, url: str) -> bool: + try: + return bool(re.search(self.url_pattern, url)) + except re.error: + return self.example_url == url + + +def root_domain(url_or_domain: str) -> str: + """Extract a registrable root domain from a URL or hostname.""" + + host = url_or_domain + if '://' in url_or_domain: + host = urlparse(url_or_domain).hostname or url_or_domain + parts = host.split('.') + if len(parts) >= 3 and '.'.join(parts[-2:]) in _TWO_PART_TLDS: + return '.'.join(parts[-3:]) + return '.'.join(parts[-2:]) if len(parts) >= 2 else host + + +class AdapterLibrary: + """Read API adapters from ``{base_dir}/{root_domain}.json`` files.""" + + def __init__(self, base_dir: str | Path = 'adapters') -> None: + self._base_dir = Path(base_dir) + + def _domain_file(self, domain: str) -> Path: + return self._base_dir / f'{root_domain(domain)}.json' + + def _load_file(self, domain: str) -> dict: + path = self._domain_file(domain) + if not path.exists(): + return {'domain': root_domain(domain), 'endpoints': []} + try: + return json.loads(path.read_text(encoding='utf-8')) + except Exception as exc: + logger.warning('AdapterLibrary: failed to parse %s, treating as empty: %s', path, exc) + return {'domain': root_domain(domain), 'endpoints': []} + + def _save_file(self, domain: str, data: dict) -> None: + path = self._domain_file(domain) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding='utf-8') + + def find(self, url_or_domain: str) -> list[AdapterEndpoint]: + """Return all known endpoints for the root domain of *url_or_domain*.""" + + data = self._load_file(url_or_domain) + endpoints = [] + for raw in data.get('endpoints', []): + try: + endpoints.append(AdapterEndpoint(**raw)) + except Exception as exc: + logger.debug('AdapterLibrary: skipping malformed endpoint in %s: %s', url_or_domain, exc) + return endpoints + + def load_all(self) -> dict[str, list[AdapterEndpoint]]: + result: dict[str, list[AdapterEndpoint]] = {} + if not self._base_dir.exists(): + return result + for path in self._base_dir.glob('*.json'): + result[path.stem] = self.find(path.stem) + return result + + def save_many(self, endpoints: list[AdapterEndpoint]) -> None: + for endpoint in endpoints: + domain = root_domain(endpoint.domain) + data = self._load_file(domain) + existing = {ep.get('url_pattern'): ep for ep in data.get('endpoints', []) if isinstance(ep, dict)} + existing[endpoint.url_pattern] = endpoint.model_dump() + data['domain'] = domain + data['endpoints'] = list(existing.values()) + data['updated_at'] = datetime.now(timezone.utc).isoformat() + self._save_file(domain, data) + + def mark_result(self, url: str, method: str = 'GET', success: bool = True) -> int: + """Record a browser_api_call result for matching adapters.""" + + domain = root_domain(url) + data = self._load_file(domain) + endpoints = data.get('endpoints', []) + normalized_method = method.upper() + matched = 0 + timestamp = datetime.now(timezone.utc).isoformat() + for index, raw in enumerate(endpoints): + try: + endpoint = AdapterEndpoint(**raw) + except Exception: + continue + if endpoint.method.upper() != normalized_method or not endpoint.matches(url): + continue + updated = dict(raw) + updated['last_used_at'] = timestamp + if success: + updated['success_count'] = int(raw.get('success_count', 0) or 0) + 1 + updated['last_used_successfully_at'] = timestamp + else: + updated['failure_count'] = int(raw.get('failure_count', 0) or 0) + 1 + updated['last_used_failed_at'] = timestamp + endpoints[index] = updated + matched += 1 + if matched: + data['updated_at'] = timestamp + self._save_file(domain, data) + return matched + + +def load_adapter_catalog(catalog_path: str | Path) -> tuple[int, list[AdapterEndpoint]]: + """Load a generated adapter catalog JSON file.""" + + raw = json.loads(Path(catalog_path).read_text(encoding='utf-8')) + source_version = int(raw.get('source_version', 0)) + records = raw.get('adapters', []) + if not isinstance(records, list): + raise ValueError(f'Invalid adapter catalog at {catalog_path}: adapters must be a list') + endpoints = [] + for record in records: + payload = dict(record) + payload.setdefault('source_version', source_version) + endpoints.append(AdapterEndpoint(**payload)) + return source_version, endpoints + + +def seed_adapters( + library: AdapterLibrary, + records: list[dict] | list[AdapterEndpoint] | None = None, + catalog_path: str | Path | None = None, +) -> int: + """Seed *library* from inline records and/or a generated catalog file.""" + + endpoints: list[AdapterEndpoint] = [] + if catalog_path: + _, catalog_endpoints = load_adapter_catalog(catalog_path) + endpoints.extend(catalog_endpoints) + if records: + for record in records: + endpoints.append(record if isinstance(record, AdapterEndpoint) else AdapterEndpoint(**record)) + library.save_many(endpoints) + return len(endpoints) + + +def find_adapters_for_task(task: str, library: AdapterLibrary) -> list[AdapterEndpoint]: + """Find adapters whose domain appears as an explicit URL in the task.""" + + domains: list[str] = [] + seen_domains: set[str] = set() + for url in re.findall(r"https?://[^\s\"'>)]+", task or ''): + host = urlparse(url).hostname or '' + if not host: + continue + root = root_domain(host) + if root not in seen_domains: + domains.append(root) + seen_domains.add(root) + + adapters: list[AdapterEndpoint] = [] + seen_patterns: set[str] = set() + for domain in domains: + for endpoint in library.find(domain): + if endpoint.url_pattern not in seen_patterns: + adapters.append(endpoint) + seen_patterns.add(endpoint.url_pattern) + return adapters + + +def build_adapter_hint(adapters: list[AdapterEndpoint]) -> str: + """Build a system prompt section for known APIs relevant to the task.""" + + if not adapters: + return '' + lines = [ + '## Prefer browser_api_call for known APIs', + '', + 'These API endpoints are known for this site. Calling them directly can be faster and more reliable than navigating the UI.', + 'Try browser_api_call on the single most relevant endpoint first.', + 'If that API returns HTTP errors, auth/risk-control errors, repeated data, or insufficient data after one attempt, switch to browser UI navigation.', + 'Do not keep probing APIs once the visible page can answer the task.', + 'Never repeat the exact same API request.', + '', + 'Available endpoints:', + ] + for endpoint in adapters: + auth_note = ' requires cookies' if endpoint.requires_auth else '' + lines.append(f' [{endpoint.method}] {endpoint.example_url}{auth_note}') + lines.append(f' -> {endpoint.description}') + return '\n'.join(lines) diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index 5b427dedff..6eb83ce8bc 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -144,6 +144,13 @@ def __init__( skill_ids: list[str | Literal['*']] | None = None, skills: list[str | Literal['*']] | None = None, # Alias for skill_ids skill_service: Any | None = None, + # Adapter/API acceleration + enable_api_tool: bool = False, + auto_enable_api_tool: bool = True, + enable_known_adapters: bool = False, + adapters_dir: str | Path | None = None, + adapter_catalog_path: str | Path | None = None, + known_adapters: list[dict[str, Any]] | None = None, # Initial agent run parameters sensitive_data: dict[str, str | dict[str, str]] | None = None, initial_actions: list[dict[str, dict[str, Any]]] | None = None, @@ -342,6 +349,28 @@ def _get_model_timeout(llm_model: BaseChatModel) -> int: self.skill_service = SkillService(skill_ids=skill_ids) + self.adapter_library = None + if enable_api_tool or enable_known_adapters or adapter_catalog_path or known_adapters: + from browser_use.adapters import AdapterLibrary, build_adapter_hint, find_adapters_for_task, seed_adapters + + self.adapter_library = AdapterLibrary(adapters_dir or 'adapters') + if enable_known_adapters or adapter_catalog_path or known_adapters: + seed_adapters( + self.adapter_library, + records=known_adapters, + catalog_path=adapter_catalog_path, + ) + relevant_adapters = find_adapters_for_task(task, self.adapter_library) + adapter_hint = build_adapter_hint(relevant_adapters) + if adapter_hint: + extend_system_message = f'{adapter_hint}\n\n{extend_system_message}' if extend_system_message else adapter_hint + if auto_enable_api_tool: + enable_api_tool = True + logger.info('AdapterLibrary: auto-enabled browser_api_call because known adapters matched the task') + logger.info('AdapterLibrary: injected %d known endpoint(s) into the system prompt', len(relevant_adapters)) + if enable_api_tool: + self.tools.register_browser_api_call(adapter_library=self.adapter_library) + # Structured output - use explicit param or detect from tools tools_output_model = self.tools.get_output_model() if output_model_schema is not None and tools_output_model is not None: diff --git a/browser_use/tools/service.py b/browser_use/tools/service.py index ea0302fb35..89d7314a74 100644 --- a/browser_use/tools/service.py +++ b/browser_use/tools/service.py @@ -4,6 +4,7 @@ import math import os from typing import Generic, TypeVar +from urllib.parse import urlsplit import anyio @@ -38,6 +39,7 @@ from browser_use.tools.registry.service import Registry from browser_use.tools.utils import get_click_description from browser_use.tools.views import ( + BrowserApiCallAction, ClickElementAction, ClickElementActionIndexOnly, CloseTabAction, @@ -63,6 +65,9 @@ logger = logging.getLogger(__name__) +_MAX_API_RESPONSE_CHARS = 8000 +_API_PRUNE_DEPTH = 4 + # Import EnhancedDOMTreeNode and rebuild event models that have forward references to it # This must be done after all imports are complete ClickElementEvent.model_rebuild() @@ -75,6 +80,23 @@ T = TypeVar('T', bound=BaseModel) +def _prune_deep_json(obj, depth: int = 0): + if depth >= _API_PRUNE_DEPTH: + if isinstance(obj, dict): + return f'{{...{len(obj)} keys}}' + if isinstance(obj, list): + return f'[...{len(obj)} items]' + return obj + if isinstance(obj, dict): + return {k: _prune_deep_json(v, depth + 1) for k, v in obj.items()} + if isinstance(obj, list): + head = [_prune_deep_json(v, depth + 1) for v in obj[:5]] + if len(obj) > 5: + head.append(f'...and {len(obj) - 5} more items') + return head + return obj + + # Global per-action timeout: last-resort guard against hung event handlers. # Individual CDP calls (Page.navigate etc.) have their own shorter timeouts, # but event-bus `await event` and `event_result()` calls have none — if a @@ -2032,6 +2054,105 @@ def action(self, description: str, **kwargs): """ return self.registry.action(description, **kwargs) + def register_browser_api_call(self, adapter_library=None) -> None: + """Register a browser-context API fetch action. + + The action runs ``fetch`` inside the current page so site cookies/session + are reused and the visible page is not navigated away from. + """ + + if 'browser_api_call' in self.registry.registry.actions: + return + + url_call_counts: dict[str, int] = {} + + def _record_adapter_result(url: str, method: str, success: bool) -> None: + if adapter_library is None: + return + try: + adapter_library.mark_result(url=url, method=method, success=success) + except Exception as exc: + logger.warning( + 'browser_api_call failed to record adapter %s for %s %s: %s', + 'success' if success else 'failure', + method, + url[:80], + exc, + ) + + @self.registry.action( + ( + "Call a website's internal API directly from inside the current browser page, " + 'reusing its cookies/session. Use this only for known API endpoints from the system prompt; ' + 'if it fails or returns insufficient data, switch to normal browser UI navigation.' + ), + param_model=BrowserApiCallAction, + ) + async def browser_api_call(params: BrowserApiCallAction, browser_session: BrowserSession): + method = params.method.upper() + parsed = urlsplit(params.url) + url_base = f'{parsed.scheme}://{parsed.netloc}{parsed.path}' + url_call_counts[url_base] = url_call_counts.get(url_base, 0) + 1 + if url_call_counts[url_base] > 2: + return ActionResult( + error=( + f'This endpoint has been called {url_call_counts[url_base]} times. ' + 'Switch to browser UI navigation instead.' + ) + ) + + try: + page = await browser_session.must_get_current_page() + raw = await page.evaluate( + """async (url, method, headers, body) => { + const opts = { method, credentials: 'include', headers }; + if (body !== null && body !== undefined) opts.body = body; + const res = await fetch(url, opts); + const text = await res.text(); + try { return { ok: res.ok, status: res.status, data: JSON.parse(text) }; } + catch (e) { return { ok: res.ok, status: res.status, data: text }; } + }""", + params.url, + method, + params.headers, + params.body, + ) + except Exception as exc: + _record_adapter_result(params.url, method, False) + logger.warning('browser_api_call %s %s failed: %s', method, params.url[:80], exc) + return ActionResult(error=f'API call failed: {type(exc).__name__}. Use browser UI navigation instead.') + + result = json.loads(raw) if isinstance(raw, str) and raw else raw + if not isinstance(result, dict): + return ActionResult(extracted_content=str(result)) + + status = result.get('status') + data = result.get('data') + logger.info('browser_api_call %s %s -> HTTP %s', method, params.url[:80], status) + if not result.get('ok'): + _record_adapter_result(params.url, method, False) + out = json.dumps({'error': f'HTTP {status}', 'data': data}, ensure_ascii=False) + if len(out) > _MAX_API_RESPONSE_CHARS: + out = out[:_MAX_API_RESPONSE_CHARS] + '\n... [truncated]' + return ActionResult(error=out) + + if isinstance(data, dict): + biz_code = data.get('code') + if isinstance(biz_code, int) and biz_code not in (0, 200): + _record_adapter_result(params.url, method, False) + biz_msg = data.get('message') or data.get('msg') or '' + return ActionResult( + error=(f'API returned error code {biz_code}: {biz_msg}. Switch to browser UI navigation instead.') + ) + + _record_adapter_result(params.url, method, True) + out = json.dumps(_prune_deep_json(data), ensure_ascii=False, indent=2) + if len(out) > _MAX_API_RESPONSE_CHARS: + out = out[:_MAX_API_RESPONSE_CHARS] + ( + f'\n... [truncated, {len(out)} chars total; switch to browser UI if this is not useful]' + ) + return ActionResult(extracted_content=out, long_term_memory=f'Called API {method} {params.url}') + def exclude_action(self, action_name: str) -> None: """Exclude an action from the tools registry. diff --git a/browser_use/tools/views.py b/browser_use/tools/views.py index 02b274ed9d..ebfefd675d 100644 --- a/browser_use/tools/views.py +++ b/browser_use/tools/views.py @@ -66,6 +66,13 @@ class NavigateAction(BaseModel): GoToUrlAction = NavigateAction +class BrowserApiCallAction(BaseModel): + url: str = Field(description='Full URL to fetch from inside the current browser page') + method: str = Field(default='GET', description='HTTP method, usually GET or POST') + headers: dict[str, str] = Field(default_factory=dict, description='Extra request headers') + body: str | None = Field(default=None, description='Optional request body string for POST/PUT requests') + + class ClickElementAction(BaseModel): index: int | None = Field(default=None, ge=1, description='Element index from browser_state') coordinate_x: int | None = Field(default=None, description='Horizontal coordinate relative to viewport left edge') diff --git a/tests/ci/test_agent_adapters.py b/tests/ci/test_agent_adapters.py new file mode 100644 index 0000000000..d4f8beaf36 --- /dev/null +++ b/tests/ci/test_agent_adapters.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from pathlib import Path + +from browser_use import Agent + + +def _adapter_record() -> dict: + return { + 'domain': 'example.com', + 'url_pattern': r'https://api\.example\.com/items.*', + 'method': 'GET', + 'example_url': 'https://api.example.com/items?limit=5', + 'requires_auth': False, + 'description': 'Returns the latest items.', + } + + +def test_agent_can_register_browser_api_tool_from_config(mock_llm, browser_session) -> None: + agent = Agent( + task='Use https://example.com to find the latest item', + llm=mock_llm, + browser_session=browser_session, + enable_api_tool=True, + ) + + assert 'browser_api_call' in agent.tools.registry.registry.actions + + +def test_agent_injects_adapter_hint_and_auto_enables_api_tool( + mock_llm, + browser_session, + tmp_path: Path, +) -> None: + agent = Agent( + task='Use https://example.com to find the latest item', + llm=mock_llm, + browser_session=browser_session, + enable_known_adapters=True, + adapters_dir=tmp_path / 'adapters', + known_adapters=[_adapter_record()], + ) + + system_prompt = agent.message_manager.system_prompt.content + assert 'Prefer browser_api_call for known APIs' in system_prompt + assert 'https://api.example.com/items?limit=5' in system_prompt + assert 'browser_api_call' in agent.tools.registry.registry.actions + + +def test_agent_keeps_api_tool_disabled_when_no_adapter_matches( + mock_llm, + browser_session, + tmp_path: Path, +) -> None: + agent = Agent( + task='Use https://other.example.org to find the latest item', + llm=mock_llm, + browser_session=browser_session, + enable_known_adapters=True, + adapters_dir=tmp_path / 'adapters', + known_adapters=[_adapter_record()], + ) + + assert 'Prefer browser_api_call for known APIs' not in agent.message_manager.system_prompt.content + assert 'browser_api_call' not in agent.tools.registry.registry.actions