From 76d838de0f22c9e7f9833f834b23043005480368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Stachowiak?= Date: Sun, 16 Nov 2025 12:02:45 +0100 Subject: [PATCH 1/2] Add OpenAI platform tools support with web_search integration - Add PlatformToolsOptions class with web_search configuration - Support openai_tools option for platform-provided tools - Add web_search_domains for domain filtering (up to 20 domains) - Add web_search_live to control live vs cached results - Add web_search_context_size for search context configuration - Add web_search_sources to include all retrieved URLs - Merge platform tools with user-defined tools in kwargs - Add web search documentation to README with examples Fix pyproject.toml license field syntax for modern setuptools --- README.md | 42 ++++++++++++++++++++++++ llm_openai.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 5 +-- 3 files changed, 126 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 164607b..1b9aee3 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,48 @@ Add `--options` to see a full list of options that can be provided to each model The `o3-streaming` model ID exists because o3 currently requires a verified organization in order to support streaming. If you have a verified organization you can use `o3-streaming` - everyone else should use `o3`. +## Web Search + +Enable the `web_search` OpenAI platform side tool to let models search the web using openai index. + +```bash +llm -m openai/gpt-5.1 -o openai_tools web_search "top 3 November 2025 news about Anthropic from leading news outlets" +``` +> Here are three of the biggest Anthropic stories from November 2025 (US time): +> - Nov 12, 2025 — Anthropic announces a $50 billion buildout of custom US data centers (Texas and New York) with Fluidstack to meet AI compute demand, coming online in 2026. ([reuters.com](https://www.reuters.com/technology/anthropic-invest-50-billion-build-data-centers-us-2025-11-12/)) +> - Nov 13–14, 2025 — Anthropic says it disrupted a largely automated cyber‑espionage campaign it links to a Chinese state‑sponsored group that misused Claude Code to target ~30 organizations; coverage underscores the rise of AI‑driven attacks. ([apnews.com](https://apnews.com/article/4e7e5b1a7df946169c72c1df58f90295?utm_source=openai)) +> - Nov 11, 2025 — The Wall Street Journal reports Anthropic’s internal projections show a faster path to profitability than OpenAI (e.g., aiming to break even by 2028), highlighting sharply different spending strategies. ([wsj.com](https://www.wsj.com/tech/ai/openai-anthropic-profitability-e9f5bcd6?utm_source=openai)) + + +### Caveats + +- This is completly separate from local llm tools. +- You dont see whole tool_call/tool_response content. +- It costs additional API money (as of November $10 per 1k searches). + + +### Options + +- **`web_search_domains`**: Limit results to specific domains (comma-separated, up to 20): + ```bash + llm -m openai/gpt-5.1 -o openai_tools web_search \ + -o web_search_domains "openai.com,wikipedia.org" "Search query" + ``` + +- **`web_search_live`**: Control live internet access (default: true): + ```bash + llm -m openai/gpt-5.1 -o openai_tools web_search \ + -o web_search_live false "Cached results only" + ``` + +- **`web_search_sources`**: Include all retrieved URLs in sources field: + ```bash + llm -m openai/gpt-5.1 -o openai_tools web_search \ + -o web_search_sources true "Query with sources" + ``` + +**Note:** User location options (`user_location`) are not currently supported. + ## Development To set up this plugin locally, first checkout the code. Then create a new virtual environment: diff --git a/llm_openai.py b/llm_openai.py index 4013c31..d3953a8 100644 --- a/llm_openai.py +++ b/llm_openai.py @@ -151,6 +151,44 @@ class ReasoningOptions(Options): ) +class PlatformToolsOptions(Options): + openai_tools: Optional[str] = Field( + description=( + "Comma-separated list of OpenAI platform tools to enable (e.g., 'web_search'). " + "These are tools provided by OpenAI's API, not user-defined tools." + ), + default=None, + ) + web_search_domains: Optional[str] = Field( + description=( + "Comma-separated list of allowed domains for web_search tool (up to 20). " + "Example: 'openai.com,wikipedia.org'. Omit http/https prefix." + ), + default=None, + ) + web_search_live: Optional[bool] = Field( + description=( + "Enable live internet access for web_search. If False, uses only cached/indexed results. " + "Default is true (live access) if you do not set it (and is missing from request)." + ), + default=None, + ) + web_search_context_size: Optional[str] = Field( + description=( + "Private search_context_size parameter. TBD how it works, tested with 'low', 'medium' and 'high'." + "Default is 'medium' (whatever it means) if you do not set it (and is missing from request)." + ), + default=None, + ) + web_search_sources: Optional[bool] = Field( + description=( + "Include all URLs retrieved during web search in the sources field. " + "When enabled, adds 'web_search_call.action.sources' to the include parameter." + ), + default=None, + ) + + class _SharedResponses: needs_key = "openai" key_env_var = "OPENAI_API_KEY" @@ -165,7 +203,7 @@ def __init__( self.model_name = model_name self.can_stream = streaming self.supports_schema = schemas - options = [BaseOptions] + options = [BaseOptions, PlatformToolsOptions] self.vision = vision if vision: self.attachment_types = { @@ -286,8 +324,39 @@ def _build_kwargs(self, prompt, conversation): if value is not None: kwargs[option] = value + # Handle platform tools (e.g., web_search) and user-defined tools + all_tools = [] + + # Add platform tools from options + openai_tools = getattr(prompt.options, "openai_tools", None) + if openai_tools: + openai_tools = [t.strip() for t in openai_tools.split(",") if t.strip()] + for tool_name in openai_tools: + tool_config = {"type": tool_name} + + # Configure web_search tool if specified + if tool_name == "web_search": + # Add allowed domains filter + domains = getattr(prompt.options, "web_search_domains", None) + if domains: + domain_list = [d.strip() for d in domains.split(",") if d.strip()] + if domain_list: + tool_config["filters"] = {"allowed_domains": domain_list} + + # defines context_size for web searches + context_size = getattr(prompt.options, "web_search_context_size", None) + if context_size: + tool_config["search_context_size"] = context_size + + # Add live web access setting + live_access = getattr(prompt.options, "web_search_live", None) + if live_access: + tool_config["external_web_access"] = live_access + + all_tools.append(tool_config) + + # Add user-defined tools (llm framework tools) if prompt.tools: - tool_defs = [] for tool in prompt.tools: if not getattr(tool, "name", None): continue @@ -295,7 +364,7 @@ def _build_kwargs(self, prompt, conversation): "type": "object", "properties": {}, } - tool_defs.append( + all_tools.append( { "type": "function", "name": tool.name, @@ -304,8 +373,17 @@ def _build_kwargs(self, prompt, conversation): "strict": False, } ) - if tool_defs: - kwargs["tools"] = tool_defs + + if all_tools: + kwargs["tools"] = all_tools + + # Handle web_search_sources option + if getattr(prompt.options, "web_search_sources", None): + if "include" not in kwargs: + kwargs["include"] = [] + if "web_search_call.action.sources" not in kwargs["include"]: + kwargs["include"].append("web_search_call.action.sources") + if self.supports_schema and prompt.schema: kwargs["text"] = { "format": { diff --git a/pyproject.toml b/pyproject.toml index 8f5874d..094a572 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,10 +4,7 @@ version = "0.7" description = "LLM plugin for OpenAI" readme = "README.md" authors = [{name = "Simon Willison"}] -license = {text = "Apache-2.0"} -classifiers = [ - "License :: OSI Approved :: Apache Software License" -] +license = "Apache-2.0" requires-python = ">=3.9" dependencies = [ "llm>=0.23", From 3cd7ef090d3fc65cc5df6679fafc07b403ddbdcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Stachowiak?= Date: Mon, 22 Dec 2025 00:09:39 +0100 Subject: [PATCH 2/2] fix small typos in README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1b9aee3..bbfa30e 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,8 @@ llm -m openai/gpt-5.1 -o openai_tools web_search "top 3 November 2025 news about ### Caveats -- This is completly separate from local llm tools. -- You dont see whole tool_call/tool_response content. +- This is completely separate from local llm tools. +- You don't see whole tool_call/tool_response content. - It costs additional API money (as of November $10 per 1k searches).