diff --git a/README.md b/README.md index 164607b..e245be7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This plugin **is a preview**. LLM currently ships with OpenAI models as part of This plugin implements those same models using the new [Responses API](https://platform.openai.com/docs/api-reference/responses). -Currently the only reason to use this plugin over the LLM defaults is to access [o1-pro](https://platform.openai.com/docs/models/o1-pro), which can only be used via the Responses API. +Use this plugin when you want direct access to the Responses API from LLM, including newer model IDs, reasoning controls, server-side conversation chaining, and OpenAI-hosted tools such as web search. ## Installation @@ -21,10 +21,10 @@ llm install llm-openai-plugin ``` ## Usage -To run a prompt against `o1-pro` do this: +To run a prompt against `gpt-5.4` do this: ```bash -llm -m openai/o1-pro "Convince me that pelicans are the most noble of birds" +llm -m openai/gpt-5.4 "Convince me that pelicans are the most noble of birds" ``` Run this to see a full list of models - they start with the `openai/` prefix: @@ -46,14 +46,24 @@ cog.out( ) ]]] --> ``` +OpenAI: openai/gpt-4 +OpenAI: openai/gpt-4-turbo +OpenAI: openai/gpt-4-turbo-2024-04-09 OpenAI: openai/gpt-4o +OpenAI: openai/gpt-4o-2024-05-13 +OpenAI: openai/gpt-4o-2024-08-06 +OpenAI: openai/gpt-4o-2024-11-20 OpenAI: openai/gpt-4o-mini +OpenAI: openai/gpt-4o-mini-2024-07-18 OpenAI: openai/gpt-4.5-preview OpenAI: openai/gpt-4.5-preview-2025-02-27 OpenAI: openai/o3-mini +OpenAI: openai/o3-mini-2025-01-31 OpenAI: openai/o1-mini OpenAI: openai/o1 +OpenAI: openai/o1-2024-12-17 OpenAI: openai/o1-pro +OpenAI: openai/o1-pro-2025-03-19 OpenAI: openai/gpt-4.1 OpenAI: openai/gpt-4.1-2025-04-14 OpenAI: openai/gpt-4.1-mini @@ -68,7 +78,9 @@ OpenAI: openai/o4-mini OpenAI: openai/o4-mini-2025-04-16 OpenAI: openai/codex-mini-latest OpenAI: openai/o3-pro +OpenAI: openai/o3-pro-2025-06-10 OpenAI: openai/gpt-5 +OpenAI: openai/gpt-5-chat-latest OpenAI: openai/gpt-5-mini OpenAI: openai/gpt-5-nano OpenAI: openai/gpt-5-2025-08-07 @@ -77,12 +89,47 @@ OpenAI: openai/gpt-5-nano-2025-08-07 OpenAI: openai/gpt-5-codex OpenAI: openai/gpt-5-pro OpenAI: openai/gpt-5-pro-2025-10-06 +OpenAI: openai/gpt-5.1 +OpenAI: openai/gpt-5.1-2025-11-13 +OpenAI: openai/gpt-5.1-chat-latest +OpenAI: openai/gpt-5.1-codex +OpenAI: openai/gpt-5.1-codex-mini +OpenAI: openai/gpt-5.1-codex-max +OpenAI: openai/gpt-5.2 +OpenAI: openai/gpt-5.2-2025-12-11 +OpenAI: openai/gpt-5.2-chat-latest +OpenAI: openai/gpt-5.2-codex +OpenAI: openai/gpt-5.2-pro +OpenAI: openai/gpt-5.2-pro-2025-12-11 +OpenAI: openai/gpt-5.3-chat-latest +OpenAI: openai/gpt-5.3-codex +OpenAI: openai/gpt-5.4 +OpenAI: openai/gpt-5.4-2026-03-05 +OpenAI: openai/gpt-5.4-pro +OpenAI: openai/gpt-5.4-pro-2026-03-05 ``` Add `--options` to see a full list of options that can be provided to each model. The `o3-streaming` model ID exists because o3 currently requires a verified organization in order to support streaming. If you have a verified organization you can use `o3-streaming` - everyone else should use `o3`. +## Web Search + +Enable OpenAI's hosted `web_search` tool like this: + +```bash +llm -m openai/gpt-5.4 \ + -o openai_tools web_search \ + "Top 3 OpenAI stories from this week" +``` + +Optional search controls: + +- `-o web_search_domains openai.com,wikipedia.org` +- `-o web_search_live false` +- `-o web_search_context_size high` +- `-o web_search_sources true` + ## Development To set up this plugin locally, first checkout the code. Then create a new virtual environment: diff --git a/llm_openai.py b/llm_openai.py index 4013c31..13dada8 100644 --- a/llm_openai.py +++ b/llm_openai.py @@ -19,14 +19,29 @@ @hookimpl def register_models(register): models = { + # Older GPT models supported by the Responses API + "gpt-4": {"schemas": False}, + "gpt-4-turbo": {"vision": True, "schemas": False}, + "gpt-4-turbo-2024-04-09": {"vision": True, "schemas": False}, "gpt-4o": {"vision": True}, + "gpt-4o-2024-05-13": {"vision": True}, + "gpt-4o-2024-08-06": {"vision": True}, + "gpt-4o-2024-11-20": {"vision": True}, "gpt-4o-mini": {"vision": True}, + "gpt-4o-mini-2024-07-18": {"vision": True}, "gpt-4.5-preview": {"vision": True}, "gpt-4.5-preview-2025-02-27": {"vision": True}, "o3-mini": {"reasoning": True}, + "o3-mini-2025-01-31": {"reasoning": True}, "o1-mini": {"reasoning": True, "schemas": False}, "o1": {"reasoning": True, "vision": True}, + "o1-2024-12-17": {"reasoning": True, "vision": True}, "o1-pro": {"reasoning": True, "vision": True, "streaming": False}, + "o1-pro-2025-03-19": { + "reasoning": True, + "vision": True, + "streaming": False, + }, # GPT-4.1 (all have vision: true, streaming: true) "gpt-4.1": {"vision": True}, "gpt-4.1-2025-04-14": {"vision": True}, @@ -44,9 +59,15 @@ def register_models(register): # May 16th 2025 "codex-mini-latest": {"vision": True, "reasoning": True}, # June 10th 2025 - "o3-pro": {"vision": True, "reasoning": True}, + "o3-pro": {"vision": True, "reasoning": True, "streaming": False}, + "o3-pro-2025-06-10": { + "vision": True, + "reasoning": True, + "streaming": False, + }, # GPT-5, August 7th 2025 "gpt-5": {"vision": True, "reasoning": True}, + "gpt-5-chat-latest": {"vision": True}, "gpt-5-mini": {"vision": True, "reasoning": True}, "gpt-5-nano": {"vision": True, "reasoning": True}, "gpt-5-2025-08-07": {"vision": True, "reasoning": True}, @@ -57,6 +78,36 @@ def register_models(register): # GPT-5 Pro, 6th October 2025 "gpt-5-pro": {"vision": True, "reasoning": True}, "gpt-5-pro-2025-10-06": {"vision": True, "reasoning": True}, + # GPT-5.1, 13th November 2025 + "gpt-5.1": {"vision": True, "reasoning": True}, + "gpt-5.1-2025-11-13": {"vision": True, "reasoning": True}, + "gpt-5.1-chat-latest": {"vision": True}, + "gpt-5.1-codex": {"vision": True, "reasoning": True}, + "gpt-5.1-codex-mini": {"vision": True, "reasoning": True}, + "gpt-5.1-codex-max": {"vision": True, "reasoning": True}, + # GPT-5.2, 11th December 2025 + "gpt-5.2": {"vision": True, "reasoning": True}, + "gpt-5.2-2025-12-11": {"vision": True, "reasoning": True}, + "gpt-5.2-chat-latest": {"vision": True}, + "gpt-5.2-codex": {"vision": True, "reasoning": True}, + "gpt-5.2-pro": {"vision": True, "reasoning": True, "schemas": False}, + "gpt-5.2-pro-2025-12-11": { + "vision": True, + "reasoning": True, + "schemas": False, + }, + # GPT-5.3, January 2026 + "gpt-5.3-chat-latest": {"vision": True}, + "gpt-5.3-codex": {"vision": True, "reasoning": True}, + # GPT-5.4, March 2026 + "gpt-5.4": {"vision": True, "reasoning": True}, + "gpt-5.4-2026-03-05": {"vision": True, "reasoning": True}, + "gpt-5.4-pro": {"vision": True, "reasoning": True, "schemas": False}, + "gpt-5.4-pro-2026-03-05": { + "vision": True, + "reasoning": True, + "schemas": False, + }, } for model_id, options in models.items(): register( @@ -77,6 +128,20 @@ class ImageDetailEnum(str, Enum): class ReasoningEffortEnum(str, Enum): + minimal = "minimal" + low = "low" + medium = "medium" + high = "high" + xhigh = "xhigh" + + +class ReasoningSummaryEnum(str, Enum): + auto = "auto" + concise = "concise" + detailed = "detailed" + + +class SearchContextSizeEnum(str, Enum): low = "low" medium = "medium" high = "high" @@ -143,9 +208,53 @@ class VisionOptions(Options): class ReasoningOptions(Options): reasoning_effort: Optional[ReasoningEffortEnum] = Field( description=( - "Constraints effort on reasoning for reasoning models. Currently supported " - "values are low, medium, and high. Reducing reasoning effort can result in " - "faster responses and fewer tokens used on reasoning in a response." + "Constraints effort on reasoning for reasoning models. Reducing reasoning " + "effort can result in faster responses and fewer reasoning tokens." + ), + default=None, + ) + reasoning_summary: Optional[ReasoningSummaryEnum] = Field( + description=( + "Ask reasoning models to include a reasoning summary in the response JSON. " + "Useful for debugging and understanding model behavior." + ), + default=None, + ) + + +class PlatformToolsOptions(Options): + openai_tools: Optional[str] = Field( + description=( + "Comma-separated OpenAI hosted tools to enable, for example 'web_search'. " + "Some tools need extra configuration that this plugin does not expose." + ), + default=None, + ) + web_search_domains: Optional[str] = Field( + description=( + "Comma-separated list of allowed domains for the web_search tool, up to 20 " + "entries. Example: 'openai.com,wikipedia.org'." + ), + default=None, + ) + web_search_live: Optional[bool] = Field( + description=( + "Control whether web_search can access the live internet. Defaults to true " + "if not specified." + ), + default=None, + ) + web_search_context_size: Optional[SearchContextSizeEnum] = Field( + description=( + "How much context window space web_search should spend on fetched results: " + "low, medium, or high." + ), + default=None, + ) + web_search_sources: Optional[bool] = Field( + description=( + "Include web_search source URLs in the response JSON under " + "web_search_call.action.sources." ), default=None, ) @@ -165,7 +274,7 @@ def __init__( self.model_name = model_name self.can_stream = streaming self.supports_schema = schemas - options = [BaseOptions] + options = [BaseOptions, PlatformToolsOptions] self.vision = vision if vision: self.attachment_types = { @@ -196,12 +305,38 @@ def set_usage(self, response, usage): input=input_tokens, output=output_tokens, details=simplify_usage_dict(usage) ) + def _build_prompt_input(self, prompt, image_detail): + messages = [] + if not prompt.attachments: + messages.append({"role": "user", "content": prompt.prompt or ""}) + else: + attachment_message = [] + if prompt.prompt: + attachment_message.append({"type": "input_text", "text": prompt.prompt}) + for attachment in prompt.attachments: + attachment_message.append(_attachment(attachment, image_detail)) + messages.append({"role": "user", "content": attachment_message}) + for tool_result in getattr(prompt, "tool_results", []): + if not tool_result.tool_call_id: + continue + messages.append( + { + "type": "function_call_output", + "call_id": tool_result.tool_call_id, + "output": tool_result.output, + } + ) + return messages + + def _image_detail(self, prompt): + if not self.vision: + return None + return prompt.options.image_detail or "low" + def _build_messages(self, prompt, conversation): messages = [] current_system = None - image_detail = None - if self.vision: - image_detail = prompt.options.image_detail or "low" + image_detail = self._image_detail(prompt) if conversation is not None: for prev_response in conversation.responses: if ( @@ -212,29 +347,7 @@ def _build_messages(self, prompt, conversation): {"role": "system", "content": prev_response.prompt.system} ) current_system = prev_response.prompt.system - if prev_response.attachments: - attachment_message = [] - if prev_response.prompt.prompt: - attachment_message.append( - {"type": "input_text", "text": prev_response.prompt.prompt} - ) - for attachment in prev_response.attachments: - attachment_message.append(_attachment(attachment, image_detail)) - messages.append({"role": "user", "content": attachment_message}) - else: - messages.append( - {"role": "user", "content": prev_response.prompt.prompt} - ) - for tool_result in getattr(prev_response.prompt, "tool_results", []): - if not tool_result.tool_call_id: - continue - messages.append( - { - "type": "function_call_output", - "call_id": tool_result.tool_call_id, - "output": tool_result.output, - } - ) + messages.extend(self._build_prompt_input(prev_response.prompt, image_detail)) prev_text = prev_response.text_or_raise() if prev_text: messages.append({"role": "assistant", "content": prev_text}) @@ -249,32 +362,54 @@ def _build_messages(self, prompt, conversation): "arguments": json.dumps(tool_call.arguments), } ) - if prompt.system and prompt.system != current_system: - messages.append({"role": "system", "content": prompt.system}) - if not prompt.attachments: - messages.append({"role": "user", "content": prompt.prompt or ""}) - else: - attachment_message = [] - if prompt.prompt: - attachment_message.append({"type": "input_text", "text": prompt.prompt}) - for attachment in prompt.attachments: - attachment_message.append(_attachment(attachment, image_detail)) - messages.append({"role": "user", "content": attachment_message}) - for tool_result in getattr(prompt, "tool_results", []): - if not tool_result.tool_call_id: - continue - messages.append( - { - "type": "function_call_output", - "call_id": tool_result.tool_call_id, - "output": tool_result.output, - } - ) + messages.extend(self._build_prompt_input(prompt, image_detail)) return messages + def _build_reasoning(self, prompt): + reasoning = {} + reasoning_effort = getattr(prompt.options, "reasoning_effort", None) + reasoning_summary = getattr(prompt.options, "reasoning_summary", None) + if reasoning_effort is not None: + reasoning["effort"] = ( + reasoning_effort.value + if isinstance(reasoning_effort, Enum) + else reasoning_effort + ) + if reasoning_summary is not None: + reasoning["summary"] = ( + reasoning_summary.value + if isinstance(reasoning_summary, Enum) + else reasoning_summary + ) + return reasoning + + def _previous_response_id(self, prompt, conversation): + if conversation is None or not conversation.responses: + return None + if getattr(prompt.options, "store", None) is False: + return None + last_response = conversation.responses[-1] + if getattr(last_response.prompt.options, "store", None) is False: + return None + response_json = getattr(last_response, "response_json", None) or {} + return response_json.get("id") + def _build_kwargs(self, prompt, conversation): - messages = self._build_messages(prompt, conversation) - kwargs = {"model": self.model_name, "input": messages} + kwargs = {"model": self.model_name} + previous_response_id = self._previous_response_id(prompt, conversation) + if previous_response_id: + kwargs["previous_response_id"] = previous_response_id + kwargs["input"] = self._build_prompt_input( + prompt, self._image_detail(prompt) + ) + last_system = conversation.responses[-1].prompt.system + if prompt.system != last_system: + kwargs["instructions"] = prompt.system or "" + else: + kwargs["input"] = self._build_messages(prompt, conversation) + if prompt.system: + kwargs["instructions"] = prompt.system + for option in ( "max_output_tokens", "temperature", @@ -285,9 +420,36 @@ def _build_kwargs(self, prompt, conversation): value = getattr(prompt.options, option, None) if value is not None: kwargs[option] = value - + reasoning = self._build_reasoning(prompt) + if reasoning: + kwargs["reasoning"] = reasoning + + all_tools = [] + openai_tools = getattr(prompt.options, "openai_tools", None) + if openai_tools: + openai_tools = [tool.strip() for tool in openai_tools.split(",") if tool.strip()] + for tool_name in openai_tools: + tool_config = {"type": tool_name} + if tool_name == "web_search": + domains = getattr(prompt.options, "web_search_domains", None) + if domains: + domain_list = [d.strip() for d in domains.split(",") if d.strip()] + if len(domain_list) > 20: + raise ValueError("web_search_domains supports at most 20 domains") + if domain_list: + tool_config["filters"] = {"allowed_domains": domain_list} + context_size = getattr(prompt.options, "web_search_context_size", None) + if context_size is not None: + tool_config["search_context_size"] = ( + context_size.value + if isinstance(context_size, Enum) + else context_size + ) + live_access = getattr(prompt.options, "web_search_live", None) + if live_access is not None: + tool_config["external_web_access"] = live_access + all_tools.append(tool_config) if prompt.tools: - tool_defs = [] for tool in prompt.tools: if not getattr(tool, "name", None): continue @@ -295,7 +457,7 @@ def _build_kwargs(self, prompt, conversation): "type": "object", "properties": {}, } - tool_defs.append( + all_tools.append( { "type": "function", "name": tool.name, @@ -304,8 +466,13 @@ def _build_kwargs(self, prompt, conversation): "strict": False, } ) - if tool_defs: - kwargs["tools"] = tool_defs + if all_tools: + kwargs["tools"] = all_tools + if getattr(prompt.options, "web_search_sources", None): + kwargs.setdefault("include", []) + include_item = "web_search_call.action.sources" + if include_item not in kwargs["include"]: + kwargs["include"].append(include_item) if self.supports_schema and prompt.schema: kwargs["text"] = { "format": { diff --git a/tests/conftest.py b/tests/conftest.py index 5ca2f88..67c5566 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,10 @@ +import os import pytest +import sys + + +os.environ["LLM_LOAD_PLUGINS"] = "llm-openai-plugin" +sys._called_from_test = True @pytest.fixture(scope="module") diff --git a/tests/test_openai.py b/tests/test_openai.py index bb83185..3e09ab7 100644 --- a/tests/test_openai.py +++ b/tests/test_openai.py @@ -3,6 +3,7 @@ import os from pydantic import BaseModel import pytest +from types import SimpleNamespace API_KEY = os.environ.get("PYTEST_OPENAI_API_KEY", None) or "badkey" @@ -12,6 +13,50 @@ def test_plugin_is_installed(): assert "openai/gpt-4o-mini" in model_ids +def make_response( + model, + *, + prompt_text, + response_text, + response_id="resp_123", + system=None, + store=None, +): + prompt = llm.Prompt( + prompt_text, + model=model, + system=system, + options=model.Options(store=store), + ) + return SimpleNamespace( + prompt=prompt, + response_json={"id": response_id} if response_id else {}, + text_or_raise=lambda: response_text, + tool_calls_or_raise=lambda: [], + ) + + +def test_latest_models_registered(): + model_ids = {model.model_id for model in llm.get_models()} + expected = { + "openai/gpt-4-turbo", + "openai/o3-pro-2025-06-10", + "openai/gpt-5-chat-latest", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.2-codex", + "openai/gpt-5.2-pro", + "openai/gpt-5.3-codex", + "openai/gpt-5.4", + "openai/gpt-5.4-pro", + } + assert expected.issubset(model_ids) + + +def test_pro_models_disable_schema(): + assert llm.get_model("openai/gpt-5.4-pro").supports_schema is False + assert llm.get_model("openai/gpt-5.2-pro").supports_schema is False + + @pytest.mark.parametrize( "options", ( @@ -76,3 +121,164 @@ def simple_tool(number): ) output = chain_response.text() assert output == snapshot + + +def test_instructions_in_kwargs(): + model = llm.get_model("openai/gpt-4o-mini") + prompt = llm.Prompt( + "say hi", + model=model, + system="You are a friendly assistant who speaks like a pirate", + options=model.Options(), + ) + kwargs = model._build_kwargs(prompt, conversation=None) + + assert kwargs["instructions"] == "You are a friendly assistant who speaks like a pirate" + assert all(item.get("role") != "system" for item in kwargs["input"]) + + +def test_reasoning_options_in_kwargs(): + model = llm.get_model("openai/gpt-5.4") + prompt = llm.Prompt( + "What is 2+2?", + model=model, + options=model.Options(reasoning_effort="xhigh", reasoning_summary="auto"), + ) + kwargs = model._build_kwargs(prompt, conversation=None) + + assert kwargs["reasoning"] == {"effort": "xhigh", "summary": "auto"} + + +def test_web_search_options_in_kwargs(): + model = llm.get_model("openai/gpt-5.4") + prompt = llm.Prompt( + "Latest OpenAI news", + model=model, + options=model.Options( + openai_tools="web_search", + web_search_domains="openai.com,wikipedia.org", + web_search_live=False, + web_search_context_size="high", + web_search_sources=True, + ), + ) + kwargs = model._build_kwargs(prompt, conversation=None) + + assert kwargs["tools"] == [ + { + "type": "web_search", + "filters": {"allowed_domains": ["openai.com", "wikipedia.org"]}, + "search_context_size": "high", + "external_web_access": False, + } + ] + assert kwargs["include"] == ["web_search_call.action.sources"] + + +def test_web_search_domains_limit(): + model = llm.get_model("openai/gpt-5.4") + domains = ",".join(f"example{i}.com" for i in range(21)) + prompt = llm.Prompt( + "Latest OpenAI news", + model=model, + options=model.Options(openai_tools="web_search", web_search_domains=domains), + ) + + with pytest.raises(ValueError, match="at most 20 domains"): + model._build_kwargs(prompt, conversation=None) + + +def test_previous_response_id_in_kwargs(): + model = llm.get_model("openai/gpt-4o-mini") + conversation = llm.Conversation(model=model) + conversation.responses.append( + make_response( + model, + prompt_text="What is 7+7?", + response_text="14", + response_id="resp_previous", + system="Be concise", + ) + ) + prompt = llm.Prompt( + "Add 3 to that", + model=model, + system="Be concise", + options=model.Options(), + ) + kwargs = model._build_kwargs(prompt, conversation=conversation) + + assert kwargs["previous_response_id"] == "resp_previous" + assert kwargs["input"] == [{"role": "user", "content": "Add 3 to that"}] + assert "instructions" not in kwargs + + +def test_previous_response_id_clears_changed_system(): + model = llm.get_model("openai/gpt-4o-mini") + conversation = llm.Conversation(model=model) + conversation.responses.append( + make_response( + model, + prompt_text="What is 7+7?", + response_text="14", + response_id="resp_previous", + system="Be concise", + ) + ) + prompt = llm.Prompt( + "Explain the answer", + model=model, + options=model.Options(), + ) + kwargs = model._build_kwargs(prompt, conversation=conversation) + + assert kwargs["previous_response_id"] == "resp_previous" + assert kwargs["instructions"] == "" + + +def test_previous_response_id_disabled_for_store_false(): + model = llm.get_model("openai/gpt-4o-mini") + conversation = llm.Conversation(model=model) + conversation.responses.append( + make_response( + model, + prompt_text="What is 8+8?", + response_text="16", + response_id="resp_previous", + ) + ) + prompt = llm.Prompt( + "Subtract 5", + model=model, + options=model.Options(store=False), + ) + kwargs = model._build_kwargs(prompt, conversation=conversation) + + assert "previous_response_id" not in kwargs + assert kwargs["input"] == [ + {"role": "user", "content": "What is 8+8?"}, + {"role": "assistant", "content": "16"}, + {"role": "user", "content": "Subtract 5"}, + ] + + +def test_previous_response_id_disabled_if_previous_response_was_not_stored(): + model = llm.get_model("openai/gpt-4o-mini") + conversation = llm.Conversation(model=model) + conversation.responses.append( + make_response( + model, + prompt_text="What is 8+8?", + response_text="16", + response_id="resp_previous", + store=False, + ) + ) + prompt = llm.Prompt( + "Subtract 5", + model=model, + options=model.Options(), + ) + kwargs = model._build_kwargs(prompt, conversation=conversation) + + assert "previous_response_id" not in kwargs