From cb275c79b2f159bbe9cf6479853f9902c79c2052 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 24 May 2026 01:40:48 +0000 Subject: [PATCH 1/9] Release v1.23.1 Co-authored-by: openhands --- .github/workflows/run-eval.yml | 3 ++- openhands-agent-server/pyproject.toml | 2 +- openhands-sdk/pyproject.toml | 2 +- openhands-tools/pyproject.toml | 2 +- openhands-workspace/pyproject.toml | 2 +- uv.lock | 10 +++++----- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml index 6039bc53b9..fe22baac1a 100644 --- a/.github/workflows/run-eval.yml +++ b/.github/workflows/run-eval.yml @@ -25,7 +25,8 @@ on: sdk_ref: description: SDK commit/ref to evaluate (must be a semantic version like v1.0.0 unless 'Allow unreleased branches' is checked) required: true - default: v1.23.0 + default: v1.23.1 + diff --git a/openhands-agent-server/pyproject.toml b/openhands-agent-server/pyproject.toml index cd415c0cab..f5569286c7 100644 --- a/openhands-agent-server/pyproject.toml +++ b/openhands-agent-server/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-agent-server" -version = "1.23.0" +version = "1.23.1" description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent" requires-python = ">=3.12" diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml index 5b1cd987cd..413482d913 100644 --- a/openhands-sdk/pyproject.toml +++ b/openhands-sdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-sdk" -version = "1.23.0" +version = "1.23.1" description = "OpenHands SDK - Core functionality for building AI agents" requires-python = ">=3.12" diff --git a/openhands-tools/pyproject.toml b/openhands-tools/pyproject.toml index 7d055d7bc6..d98bfad756 100644 --- a/openhands-tools/pyproject.toml +++ b/openhands-tools/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-tools" -version = "1.23.0" +version = "1.23.1" description = "OpenHands Tools - Runtime tools for AI agents" requires-python = ">=3.12" diff --git a/openhands-workspace/pyproject.toml b/openhands-workspace/pyproject.toml index abc794cbcc..94350ac3bf 100644 --- a/openhands-workspace/pyproject.toml +++ b/openhands-workspace/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openhands-workspace" -version = "1.23.0" +version = "1.23.1" description = "OpenHands Workspace - Docker and container-based workspace implementations" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index f6e906ebf6..b51db58e3a 100644 --- a/uv.lock +++ b/uv.lock @@ -8,7 +8,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-05-12T18:17:37.961907255Z" +exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. exclude-newer-span = "P7D" [manifest] @@ -2454,7 +2454,7 @@ wheels = [ [[package]] name = "openhands-agent-server" -version = "1.23.0" +version = "1.23.1" source = { editable = "openhands-agent-server" } dependencies = [ { name = "aiosqlite" }, @@ -2485,7 +2485,7 @@ requires-dist = [ [[package]] name = "openhands-sdk" -version = "1.23.0" +version = "1.23.1" source = { editable = "openhands-sdk" } dependencies = [ { name = "agent-client-protocol" }, @@ -2537,7 +2537,7 @@ provides-extras = ["boto3"] [[package]] name = "openhands-tools" -version = "1.23.0" +version = "1.23.1" source = { editable = "openhands-tools" } dependencies = [ { name = "binaryornot" }, @@ -2568,7 +2568,7 @@ requires-dist = [ [[package]] name = "openhands-workspace" -version = "1.23.0" +version = "1.23.1" source = { editable = "openhands-workspace" } dependencies = [ { name = "openhands-agent-server" }, From a8a48e34b01e40649345dea255b6c23fe211d574 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 03:07:55 +0000 Subject: [PATCH 2/9] fix(sdk): support Gemini tool result history Co-authored-by: openhands --- .github/workflows/integration-runner.yml | 2 +- openhands-sdk/openhands/sdk/llm/llm.py | 30 +++++++++++ tests/sdk/llm/test_llm.py | 68 +++++++++++++++++++++++- 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index 009c1006d9..b75ef4396b 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -50,7 +50,7 @@ on: env: N_PROCESSES: 4 # Global configuration for number of parallel processes for evaluation # Default models for scheduled/label-triggered runs (subset of models from resolve_model_config.py) - DEFAULT_MODEL_IDS: gpt-5.5,deepseek-v4-flash,kimi-k2.6,gemini-3.1-pro + DEFAULT_MODEL_IDS: gpt-5.5,deepseek-v4-flash,minimax-m2.5,gemini-3.1-pro jobs: setup-matrix: diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 3bee201e0b..9a83db42e2 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -153,6 +153,8 @@ LLM_PROFILE_SCHEMA_VERSION: Final[int] = 1 +TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: Final[frozenset[str]] = frozenset({"gemini"}) + class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): """Language model interface for OpenHands agents. @@ -1909,9 +1911,37 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]: ) for message in messages ] + self._strip_unsupported_tool_call_ids(formatted_messages) return formatted_messages + def _strip_unsupported_tool_call_ids(self, messages: list[dict[str, Any]]) -> None: + if not self._tool_call_ids_are_unsupported(): + return + + for message in messages: + if message.get("role") == "assistant": + tool_calls = message.get("tool_calls") + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call.pop("id", None) + elif message.get("role") == "tool": + message.pop("tool_call_id", None) + + def _tool_call_ids_are_unsupported(self) -> bool: + provider = self._infer_model_info_provider() + if provider in TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: + return True + + model_names = [self._model_name_for_capabilities()] + if self._model_info is not None: + model_key = self._model_info.get("key") + if isinstance(model_key, str): + model_names.append(model_key) + + return any("gemini" in model_name.lower() for model_name in model_names) + def format_messages_for_responses( self, messages: list[Message] ) -> tuple[str | None, list[dict[str, Any]]]: diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index e2aa8d9d8a..11481bc7a5 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -10,7 +10,7 @@ from pydantic import SecretStr from openhands.sdk import ConversationStats, RegistryEvent -from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent +from openhands.sdk.llm import LLM, LLMResponse, Message, MessageToolCall, TextContent from openhands.sdk.llm.exceptions import LLMNoResponseError from openhands.sdk.llm.options.responses_options import select_responses_options from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage @@ -32,6 +32,72 @@ def default_llm(): ) +def _tool_result_history() -> list[Message]: + return [ + Message( + role="assistant", + content=[], + tool_calls=[ + MessageToolCall( + id="call_123", + name="terminal", + arguments='{"command": "cat document.txt"}', + origin="completion", + ) + ], + ), + Message( + role="tool", + content=[TextContent(text="file contents")], + tool_call_id="call_123", + name="terminal", + ), + ] + + +def test_gemini_chat_format_omits_tool_call_ids(): + llm = LLM( + model="litellm_proxy/gemini-3.1-pro-preview", + api_key=SecretStr("test_key"), + usage_id="test-gemini-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "vertex_ai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert "id" not in formatted[0]["tool_calls"][0] + assert "tool_call_id" not in formatted[1] + assert formatted[1]["name"] == "terminal" + + +def test_openai_chat_format_keeps_tool_call_ids(): + llm = LLM( + model="gpt-4o", + api_key=SecretStr("test_key"), + usage_id="test-openai-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "openai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert formatted[0]["tool_calls"][0]["id"] == "call_123" + assert formatted[1]["tool_call_id"] == "call_123" + + +def test_non_gemini_vertex_chat_format_keeps_tool_call_ids(): + llm = LLM( + model="vertex_ai/claude-sonnet-4-5", + api_key=SecretStr("test_key"), + usage_id="test-vertex-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "vertex_ai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert formatted[0]["tool_calls"][0]["id"] == "call_123" + assert formatted[1]["tool_call_id"] == "call_123" + + def test_llm_init_with_default_config(default_llm): """Test LLM initialization with default config using fixture.""" assert default_llm.model == "gpt-4o" From cca4e79e935aba0ba843b81dd23047b44ac4c098 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 03:10:49 +0000 Subject: [PATCH 3/9] fix(ci): use workflow token for dispatch comments Co-authored-by: openhands --- .github/workflows/integration-runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index b75ef4396b..12020f2821 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -176,7 +176,7 @@ jobs: steps: - name: Comment on issue/PR (workflow_dispatch) env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ github.token }} ISSUE_NUMBER: ${{ github.event.inputs.issue_number }} MODEL_IDS: ${{ github.event.inputs.model_ids || 'all models' }} TEST_TYPE: ${{ github.event.inputs.test_type || 'all' }} From 991324d48b07991e1482e11a5c9f79a678ae41ae Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 03:14:09 +0000 Subject: [PATCH 4/9] fix(ci): pass repository to dispatch comment Co-authored-by: openhands --- .github/workflows/integration-runner.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index 12020f2821..d9d8ae09c7 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -196,7 +196,7 @@ jobs: Results will be posted here when complete. EOF ) - gh issue comment "$ISSUE_NUMBER" --body "$COMMENT_BODY" + gh issue comment "$ISSUE_NUMBER" --repo "${{ github.repository }}" --body "$COMMENT_BODY" run-integration-tests: # Security: Only run when integration-related labels are present, via workflow_dispatch, or on schedule From 7ff46f0c90e9ad1cf0008cf032df4febcb1730b2 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 03:15:36 +0000 Subject: [PATCH 5/9] fix(ci): allow dispatch comments on PRs Co-authored-by: openhands --- .github/workflows/integration-runner.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index d9d8ae09c7..1cee19071f 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -173,6 +173,7 @@ jobs: runs-on: ubuntu-latest permissions: issues: write + pull-requests: write steps: - name: Comment on issue/PR (workflow_dispatch) env: From c3f13669f90f2b3a1a24b277144e1bfa19e7465d Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 14:53:16 -0400 Subject: [PATCH 6/9] chore(release): keep v1.23.1 PR minimal Drop non-release Gemini and workflow-comment experiments from the release branch so the PR diff only carries release mechanics. Co-authored-by: openhands --- .github/workflows/integration-runner.yml | 7 ++- openhands-sdk/openhands/sdk/llm/llm.py | 30 ----------- tests/sdk/llm/test_llm.py | 68 +----------------------- 3 files changed, 4 insertions(+), 101 deletions(-) diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index 1cee19071f..009c1006d9 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -50,7 +50,7 @@ on: env: N_PROCESSES: 4 # Global configuration for number of parallel processes for evaluation # Default models for scheduled/label-triggered runs (subset of models from resolve_model_config.py) - DEFAULT_MODEL_IDS: gpt-5.5,deepseek-v4-flash,minimax-m2.5,gemini-3.1-pro + DEFAULT_MODEL_IDS: gpt-5.5,deepseek-v4-flash,kimi-k2.6,gemini-3.1-pro jobs: setup-matrix: @@ -173,11 +173,10 @@ jobs: runs-on: ubuntu-latest permissions: issues: write - pull-requests: write steps: - name: Comment on issue/PR (workflow_dispatch) env: - GITHUB_TOKEN: ${{ github.token }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ISSUE_NUMBER: ${{ github.event.inputs.issue_number }} MODEL_IDS: ${{ github.event.inputs.model_ids || 'all models' }} TEST_TYPE: ${{ github.event.inputs.test_type || 'all' }} @@ -197,7 +196,7 @@ jobs: Results will be posted here when complete. EOF ) - gh issue comment "$ISSUE_NUMBER" --repo "${{ github.repository }}" --body "$COMMENT_BODY" + gh issue comment "$ISSUE_NUMBER" --body "$COMMENT_BODY" run-integration-tests: # Security: Only run when integration-related labels are present, via workflow_dispatch, or on schedule diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 9a83db42e2..3bee201e0b 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -153,8 +153,6 @@ LLM_PROFILE_SCHEMA_VERSION: Final[int] = 1 -TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: Final[frozenset[str]] = frozenset({"gemini"}) - class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): """Language model interface for OpenHands agents. @@ -1911,37 +1909,9 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]: ) for message in messages ] - self._strip_unsupported_tool_call_ids(formatted_messages) return formatted_messages - def _strip_unsupported_tool_call_ids(self, messages: list[dict[str, Any]]) -> None: - if not self._tool_call_ids_are_unsupported(): - return - - for message in messages: - if message.get("role") == "assistant": - tool_calls = message.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict): - tool_call.pop("id", None) - elif message.get("role") == "tool": - message.pop("tool_call_id", None) - - def _tool_call_ids_are_unsupported(self) -> bool: - provider = self._infer_model_info_provider() - if provider in TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: - return True - - model_names = [self._model_name_for_capabilities()] - if self._model_info is not None: - model_key = self._model_info.get("key") - if isinstance(model_key, str): - model_names.append(model_key) - - return any("gemini" in model_name.lower() for model_name in model_names) - def format_messages_for_responses( self, messages: list[Message] ) -> tuple[str | None, list[dict[str, Any]]]: diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index 11481bc7a5..e2aa8d9d8a 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -10,7 +10,7 @@ from pydantic import SecretStr from openhands.sdk import ConversationStats, RegistryEvent -from openhands.sdk.llm import LLM, LLMResponse, Message, MessageToolCall, TextContent +from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent from openhands.sdk.llm.exceptions import LLMNoResponseError from openhands.sdk.llm.options.responses_options import select_responses_options from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage @@ -32,72 +32,6 @@ def default_llm(): ) -def _tool_result_history() -> list[Message]: - return [ - Message( - role="assistant", - content=[], - tool_calls=[ - MessageToolCall( - id="call_123", - name="terminal", - arguments='{"command": "cat document.txt"}', - origin="completion", - ) - ], - ), - Message( - role="tool", - content=[TextContent(text="file contents")], - tool_call_id="call_123", - name="terminal", - ), - ] - - -def test_gemini_chat_format_omits_tool_call_ids(): - llm = LLM( - model="litellm_proxy/gemini-3.1-pro-preview", - api_key=SecretStr("test_key"), - usage_id="test-gemini-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "vertex_ai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert "id" not in formatted[0]["tool_calls"][0] - assert "tool_call_id" not in formatted[1] - assert formatted[1]["name"] == "terminal" - - -def test_openai_chat_format_keeps_tool_call_ids(): - llm = LLM( - model="gpt-4o", - api_key=SecretStr("test_key"), - usage_id="test-openai-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "openai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert formatted[0]["tool_calls"][0]["id"] == "call_123" - assert formatted[1]["tool_call_id"] == "call_123" - - -def test_non_gemini_vertex_chat_format_keeps_tool_call_ids(): - llm = LLM( - model="vertex_ai/claude-sonnet-4-5", - api_key=SecretStr("test_key"), - usage_id="test-vertex-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "vertex_ai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert formatted[0]["tool_calls"][0]["id"] == "call_123" - assert formatted[1]["tool_call_id"] == "call_123" - - def test_llm_init_with_default_config(default_llm): """Test LLM initialization with default config using fixture.""" assert default_llm.model == "gpt-4o" From e4fe2ba57e15fb3112ed20d113e626a51c3fbf26 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 25 May 2026 16:10:34 +0000 Subject: [PATCH 7/9] fix(sdk): omit Gemini tool call ids Co-authored-by: openhands --- openhands-sdk/openhands/sdk/llm/llm.py | 30 ++++++++++++ tests/sdk/llm/test_llm.py | 68 +++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 3bee201e0b..9a83db42e2 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -153,6 +153,8 @@ LLM_PROFILE_SCHEMA_VERSION: Final[int] = 1 +TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: Final[frozenset[str]] = frozenset({"gemini"}) + class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): """Language model interface for OpenHands agents. @@ -1909,9 +1911,37 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]: ) for message in messages ] + self._strip_unsupported_tool_call_ids(formatted_messages) return formatted_messages + def _strip_unsupported_tool_call_ids(self, messages: list[dict[str, Any]]) -> None: + if not self._tool_call_ids_are_unsupported(): + return + + for message in messages: + if message.get("role") == "assistant": + tool_calls = message.get("tool_calls") + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call.pop("id", None) + elif message.get("role") == "tool": + message.pop("tool_call_id", None) + + def _tool_call_ids_are_unsupported(self) -> bool: + provider = self._infer_model_info_provider() + if provider in TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: + return True + + model_names = [self._model_name_for_capabilities()] + if self._model_info is not None: + model_key = self._model_info.get("key") + if isinstance(model_key, str): + model_names.append(model_key) + + return any("gemini" in model_name.lower() for model_name in model_names) + def format_messages_for_responses( self, messages: list[Message] ) -> tuple[str | None, list[dict[str, Any]]]: diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index e2aa8d9d8a..11481bc7a5 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -10,7 +10,7 @@ from pydantic import SecretStr from openhands.sdk import ConversationStats, RegistryEvent -from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent +from openhands.sdk.llm import LLM, LLMResponse, Message, MessageToolCall, TextContent from openhands.sdk.llm.exceptions import LLMNoResponseError from openhands.sdk.llm.options.responses_options import select_responses_options from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage @@ -32,6 +32,72 @@ def default_llm(): ) +def _tool_result_history() -> list[Message]: + return [ + Message( + role="assistant", + content=[], + tool_calls=[ + MessageToolCall( + id="call_123", + name="terminal", + arguments='{"command": "cat document.txt"}', + origin="completion", + ) + ], + ), + Message( + role="tool", + content=[TextContent(text="file contents")], + tool_call_id="call_123", + name="terminal", + ), + ] + + +def test_gemini_chat_format_omits_tool_call_ids(): + llm = LLM( + model="litellm_proxy/gemini-3.1-pro-preview", + api_key=SecretStr("test_key"), + usage_id="test-gemini-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "vertex_ai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert "id" not in formatted[0]["tool_calls"][0] + assert "tool_call_id" not in formatted[1] + assert formatted[1]["name"] == "terminal" + + +def test_openai_chat_format_keeps_tool_call_ids(): + llm = LLM( + model="gpt-4o", + api_key=SecretStr("test_key"), + usage_id="test-openai-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "openai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert formatted[0]["tool_calls"][0]["id"] == "call_123" + assert formatted[1]["tool_call_id"] == "call_123" + + +def test_non_gemini_vertex_chat_format_keeps_tool_call_ids(): + llm = LLM( + model="vertex_ai/claude-sonnet-4-5", + api_key=SecretStr("test_key"), + usage_id="test-vertex-tool-call-ids", + ) + llm._model_info = {"litellm_provider": "vertex_ai"} + + formatted = llm.format_messages_for_llm(_tool_result_history()) + + assert formatted[0]["tool_calls"][0]["id"] == "call_123" + assert formatted[1]["tool_call_id"] == "call_123" + + def test_llm_init_with_default_config(default_llm): """Test LLM initialization with default config using fixture.""" assert default_llm.model == "gpt-4o" From d0b37ec236de2c3ef966533fa9faa44d4415bd95 Mon Sep 17 00:00:00 2001 From: enyst Date: Mon, 25 May 2026 18:47:38 +0000 Subject: [PATCH 8/9] Revert Gemini tool call ID workaround Reverts e4fe2ba57e15fb3112ed20d113e626a51c3fbf26 to keep the v1.23.1 release PR focused on release-only changes. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/llm/llm.py | 30 ------------ tests/sdk/llm/test_llm.py | 68 +------------------------- 2 files changed, 1 insertion(+), 97 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 9a83db42e2..3bee201e0b 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -153,8 +153,6 @@ LLM_PROFILE_SCHEMA_VERSION: Final[int] = 1 -TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: Final[frozenset[str]] = frozenset({"gemini"}) - class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): """Language model interface for OpenHands agents. @@ -1911,37 +1909,9 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]: ) for message in messages ] - self._strip_unsupported_tool_call_ids(formatted_messages) return formatted_messages - def _strip_unsupported_tool_call_ids(self, messages: list[dict[str, Any]]) -> None: - if not self._tool_call_ids_are_unsupported(): - return - - for message in messages: - if message.get("role") == "assistant": - tool_calls = message.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict): - tool_call.pop("id", None) - elif message.get("role") == "tool": - message.pop("tool_call_id", None) - - def _tool_call_ids_are_unsupported(self) -> bool: - provider = self._infer_model_info_provider() - if provider in TOOL_CALL_ID_UNSUPPORTED_PROVIDERS: - return True - - model_names = [self._model_name_for_capabilities()] - if self._model_info is not None: - model_key = self._model_info.get("key") - if isinstance(model_key, str): - model_names.append(model_key) - - return any("gemini" in model_name.lower() for model_name in model_names) - def format_messages_for_responses( self, messages: list[Message] ) -> tuple[str | None, list[dict[str, Any]]]: diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index 11481bc7a5..e2aa8d9d8a 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -10,7 +10,7 @@ from pydantic import SecretStr from openhands.sdk import ConversationStats, RegistryEvent -from openhands.sdk.llm import LLM, LLMResponse, Message, MessageToolCall, TextContent +from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent from openhands.sdk.llm.exceptions import LLMNoResponseError from openhands.sdk.llm.options.responses_options import select_responses_options from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage @@ -32,72 +32,6 @@ def default_llm(): ) -def _tool_result_history() -> list[Message]: - return [ - Message( - role="assistant", - content=[], - tool_calls=[ - MessageToolCall( - id="call_123", - name="terminal", - arguments='{"command": "cat document.txt"}', - origin="completion", - ) - ], - ), - Message( - role="tool", - content=[TextContent(text="file contents")], - tool_call_id="call_123", - name="terminal", - ), - ] - - -def test_gemini_chat_format_omits_tool_call_ids(): - llm = LLM( - model="litellm_proxy/gemini-3.1-pro-preview", - api_key=SecretStr("test_key"), - usage_id="test-gemini-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "vertex_ai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert "id" not in formatted[0]["tool_calls"][0] - assert "tool_call_id" not in formatted[1] - assert formatted[1]["name"] == "terminal" - - -def test_openai_chat_format_keeps_tool_call_ids(): - llm = LLM( - model="gpt-4o", - api_key=SecretStr("test_key"), - usage_id="test-openai-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "openai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert formatted[0]["tool_calls"][0]["id"] == "call_123" - assert formatted[1]["tool_call_id"] == "call_123" - - -def test_non_gemini_vertex_chat_format_keeps_tool_call_ids(): - llm = LLM( - model="vertex_ai/claude-sonnet-4-5", - api_key=SecretStr("test_key"), - usage_id="test-vertex-tool-call-ids", - ) - llm._model_info = {"litellm_provider": "vertex_ai"} - - formatted = llm.format_messages_for_llm(_tool_result_history()) - - assert formatted[0]["tool_calls"][0]["id"] == "call_123" - assert formatted[1]["tool_call_id"] == "call_123" - - def test_llm_init_with_default_config(default_llm): """Test LLM initialization with default config using fixture.""" assert default_llm.model == "gpt-4o" From b4909473ae3090f02d11acad5061a9f1e6477cca Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 25 May 2026 21:10:56 +0200 Subject: [PATCH 9/9] Change exclude-newer date in uv.lock Updated exclude-newer date for compatibility. --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index b51db58e3a..ecae9b34ad 100644 --- a/uv.lock +++ b/uv.lock @@ -8,7 +8,7 @@ resolution-markers = [ ] [options] -exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. +exclude-newer = "2026-05-12T18:17:37.961907255Z" exclude-newer-span = "P7D" [manifest]