fix(models): Drop stale thought signatures

its-amann · its-amann · commit c73038cb1281 · 2026-06-23T20:49:57.000+05:30
Keep only the newest thought_signature in request contents so prior turns do not send signatures the model no longer uses. Fixes #3693
diff --git a/src/google/adk/models/google_llm.py b/src/google/adk/models/google_llm.py
@@ -63,6 +63,21 @@
 """
 
 
+def _remove_old_thought_signatures(contents: list[types.Content]) -> None:
+  """Keeps only the latest thought signature in Gemini request contents."""
+  latest_signature_seen = False
+  for content in reversed(contents):
+    if not content.parts:
+      continue
+    for part in reversed(content.parts):
+      if part.thought_signature is None:
+        continue
+      if latest_signature_seen:
+        part.thought_signature = None
+      else:
+        latest_signature_seen = True
+
+
 class _ResourceExhaustedError(ClientError):
   """Represents a resources exhausted error received from the Model."""
 
@@ -195,6 +210,7 @@ async def generate_content_async(
     """
     await self._preprocess_request(llm_request)
     self._maybe_append_user_content(llm_request)
+    _remove_old_thought_signatures(llm_request.contents)
 
     # Handle context caching if configured
     cache_metadata = None
diff --git a/tests/unittests/models/test_google_llm.py b/tests/unittests/models/test_google_llm.py
@@ -381,6 +381,53 @@ async def mock_coro():
     mock_client.aio.models.generate_content.assert_called_once()
 
 
+@pytest.mark.asyncio
+async def test_generate_content_async_keeps_only_latest_thought_signature(
+    gemini_llm, generate_content_response
+):
+  """Gemini requests keep only the newest thought signature."""
+
+  def _function_call_part(name, signature):
+    return Part(
+        function_call=types.FunctionCall(name=name, args={}),
+        thought_signature=signature,
+    )
+
+  old_part = _function_call_part("first_tool", b"old")
+  newer_part = _function_call_part("second_tool", b"newer")
+  latest_part = _function_call_part("third_tool", b"latest")
+  llm_request = LlmRequest(
+      model="gemini-2.5-flash",
+      contents=[
+          Content(role="model", parts=[old_part]),
+          Content(role="user", parts=[Part.from_text(text="tool result")]),
+          Content(role="model", parts=[newer_part, latest_part]),
+      ],
+  )
+
+  with mock.patch.object(gemini_llm, "api_client") as mock_client:
+
+    async def mock_coro():
+      return generate_content_response
+
+    mock_client.aio.models.generate_content.return_value = mock_coro()
+
+    responses = [
+        resp
+        async for resp in gemini_llm.generate_content_async(
+            llm_request, stream=False
+        )
+    ]
+
+  assert len(responses) == 1
+  request_contents = mock_client.aio.models.generate_content.call_args.kwargs[
+      "contents"
+  ]
+  assert request_contents[0].parts[0].thought_signature is None
+  assert request_contents[2].parts[0].thought_signature is None
+  assert request_contents[2].parts[1].thought_signature == b"latest"
+
+
 @pytest.mark.asyncio
 async def test_generate_content_async_stream(gemini_llm, llm_request):
   with mock.patch.object(gemini_llm, "api_client") as mock_client: