From 31eaa8b6be26b02ec6d7b1b8d365b50125f6e883 Mon Sep 17 00:00:00 2001
From: "logic.wu0" <605524858@qq.com>
Date: Fri, 26 Jun 2026 13:07:25 +0800
Subject: [PATCH] fix(kosong): omit reasoning_effort instead of sending null
 when thinking is off

OpenAILegacy.with_thinking("off") maps to a Python None, which was passed straight
to client.chat.completions.create(). The OpenAI SDK serializes an explicit None as
"reasoning_effort": null, which is invalid in the chat-completions schema: strict
validators reject it with HTTP 400 (driving a retry/rate-limit loop) and lenient
backends treat null as "reasoning on by default", so thinking is not actually disabled.

Normalize a None reasoning_effort to the SDK omit sentinel in generate() so the field
is dropped from the payload. Adds a snapshot test asserting reasoning_effort is absent
from the request body when thinking is off.

Closes #2465
---
 .../contrib/chat_provider/openai_legacy.py    |  8 +++++++
 .../api_snapshot_tests/test_openai_legacy.py  | 21 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
index 2855fd6ab..fd8a682db 100644
--- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
+++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
@@ -138,6 +138,14 @@ async def generate(
             if has_think_part:
                 reasoning_effort = "medium"
 
+        # `with_thinking("off")` resolves to `None`, but passing an explicit `None` makes the
+        # OpenAI SDK serialize `"reasoning_effort": null`. That is invalid in the chat-completions
+        # schema: strict validators reject it (HTTP 400 -> retry/rate-limit loop) and lenient
+        # backends treat it as "reasoning on by default". Use the `omit` sentinel so the field is
+        # dropped from the payload instead. See: https://github.com/MoonshotAI/kimi-cli/issues/2465
+        if reasoning_effort is None:
+            reasoning_effort = omit
+
         try:
             response = await self.client.chat.completions.create(
                 model=self.model,
diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
index 78114ff9a..8a81de07f 100644
--- a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
+++ b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
@@ -342,6 +342,27 @@ async def test_openai_legacy_with_thinking():
         assert body["reasoning_effort"] == snapshot("high")
 
 
+async def test_openai_legacy_with_thinking_off_omits_reasoning_effort():
+    """`with_thinking("off")` must omit reasoning_effort from the payload rather than send
+    `reasoning_effort: null`, which strict OpenAI-compatible validators reject (HTTP 400) and
+    lenient backends treat as "reasoning on".
+
+    Reproduces: https://github.com/MoonshotAI/kimi-cli/issues/2465
+    """
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/chat/completions").mock(
+            return_value=Response(200, json=make_chat_completion_response())
+        )
+        provider = OpenAILegacy(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
+            "off"
+        )
+        stream = await provider.generate("", [], [Message(role="user", content="Hi")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert "reasoning_effort" not in body
+
+
 async def test_openai_legacy_auto_reasoning_effort_when_history_has_think_part():
     """When reasoning_effort is not set but history contains ThinkPart and reasoning_key is
     configured, reasoning_effort should be auto-set to avoid server validation errors.