MoonshotAI · logicwu0 · Jun 26, 2026 · devin-ai-integration · Jun 26, 2026
diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py
@@ -138,6 +138,14 @@ async def generate(
             if has_think_part:
                 reasoning_effort = "medium"
 
+        # `with_thinking("off")` resolves to `None`, but passing an explicit `None` makes the
+        # OpenAI SDK serialize `"reasoning_effort": null`. That is invalid in the chat-completions
+        # schema: strict validators reject it (HTTP 400 -> retry/rate-limit loop) and lenient
+        # backends treat it as "reasoning on by default". Use the `omit` sentinel so the field is
+        # dropped from the payload instead. See: https://github.com/MoonshotAI/kimi-cli/issues/2465
+        if reasoning_effort is None:
+            reasoning_effort = omit
+
         try:
             response = await self.client.chat.completions.create(
                 model=self.model,

diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
@@ -342,6 +342,27 @@ async def test_openai_legacy_with_thinking():
         assert body["reasoning_effort"] == snapshot("high")
 
 
+async def test_openai_legacy_with_thinking_off_omits_reasoning_effort():
+    """`with_thinking("off")` must omit reasoning_effort from the payload rather than send
+    `reasoning_effort: null`, which strict OpenAI-compatible validators reject (HTTP 400) and
+    lenient backends treat as "reasoning on".
+
+    Reproduces: https://github.com/MoonshotAI/kimi-cli/issues/2465
+    """
+    with respx.mock(base_url="https://api.openai.com") as mock:
+        mock.post("/v1/chat/completions").mock(
+            return_value=Response(200, json=make_chat_completion_response())
+        )
+        provider = OpenAILegacy(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
+            "off"
+        )
+        stream = await provider.generate("", [], [Message(role="user", content="Hi")])
+        async for _ in stream:
+            pass
+        body = json.loads(mock.calls.last.request.content.decode())
+        assert "reasoning_effort" not in body
+
+
 async def test_openai_legacy_auto_reasoning_effort_when_history_has_think_part():
     """When reasoning_effort is not set but history contains ThinkPart and reasoning_key is
     configured, reasoning_effort should be auto-set to avoid server validation errors.