From 31eaa8b6be26b02ec6d7b1b8d365b50125f6e883 Mon Sep 17 00:00:00 2001 From: "logic.wu0" <605524858@qq.com> Date: Fri, 26 Jun 2026 13:07:25 +0800 Subject: [PATCH] fix(kosong): omit reasoning_effort instead of sending null when thinking is off OpenAILegacy.with_thinking("off") maps to a Python None, which was passed straight to client.chat.completions.create(). The OpenAI SDK serializes an explicit None as "reasoning_effort": null, which is invalid in the chat-completions schema: strict validators reject it with HTTP 400 (driving a retry/rate-limit loop) and lenient backends treat null as "reasoning on by default", so thinking is not actually disabled. Normalize a None reasoning_effort to the SDK omit sentinel in generate() so the field is dropped from the payload. Adds a snapshot test asserting reasoning_effort is absent from the request body when thinking is off. Closes #2465 --- .../contrib/chat_provider/openai_legacy.py | 8 +++++++ .../api_snapshot_tests/test_openai_legacy.py | 21 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py index 2855fd6ab..fd8a682db 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/openai_legacy.py @@ -138,6 +138,14 @@ async def generate( if has_think_part: reasoning_effort = "medium" + # `with_thinking("off")` resolves to `None`, but passing an explicit `None` makes the + # OpenAI SDK serialize `"reasoning_effort": null`. That is invalid in the chat-completions + # schema: strict validators reject it (HTTP 400 -> retry/rate-limit loop) and lenient + # backends treat it as "reasoning on by default". Use the `omit` sentinel so the field is + # dropped from the payload instead. See: https://github.com/MoonshotAI/kimi-cli/issues/2465 + if reasoning_effort is None: + reasoning_effort = omit + try: response = await self.client.chat.completions.create( model=self.model, diff --git a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py index 78114ff9a..8a81de07f 100644 --- a/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py +++ b/packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py @@ -342,6 +342,27 @@ async def test_openai_legacy_with_thinking(): assert body["reasoning_effort"] == snapshot("high") +async def test_openai_legacy_with_thinking_off_omits_reasoning_effort(): + """`with_thinking("off")` must omit reasoning_effort from the payload rather than send + `reasoning_effort: null`, which strict OpenAI-compatible validators reject (HTTP 400) and + lenient backends treat as "reasoning on". + + Reproduces: https://github.com/MoonshotAI/kimi-cli/issues/2465 + """ + with respx.mock(base_url="https://api.openai.com") as mock: + mock.post("/v1/chat/completions").mock( + return_value=Response(200, json=make_chat_completion_response()) + ) + provider = OpenAILegacy(model="gpt-4.1", api_key="test-key", stream=False).with_thinking( + "off" + ) + stream = await provider.generate("", [], [Message(role="user", content="Hi")]) + async for _ in stream: + pass + body = json.loads(mock.calls.last.request.content.decode()) + assert "reasoning_effort" not in body + + async def test_openai_legacy_auto_reasoning_effort_when_history_has_think_part(): """When reasoning_effort is not set but history contains ThinkPart and reasoning_key is configured, reasoning_effort should be auto-set to avoid server validation errors.