Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ async def generate(
if has_think_part:
reasoning_effort = "medium"

# `with_thinking("off")` resolves to `None`, but passing an explicit `None` makes the
# OpenAI SDK serialize `"reasoning_effort": null`. That is invalid in the chat-completions
# schema: strict validators reject it (HTTP 400 -> retry/rate-limit loop) and lenient
# backends treat it as "reasoning on by default". Use the `omit` sentinel so the field is
# dropped from the payload instead. See: https://github.com/MoonshotAI/kimi-cli/issues/2465
if reasoning_effort is None:
reasoning_effort = omit
Comment on lines +146 to +147

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Kimi provider may have the same null reasoning_effort serialization issue

The Kimi provider's with_thinking("off") also sets reasoning_effort=None (kimi.py:199), and this value is passed via **generation_kwargs to self.client.chat.completions.create() at kimi.py:170-177. Unlike OpenAILegacy, there is no Noneomit guard, so "reasoning_effort": null will appear in the Kimi API request payload. This may be acceptable because the Kimi API endpoint handles it differently (and the extra_body.thinking.type: "disabled" already signals the intent), but it is an inconsistency with the fix applied here.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


try:
response = await self.client.chat.completions.create(
model=self.model,
Expand Down
21 changes: 21 additions & 0 deletions packages/kosong/tests/api_snapshot_tests/test_openai_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,27 @@ async def test_openai_legacy_with_thinking():
assert body["reasoning_effort"] == snapshot("high")


async def test_openai_legacy_with_thinking_off_omits_reasoning_effort():
"""`with_thinking("off")` must omit reasoning_effort from the payload rather than send
`reasoning_effort: null`, which strict OpenAI-compatible validators reject (HTTP 400) and
lenient backends treat as "reasoning on".

Reproduces: https://github.com/MoonshotAI/kimi-cli/issues/2465
"""
with respx.mock(base_url="https://api.openai.com") as mock:
mock.post("/v1/chat/completions").mock(
return_value=Response(200, json=make_chat_completion_response())
)
provider = OpenAILegacy(model="gpt-4.1", api_key="test-key", stream=False).with_thinking(
"off"
)
stream = await provider.generate("", [], [Message(role="user", content="Hi")])
async for _ in stream:
pass
body = json.loads(mock.calls.last.request.content.decode())
assert "reasoning_effort" not in body


async def test_openai_legacy_auto_reasoning_effort_when_history_has_think_part():
"""When reasoning_effort is not set but history contains ThinkPart and reasoning_key is
configured, reasoning_effort should be auto-set to avoid server validation errors.
Expand Down
Loading