From f6ec9b116838c3a0ef8d7e04fab0f2fe69cde40e Mon Sep 17 00:00:00 2001 From: Taimoor Date: Thu, 28 May 2026 09:27:07 +0500 Subject: [PATCH 1/2] fix(inference): preserve reasoning_content across multi-turn conversations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thinking-mode models (DeepSeek-R1, Qwen3, etc.) return a reasoning_content field that the API requires to be passed back verbatim on subsequent turns. Previously the field was discarded after the first response, causing every follow-up turn to fail with 400 "reasoning_content must be passed back". Two changes together fix the round-trip: 1. parse_native_response: when the response has reasoning_content and no tool calls, encode both content and reasoning_content as a JSON object in the returned text (matching the existing pattern for tool-call messages). The JSON is transparent to callers — they store it as the ChatMessage content unchanged. 2. convert_messages_for_native: detect the {"content":..,"reasoning_content":..} JSON envelope in stored assistant messages and unpack it into a NativeMessage with the reasoning_content field set. This ensures the field appears in the outbound API payload on the next turn. NativeMessage gains a new optional reasoning_content field with skip_serializing_if = "Option::is_none" so it is only emitted when present and never breaks vanilla providers that do not understand the field. Four unit tests cover: response preservation, plain-text pass-through, round-trip restoration in convert_messages_for_native, and no spurious field for non-thinking models. Fixes #2800 --- .../inference/provider/compatible.rs | 50 +++++++++++ .../inference/provider/compatible_tests.rs | 89 +++++++++++++++++++ .../inference/provider/compatible_types.rs | 5 ++ 3 files changed, 144 insertions(+) diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs index f2789c296a..e235f02611 100644 --- a/src/openhuman/inference/provider/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -583,9 +583,34 @@ impl OpenAiCompatibleProvider { content, tool_call_id: None, tool_calls: Some(tool_calls), + reasoning_content: None, }; } } + + // Thinking-mode assistant messages are stored as JSON + // {"content": "…", "reasoning_content": "…"} so the + // reasoning content survives across turns and can be + // passed back to the API as required by DeepSeek-R1, + // Qwen3 thinking, and similar models. + if let Some(rc) = value + .get("reasoning_content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.is_empty()) + { + let content = value + .get("content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + return NativeMessage { + role: "assistant".to_string(), + content, + tool_call_id: None, + tool_calls: None, + reasoning_content: Some(rc.to_string()), + }; + } } } @@ -608,6 +633,7 @@ impl OpenAiCompatibleProvider { content, tool_call_id, tool_calls: None, + reasoning_content: None, }; } } @@ -617,6 +643,7 @@ impl OpenAiCompatibleProvider { content: Some(message.content.clone()), tool_call_id: None, tool_calls: None, + reasoning_content: None, } }) .collect(); @@ -813,6 +840,29 @@ impl OpenAiCompatibleProvider { } } + // When the model returned reasoning_content (thinking mode) and there are + // no tool calls, encode both fields as JSON so the next conversation turn + // can pass reasoning_content back — required by DeepSeek-R1, Qwen3, and + // other thinking-mode models that return 400 if it is omitted. + if tool_calls.is_empty() { + let reasoning = message + .reasoning_content + .as_deref() + .filter(|s| !s.is_empty()); + if let Some(rc) = reasoning { + let json = serde_json::json!({ + "content": message.content.as_deref().unwrap_or(""), + "reasoning_content": rc, + }); + text = Some(json.to_string()); + log::debug!( + "[provider:{}] preserving reasoning_content ({} chars) for multi-turn replay", + provider_name, + rc.len(), + ); + } + } + Ok(ProviderChatResponse { text, tool_calls, diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index 3e1b7dbff1..5b13262800 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -415,6 +415,7 @@ async fn streaming_chat_config_rejection_propagates_error_without_sentry_report( content: Some("hello".to_string()), tool_call_id: None, tool_calls: None, + reasoning_content: None, }], temperature: Some(0.7), stream: Some(true), @@ -1556,3 +1557,91 @@ fn enrich_404_message_adds_hint_when_no_fallback() { "must not add hint when fallback is enabled: {result_with_fallback}" ); } + +// ── Issue #2800: reasoning_content multi-turn replay ───────────────────────── + +#[test] +fn parse_native_response_preserves_reasoning_content_as_json() { + // When a thinking model returns both content and reasoning_content, the + // response text should be JSON so the next turn can pass reasoning_content + // back to the API. + let msg = ResponseMessage { + content: Some("The answer is 42.".to_string()), + reasoning_content: Some("Let me think step by step...".to_string()), + tool_calls: None, + function_call: None, + }; + let resp = OpenAiCompatibleProvider::parse_native_response( + wrap_message(msg), + "deepseek", + ) + .unwrap(); + + let text = resp.text.expect("should have text"); + let parsed: serde_json::Value = + serde_json::from_str(&text).expect("text should be valid JSON"); + assert_eq!(parsed["content"], "The answer is 42."); + assert_eq!(parsed["reasoning_content"], "Let me think step by step..."); + assert!(resp.tool_calls.is_empty()); +} + +#[test] +fn parse_native_response_no_reasoning_content_returns_plain_text() { + // Without reasoning_content the text should be the plain string, not JSON. + let msg = ResponseMessage { + content: Some("Hello world".to_string()), + reasoning_content: None, + tool_calls: None, + function_call: None, + }; + let resp = OpenAiCompatibleProvider::parse_native_response( + wrap_message(msg), + "openai", + ) + .unwrap(); + + assert_eq!(resp.text.as_deref(), Some("Hello world")); +} + +#[test] +fn convert_messages_for_native_restores_reasoning_content() { + // A stored assistant message with JSON-encoded reasoning_content must be + // expanded back into a NativeMessage with the reasoning_content field set, + // so the next API call carries it as required by the spec. + let stored_content = serde_json::json!({ + "content": "The answer is 42.", + "reasoning_content": "Let me think step by step...", + }) + .to_string(); + + let messages = vec![ + ChatMessage::user("What is the meaning of life?"), + ChatMessage::assistant(stored_content), + ]; + + let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages); + assert_eq!(native.len(), 2); + + let asst = &native[1]; + assert_eq!(asst.role, "assistant"); + assert_eq!(asst.content.as_deref(), Some("The answer is 42.")); + assert_eq!( + asst.reasoning_content.as_deref(), + Some("Let me think step by step...") + ); + assert!(asst.tool_calls.is_none()); +} + +#[test] +fn convert_messages_for_native_plain_text_has_no_reasoning_content() { + // A regular (non-thinking-model) assistant message must not gain a + // spurious reasoning_content field. + let messages = vec![ + ChatMessage::user("Hi"), + ChatMessage::assistant("Hello there!"), + ]; + + let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages); + let asst = &native[1]; + assert!(asst.reasoning_content.is_none()); +} diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs index 25ceff338f..d7429f4dbd 100644 --- a/src/openhuman/inference/provider/compatible_types.rs +++ b/src/openhuman/inference/provider/compatible_types.rs @@ -77,6 +77,11 @@ pub(crate) struct NativeMessage { pub(crate) tool_call_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) tool_calls: Option>, + /// Thinking/reasoning content from models that support extended reasoning + /// (e.g. DeepSeek-R1, Qwen3 in thinking mode). Required by the API on + /// subsequent turns — must be passed back verbatim alongside `content`. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) reasoning_content: Option, } #[derive(Debug, Serialize)] From b129db888b798c76e6457650dab00866146ff6f1 Mon Sep 17 00:00:00 2001 From: Taimoor Date: Thu, 28 May 2026 11:36:39 +0500 Subject: [PATCH 2/2] style: fix cargo fmt violations in reasoning_content test code --- .../inference/provider/compatible_tests.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index 5b13262800..99cb68b269 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -1571,15 +1571,11 @@ fn parse_native_response_preserves_reasoning_content_as_json() { tool_calls: None, function_call: None, }; - let resp = OpenAiCompatibleProvider::parse_native_response( - wrap_message(msg), - "deepseek", - ) - .unwrap(); + let resp = + OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "deepseek").unwrap(); let text = resp.text.expect("should have text"); - let parsed: serde_json::Value = - serde_json::from_str(&text).expect("text should be valid JSON"); + let parsed: serde_json::Value = serde_json::from_str(&text).expect("text should be valid JSON"); assert_eq!(parsed["content"], "The answer is 42."); assert_eq!(parsed["reasoning_content"], "Let me think step by step..."); assert!(resp.tool_calls.is_empty()); @@ -1594,11 +1590,8 @@ fn parse_native_response_no_reasoning_content_returns_plain_text() { tool_calls: None, function_call: None, }; - let resp = OpenAiCompatibleProvider::parse_native_response( - wrap_message(msg), - "openai", - ) - .unwrap(); + let resp = + OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "openai").unwrap(); assert_eq!(resp.text.as_deref(), Some("Hello world")); }