From f6ec9b116838c3a0ef8d7e04fab0f2fe69cde40e Mon Sep 17 00:00:00 2001
From: Taimoor <astikkosapparel009@gmail.com>
Date: Thu, 28 May 2026 09:27:07 +0500
Subject: [PATCH 1/2] fix(inference): preserve reasoning_content across
 multi-turn conversations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thinking-mode models (DeepSeek-R1, Qwen3, etc.) return a reasoning_content
field that the API requires to be passed back verbatim on subsequent turns.
Previously the field was discarded after the first response, causing every
follow-up turn to fail with 400 "reasoning_content must be passed back".

Two changes together fix the round-trip:

1. parse_native_response: when the response has reasoning_content and no
   tool calls, encode both content and reasoning_content as a JSON object
   in the returned text (matching the existing pattern for tool-call
   messages). The JSON is transparent to callers — they store it as the
   ChatMessage content unchanged.

2. convert_messages_for_native: detect the {"content":..,"reasoning_content":..}
   JSON envelope in stored assistant messages and unpack it into a
   NativeMessage with the reasoning_content field set. This ensures the
   field appears in the outbound API payload on the next turn.

NativeMessage gains a new optional reasoning_content field with
skip_serializing_if = "Option::is_none" so it is only emitted when present
and never breaks vanilla providers that do not understand the field.

Four unit tests cover: response preservation, plain-text pass-through,
round-trip restoration in convert_messages_for_native, and no spurious
field for non-thinking models.

Fixes #2800
---
 .../inference/provider/compatible.rs          | 50 +++++++++++
 .../inference/provider/compatible_tests.rs    | 89 +++++++++++++++++++
 .../inference/provider/compatible_types.rs    |  5 ++
 3 files changed, 144 insertions(+)

diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs
index f2789c296a..e235f02611 100644
--- a/src/openhuman/inference/provider/compatible.rs
+++ b/src/openhuman/inference/provider/compatible.rs
@@ -583,9 +583,34 @@ impl OpenAiCompatibleProvider {
                                         content,
                                         tool_call_id: None,
                                         tool_calls: Some(tool_calls),
+                                        reasoning_content: None,
                                     };
                                 }
                             }
+
+                            // Thinking-mode assistant messages are stored as JSON
+                            // {"content": "…", "reasoning_content": "…"} so the
+                            // reasoning content survives across turns and can be
+                            // passed back to the API as required by DeepSeek-R1,
+                            // Qwen3 thinking, and similar models.
+                            if let Some(rc) = value
+                                .get("reasoning_content")
+                                .and_then(serde_json::Value::as_str)
+                                .filter(|s| !s.is_empty())
+                            {
+                                let content = value
+                                    .get("content")
+                                    .and_then(serde_json::Value::as_str)
+                                    .filter(|s| !s.is_empty())
+                                    .map(ToString::to_string);
+                                return NativeMessage {
+                                    role: "assistant".to_string(),
+                                    content,
+                                    tool_call_id: None,
+                                    tool_calls: None,
+                                    reasoning_content: Some(rc.to_string()),
+                                };
+                            }
                         }
                     }
 
@@ -608,6 +633,7 @@ impl OpenAiCompatibleProvider {
                                 content,
                                 tool_call_id,
                                 tool_calls: None,
+                                reasoning_content: None,
                             };
                         }
                     }
@@ -617,6 +643,7 @@ impl OpenAiCompatibleProvider {
                         content: Some(message.content.clone()),
                         tool_call_id: None,
                         tool_calls: None,
+                        reasoning_content: None,
                     }
                 })
                 .collect();
@@ -813,6 +840,29 @@ impl OpenAiCompatibleProvider {
             }
         }
 
+        // When the model returned reasoning_content (thinking mode) and there are
+        // no tool calls, encode both fields as JSON so the next conversation turn
+        // can pass reasoning_content back — required by DeepSeek-R1, Qwen3, and
+        // other thinking-mode models that return 400 if it is omitted.
+        if tool_calls.is_empty() {
+            let reasoning = message
+                .reasoning_content
+                .as_deref()
+                .filter(|s| !s.is_empty());
+            if let Some(rc) = reasoning {
+                let json = serde_json::json!({
+                    "content": message.content.as_deref().unwrap_or(""),
+                    "reasoning_content": rc,
+                });
+                text = Some(json.to_string());
+                log::debug!(
+                    "[provider:{}] preserving reasoning_content ({} chars) for multi-turn replay",
+                    provider_name,
+                    rc.len(),
+                );
+            }
+        }
+
         Ok(ProviderChatResponse {
             text,
             tool_calls,
diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs
index 3e1b7dbff1..5b13262800 100644
--- a/src/openhuman/inference/provider/compatible_tests.rs
+++ b/src/openhuman/inference/provider/compatible_tests.rs
@@ -415,6 +415,7 @@ async fn streaming_chat_config_rejection_propagates_error_without_sentry_report(
             content: Some("hello".to_string()),
             tool_call_id: None,
             tool_calls: None,
+            reasoning_content: None,
         }],
         temperature: Some(0.7),
         stream: Some(true),
@@ -1556,3 +1557,91 @@ fn enrich_404_message_adds_hint_when_no_fallback() {
         "must not add hint when fallback is enabled: {result_with_fallback}"
     );
 }
+
+// ── Issue #2800: reasoning_content multi-turn replay ─────────────────────────
+
+#[test]
+fn parse_native_response_preserves_reasoning_content_as_json() {
+    // When a thinking model returns both content and reasoning_content, the
+    // response text should be JSON so the next turn can pass reasoning_content
+    // back to the API.
+    let msg = ResponseMessage {
+        content: Some("The answer is 42.".to_string()),
+        reasoning_content: Some("Let me think step by step...".to_string()),
+        tool_calls: None,
+        function_call: None,
+    };
+    let resp = OpenAiCompatibleProvider::parse_native_response(
+        wrap_message(msg),
+        "deepseek",
+    )
+    .unwrap();
+
+    let text = resp.text.expect("should have text");
+    let parsed: serde_json::Value =
+        serde_json::from_str(&text).expect("text should be valid JSON");
+    assert_eq!(parsed["content"], "The answer is 42.");
+    assert_eq!(parsed["reasoning_content"], "Let me think step by step...");
+    assert!(resp.tool_calls.is_empty());
+}
+
+#[test]
+fn parse_native_response_no_reasoning_content_returns_plain_text() {
+    // Without reasoning_content the text should be the plain string, not JSON.
+    let msg = ResponseMessage {
+        content: Some("Hello world".to_string()),
+        reasoning_content: None,
+        tool_calls: None,
+        function_call: None,
+    };
+    let resp = OpenAiCompatibleProvider::parse_native_response(
+        wrap_message(msg),
+        "openai",
+    )
+    .unwrap();
+
+    assert_eq!(resp.text.as_deref(), Some("Hello world"));
+}
+
+#[test]
+fn convert_messages_for_native_restores_reasoning_content() {
+    // A stored assistant message with JSON-encoded reasoning_content must be
+    // expanded back into a NativeMessage with the reasoning_content field set,
+    // so the next API call carries it as required by the spec.
+    let stored_content = serde_json::json!({
+        "content": "The answer is 42.",
+        "reasoning_content": "Let me think step by step...",
+    })
+    .to_string();
+
+    let messages = vec![
+        ChatMessage::user("What is the meaning of life?"),
+        ChatMessage::assistant(stored_content),
+    ];
+
+    let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages);
+    assert_eq!(native.len(), 2);
+
+    let asst = &native[1];
+    assert_eq!(asst.role, "assistant");
+    assert_eq!(asst.content.as_deref(), Some("The answer is 42."));
+    assert_eq!(
+        asst.reasoning_content.as_deref(),
+        Some("Let me think step by step...")
+    );
+    assert!(asst.tool_calls.is_none());
+}
+
+#[test]
+fn convert_messages_for_native_plain_text_has_no_reasoning_content() {
+    // A regular (non-thinking-model) assistant message must not gain a
+    // spurious reasoning_content field.
+    let messages = vec![
+        ChatMessage::user("Hi"),
+        ChatMessage::assistant("Hello there!"),
+    ];
+
+    let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages);
+    let asst = &native[1];
+    assert!(asst.reasoning_content.is_none());
+}
diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs
index 25ceff338f..d7429f4dbd 100644
--- a/src/openhuman/inference/provider/compatible_types.rs
+++ b/src/openhuman/inference/provider/compatible_types.rs
@@ -77,6 +77,11 @@ pub(crate) struct NativeMessage {
     pub(crate) tool_call_id: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub(crate) tool_calls: Option<Vec<ToolCall>>,
+    /// Thinking/reasoning content from models that support extended reasoning
+    /// (e.g. DeepSeek-R1, Qwen3 in thinking mode). Required by the API on
+    /// subsequent turns — must be passed back verbatim alongside `content`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) reasoning_content: Option<String>,
 }
 
 #[derive(Debug, Serialize)]

From b129db888b798c76e6457650dab00866146ff6f1 Mon Sep 17 00:00:00 2001
From: Taimoor <astikkosapparel009@gmail.com>
Date: Thu, 28 May 2026 11:36:39 +0500
Subject: [PATCH 2/2] style: fix cargo fmt violations in reasoning_content test
 code

---
 .../inference/provider/compatible_tests.rs      | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs
index 5b13262800..99cb68b269 100644
--- a/src/openhuman/inference/provider/compatible_tests.rs
+++ b/src/openhuman/inference/provider/compatible_tests.rs
@@ -1571,15 +1571,11 @@ fn parse_native_response_preserves_reasoning_content_as_json() {
         tool_calls: None,
         function_call: None,
     };
-    let resp = OpenAiCompatibleProvider::parse_native_response(
-        wrap_message(msg),
-        "deepseek",
-    )
-    .unwrap();
+    let resp =
+        OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "deepseek").unwrap();
 
     let text = resp.text.expect("should have text");
-    let parsed: serde_json::Value =
-        serde_json::from_str(&text).expect("text should be valid JSON");
+    let parsed: serde_json::Value = serde_json::from_str(&text).expect("text should be valid JSON");
     assert_eq!(parsed["content"], "The answer is 42.");
     assert_eq!(parsed["reasoning_content"], "Let me think step by step...");
     assert!(resp.tool_calls.is_empty());
@@ -1594,11 +1590,8 @@ fn parse_native_response_no_reasoning_content_returns_plain_text() {
         tool_calls: None,
         function_call: None,
     };
-    let resp = OpenAiCompatibleProvider::parse_native_response(
-        wrap_message(msg),
-        "openai",
-    )
-    .unwrap();
+    let resp =
+        OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "openai").unwrap();
 
     assert_eq!(resp.text.as_deref(), Some("Hello world"));
 }