tinyhumansai · staimoorulhassan · May 28, 2026 · May 28, 2026
@@ -583,9 +583,34 @@ impl OpenAiCompatibleProvider {
                                         content,
                                         tool_call_id: None,
                                         tool_calls: Some(tool_calls),
+                                        reasoning_content: None,
                                     };
                                 }
                             }
+
+                            // Thinking-mode assistant messages are stored as JSON
+                            // {"content": "…", "reasoning_content": "…"} so the
+                            // reasoning content survives across turns and can be
+                            // passed back to the API as required by DeepSeek-R1,
+                            // Qwen3 thinking, and similar models.
+                            if let Some(rc) = value
+                                .get("reasoning_content")
+                                .and_then(serde_json::Value::as_str)
+                                .filter(|s| !s.is_empty())
+                            {
+                                let content = value
+                                    .get("content")
+                                    .and_then(serde_json::Value::as_str)
+                                    .filter(|s| !s.is_empty())
+                                    .map(ToString::to_string);
+                                return NativeMessage {
+                                    role: "assistant".to_string(),
+                                    content,
+                                    tool_call_id: None,
+                                    tool_calls: None,
+                                    reasoning_content: Some(rc.to_string()),
+                                };
+                            }
                         }
                     }
 
@@ -608,6 +633,7 @@ impl OpenAiCompatibleProvider {
                                 content,
                                 tool_call_id,
                                 tool_calls: None,
+                                reasoning_content: None,
                             };
                         }
                     }
@@ -617,6 +643,7 @@ impl OpenAiCompatibleProvider {
                         content: Some(message.content.clone()),
                         tool_call_id: None,
                         tool_calls: None,
+                        reasoning_content: None,
                     }
                 })
                 .collect();
@@ -813,6 +840,29 @@ impl OpenAiCompatibleProvider {
             }
         }
 
+        // When the model returned reasoning_content (thinking mode) and there are
+        // no tool calls, encode both fields as JSON so the next conversation turn
+        // can pass reasoning_content back — required by DeepSeek-R1, Qwen3, and
+        // other thinking-mode models that return 400 if it is omitted.
+        if tool_calls.is_empty() {
+            let reasoning = message
+                .reasoning_content
+                .as_deref()
+                .filter(|s| !s.is_empty());
+            if let Some(rc) = reasoning {
+                let json = serde_json::json!({
+                    "content": message.content.as_deref().unwrap_or(""),
+                    "reasoning_content": rc,
+                });
+                text = Some(json.to_string());
+                log::debug!(
+                    "[provider:{}] preserving reasoning_content ({} chars) for multi-turn replay",
+                    provider_name,
+                    rc.len(),
+                );
+            }
+        }
+
         Ok(ProviderChatResponse {
             text,
             tool_calls,

@@ -415,6 +415,7 @@ async fn streaming_chat_config_rejection_propagates_error_without_sentry_report(
             content: Some("hello".to_string()),
             tool_call_id: None,
             tool_calls: None,
+            reasoning_content: None,
         }],
         temperature: Some(0.7),
         stream: Some(true),
@@ -1556,3 +1557,84 @@ fn enrich_404_message_adds_hint_when_no_fallback() {
         "must not add hint when fallback is enabled: {result_with_fallback}"
     );
 }
+
+// ── Issue #2800: reasoning_content multi-turn replay ─────────────────────────
+
+#[test]
+fn parse_native_response_preserves_reasoning_content_as_json() {
+    // When a thinking model returns both content and reasoning_content, the
+    // response text should be JSON so the next turn can pass reasoning_content
+    // back to the API.
+    let msg = ResponseMessage {
+        content: Some("The answer is 42.".to_string()),
+        reasoning_content: Some("Let me think step by step...".to_string()),
+        tool_calls: None,
+        function_call: None,
+    };
+    let resp =
+        OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "deepseek").unwrap();
+
+    let text = resp.text.expect("should have text");
+    let parsed: serde_json::Value = serde_json::from_str(&text).expect("text should be valid JSON");
+    assert_eq!(parsed["content"], "The answer is 42.");
+    assert_eq!(parsed["reasoning_content"], "Let me think step by step...");
+    assert!(resp.tool_calls.is_empty());
+}
+
+#[test]
+fn parse_native_response_no_reasoning_content_returns_plain_text() {
+    // Without reasoning_content the text should be the plain string, not JSON.
+    let msg = ResponseMessage {
+        content: Some("Hello world".to_string()),
+        reasoning_content: None,
+        tool_calls: None,
+        function_call: None,
+    };
+    let resp =
+        OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "openai").unwrap();
+
+    assert_eq!(resp.text.as_deref(), Some("Hello world"));
+}
+
+#[test]
+fn convert_messages_for_native_restores_reasoning_content() {
+    // A stored assistant message with JSON-encoded reasoning_content must be
+    // expanded back into a NativeMessage with the reasoning_content field set,
+    // so the next API call carries it as required by the spec.
+    let stored_content = serde_json::json!({
+        "content": "The answer is 42.",
+        "reasoning_content": "Let me think step by step...",
+    })
+    .to_string();
+
+    let messages = vec![
+        ChatMessage::user("What is the meaning of life?"),
+        ChatMessage::assistant(stored_content),
+    ];
+
+    let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages);
+    assert_eq!(native.len(), 2);
+
+    let asst = &native[1];
+    assert_eq!(asst.role, "assistant");
+    assert_eq!(asst.content.as_deref(), Some("The answer is 42."));
+    assert_eq!(
+        asst.reasoning_content.as_deref(),
+        Some("Let me think step by step...")
+    );
+    assert!(asst.tool_calls.is_none());
+}
+
+#[test]
+fn convert_messages_for_native_plain_text_has_no_reasoning_content() {
+    // A regular (non-thinking-model) assistant message must not gain a
+    // spurious reasoning_content field.
+    let messages = vec![
+        ChatMessage::user("Hi"),
+        ChatMessage::assistant("Hello there!"),
+    ];
+
+    let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages);
+    let asst = &native[1];
+    assert!(asst.reasoning_content.is_none());
+}
@@ -77,6 +77,11 @@ pub(crate) struct NativeMessage {
     pub(crate) tool_call_id: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub(crate) tool_calls: Option<Vec<ToolCall>>,
+    /// Thinking/reasoning content from models that support extended reasoning
+    /// (e.g. DeepSeek-R1, Qwen3 in thinking mode). Required by the API on
+    /// subsequent turns — must be passed back verbatim alongside `content`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) reasoning_content: Option<String>,
 }
 
 #[derive(Debug, Serialize)]