diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs index f2789c296a..e235f02611 100644 --- a/src/openhuman/inference/provider/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -583,9 +583,34 @@ impl OpenAiCompatibleProvider { content, tool_call_id: None, tool_calls: Some(tool_calls), + reasoning_content: None, }; } } + + // Thinking-mode assistant messages are stored as JSON + // {"content": "…", "reasoning_content": "…"} so the + // reasoning content survives across turns and can be + // passed back to the API as required by DeepSeek-R1, + // Qwen3 thinking, and similar models. + if let Some(rc) = value + .get("reasoning_content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.is_empty()) + { + let content = value + .get("content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + return NativeMessage { + role: "assistant".to_string(), + content, + tool_call_id: None, + tool_calls: None, + reasoning_content: Some(rc.to_string()), + }; + } } } @@ -608,6 +633,7 @@ impl OpenAiCompatibleProvider { content, tool_call_id, tool_calls: None, + reasoning_content: None, }; } } @@ -617,6 +643,7 @@ impl OpenAiCompatibleProvider { content: Some(message.content.clone()), tool_call_id: None, tool_calls: None, + reasoning_content: None, } }) .collect(); @@ -813,6 +840,29 @@ impl OpenAiCompatibleProvider { } } + // When the model returned reasoning_content (thinking mode) and there are + // no tool calls, encode both fields as JSON so the next conversation turn + // can pass reasoning_content back — required by DeepSeek-R1, Qwen3, and + // other thinking-mode models that return 400 if it is omitted. + if tool_calls.is_empty() { + let reasoning = message + .reasoning_content + .as_deref() + .filter(|s| !s.is_empty()); + if let Some(rc) = reasoning { + let json = serde_json::json!({ + "content": message.content.as_deref().unwrap_or(""), + "reasoning_content": rc, + }); + text = Some(json.to_string()); + log::debug!( + "[provider:{}] preserving reasoning_content ({} chars) for multi-turn replay", + provider_name, + rc.len(), + ); + } + } + Ok(ProviderChatResponse { text, tool_calls, diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index 3e1b7dbff1..99cb68b269 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -415,6 +415,7 @@ async fn streaming_chat_config_rejection_propagates_error_without_sentry_report( content: Some("hello".to_string()), tool_call_id: None, tool_calls: None, + reasoning_content: None, }], temperature: Some(0.7), stream: Some(true), @@ -1556,3 +1557,84 @@ fn enrich_404_message_adds_hint_when_no_fallback() { "must not add hint when fallback is enabled: {result_with_fallback}" ); } + +// ── Issue #2800: reasoning_content multi-turn replay ───────────────────────── + +#[test] +fn parse_native_response_preserves_reasoning_content_as_json() { + // When a thinking model returns both content and reasoning_content, the + // response text should be JSON so the next turn can pass reasoning_content + // back to the API. + let msg = ResponseMessage { + content: Some("The answer is 42.".to_string()), + reasoning_content: Some("Let me think step by step...".to_string()), + tool_calls: None, + function_call: None, + }; + let resp = + OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "deepseek").unwrap(); + + let text = resp.text.expect("should have text"); + let parsed: serde_json::Value = serde_json::from_str(&text).expect("text should be valid JSON"); + assert_eq!(parsed["content"], "The answer is 42."); + assert_eq!(parsed["reasoning_content"], "Let me think step by step..."); + assert!(resp.tool_calls.is_empty()); +} + +#[test] +fn parse_native_response_no_reasoning_content_returns_plain_text() { + // Without reasoning_content the text should be the plain string, not JSON. + let msg = ResponseMessage { + content: Some("Hello world".to_string()), + reasoning_content: None, + tool_calls: None, + function_call: None, + }; + let resp = + OpenAiCompatibleProvider::parse_native_response(wrap_message(msg), "openai").unwrap(); + + assert_eq!(resp.text.as_deref(), Some("Hello world")); +} + +#[test] +fn convert_messages_for_native_restores_reasoning_content() { + // A stored assistant message with JSON-encoded reasoning_content must be + // expanded back into a NativeMessage with the reasoning_content field set, + // so the next API call carries it as required by the spec. + let stored_content = serde_json::json!({ + "content": "The answer is 42.", + "reasoning_content": "Let me think step by step...", + }) + .to_string(); + + let messages = vec![ + ChatMessage::user("What is the meaning of life?"), + ChatMessage::assistant(stored_content), + ]; + + let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages); + assert_eq!(native.len(), 2); + + let asst = &native[1]; + assert_eq!(asst.role, "assistant"); + assert_eq!(asst.content.as_deref(), Some("The answer is 42.")); + assert_eq!( + asst.reasoning_content.as_deref(), + Some("Let me think step by step...") + ); + assert!(asst.tool_calls.is_none()); +} + +#[test] +fn convert_messages_for_native_plain_text_has_no_reasoning_content() { + // A regular (non-thinking-model) assistant message must not gain a + // spurious reasoning_content field. + let messages = vec![ + ChatMessage::user("Hi"), + ChatMessage::assistant("Hello there!"), + ]; + + let native = OpenAiCompatibleProvider::convert_messages_for_native(&messages); + let asst = &native[1]; + assert!(asst.reasoning_content.is_none()); +} diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs index 25ceff338f..d7429f4dbd 100644 --- a/src/openhuman/inference/provider/compatible_types.rs +++ b/src/openhuman/inference/provider/compatible_types.rs @@ -77,6 +77,11 @@ pub(crate) struct NativeMessage { pub(crate) tool_call_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) tool_calls: Option>, + /// Thinking/reasoning content from models that support extended reasoning + /// (e.g. DeepSeek-R1, Qwen3 in thinking mode). Required by the API on + /// subsequent turns — must be passed back verbatim alongside `content`. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) reasoning_content: Option, } #[derive(Debug, Serialize)]