From 65eb9bbee57fa7101ff8ee7565bd26d09279d2ec Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 28 May 2026 10:40:42 +0530 Subject: [PATCH 1/3] fix(inference): replay deepseek reasoning_content on tool-call turns (Sentry TAURI-RUST-4KB) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves Sentry issue 5236 (TAURI-RUST-4KB): https://sentry.tinyhumans.ai/organizations/tinyhumans/issues/5236/ DeepSeek's thinking mode returns `reasoning_content` alongside `tool_calls` and requires that reasoning to be replayed on the follow-up request. Our OpenAI-compatible provider dropped it: `ChatResponse`, the assistant history JSON, and the `NativeMessage` wire type had no carrier for `reasoning_content`, so the next request omitted it and DeepSeek returned: 400 Bad Request: The `reasoning_content` in the thinking mode must be passed back to the API. The agent loop (`run_chat_task`) then failed every multi-turn tool call against deepseek-reasoner (31 events since v0.56.0). Fix: round-trip `reasoning_content` for tool-call assistant turns across all four layers — - `ChatResponse.reasoning_content` (captured in `parse_native_response` and `chat_with_tools`, trimmed; empty -> None) - `build_native_assistant_history` writes it into the assistant history JSON (omitted when empty) - `convert_messages_for_native` lifts it back onto the wire message - `NativeMessage.reasoning_content` serializes only when present Because the field is `skip_serializing_if = Option::is_none` and only populated for reasoning models, non-reasoning providers see zero change on the wire. Tests: provider capture (`parse_native_response_captures_reasoning_content`, blank -> None), wire round-trip (`convert_preserves/omits_reasoning_content`), and history-builder round-trip in `parse_tests`. --- src/openhuman/agent/dispatcher_tests.rs | 7 ++ src/openhuman/agent/harness/bughunt_tests.rs | 6 ++ .../agent/harness/harness_gap_tests.rs | 7 ++ src/openhuman/agent/harness/parse.rs | 23 ++++- src/openhuman/agent/harness/parse_tests.rs | 17 +++- .../agent/harness/session/runtime_tests.rs | 4 + src/openhuman/agent/harness/session/tests.rs | 13 +++ .../agent/harness/session/turn_tests.rs | 11 +++ .../agent/harness/subagent_runner/ops.rs | 7 +- .../harness/subagent_runner/ops_tests.rs | 3 + src/openhuman/agent/harness/test_support.rs | 1 + .../agent/harness/test_support_test.rs | 2 + src/openhuman/agent/harness/tests.rs | 1 + src/openhuman/agent/harness/tool_loop.rs | 6 +- .../agent/harness/tool_loop_tests.rs | 22 +++++ src/openhuman/agent/tests.rs | 12 +++ src/openhuman/context/summarizer_tests.rs | 1 + .../inference/provider/compatible.rs | 40 ++++++++ .../inference/provider/compatible_tests.rs | 91 +++++++++++++++++++ .../inference/provider/compatible_types.rs | 8 ++ src/openhuman/inference/provider/traits.rs | 16 +++- .../inference/provider/traits_tests.rs | 2 + .../impl/agent/spawn_parallel_agents_test.rs | 3 + .../tools/impl/agent/spawn_worker_thread.rs | 1 + tests/agent_builder_public.rs | 1 + tests/agent_harness_public.rs | 1 + ...io_list_tools_stack_overflow_regression.rs | 2 + 27 files changed, 299 insertions(+), 9 deletions(-) diff --git a/src/openhuman/agent/dispatcher_tests.rs b/src/openhuman/agent/dispatcher_tests.rs index 34ada29d16..d5741520b4 100644 --- a/src/openhuman/agent/dispatcher_tests.rs +++ b/src/openhuman/agent/dispatcher_tests.rs @@ -10,6 +10,7 @@ fn xml_dispatcher_parses_tool_calls() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = XmlToolDispatcher; let (_, calls) = dispatcher.parse_response(&response); @@ -27,6 +28,7 @@ fn native_dispatcher_roundtrip() { arguments: "{\"path\":\"a.txt\"}".into(), }], usage: None, + reasoning_content: None, }; let dispatcher = NativeToolDispatcher; let (_, calls) = dispatcher.parse_response(&response); @@ -57,6 +59,7 @@ fn native_dispatcher_falls_back_to_xml_tool_calls() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = NativeToolDispatcher; let (text, calls) = dispatcher.parse_response(&response); @@ -74,6 +77,7 @@ fn native_dispatcher_falls_back_to_invoke_tag() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = NativeToolDispatcher; let (text, calls) = dispatcher.parse_response(&response); @@ -128,6 +132,7 @@ fn pformat_dispatcher_parses_tool_call_tag() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let (text, calls) = dispatcher.parse_response(&response); assert_eq!(text, "Let me check the weather."); @@ -157,6 +162,7 @@ fn pformat_dispatcher_falls_back_to_json_in_tag() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let (text, calls) = dispatcher.parse_response(&response); assert_eq!(text, "Running it now."); @@ -179,6 +185,7 @@ fn pformat_dispatcher_handles_multiple_tags() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let (_text, calls) = dispatcher.parse_response(&response); assert_eq!(calls.len(), 2); diff --git a/src/openhuman/agent/harness/bughunt_tests.rs b/src/openhuman/agent/harness/bughunt_tests.rs index dae9204d5d..9b4afc6ab3 100644 --- a/src/openhuman/agent/harness/bughunt_tests.rs +++ b/src/openhuman/agent/harness/bughunt_tests.rs @@ -78,6 +78,7 @@ async fn native_tool_call_decodes_json_encoded_arguments_string() { arguments: "{\"city\":\"Berlin\",\"n\":3}".to_string(), }], usage: None, + reasoning_content: None, }); let (tool, captured) = ArgsCapturingTool::new("captured", "captured-ok"); @@ -139,6 +140,7 @@ async fn documents_silent_drop_of_non_json_arguments_string() { arguments: "world".to_string(), }], usage: None, + reasoning_content: None, }); let (tool, captured) = ArgsCapturingTool::new("captured", "captured-ok"); @@ -194,6 +196,7 @@ async fn parallel_tool_calls_in_single_iteration_all_execute() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }); let (a, a_calls) = ArgsCapturingTool::new("tool_a", "tool_a-ok"); @@ -289,6 +292,7 @@ async fn markdown_fenced_tool_call_block_is_parsed() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }); let (a, a_calls) = ArgsCapturingTool::new("tool_a", "tool_a-ok"); @@ -342,6 +346,7 @@ async fn native_tool_calls_take_precedence_over_xml_in_text() { arguments: "{\"src\":\"native\"}".into(), }], usage: None, + reasoning_content: None, }); let (a, a_calls) = ArgsCapturingTool::new("tool_a", "tool_a-ok"); @@ -453,6 +458,7 @@ async fn empty_response_with_no_tool_calls_terminates_with_empty_text() { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, }); let tools: Vec> = vec![]; diff --git a/src/openhuman/agent/harness/harness_gap_tests.rs b/src/openhuman/agent/harness/harness_gap_tests.rs index c6277b30f8..bac0c74e46 100644 --- a/src/openhuman/agent/harness/harness_gap_tests.rs +++ b/src/openhuman/agent/harness/harness_gap_tests.rs @@ -123,11 +123,13 @@ async fn full_turn_cycle_user_llm_tool_result_final() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("The tool said: echo-out".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), }; @@ -188,6 +190,7 @@ async fn max_iterations_exceeded_downcasts_to_typed_agent_error() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), }; let mut history = vec![ChatMessage::user("loop me")]; @@ -254,11 +257,13 @@ async fn visible_tool_names_rejects_tool_outside_whitelist() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("corrected response".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), }; @@ -313,11 +318,13 @@ async fn visible_tool_names_allows_tool_inside_whitelist() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("heard echo-out".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), }; diff --git a/src/openhuman/agent/harness/parse.rs b/src/openhuman/agent/harness/parse.rs index 9daf8b39cc..b1265c2670 100644 --- a/src/openhuman/agent/harness/parse.rs +++ b/src/openhuman/agent/harness/parse.rs @@ -597,7 +597,17 @@ pub(crate) fn parse_structured_tool_calls(tool_calls: &[ToolCall]) -> Vec String { +/// +/// `reasoning_content` carries the model's thinking output (when the provider +/// surfaced it). It is persisted so the next request can replay it: DeepSeek's +/// thinking mode rejects an `assistant` turn that carries `tool_calls` if its +/// `reasoning_content` is not passed back (Sentry TAURI-RUST-4KB). Omitted from +/// the JSON when empty, so non-reasoning models are unaffected. +pub(crate) fn build_native_assistant_history( + text: &str, + reasoning_content: Option<&str>, + tool_calls: &[ToolCall], +) -> String { let calls_json: Vec = tool_calls .iter() .map(|tc| { @@ -615,11 +625,16 @@ pub(crate) fn build_native_assistant_history(text: &str, tool_calls: &[ToolCall] serde_json::Value::String(text.trim().to_string()) }; - serde_json::json!({ + let mut entry = serde_json::json!({ "content": content, "tool_calls": calls_json, - }) - .to_string() + }); + + if let Some(reasoning) = reasoning_content.map(str::trim).filter(|r| !r.is_empty()) { + entry["reasoning_content"] = serde_json::Value::String(reasoning.to_string()); + } + + entry.to_string() } pub(crate) fn build_assistant_history_with_tool_calls( diff --git a/src/openhuman/agent/harness/parse_tests.rs b/src/openhuman/agent/harness/parse_tests.rs index c694b4fd69..fca17e808b 100644 --- a/src/openhuman/agent/harness/parse_tests.rs +++ b/src/openhuman/agent/harness/parse_tests.rs @@ -267,10 +267,25 @@ fn structured_tool_call_and_history_helpers_round_trip_expected_shapes() { assert_eq!(parsed.len(), 1); assert_eq!(parsed[0].arguments, serde_json::json!({ "value": "hello" })); - let native = build_native_assistant_history("done", &tool_calls); + let native = build_native_assistant_history("done", None, &tool_calls); let native_json: serde_json::Value = serde_json::from_str(&native).expect("valid json"); assert_eq!(native_json["content"], "done"); assert_eq!(native_json["tool_calls"][0]["id"], "call-1"); + // No reasoning supplied -> field omitted entirely (non-reasoning models + // must not gain a spurious `reasoning_content` key). + assert!(native_json.get("reasoning_content").is_none()); + + // DeepSeek thinking mode: reasoning must round-trip onto the tool-call + // turn (Sentry TAURI-RUST-4KB). + let native_reasoning = + build_native_assistant_history("done", Some(" step-by-step thoughts "), &tool_calls); + let reasoning_json: serde_json::Value = + serde_json::from_str(&native_reasoning).expect("valid json"); + assert_eq!(reasoning_json["reasoning_content"], "step-by-step thoughts"); + // Whitespace-only reasoning is treated as absent. + let native_blank = build_native_assistant_history("done", Some(" "), &tool_calls); + let blank_json: serde_json::Value = serde_json::from_str(&native_blank).expect("valid json"); + assert!(blank_json.get("reasoning_content").is_none()); let xml_history = build_assistant_history_with_tool_calls("", &tool_calls); assert!(xml_history.contains("")); diff --git a/src/openhuman/agent/harness/session/runtime_tests.rs b/src/openhuman/agent/harness/session/runtime_tests.rs index d114d39068..3ebd503785 100644 --- a/src/openhuman/agent/harness/session/runtime_tests.rs +++ b/src/openhuman/agent/harness/session/runtime_tests.rs @@ -38,6 +38,7 @@ impl Provider for StaticProvider { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }) }) } @@ -126,6 +127,7 @@ fn sanitizers_and_tool_call_helpers_cover_fallback_paths() { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, }; let persisted = Agent::persisted_tool_calls_for_history(&response, &calls, 2); assert_eq!(persisted[0].id, "parsed-3-1"); @@ -212,6 +214,7 @@ async fn run_single_publishes_completed_and_error_events() { text: Some("ok".into()), tool_calls: vec![], usage: Some(UsageInfo::default()), + reasoning_content: None, }))), }); let mut ok_agent = make_agent(ok_provider); @@ -312,6 +315,7 @@ fn helper_paths_cover_no_overlap_native_calls_and_truncation() { text: Some(String::new()), tool_calls: native_calls.clone(), usage: None, + reasoning_content: None, }; let persisted = Agent::persisted_tool_calls_for_history(&response, &[], 0); assert_eq!(persisted.len(), 1); diff --git a/src/openhuman/agent/harness/session/tests.rs b/src/openhuman/agent/harness/session/tests.rs index 26908925b3..e570f45923 100644 --- a/src/openhuman/agent/harness/session/tests.rs +++ b/src/openhuman/agent/harness/session/tests.rs @@ -43,6 +43,7 @@ impl Provider for MockProvider { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }); } Ok(guard.remove(0)) @@ -99,6 +100,7 @@ impl Provider for RecordingProvider { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }); } Ok(guard.remove(0)) @@ -264,6 +266,7 @@ async fn turn_without_tools_returns_text() { text: Some("hello".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }]), }); @@ -303,11 +306,13 @@ async fn turn_with_native_dispatcher_handles_tool_results_variant() { arguments: "{}".into(), }], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, ]), }); @@ -351,11 +356,13 @@ async fn turn_with_native_dispatcher_persists_fallback_tool_calls() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, ]), }); @@ -442,16 +449,19 @@ async fn turn_dispatches_spawn_subagent_through_full_path() { .to_string(), }], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("X is Y".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("Based on the research, X is Y.".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, ]), }); @@ -530,16 +540,19 @@ async fn system_prompt_and_model_are_byte_stable_across_turns() { text: Some("first".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("second".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, crate::openhuman::inference::provider::ChatResponse { text: Some("third".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }, ]), captures: Mutex::new(Vec::new()), diff --git a/src/openhuman/agent/harness/session/turn_tests.rs b/src/openhuman/agent/harness/session/turn_tests.rs index 1bb405e630..82821b85d2 100644 --- a/src/openhuman/agent/harness/session/turn_tests.rs +++ b/src/openhuman/agent/harness/session/turn_tests.rs @@ -43,6 +43,7 @@ impl Provider for DummyProvider { text: Some("unused".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }) } } @@ -756,11 +757,13 @@ async fn turn_runs_full_tool_cycle_with_context_and_hooks() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("final answer".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), requests: AsyncMutex::new(Vec::new()), @@ -843,6 +846,7 @@ async fn turn_uses_cached_transcript_prefix_on_first_iteration() { text: Some("cached-final".into()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), requests: AsyncMutex::new(Vec::new()), }); @@ -889,6 +893,7 @@ async fn turn_emits_checkpoint_when_max_tool_iterations_are_exceeded() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some( @@ -896,6 +901,7 @@ async fn turn_emits_checkpoint_when_max_tool_iterations_are_exceeded() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), requests: AsyncMutex::new(Vec::new()), @@ -952,6 +958,7 @@ async fn turn_errors_on_empty_provider_response() { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), requests: AsyncMutex::new(Vec::new()), }); @@ -988,11 +995,13 @@ async fn turn_checkpoint_falls_back_to_deterministic_summary_when_model_summary_ text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), requests: AsyncMutex::new(Vec::new()), @@ -1037,6 +1046,7 @@ async fn turn_checkpoint_usage_is_folded_into_transcript_accounting() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), // Checkpoint call — reports usage that must be accounted for. Ok(ChatResponse { @@ -1049,6 +1059,7 @@ async fn turn_checkpoint_usage_is_folded_into_transcript_accounting() { charged_amount_usd: 0.05, ..UsageInfo::default() }), + reasoning_content: None, }), ]), requests: AsyncMutex::new(Vec::new()), diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index 9b4d7aa4d2..9f0aa6bd05 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -1526,8 +1526,11 @@ async fn run_inner_loop( if force_text_mode { history.push(ChatMessage::assistant(response_text.clone())); } else { - let assistant_history_content = - super::super::parse::build_native_assistant_history(&response_text, &native_calls); + let assistant_history_content = super::super::parse::build_native_assistant_history( + &response_text, + resp.reasoning_content.as_deref(), + &native_calls, + ); history.push(ChatMessage::assistant(assistant_history_content)); } diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs index cdef0a455b..41c352680f 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs @@ -231,6 +231,7 @@ impl Provider for ScriptedProvider { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, }); } Ok(q.remove(0)) @@ -246,6 +247,7 @@ fn text_response(text: &str) -> ChatResponse { text: Some(text.into()), tool_calls: vec![], usage: None, + reasoning_content: None, } } @@ -258,6 +260,7 @@ fn tool_response(name: &str, args: &str) -> ChatResponse { arguments: args.into(), }], usage: None, + reasoning_content: None, } } diff --git a/src/openhuman/agent/harness/test_support.rs b/src/openhuman/agent/harness/test_support.rs index e97d453d5b..b4bcaaf504 100644 --- a/src/openhuman/agent/harness/test_support.rs +++ b/src/openhuman/agent/harness/test_support.rs @@ -302,6 +302,7 @@ impl Provider for KeywordScriptedProvider { text: text.clone(), tool_calls: tool_calls.clone(), usage: None, + reasoning_content: None, }; state.turns.push(ProviderTurn { diff --git a/src/openhuman/agent/harness/test_support_test.rs b/src/openhuman/agent/harness/test_support_test.rs index 556f31e46a..28477e507e 100644 --- a/src/openhuman/agent/harness/test_support_test.rs +++ b/src/openhuman/agent/harness/test_support_test.rs @@ -38,6 +38,7 @@ async fn keyword_provider_records_forced_then_fallback_turns() { text: Some("forced reply".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }); let messages = vec![ChatMessage::user("nothing should match here")]; @@ -942,6 +943,7 @@ async fn run_tool_call_loop_returns_max_iterations_error() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }); let (echo_tool, _) = RecordingTool::echo("echo"); diff --git a/src/openhuman/agent/harness/tests.rs b/src/openhuman/agent/harness/tests.rs index f26ae5b2d2..7f8b7202a1 100644 --- a/src/openhuman/agent/harness/tests.rs +++ b/src/openhuman/agent/harness/tests.rs @@ -95,6 +95,7 @@ impl Provider for VisionProvider { text: Some("vision-ok".to_string()), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } } diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index 7f1100295a..26e243bc34 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -583,7 +583,11 @@ pub(crate) async fn run_tool_call_loop( let assistant_history_content = if resp.tool_calls.is_empty() { response_text.clone() } else { - build_native_assistant_history(&response_text, &resp.tool_calls) + build_native_assistant_history( + &response_text, + resp.reasoning_content.as_deref(), + &resp.tool_calls, + ) }; let native_calls = resp.tool_calls; diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs index 9cb1e98521..fd4a63ab83 100644 --- a/src/openhuman/agent/harness/tool_loop_tests.rs +++ b/src/openhuman/agent/harness/tool_loop_tests.rs @@ -190,11 +190,13 @@ async fn run_tool_call_loop_intercepts_oversized_tool_results_via_summarizer() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -288,6 +290,7 @@ async fn run_tool_call_loop_streams_final_text_chunks() { text: Some("word ".repeat(30)), tool_calls: vec![], usage: None, + reasoning_content: None, })]), native_tools: false, vision: false, @@ -335,11 +338,13 @@ async fn run_tool_call_loop_blocks_cli_rpc_only_tools_in_prompt_mode() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -391,11 +396,13 @@ async fn run_tool_call_loop_persists_native_tool_results_as_tool_messages() { arguments: "{}".into(), }], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: true, @@ -442,11 +449,13 @@ async fn run_tool_call_loop_reports_unknown_tool_and_uses_default_max_iterations text: Some("{\"name\":\"missing\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -497,11 +506,13 @@ async fn run_tool_call_loop_formats_tool_error_paths() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -577,6 +588,7 @@ async fn run_tool_call_loop_propagates_provider_errors_and_max_iteration_failure text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), native_tools: false, vision: false, @@ -643,11 +655,13 @@ async fn run_tool_call_loop_aborts_when_stop_hook_returns_stop() { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("{\"name\":\"echo\",\"arguments\":{}}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -706,6 +720,7 @@ async fn run_tool_call_loop_runs_unchanged_when_no_stop_hooks_installed() { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), native_tools: false, vision: false, @@ -772,12 +787,14 @@ async fn run_tool_call_loop_applies_per_tool_max_result_size_cap() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }), // Round 2: stop. Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, @@ -842,6 +859,7 @@ async fn run_tool_call_loop_halts_on_repeated_identical_failure() { ), tool_calls: vec![], usage: None, + reasoning_content: None, })); } let provider = ScriptedProvider { @@ -905,6 +923,7 @@ async fn run_tool_call_loop_halts_when_no_progress() { )), tool_calls: vec![], usage: None, + reasoning_content: None, })); } let provider = ScriptedProvider { @@ -1131,6 +1150,7 @@ async fn run_tool_call_loop_dedups_duplicate_tool_names_before_provider_call() { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, })]), // Native tool-calling on: only when the provider supports native // tools does `run_tool_call_loop` populate `ChatRequest.tools`. @@ -1257,11 +1277,13 @@ async fn auto_approved_external_effect_tool_runs_through_loop_without_parking() )), tool_calls: vec![], usage: None, + reasoning_content: None, }), Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }), ]), native_tools: false, diff --git a/src/openhuman/agent/tests.rs b/src/openhuman/agent/tests.rs index c83570fb35..d4febcc31f 100644 --- a/src/openhuman/agent/tests.rs +++ b/src/openhuman/agent/tests.rs @@ -94,6 +94,7 @@ impl Provider for ScriptedProvider { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }); } Ok(guard.remove(0)) @@ -324,6 +325,7 @@ fn tool_response(calls: Vec) -> ChatResponse { text: Some(String::new()), tool_calls: calls, usage: None, + reasoning_content: None, } } @@ -333,6 +335,7 @@ fn text_response(text: &str) -> ChatResponse { text: Some(text.into()), tool_calls: vec![], usage: None, + reasoning_content: None, } } @@ -344,6 +347,7 @@ fn xml_tool_response(name: &str, args: &str) -> ChatResponse { )), tool_calls: vec![], usage: None, + reasoning_content: None, } } @@ -741,6 +745,7 @@ async fn turn_errors_on_empty_text_response() { text: Some(String::new()), tool_calls: vec![], usage: None, + reasoning_content: None, }])); let (mut agent, _tmp) = build_agent_with(provider, vec![], Box::new(NativeToolDispatcher)); @@ -761,6 +766,7 @@ async fn turn_errors_on_none_text_response() { text: None, tool_calls: vec![], usage: None, + reasoning_content: None, }])); let (mut agent, _tmp) = build_agent_with(provider, vec![], Box::new(NativeToolDispatcher)); @@ -790,6 +796,7 @@ async fn turn_preserves_text_alongside_tool_calls() { arguments: r#"{"message": "hi"}"#.into(), }], usage: None, + reasoning_content: None, }, text_response("Here are the results"), ])); @@ -871,6 +878,7 @@ async fn e2e_native_loop_executes_text_fallback_tool_calls_and_persists_history( ), tool_calls: vec![], usage: None, + reasoning_content: None, }, text_response("Completed via tool"), ])); @@ -1081,6 +1089,7 @@ async fn native_dispatcher_handles_stringified_arguments() { arguments: r#"{"message": "hello"}"#.into(), }], usage: None, + reasoning_content: None, }; let (_, calls) = dispatcher.parse_response(&response); @@ -1107,6 +1116,7 @@ fn xml_dispatcher_handles_nested_json() { ), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = XmlToolDispatcher; @@ -1125,6 +1135,7 @@ fn xml_dispatcher_handles_empty_tool_call_tag() { text: Some("\n\nSome text".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = XmlToolDispatcher; @@ -1139,6 +1150,7 @@ fn xml_dispatcher_handles_unclosed_tool_call() { text: Some("Before\n\n{\"name\": \"shell\"}".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }; let dispatcher = XmlToolDispatcher; diff --git a/src/openhuman/context/summarizer_tests.rs b/src/openhuman/context/summarizer_tests.rs index bc81a343c6..413ad9b4a1 100644 --- a/src/openhuman/context/summarizer_tests.rs +++ b/src/openhuman/context/summarizer_tests.rs @@ -84,6 +84,7 @@ impl Provider for StubProvider { text: Some(self.reply.clone()), tool_calls: vec![], usage: None, + reasoning_content: None, }) } } diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs index f2789c296a..6f825b7219 100644 --- a/src/openhuman/inference/provider/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -578,11 +578,24 @@ impl OpenAiCompatibleProvider { .and_then(serde_json::Value::as_str) .map(ToString::to_string); + // Replay the assistant's reasoning so + // DeepSeek thinking mode accepts the + // tool-call turn on the follow-up request + // (Sentry TAURI-RUST-4KB). Written by + // `build_native_assistant_history`; absent + // for non-reasoning models. + let reasoning_content = value + .get("reasoning_content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.trim().is_empty()) + .map(ToString::to_string); + return NativeMessage { role: "assistant".to_string(), content, tool_call_id: None, tool_calls: Some(tool_calls), + reasoning_content, }; } } @@ -608,6 +621,7 @@ impl OpenAiCompatibleProvider { content, tool_call_id, tool_calls: None, + reasoning_content: None, }; } } @@ -617,6 +631,7 @@ impl OpenAiCompatibleProvider { content: Some(message.content.clone()), tool_call_id: None, tool_calls: None, + reasoning_content: None, } }) .collect(); @@ -769,6 +784,16 @@ impl OpenAiCompatibleProvider { .ok_or_else(|| anyhow::anyhow!("No choices in response from {}", provider_name))?; let mut text = message.effective_content_optional(); + // Preserve the raw reasoning so the agent loop can replay it on the + // follow-up request. DeepSeek's thinking mode rejects an + // `assistant` turn that carries `tool_calls` if its + // `reasoning_content` is not passed back (Sentry TAURI-RUST-4KB). + let reasoning_content = message + .reasoning_content + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); let mut tool_calls = message .tool_calls .unwrap_or_default() @@ -817,6 +842,7 @@ impl OpenAiCompatibleProvider { text, tool_calls, usage, + reasoning_content, }) } @@ -1676,6 +1702,7 @@ impl Provider for OpenAiCompatibleProvider { text: Some(text), tool_calls: vec![], usage: None, + reasoning_content: None, }); } }; @@ -1694,6 +1721,15 @@ impl Provider for OpenAiCompatibleProvider { .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?; let text = choice.message.effective_content_optional(); + // See `parse_native_response`: replay reasoning on the follow-up + // request so DeepSeek thinking mode accepts the tool-call turn. + let reasoning_content = choice + .message + .reasoning_content + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); let tool_calls = choice .message .tool_calls @@ -1715,6 +1751,7 @@ impl Provider for OpenAiCompatibleProvider { text, tool_calls, usage, + reasoning_content, }) } @@ -1849,6 +1886,7 @@ impl Provider for OpenAiCompatibleProvider { text: Some(text), tool_calls: vec![], usage: None, + reasoning_content: None, }) .map_err(|responses_err| { let fb = super::format_anyhow_chain(&responses_err); @@ -1878,6 +1916,7 @@ impl Provider for OpenAiCompatibleProvider { text: Some(text), tool_calls: vec![], usage: None, + reasoning_content: None, }); } @@ -1889,6 +1928,7 @@ impl Provider for OpenAiCompatibleProvider { text: Some(text), tool_calls: vec![], usage: None, + reasoning_content: None, }) .map_err(|responses_err| { let fb = super::format_anyhow_chain(&responses_err); diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index 3e1b7dbff1..65b83ac9b1 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -415,6 +415,7 @@ async fn streaming_chat_config_rejection_propagates_error_without_sentry_report( content: Some("hello".to_string()), tool_call_id: None, tool_calls: None, + reasoning_content: None, }], temperature: Some(0.7), stream: Some(true), @@ -657,6 +658,49 @@ fn parse_native_response_preserves_tool_call_id() { assert_eq!(parsed.tool_calls[0].name, "shell"); } +/// DeepSeek thinking mode emits the chain-of-thought in `reasoning_content` +/// alongside the tool call. `parse_native_response` must surface it so the +/// agent loop can replay it on the follow-up request (Sentry TAURI-RUST-4KB). +#[test] +fn parse_native_response_captures_reasoning_content() { + let message = ResponseMessage { + content: None, + tool_calls: Some(vec![ToolCall { + id: Some("call_r".to_string()), + kind: Some("function".to_string()), + function: Some(Function { + name: Some("shell".to_string()), + arguments: Some(serde_json::Value::String("{}".to_string())), + }), + }]), + function_call: None, + reasoning_content: Some(" weighing the options ".to_string()), + }; + + let parsed = + OpenAiCompatibleProvider::parse_native_response(wrap_message(message), "deepseek").unwrap(); + assert_eq!( + parsed.reasoning_content.as_deref(), + Some("weighing the options") + ); +} + +/// Whitespace-only / empty reasoning is normalised to `None` so it never +/// produces a spurious `reasoning_content` key on the wire. +#[test] +fn parse_native_response_blank_reasoning_is_none() { + let message = ResponseMessage { + content: Some("hello".to_string()), + tool_calls: None, + function_call: None, + reasoning_content: Some(" ".to_string()), + }; + + let parsed = + OpenAiCompatibleProvider::parse_native_response(wrap_message(message), "deepseek").unwrap(); + assert!(parsed.reasoning_content.is_none()); +} + #[test] fn convert_messages_for_native_maps_tool_result_payload() { // A `tool` result must be opened by a preceding `assistant(tool_calls)`, @@ -897,6 +941,53 @@ fn tool_invariants_drop_orphan_but_keep_following_cycle() { assert_eq!(converted[1].tool_call_id.as_deref(), Some("call_b")); } +/// DeepSeek thinking mode (Sentry TAURI-RUST-4KB): an `assistant` turn that +/// carries `tool_calls` must replay its `reasoning_content` on the follow-up +/// request, otherwise DeepSeek returns +/// `400 The reasoning_content in the thinking mode must be passed back to the +/// API.` The history JSON written by `build_native_assistant_history` carries +/// `reasoning_content`; `convert_messages_for_native` must lift it back onto +/// the wire message. +#[test] +fn convert_preserves_reasoning_content_on_tool_call_turn() { + let input = vec![ChatMessage::assistant( + r#"{"content":null,"reasoning_content":"let me think about this","tool_calls":[{"id":"call_x","name":"shell","arguments":"{}"}]}"#, + )]; + + let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input); + + assert_eq!(converted.len(), 1); + assert_eq!( + converted[0].reasoning_content.as_deref(), + Some("let me think about this") + ); + + // The wire payload must actually carry the field for DeepSeek to accept it. + let wire = serde_json::to_value(&converted[0]).unwrap(); + assert_eq!(wire["reasoning_content"], "let me think about this"); +} + +/// Assistant tool-call turns from non-reasoning models carry no +/// `reasoning_content`; it must never appear on the wire for them (most +/// OpenAI-compatible providers don't recognise the field). +#[test] +fn convert_omits_reasoning_content_when_absent() { + let input = vec![ChatMessage::assistant( + r#"{"content":"sure","tool_calls":[{"id":"call_y","name":"shell","arguments":"{}"}]}"#, + )]; + + let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input); + + assert_eq!(converted.len(), 1); + assert!(converted[0].reasoning_content.is_none()); + + let wire = serde_json::to_value(&converted[0]).unwrap(); + assert!( + wire.get("reasoning_content").is_none(), + "reasoning_content must be omitted from the wire when absent" + ); +} + #[test] fn chat_message_identity_metadata_is_not_provider_wire_payload() { let message = ChatMessage { diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs index 25ceff338f..d421f28a78 100644 --- a/src/openhuman/inference/provider/compatible_types.rs +++ b/src/openhuman/inference/provider/compatible_types.rs @@ -77,6 +77,14 @@ pub(crate) struct NativeMessage { pub(crate) tool_call_id: Option, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) tool_calls: Option>, + /// Reasoning/thinking output for an `assistant` turn, replayed back to + /// the provider on follow-up requests. DeepSeek's thinking mode rejects + /// a tool-call turn whose `reasoning_content` is not passed back + /// (Sentry TAURI-RUST-4KB). Only emitted for reasoning models that + /// produced it (most providers leave this `None`, so it never appears + /// on the wire for them). + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) reasoning_content: Option, } #[derive(Debug, Serialize)] diff --git a/src/openhuman/inference/provider/traits.rs b/src/openhuman/inference/provider/traits.rs index e453a11a2c..1d28456e15 100644 --- a/src/openhuman/inference/provider/traits.rs +++ b/src/openhuman/inference/provider/traits.rs @@ -81,7 +81,7 @@ pub struct UsageInfo { } /// An LLM response that may contain text, tool calls, or both. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct ChatResponse { /// Text content of the response (may be empty if only tool calls). pub text: Option, @@ -89,6 +89,17 @@ pub struct ChatResponse { pub tool_calls: Vec, /// Token usage info from the provider (if available). pub usage: Option, + /// Reasoning/thinking output emitted alongside this turn (DeepSeek, + /// Qwen3, GLM-4, … return it in a separate `reasoning_content` field). + /// + /// Preserved here so the agent loop can round-trip it back into the + /// assistant history entry for the *next* request. DeepSeek's thinking + /// mode **requires** the `reasoning_content` of an assistant turn that + /// carries `tool_calls` to be replayed verbatim on the follow-up call + /// (otherwise: `400 The reasoning_content in the thinking mode must be + /// passed back to the API.` — Sentry TAURI-RUST-4KB). `None` for + /// providers/models that don't emit reasoning. + pub reasoning_content: Option, } impl ChatResponse { @@ -439,6 +450,7 @@ pub trait Provider: Send + Sync { text: Some(text), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }); } } @@ -457,6 +469,7 @@ pub trait Provider: Send + Sync { text: Some(text), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } @@ -491,6 +504,7 @@ pub trait Provider: Send + Sync { text: Some(text), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } diff --git a/src/openhuman/inference/provider/traits_tests.rs b/src/openhuman/inference/provider/traits_tests.rs index ac62a7b2ea..266be1c9ef 100644 --- a/src/openhuman/inference/provider/traits_tests.rs +++ b/src/openhuman/inference/provider/traits_tests.rs @@ -44,6 +44,7 @@ fn chat_response_helpers() { text: None, tool_calls: vec![], usage: None, + reasoning_content: None, }; assert!(!empty.has_tool_calls()); assert_eq!(empty.text_or_empty(), ""); @@ -56,6 +57,7 @@ fn chat_response_helpers() { arguments: "{}".into(), }], usage: None, + reasoning_content: None, }; assert!(with_tools.has_tool_calls()); assert_eq!(with_tools.text_or_empty(), "Let me check"); diff --git a/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs b/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs index c8767b5096..3e030e9c1d 100644 --- a/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs +++ b/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs @@ -113,6 +113,7 @@ impl Provider for NoopProvider { text: Some("ok".into()), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } } @@ -479,6 +480,7 @@ fn text_response(text: impl Into) -> ChatResponse { text: Some(text.into()), tool_calls: Vec::new(), usage: None, + reasoning_content: None, } } @@ -491,6 +493,7 @@ fn tool_response(name: &str, arguments: serde_json::Value) -> ChatResponse { arguments: arguments.to_string(), }], usage: None, + reasoning_content: None, } } diff --git a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs index 4c6f5e9668..3e691fab26 100644 --- a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs +++ b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs @@ -310,6 +310,7 @@ mod tests { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }) } fn supports_native_tools(&self) -> bool { diff --git a/tests/agent_builder_public.rs b/tests/agent_builder_public.rs index 46433459b5..36536c1468 100644 --- a/tests/agent_builder_public.rs +++ b/tests/agent_builder_public.rs @@ -33,6 +33,7 @@ impl Provider for StubProvider { text: Some("ok".into()), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } } diff --git a/tests/agent_harness_public.rs b/tests/agent_harness_public.rs index 8193d021c2..ce0c88158f 100644 --- a/tests/agent_harness_public.rs +++ b/tests/agent_harness_public.rs @@ -40,6 +40,7 @@ impl Provider for StubProvider { text: Some("ok".into()), tool_calls: Vec::new(), usage: None, + reasoning_content: None, }) } } diff --git a/tests/composio_list_tools_stack_overflow_regression.rs b/tests/composio_list_tools_stack_overflow_regression.rs index ce995519d5..9f60131517 100644 --- a/tests/composio_list_tools_stack_overflow_regression.rs +++ b/tests/composio_list_tools_stack_overflow_regression.rs @@ -220,12 +220,14 @@ impl Provider for StubProvider { arguments: json!({ "toolkits": ["gmail"] }).to_string(), }], usage: None, + reasoning_content: None, }) } else { Ok(ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }) } } From bdf5a2f546d1f112a13e6a7ab735729210f7544a Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 28 May 2026 11:26:58 +0530 Subject: [PATCH 2/3] test(inference): add reasoning_content to calendar_grounding_e2e ChatResponse initializers The new `ChatResponse.reasoning_content` field was added to every `src/` initializer but the `tests/calendar_grounding_e2e.rs` integration test was missed, so the test build failed to compile (error[E0063]: missing field `reasoning_content`). That broke the Rust Core Tests + Quality, Rust Core Coverage, and Linux Rust integration-suite checks on this PR. Set the field to None at both mock-provider initializers; `cargo test --no-run` now compiles all test targets cleanly. --- tests/calendar_grounding_e2e.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/calendar_grounding_e2e.rs b/tests/calendar_grounding_e2e.rs index 20df3900ab..ff9d36ccab 100644 --- a/tests/calendar_grounding_e2e.rs +++ b/tests/calendar_grounding_e2e.rs @@ -55,6 +55,7 @@ impl Provider for MockCalendarProvider { .to_string(), }], usage: None, + reasoning_content: None, }) } else { // End the loop @@ -62,6 +63,7 @@ impl Provider for MockCalendarProvider { text: Some("You have no events this week.".into()), tool_calls: vec![], usage: None, + reasoning_content: None, }) } } From aab7b59d8939ea9c85e796d6d9c93f7f6199e415 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 29 May 2026 00:46:56 +0530 Subject: [PATCH 3/3] fix(inference): rename duplicate test to resolve compile error Both the PR and main added parse_native_response_captures_reasoning_content testing different code paths. Rename the second one (non-streaming API response path) to avoid the duplicate symbol compile error. --- src/openhuman/inference/provider/compatible_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index c23172478b..c1f2c24503 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -1651,9 +1651,9 @@ fn enrich_404_message_adds_hint_when_no_fallback() { // ── reasoning_content round-trip tests (issue #2800 / Sentry TAURI-RUST-4WC) ─ /// `parse_native_response` must capture `reasoning_content` from a non-streaming -/// response and surface it on `ChatResponse`. +/// `ApiChatResponse` and surface it on `ChatResponse`. #[test] -fn parse_native_response_captures_reasoning_content() { +fn parse_native_response_captures_reasoning_content_from_api_response() { let api_resp = ApiChatResponse { choices: vec![Choice { message: ResponseMessage {