From 500c7df1ab4e18da551f5185fcd19af6d11eff66 Mon Sep 17 00:00:00 2001 From: Craig Tollifson Date: Sun, 31 May 2026 21:39:32 -0700 Subject: [PATCH 1/2] server: treat DSML start as implicit when thinking is unclosed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the model emits a DSML tool-call block without closing first, the previous behavior silently discarded the tool call and returned finish=stop, leaving agent clients with no actionable output and a stalled session. The model is almost certainly intending a real tool call in this case — it just dropped the closer, which happens more frequently at longer context depths. Treat the DSML start position as an implicit thinking boundary: the pre-DSML text becomes reasoning content and the tool call is parsed normally. When no DSML is present either (pure unclosed thinking), preserve the existing behavior of returning the output as reasoning only. Both cases now log a DS4_LOG_WARNING so the condition is visible in server output and --trace sessions. Refs https://github.com/antirez/ds4/issues/167 --- ds4_server.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/ds4_server.c b/ds4_server.c index f5c96e885..de367ea78 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -566,6 +566,7 @@ static void tool_memory_attach_to_messages(server *s, chat_msgs *msgs, tool_replay_stats *stats); static bool tool_memory_has_id(server *s, const char *id); static void kv_cache_restore_tool_memory_for_messages(server *s, const chat_msgs *msgs); +static void server_log(ds4_log_type type, const char *fmt, ...); typedef struct { char **v; @@ -4443,16 +4444,42 @@ static bool parse_generated_message_ex(const char *text, bool require_thinking_c * or an explanation of the protocol. Treating it as a real tool call * duplicates it into both reasoning and structured tool_calls, and can make * clients execute something the assistant had not actually emitted as its - * post-thinking action. */ + * post-thinking action. + * + * Exception: if is absent but a DSML tool-call block is present, + * the model almost certainly intended a real tool call and simply forgot to + * close the thinking stanza. In that case we treat the DSML start as an + * implicit boundary rather than silently discarding the call. + * The pre-DSML text becomes reasoning; the DSML block is parsed normally. */ if (require_thinking_closed) { const char *think_end = find_last_substr(text, ""); if (!think_end) { - /* Model did not close thinking, ignore any DSML in reasoning */ - fprintf(stderr, "ds4-server: thinking not closed, ignoring DSML in reasoning\n"); - split_reasoning_content(text, strlen(text), content_out, reasoning_out); - return true; + const char *dsml_start = find_any_tool_start(text); + if (dsml_start) { + /* Implicit close: treat DSML start as the thinking boundary. */ + server_log(DS4_LOG_WARNING, + "ds4-server: thinking not closed before tool call; " + "treating DSML start as implicit "); + tool_search = dsml_start; + /* Expose reasoning as everything up to the implicit boundary. */ + split_reasoning_content(text, (size_t)(dsml_start - text), + content_out, reasoning_out); + /* content_out holds any non-reasoning prefix; we only need + * reasoning here — clear the content placeholder so the + * tool-call path below can fill it in correctly. */ + free(*content_out); + *content_out = NULL; + } else { + /* No DSML at all: entire output is unclosed reasoning. */ + server_log(DS4_LOG_WARNING, + "ds4-server: thinking not closed and no tool call found; " + "returning output as reasoning only"); + split_reasoning_content(text, strlen(text), content_out, reasoning_out); + return true; + } + } else { + tool_search = think_end + 8; } - tool_search = think_end + 8; } const char *start = strstr(tool_search, "\n\n" DS4_TOOL_CALLS_START); From 389848594adb4f9cb12f28adab7ba0ea7f408a23 Mon Sep 17 00:00:00 2001 From: Craig Tollifson Date: Tue, 2 Jun 2026 19:35:49 -0700 Subject: [PATCH 2/2] server: suppress DSML from streaming output on implicit think close When is absent but a DSML tool block is present, the three live streaming paths (OpenAI, Anthropic, Responses) were emitting the raw DSML markers as reasoning content because thinking.inside remained true for the entire generation. Apply the same implicit-close logic already in parse_generated_message_ex to each streaming state machine: on final update, if no but DSML is present, clip the reasoning limit before the DSML start and transition to TEXT mode instead of SUPPRESS. The TEXT state then handles the tool block normally. For the Responses path, set reasoning_closed_naturally so the reasoning item is marked completed rather than incomplete. --- ds4_server.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/ds4_server.c b/ds4_server.c index de367ea78..5e4af4237 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -5890,9 +5890,13 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -5912,6 +5916,9 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const if (close) { st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = OPENAI_STREAM_TEXT; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = OPENAI_STREAM_TEXT; } else if (final) { st->mode = OPENAI_STREAM_SUPPRESS; return true; @@ -6587,9 +6594,13 @@ static bool responses_sse_stream_update(int fd, const request *r, } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -6622,6 +6633,10 @@ static bool responses_sse_stream_update(int fd, const request *r, st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = RESP_STREAM_TEXT; st->reasoning_closed_naturally = true; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = RESP_STREAM_TEXT; + st->reasoning_closed_naturally = true; } else if (final) { st->mode = RESP_STREAM_SUPPRESS; return true; @@ -7464,9 +7479,13 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -7484,11 +7503,14 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con st->emit_pos = limit; } - if (close || final) { + if (close || dsml_implicit || final) { if (!anthropic_sse_close_block_live(fd, id, st)) return false; if (close) { st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = ANTH_STREAM_TEXT; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = ANTH_STREAM_TEXT; } else { st->mode = ANTH_STREAM_SUPPRESS; return true;