diff --git a/ds4_server.c b/ds4_server.c index f5c96e885..5e4af4237 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -566,6 +566,7 @@ static void tool_memory_attach_to_messages(server *s, chat_msgs *msgs, tool_replay_stats *stats); static bool tool_memory_has_id(server *s, const char *id); static void kv_cache_restore_tool_memory_for_messages(server *s, const chat_msgs *msgs); +static void server_log(ds4_log_type type, const char *fmt, ...); typedef struct { char **v; @@ -4443,16 +4444,42 @@ static bool parse_generated_message_ex(const char *text, bool require_thinking_c * or an explanation of the protocol. Treating it as a real tool call * duplicates it into both reasoning and structured tool_calls, and can make * clients execute something the assistant had not actually emitted as its - * post-thinking action. */ + * post-thinking action. + * + * Exception: if is absent but a DSML tool-call block is present, + * the model almost certainly intended a real tool call and simply forgot to + * close the thinking stanza. In that case we treat the DSML start as an + * implicit boundary rather than silently discarding the call. + * The pre-DSML text becomes reasoning; the DSML block is parsed normally. */ if (require_thinking_closed) { const char *think_end = find_last_substr(text, ""); if (!think_end) { - /* Model did not close thinking, ignore any DSML in reasoning */ - fprintf(stderr, "ds4-server: thinking not closed, ignoring DSML in reasoning\n"); - split_reasoning_content(text, strlen(text), content_out, reasoning_out); - return true; + const char *dsml_start = find_any_tool_start(text); + if (dsml_start) { + /* Implicit close: treat DSML start as the thinking boundary. */ + server_log(DS4_LOG_WARNING, + "ds4-server: thinking not closed before tool call; " + "treating DSML start as implicit "); + tool_search = dsml_start; + /* Expose reasoning as everything up to the implicit boundary. */ + split_reasoning_content(text, (size_t)(dsml_start - text), + content_out, reasoning_out); + /* content_out holds any non-reasoning prefix; we only need + * reasoning here — clear the content placeholder so the + * tool-call path below can fill it in correctly. */ + free(*content_out); + *content_out = NULL; + } else { + /* No DSML at all: entire output is unclosed reasoning. */ + server_log(DS4_LOG_WARNING, + "ds4-server: thinking not closed and no tool call found; " + "returning output as reasoning only"); + split_reasoning_content(text, strlen(text), content_out, reasoning_out); + return true; + } + } else { + tool_search = think_end + 8; } - tool_search = think_end + 8; } const char *start = strstr(tool_search, "\n\n" DS4_TOOL_CALLS_START); @@ -5863,9 +5890,13 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -5885,6 +5916,9 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const if (close) { st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = OPENAI_STREAM_TEXT; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = OPENAI_STREAM_TEXT; } else if (final) { st->mode = OPENAI_STREAM_SUPPRESS; return true; @@ -6560,9 +6594,13 @@ static bool responses_sse_stream_update(int fd, const request *r, } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -6595,6 +6633,10 @@ static bool responses_sse_stream_update(int fd, const request *r, st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = RESP_STREAM_TEXT; st->reasoning_closed_naturally = true; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = RESP_STREAM_TEXT; + st->reasoning_closed_naturally = true; } else if (final) { st->mode = RESP_STREAM_SUPPRESS; return true; @@ -7437,9 +7479,13 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con } const char *close = strstr(raw + st->emit_pos, ""); + const char *dsml_implicit = (!close && final && r->has_tools) + ? find_any_tool_start(raw + st->emit_pos) : NULL; size_t limit; if (close) { limit = (size_t)(close - raw); + } else if (dsml_implicit) { + limit = (size_t)(dsml_implicit - raw); } else if (final) { limit = raw_len; } else { @@ -7457,11 +7503,14 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con st->emit_pos = limit; } - if (close || final) { + if (close || dsml_implicit || final) { if (!anthropic_sse_close_block_live(fd, id, st)) return false; if (close) { st->emit_pos = (size_t)(close - raw) + strlen(""); st->mode = ANTH_STREAM_TEXT; + } else if (dsml_implicit) { + st->emit_pos = (size_t)(dsml_implicit - raw); + st->mode = ANTH_STREAM_TEXT; } else { st->mode = ANTH_STREAM_SUPPRESS; return true;