Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 56 additions & 7 deletions ds4_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ static void tool_memory_attach_to_messages(server *s, chat_msgs *msgs,
tool_replay_stats *stats);
static bool tool_memory_has_id(server *s, const char *id);
static void kv_cache_restore_tool_memory_for_messages(server *s, const chat_msgs *msgs);
static void server_log(ds4_log_type type, const char *fmt, ...);

typedef struct {
char **v;
Expand Down Expand Up @@ -4443,16 +4444,42 @@ static bool parse_generated_message_ex(const char *text, bool require_thinking_c
* or an explanation of the protocol. Treating it as a real tool call
* duplicates it into both reasoning and structured tool_calls, and can make
* clients execute something the assistant had not actually emitted as its
* post-thinking action. */
* post-thinking action.
*
* Exception: if </think> is absent but a DSML tool-call block is present,
* the model almost certainly intended a real tool call and simply forgot to
* close the thinking stanza. In that case we treat the DSML start as an
* implicit </think> boundary rather than silently discarding the call.
* The pre-DSML text becomes reasoning; the DSML block is parsed normally. */
if (require_thinking_closed) {
const char *think_end = find_last_substr(text, "</think>");
if (!think_end) {
/* Model did not close thinking, ignore any DSML in reasoning */
fprintf(stderr, "ds4-server: thinking not closed, ignoring DSML in reasoning\n");
split_reasoning_content(text, strlen(text), content_out, reasoning_out);
return true;
const char *dsml_start = find_any_tool_start(text);
if (dsml_start) {
/* Implicit close: treat DSML start as the thinking boundary. */
server_log(DS4_LOG_WARNING,
"ds4-server: thinking not closed before tool call; "
"treating DSML start as implicit </think>");
tool_search = dsml_start;
/* Expose reasoning as everything up to the implicit boundary. */
split_reasoning_content(text, (size_t)(dsml_start - text),
content_out, reasoning_out);
/* content_out holds any non-reasoning prefix; we only need
* reasoning here — clear the content placeholder so the
* tool-call path below can fill it in correctly. */
free(*content_out);
*content_out = NULL;
} else {
/* No DSML at all: entire output is unclosed reasoning. */
server_log(DS4_LOG_WARNING,
"ds4-server: thinking not closed and no tool call found; "
"returning output as reasoning only");
split_reasoning_content(text, strlen(text), content_out, reasoning_out);
return true;
}
} else {
tool_search = think_end + 8;
}
tool_search = think_end + 8;
}

const char *start = strstr(tool_search, "\n\n" DS4_TOOL_CALLS_START);
Expand Down Expand Up @@ -5863,9 +5890,13 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const
}

const char *close = strstr(raw + st->emit_pos, "</think>");
const char *dsml_implicit = (!close && final && r->has_tools)
? find_any_tool_start(raw + st->emit_pos) : NULL;
size_t limit;
if (close) {
limit = (size_t)(close - raw);
} else if (dsml_implicit) {
limit = (size_t)(dsml_implicit - raw);
} else if (final) {
limit = raw_len;
} else {
Expand All @@ -5885,6 +5916,9 @@ static bool openai_sse_stream_update(int fd, server *s, const request *r, const
if (close) {
st->emit_pos = (size_t)(close - raw) + strlen("</think>");
st->mode = OPENAI_STREAM_TEXT;
} else if (dsml_implicit) {
st->emit_pos = (size_t)(dsml_implicit - raw);
st->mode = OPENAI_STREAM_TEXT;
} else if (final) {
st->mode = OPENAI_STREAM_SUPPRESS;
return true;
Expand Down Expand Up @@ -6560,9 +6594,13 @@ static bool responses_sse_stream_update(int fd, const request *r,
}

const char *close = strstr(raw + st->emit_pos, "</think>");
const char *dsml_implicit = (!close && final && r->has_tools)
? find_any_tool_start(raw + st->emit_pos) : NULL;
size_t limit;
if (close) {
limit = (size_t)(close - raw);
} else if (dsml_implicit) {
limit = (size_t)(dsml_implicit - raw);
} else if (final) {
limit = raw_len;
} else {
Expand Down Expand Up @@ -6595,6 +6633,10 @@ static bool responses_sse_stream_update(int fd, const request *r,
st->emit_pos = (size_t)(close - raw) + strlen("</think>");
st->mode = RESP_STREAM_TEXT;
st->reasoning_closed_naturally = true;
} else if (dsml_implicit) {
st->emit_pos = (size_t)(dsml_implicit - raw);
st->mode = RESP_STREAM_TEXT;
st->reasoning_closed_naturally = true;
} else if (final) {
st->mode = RESP_STREAM_SUPPRESS;
return true;
Expand Down Expand Up @@ -7437,9 +7479,13 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con
}

const char *close = strstr(raw + st->emit_pos, "</think>");
const char *dsml_implicit = (!close && final && r->has_tools)
? find_any_tool_start(raw + st->emit_pos) : NULL;
size_t limit;
if (close) {
limit = (size_t)(close - raw);
} else if (dsml_implicit) {
limit = (size_t)(dsml_implicit - raw);
} else if (final) {
limit = raw_len;
} else {
Expand All @@ -7457,11 +7503,14 @@ static bool anthropic_sse_stream_update(int fd, server *s, const request *r, con
st->emit_pos = limit;
}

if (close || final) {
if (close || dsml_implicit || final) {
if (!anthropic_sse_close_block_live(fd, id, st)) return false;
if (close) {
st->emit_pos = (size_t)(close - raw) + strlen("</think>");
st->mode = ANTH_STREAM_TEXT;
} else if (dsml_implicit) {
st->emit_pos = (size_t)(dsml_implicit - raw);
st->mode = ANTH_STREAM_TEXT;
} else {
st->mode = ANTH_STREAM_SUPPRESS;
return true;
Expand Down