From 6ca70f74a42eeb8a176910d962ad1984e80ea407 Mon Sep 17 00:00:00 2001 From: vp Date: Fri, 6 Mar 2026 18:32:59 +0300 Subject: [PATCH 1/2] fix: normalize reasoning delta spacing --- .../llm/provider/openai/llm_openai.py | 13 ++-- .../openai/openresponses_streaming.py | 30 ++++++--- .../provider/openai/responses_streaming.py | 10 ++- src/fast_agent/utils/reasoning_chunk_join.py | 65 +++++++++++++++++++ .../fast_agent/test_reasoning_chunk_join.py | 25 +++++++ 5 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 src/fast_agent/utils/reasoning_chunk_join.py create mode 100644 tests/unit/fast_agent/test_reasoning_chunk_join.py diff --git a/src/fast_agent/llm/provider/openai/llm_openai.py b/src/fast_agent/llm/provider/openai/llm_openai.py index 3bac0e922..18166bab0 100644 --- a/src/fast_agent/llm/provider/openai/llm_openai.py +++ b/src/fast_agent/llm/provider/openai/llm_openai.py @@ -52,6 +52,7 @@ from fast_agent.llm.usage_tracking import TurnUsage from fast_agent.mcp.helpers.content_helpers import get_text from fast_agent.types import LlmStopReason, PromptMessageExtended +from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta _logger = get_logger(__name__) @@ -235,17 +236,21 @@ def _handle_reasoning_delta( if not reasoning_text: return reasoning_active + normalized_text = normalize_reasoning_delta("".join(reasoning_segments), reasoning_text) + if not normalized_text: + return reasoning_active + if reasoning_mode == "tags": if not reasoning_active: reasoning_active = True - self._notify_stream_listeners(StreamChunk(text=reasoning_text, is_reasoning=True)) - reasoning_segments.append(reasoning_text) + self._notify_stream_listeners(StreamChunk(text=normalized_text, is_reasoning=True)) + reasoning_segments.append(normalized_text) return reasoning_active if reasoning_mode in {"stream", "reasoning_content", "gpt_oss"}: # Emit reasoning as-is - self._notify_stream_listeners(StreamChunk(text=reasoning_text, is_reasoning=True)) - reasoning_segments.append(reasoning_text) + self._notify_stream_listeners(StreamChunk(text=normalized_text, is_reasoning=True)) + reasoning_segments.append(normalized_text) return reasoning_active return reasoning_active diff --git a/src/fast_agent/llm/provider/openai/openresponses_streaming.py b/src/fast_agent/llm/provider/openai/openresponses_streaming.py index 18a38c50f..34a70a73b 100644 --- a/src/fast_agent/llm/provider/openai/openresponses_streaming.py +++ b/src/fast_agent/llm/provider/openai/openresponses_streaming.py @@ -11,6 +11,7 @@ from fast_agent.llm.provider.openai.streaming_utils import finalize_stream_response from fast_agent.llm.provider.openai.tool_notifications import OpenAIToolNotificationMixin from fast_agent.llm.stream_types import StreamChunk +from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta if TYPE_CHECKING: from openai.types.responses import ( @@ -201,11 +202,16 @@ async def _process_stream( part_type = getattr(part, "type", None) part_text = getattr(part, "text", None) if part_type in {"reasoning", "reasoning_text"} and part_text: - reasoning_segments.append(part_text) + normalized_delta = normalize_reasoning_delta( + "".join(reasoning_segments), part_text + ) + if not normalized_delta: + continue + reasoning_segments.append(normalized_delta) self._notify_stream_listeners( - StreamChunk(text=part_text, is_reasoning=True) + StreamChunk(text=normalized_delta, is_reasoning=True) ) - reasoning_chars += len(part_text) + reasoning_chars += len(normalized_delta) await self._emit_streaming_progress( model=f"{model} (reasoning)", new_total=reasoning_chars, @@ -218,11 +224,14 @@ async def _process_stream( "response.reasoning_summary.delta", }: if delta: - reasoning_segments.append(delta) + normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + if not normalized_delta: + continue + reasoning_segments.append(normalized_delta) self._notify_stream_listeners( - StreamChunk(text=delta, is_reasoning=True) + StreamChunk(text=normalized_delta, is_reasoning=True) ) - reasoning_chars += len(delta) + reasoning_chars += len(normalized_delta) await self._emit_streaming_progress( model=f"{model} (summary)", new_total=reasoning_chars, @@ -235,11 +244,14 @@ async def _process_stream( "response.reasoning_text.delta", }: if delta: - reasoning_segments.append(delta) + normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + if not normalized_delta: + continue + reasoning_segments.append(normalized_delta) self._notify_stream_listeners( - StreamChunk(text=delta, is_reasoning=True) + StreamChunk(text=normalized_delta, is_reasoning=True) ) - reasoning_chars += len(delta) + reasoning_chars += len(normalized_delta) await self._emit_streaming_progress( model=f"{model} (reasoning)", new_total=reasoning_chars, diff --git a/src/fast_agent/llm/provider/openai/responses_streaming.py b/src/fast_agent/llm/provider/openai/responses_streaming.py index 79caf358a..52e68b690 100644 --- a/src/fast_agent/llm/provider/openai/responses_streaming.py +++ b/src/fast_agent/llm/provider/openai/responses_streaming.py @@ -15,6 +15,7 @@ from fast_agent.llm.provider.openai.streaming_utils import finalize_stream_response from fast_agent.llm.provider.openai.tool_notifications import OpenAIToolNotificationMixin from fast_agent.llm.stream_types import StreamChunk +from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta _logger = get_logger(__name__) @@ -156,11 +157,14 @@ def _close_tool(index: int, tool_use_id: str | None) -> None: }: delta = getattr(event, "delta", None) if delta: - reasoning_segments.append(delta) + normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + if not normalized_delta: + continue + reasoning_segments.append(normalized_delta) self._notify_stream_listeners( - StreamChunk(text=delta, is_reasoning=True) + StreamChunk(text=normalized_delta, is_reasoning=True) ) - reasoning_chars += len(delta) + reasoning_chars += len(normalized_delta) await self._emit_streaming_progress( model=f"{model} (summary)", new_total=reasoning_chars, diff --git a/src/fast_agent/utils/reasoning_chunk_join.py b/src/fast_agent/utils/reasoning_chunk_join.py new file mode 100644 index 000000000..81ebde9f7 --- /dev/null +++ b/src/fast_agent/utils/reasoning_chunk_join.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Sequence + + +_SENTENCE_PUNCTUATION = ".!?;:" +_MARKDOWN_OR_QUOTE_PREFIXES = "\"'`*_[" +_CLOSING_DELIMITERS = ")]}\"'" + + +def _needs_reasoning_separator(existing: str, incoming: str) -> bool: + if not existing or not incoming: + return False + + prev = existing[-1] + nxt = incoming[0] + + if prev.isspace() or nxt.isspace(): + return False + + if prev.islower() and nxt.isupper(): + return True + + if prev.isdigit() and nxt.isupper(): + return True + + if prev in _SENTENCE_PUNCTUATION and (nxt.isupper() or nxt in _MARKDOWN_OR_QUOTE_PREFIXES): + return True + + if prev in _CLOSING_DELIMITERS and nxt.isupper(): + return True + + if nxt in _MARKDOWN_OR_QUOTE_PREFIXES and (prev.isalnum() or prev in _SENTENCE_PUNCTUATION): + return True + + return False + + +def append_reasoning_chunk(existing: str, incoming: str) -> str: + if not existing: + return incoming + if not incoming: + return existing + if _needs_reasoning_separator(existing, incoming): + return f"{existing} {incoming}" + return existing + incoming + + +def join_reasoning_chunks(chunks: Sequence[str]) -> str: + combined = "" + for chunk in chunks: + if not chunk: + continue + combined = append_reasoning_chunk(combined, chunk) + return combined + + +def normalize_reasoning_delta(existing: str, incoming: str) -> str: + if not incoming: + return "" + combined = append_reasoning_chunk(existing, incoming) + return combined[len(existing) :] diff --git a/tests/unit/fast_agent/test_reasoning_chunk_join.py b/tests/unit/fast_agent/test_reasoning_chunk_join.py new file mode 100644 index 000000000..5b10c1110 --- /dev/null +++ b/tests/unit/fast_agent/test_reasoning_chunk_join.py @@ -0,0 +1,25 @@ +from fast_agent.utils.reasoning_chunk_join import normalize_reasoning_delta + + +def test_normalize_reasoning_delta_inserts_space_after_sentence_break() -> None: + emitted = "" + parts = [ + "approach.", + "Specifying session retrieval format", + "Selecting session retrieval method", + ] + + for part in parts: + delta = normalize_reasoning_delta(emitted, part) + emitted += delta + + assert emitted == "approach. Specifying session retrieval format Selecting session retrieval method" + + +def test_normalize_reasoning_delta_preserves_tool_style_names() -> None: + emitted = "" + for part in ["voice.", "fetch", "(id=session_id, mode='transcript')"]: + delta = normalize_reasoning_delta(emitted, part) + emitted += delta + + assert emitted == "voice.fetch(id=session_id, mode='transcript')" From b96c70d7ea5e27689cfab6f8e2c32c3ab2250f75 Mon Sep 17 00:00:00 2001 From: vp Date: Fri, 6 Mar 2026 19:20:37 +0300 Subject: [PATCH 2/2] fix: keep reasoning delta normalization minimal --- .../llm/provider/openai/llm_openai.py | 3 +- .../openai/openresponses_streaming.py | 21 ++++-- .../provider/openai/responses_streaming.py | 7 +- src/fast_agent/utils/reasoning_chunk_join.py | 72 +++++-------------- .../fast_agent/test_reasoning_chunk_join.py | 25 +++++-- 5 files changed, 64 insertions(+), 64 deletions(-) diff --git a/src/fast_agent/llm/provider/openai/llm_openai.py b/src/fast_agent/llm/provider/openai/llm_openai.py index 18166bab0..99d2729a6 100644 --- a/src/fast_agent/llm/provider/openai/llm_openai.py +++ b/src/fast_agent/llm/provider/openai/llm_openai.py @@ -236,7 +236,8 @@ def _handle_reasoning_delta( if not reasoning_text: return reasoning_active - normalized_text = normalize_reasoning_delta("".join(reasoning_segments), reasoning_text) + last_char = reasoning_segments[-1][-1] if reasoning_segments and reasoning_segments[-1] else None + normalized_text = normalize_reasoning_delta(last_char, reasoning_text) if not normalized_text: return reasoning_active diff --git a/src/fast_agent/llm/provider/openai/openresponses_streaming.py b/src/fast_agent/llm/provider/openai/openresponses_streaming.py index 34a70a73b..89bf8ff07 100644 --- a/src/fast_agent/llm/provider/openai/openresponses_streaming.py +++ b/src/fast_agent/llm/provider/openai/openresponses_streaming.py @@ -202,9 +202,12 @@ async def _process_stream( part_type = getattr(part, "type", None) part_text = getattr(part, "text", None) if part_type in {"reasoning", "reasoning_text"} and part_text: - normalized_delta = normalize_reasoning_delta( - "".join(reasoning_segments), part_text + last_char = ( + reasoning_segments[-1][-1] + if reasoning_segments and reasoning_segments[-1] + else None ) + normalized_delta = normalize_reasoning_delta(last_char, part_text) if not normalized_delta: continue reasoning_segments.append(normalized_delta) @@ -224,7 +227,12 @@ async def _process_stream( "response.reasoning_summary.delta", }: if delta: - normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + last_char = ( + reasoning_segments[-1][-1] + if reasoning_segments and reasoning_segments[-1] + else None + ) + normalized_delta = normalize_reasoning_delta(last_char, delta) if not normalized_delta: continue reasoning_segments.append(normalized_delta) @@ -244,7 +252,12 @@ async def _process_stream( "response.reasoning_text.delta", }: if delta: - normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + last_char = ( + reasoning_segments[-1][-1] + if reasoning_segments and reasoning_segments[-1] + else None + ) + normalized_delta = normalize_reasoning_delta(last_char, delta) if not normalized_delta: continue reasoning_segments.append(normalized_delta) diff --git a/src/fast_agent/llm/provider/openai/responses_streaming.py b/src/fast_agent/llm/provider/openai/responses_streaming.py index 52e68b690..60a8fe6be 100644 --- a/src/fast_agent/llm/provider/openai/responses_streaming.py +++ b/src/fast_agent/llm/provider/openai/responses_streaming.py @@ -157,7 +157,12 @@ def _close_tool(index: int, tool_use_id: str | None) -> None: }: delta = getattr(event, "delta", None) if delta: - normalized_delta = normalize_reasoning_delta("".join(reasoning_segments), delta) + last_char = ( + reasoning_segments[-1][-1] + if reasoning_segments and reasoning_segments[-1] + else None + ) + normalized_delta = normalize_reasoning_delta(last_char, delta) if not normalized_delta: continue reasoning_segments.append(normalized_delta) diff --git a/src/fast_agent/utils/reasoning_chunk_join.py b/src/fast_agent/utils/reasoning_chunk_join.py index 81ebde9f7..06cef712a 100644 --- a/src/fast_agent/utils/reasoning_chunk_join.py +++ b/src/fast_agent/utils/reasoning_chunk_join.py @@ -1,65 +1,31 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Sequence - - _SENTENCE_PUNCTUATION = ".!?;:" -_MARKDOWN_OR_QUOTE_PREFIXES = "\"'`*_[" -_CLOSING_DELIMITERS = ")]}\"'" +_MARKDOWN_PREFIXES = "\"`*[" -def _needs_reasoning_separator(existing: str, incoming: str) -> bool: - if not existing or not incoming: +def _looks_like_sentence_chunk(incoming: str) -> bool: + if not incoming: return False - - prev = existing[-1] - nxt = incoming[0] - - if prev.isspace() or nxt.isspace(): + if " " not in incoming: return False + first = incoming[0] + return first.isupper() or first in _MARKDOWN_PREFIXES - if prev.islower() and nxt.isupper(): - return True - - if prev.isdigit() and nxt.isupper(): - return True - - if prev in _SENTENCE_PUNCTUATION and (nxt.isupper() or nxt in _MARKDOWN_OR_QUOTE_PREFIXES): - return True - - if prev in _CLOSING_DELIMITERS and nxt.isupper(): - return True - if nxt in _MARKDOWN_OR_QUOTE_PREFIXES and (prev.isalnum() or prev in _SENTENCE_PUNCTUATION): - return True +def normalize_reasoning_delta(last_char: str | None, incoming: str) -> str: + """Normalize one reasoning delta without rebuilding the full accumulated text. - return False - - -def append_reasoning_chunk(existing: str, incoming: str) -> str: - if not existing: - return incoming - if not incoming: - return existing - if _needs_reasoning_separator(existing, incoming): - return f"{existing} {incoming}" - return existing + incoming - - -def join_reasoning_chunks(chunks: Sequence[str]) -> str: - combined = "" - for chunk in chunks: - if not chunk: - continue - combined = append_reasoning_chunk(combined, chunk) - return combined - - -def normalize_reasoning_delta(existing: str, incoming: str) -> str: + Keep the Codex-style append-only flow, but patch the specific broken case where + providers split natural-language reasoning into sentence chunks without a + separating space, e.g. "approach." + "Specifying session retrieval format". + """ if not incoming: return "" - combined = append_reasoning_chunk(existing, incoming) - return combined[len(existing) :] + if not last_char or last_char.isspace() or incoming[0].isspace(): + return incoming + if last_char in _SENTENCE_PUNCTUATION and _looks_like_sentence_chunk(incoming): + return f" {incoming}" + if last_char.islower() and _looks_like_sentence_chunk(incoming): + return f" {incoming}" + return incoming diff --git a/tests/unit/fast_agent/test_reasoning_chunk_join.py b/tests/unit/fast_agent/test_reasoning_chunk_join.py index 5b10c1110..7ccced228 100644 --- a/tests/unit/fast_agent/test_reasoning_chunk_join.py +++ b/tests/unit/fast_agent/test_reasoning_chunk_join.py @@ -2,6 +2,7 @@ def test_normalize_reasoning_delta_inserts_space_after_sentence_break() -> None: + last_char = None emitted = "" parts = [ "approach.", @@ -10,16 +11,30 @@ def test_normalize_reasoning_delta_inserts_space_after_sentence_break() -> None: ] for part in parts: - delta = normalize_reasoning_delta(emitted, part) + delta = normalize_reasoning_delta(last_char, part) emitted += delta + last_char = emitted[-1] if emitted else None assert emitted == "approach. Specifying session retrieval format Selecting session retrieval method" -def test_normalize_reasoning_delta_preserves_tool_style_names() -> None: +def test_normalize_reasoning_delta_preserves_contractions() -> None: + last_char = None emitted = "" - for part in ["voice.", "fetch", "(id=session_id, mode='transcript')"]: - delta = normalize_reasoning_delta(emitted, part) + for part in ["don", "'t do that"]: + delta = normalize_reasoning_delta(last_char, part) emitted += delta + last_char = emitted[-1] if emitted else None - assert emitted == "voice.fetch(id=session_id, mode='transcript')" + assert emitted == "don't do that" + + +def test_normalize_reasoning_delta_preserves_identifier_fragments() -> None: + last_char = None + emitted = "" + for part in ["session", "_id is required"]: + delta = normalize_reasoning_delta(last_char, part) + emitted += delta + last_char = emitted[-1] if emitted else None + + assert emitted == "session_id is required"