MoonshotAI · wbxl2000 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/.changeset/fix-web-streaming-render-jank.md b/.changeset/fix-web-streaming-render-jank.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Keep the web chat responsive during long streaming replies by isolating live token text from the rest of the UI state, so it no longer stalls the main thread.
diff --git a/apps/kimi-web/src/api/daemon/eventReducer.ts b/apps/kimi-web/src/api/daemon/eventReducer.ts
@@ -84,7 +84,13 @@ export function createInitialState(): KimiClientState {
 function cloneState(s: KimiClientState): KimiClientState {
   return {
     ...s,
-    sessions: [...s.sessions],
+    // Reuse the `sessions` array reference when an event does not touch it.
+    // Every session-mutating case below already builds its own array via
+    // `[...]` / `.map` / `.filter`, so sharing the reference is safe — and it
+    // keeps `rawState.sessions` stable for events that don't change sessions,
+    // so the sidebar computeds (sessionsForView / workspaceGroups /
+    // mergedWorkspaces) are not dirtied by unrelated events.
+    sessions: s.sessions,
     messagesBySession: { ...s.messagesBySession },
     approvalsBySession: { ...s.approvalsBySession },
     planReviewByToolCallId: { ...s.planReviewByToolCallId },

diff --git a/apps/kimi-web/src/components/chat/ChatPane.vue b/apps/kimi-web/src/components/chat/ChatPane.vue
@@ -5,6 +5,7 @@ import { useI18n } from 'vue-i18n';
 import type { ChatTurn, ApprovalBlock, FilePreviewRequest, ToolMedia } from '../../types';
 import ToolCall from './ToolCall.vue';
 import Markdown from './Markdown.vue';
+import StreamingBlocks from './StreamingBlocks.vue';
 import ThinkingBlock from './ThinkingBlock.vue';
 import ActivityNotice from './ActivityNotice.vue';
 import AgentCard from './AgentCard.vue';
@@ -23,6 +24,7 @@ import {
   turnFinalText,
   turnToMarkdown,
 } from '../chatTurnRendering';
+import { streamingBySession, type StreamingBlock } from '../../composables/client/streamingStore';
 
 const { t } = useI18n();
 
@@ -44,6 +46,12 @@ onUnmounted(() => {
 const props = withDefaults(
   defineProps<{
     turns: ChatTurn[];
+    /**
+     * The session these turns belong to. Used by the streaming renderer to look
+     * up the live text in the streaming store. Optional so SideChatPanel (which
+     * renders a subagent transcript, not a streaming session) can omit it.
+     */
+    sessionId?: string;
     approvals?: { approvalId: string; block: ApprovalBlock; agentName?: string }[];
     /**
      * Bubble chat layout: render each turn as a chat bubble (user = right-aligned
@@ -198,7 +206,7 @@ const emit = defineEmits<{
   openMedia: [media: ToolMedia];
   copyConversationCopied: [];
   /** Show a thinking block's full text in the right-side panel. */
-  openThinking: [target: { turnId: string; blockIndex: number }];
+  openThinking: [target: { turnId: string; blockIndex: number; live?: boolean }];
   /** Show a compaction divider's summary text in the right-side panel. */
   openCompaction: [target: { turnId: string }];
   /** Show a subagent's full detail in the right-side panel. */
@@ -293,14 +301,37 @@ function confirmEditMessage(turn: ChatTurn): void {
 const copiedConversation = ref(false);
 let copiedConversationTimer: ReturnType<typeof setTimeout> | null = null;
 
+/** Live text/thinking blocks for the turn currently streaming, if any. */
+function liveBlocksForStreaming(): StreamingBlock[] {
+  if (!props.running || !props.sessionId) return [];
+  return streamingBySession[props.sessionId]?.blocks ?? [];
+}
+
+/**
+ * Merge the still-streaming live blocks into a turn for serialization (copy).
+ * Live text/thinking is not in `turn.blocks` during streaming (deltas bypass
+ * messagesBySession), so without this a copy mid-stream would drop the tail.
+ */
+function withLiveBlocks(turn: ChatTurn, liveBlocks: StreamingBlock[]): ChatTurn {
+  if (liveBlocks.length === 0) return turn;
+  const blocks = turn.blocks ? [...turn.blocks] : turnBlocks(turn);
+  for (const blk of liveBlocks) {
+    if (blk.kind === 'text' && blk.text) blocks.push({ kind: 'text', text: blk.text });
+    else if (blk.kind === 'thinking' && blk.text) blocks.push({ kind: 'thinking', thinking: blk.text });
+  }
+  return { ...turn, blocks };
+}
+
 /** Convert the entire conversation to Markdown and copy to clipboard. */
 function copyConversation(): void {
   if (props.turns.length === 0) return;
+  const liveBlocks = liveBlocksForStreaming();
   const lines: string[] = [];
   for (const turn of props.turns) {
     if (turn.role === 'compaction') continue; // dividers don't copy
+    const t = turn.id === streamingTurnId.value ? withLiveBlocks(turn, liveBlocks) : turn;
     const roleLabel = turn.role === 'user' ? 'User' : 'Assistant';
-    const content = turnToMarkdown(turn);
+    const content = turnToMarkdown(t);
     if (content.trim()) {
       lines.push(`**${roleLabel}**\n\n${content}`);
     }
@@ -329,8 +360,9 @@ function assistantRunEndingAt(index: number): ChatTurn[] {
 }
 
 function assistantRunFinalText(index: number): string {
+  const liveBlocks = liveBlocksForStreaming();
   return assistantRunEndingAt(index)
-    .map((t) => turnFinalText(t))
+    .map((t) => turnFinalText(t.id === streamingTurnId.value ? withLiveBlocks(t, liveBlocks) : t))
     .filter(Boolean)
     .join('\n\n');
 }
@@ -537,6 +569,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
           <AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
           <ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :mobile="childBubble" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
         </template>
+        <StreamingBlocks
+          v-if="sessionId && turn.id === streamingTurnId"
+          :session-id="sessionId"
+          :turn-id="turn.id"
+          :mobile="childBubble"
+          @open-file="(target) => emit('openFile', target)"
+          @open-thinking="emit('openThinking', $event)"
+        />
         <div v-if="turn.id !== streamingTurnId && isAssistantRunEnd(ti) && (assistantRunFinalText(ti).trim().length > 0 || turn.durationMs !== undefined)" class="a-msg-ft">
           <span v-if="turn.durationMs !== undefined" class="a-duration" :title="`${turn.durationMs} ms`">{{ formatDuration(turn.durationMs) }}</span>
           <button
@@ -679,6 +719,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
               <AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
               <ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
             </template>
+            <StreamingBlocks
+              v-if="sessionId && turn.id === streamingTurnId"
+              :session-id="sessionId"
+              :turn-id="turn.id"
+              :mobile="childBubble"
+              @open-file="(target) => emit('openFile', target)"
+              @open-thinking="emit('openThinking', $event)"
+            />
           </template>
         </div>
 

diff --git a/apps/kimi-web/src/components/chat/ConversationPane.vue b/apps/kimi-web/src/components/chat/ConversationPane.vue
@@ -105,7 +105,7 @@ const emit = defineEmits<{
   selectModel: [modelId: string];
   openFile: [target: FilePreviewRequest];
   openMedia: [media: ToolMedia];
-  openThinking: [target: { turnId: string; blockIndex: number }];
+  openThinking: [target: { turnId: string; blockIndex: number; live?: boolean }];
   openCompaction: [target: { turnId: string }];
   openAgent: [target: { turnId: string; blockIndex: number; memberId: string }];
   openToolDiff: [id: string];
@@ -1008,6 +1008,7 @@ defineExpose({ loadComposerForEdit });
               ref="chatPaneRef"
               :key="fileReloadKey ?? 'no-session'"
               :turns="turns"
+              :session-id="sessionId"
               :approvals="approvals"
               :bubble="bubble"
               :mobile="mobile"

diff --git a/apps/kimi-web/src/components/chat/Markdown.vue b/apps/kimi-web/src/components/chat/Markdown.vue
@@ -70,12 +70,15 @@ const renderPlan = computed(() => {
 // Code blocks follow the app colour scheme (shiki re-renders on flip).
 const isDark = useIsDark();
 
-// markstream's chat mode can batch nodes and defer offscreen nodes. Batching is
-// safe for settled history, but viewport deferral can leave individual code
-// blocks blank in our internal chat scroller when visibility events are missed
-// during a session/theme switch. Keep batching for history, but always mount the
-// actual nodes so every code block has at least its plain fallback immediately.
-const allowBatchRender = computed(() => !props.streaming);
+// markstream's chat mode batches node mounting across frames (frame-budget
+// scheduling) and can defer offscreen nodes. Viewport deferral can leave
+// individual code blocks blank in our internal chat scroller when visibility
+// events are missed, so it stays disabled below (`deferNodesUntilVisible:
+// false`). Batching itself only spreads mounting by a frame or two and is
+// exactly the scenario streaming needs, so it stays on for both live and
+// settled content (the `loading: false` code-block prop already removes the
+// skeleton, so a not-yet-mounted block simply appears a frame later).
+const allowBatchRender = computed(() => true);
 
 // ---------------------------------------------------------------------------
 // Local image resolution — rewrite the SOURCE TEXT before markstream sees it.

diff --git a/apps/kimi-web/src/components/chat/StreamingBlocks.vue b/apps/kimi-web/src/components/chat/StreamingBlocks.vue
@@ -0,0 +1,51 @@
+<!-- apps/kimi-web/src/components/chat/StreamingBlocks.vue -->
+<!--
+  Renders the live (still-streaming) text/thinking blocks of the active
+  assistant message. This is the ONLY component that re-renders on each
+  `assistantDelta`: it subscribes to the fine-grained streaming store, so the
+  rest of the app (App, sidebar, the turn list) does not move on every token.
+
+  Mounted by ChatPane only for the turn that is currently streaming; unmounts
+  when the turn settles (the committed content in `messagesBySession` takes
+  over).
+-->
+<script setup lang="ts">
+import { computed } from 'vue';
+import Markdown from './Markdown.vue';
+import ThinkingBlock from './ThinkingBlock.vue';
+import { streamingBySession } from '../../composables/client/streamingStore';
+import type { FilePreviewRequest } from '../../types';
+
+const props = withDefaults(
+  defineProps<{
+    sessionId: string;
+    turnId: string;
+    mobile?: boolean;
+  }>(),
+  { mobile: false },
+);
+
+const emit = defineEmits<{
+  openFile: [target: FilePreviewRequest];
+  openThinking: [target: { turnId: string; blockIndex: number; live?: boolean }];
+}>();
+
+// Subscribe to this session's live blocks. Only this computed (and therefore
+// only this component) is dirtied when a delta appends to the store.
+const blocks = computed(() => streamingBySession[props.sessionId]?.blocks ?? []);
+</script>
+
+<template>
+  <template v-for="blk in blocks" :key="`stream-${blk.kind}-${blk.contentIndex}`">
+    <ThinkingBlock
+      v-if="blk.kind === 'thinking'"
+      :text="blk.text"
+      :mobile="mobile"
+      :streaming="true"
+      @open="emit('openThinking', { turnId, blockIndex: blk.contentIndex, live: true })"
+    />
+    <div v-else-if="blk.kind === 'text' && blk.text" class="msg">
+      <Markdown :text="blk.text" :streaming="true" :open-file="(target) => emit('openFile', target)" />
+    </div>
+  </template>
+</template>
diff --git a/apps/kimi-web/src/composables/client/streamingStore.ts b/apps/kimi-web/src/composables/client/streamingStore.ts
@@ -0,0 +1,71 @@
+// apps/kimi-web/src/composables/client/streamingStore.ts
+//
+// Fine-grained streaming-text store, kept OUTSIDE `rawState` on purpose.
+//
+// `assistantDelta` is the only genuinely high-frequency event (dozens to
+// hundreds per second). Routing it through the immutable reducer + the coarse
+// `rawState` graph makes every delta re-render the whole App and recompute the
+// sidebar computeds (see the main-thread-jank investigation). Instead, deltas
+// append here and only the single `StreamingBlocks` component subscribed to a
+// session re-renders.
+//
+// Lifecycle: deltas append; `messageUpdated` (authoritative full content) and
+// turn-end (`sessionStatusChanged` idle/aborted) clear the entry so the
+// committed content in `messagesBySession` takes over without duplication.
+
+import { reactive } from 'vue';
+
+export interface StreamingBlock {
+  contentIndex: number;
+  kind: 'text' | 'thinking';
+  text: string;
+}
+
+export interface StreamingState {
+  /** id of the assistant message currently being streamed. */
+  messageId: string;
+  /** Ordered live text/thinking blocks (always trailing in the message). */
+  blocks: StreamingBlock[];
+}
+
+/**
+ * Per-session live streaming state. A session has at most one in-flight
+ * assistant message (its trailing one), so a single entry per session suffices.
+ */
+export const streamingBySession = reactive<Record<string, StreamingState>>({});
+
+/**
+ * Append one `assistantDelta` to the streaming store. O(1): either mutates the
+ * trailing block's text in place (same contentIndex) or pushes a new block
+ * (new contentIndex, rare). Never touches `rawState`, so no heavy computed
+ * (`turns`, sidebar) is dirtied.
+ */
+export function appendStreamingDelta(
+  sessionId: string,
+  messageId: string,
+  contentIndex: number,
+  delta: { text?: string; thinking?: string },
+): void {
+  let state = streamingBySession[sessionId];
+  // A new assistant message (new step, or text resuming after a tool) starts a
+  // fresh entry — the previous message is already committed via messageUpdated.
+  if (!state || state.messageId !== messageId) {
+    state = streamingBySession[sessionId] = { messageId, blocks: [] };
+  }
+
+  const kind: 'text' | 'thinking' = delta.text !== undefined ? 'text' : 'thinking';
+  const chunk = delta.text ?? delta.thinking ?? '';
+  if (chunk.length === 0) return;
+
+  const last = state.blocks.at(-1);
+  if (last && last.contentIndex === contentIndex && last.kind === kind) {
+    last.text += chunk;
+  } else {
+    state.blocks.push({ contentIndex, kind, text: chunk });
+  }
+}
+
+/** Drop the live entry for a session (commit or turn end). */
+export function clearStreaming(sessionId: string): void {
+  delete streamingBySession[sessionId];
+}
diff --git a/apps/kimi-web/src/composables/useDetailPanel.ts b/apps/kimi-web/src/composables/useDetailPanel.ts
@@ -8,6 +8,7 @@ import type { useKimiWebClient } from './useKimiWebClient';
 import { buildEditDiffLines, extractEditPath, findToolCallById } from '../lib/toolDiff';
 import { toolLabel } from '../lib/toolMeta';
 import { clampPanelWidth, panelMaxWidth, useViewportWidth } from './useViewportWidth';
+import { streamingBySession } from './client/streamingStore';
 
 type KimiWebClient = ReturnType<typeof useKimiWebClient>;
 
@@ -64,21 +65,47 @@ export function useDetailPanel({
   // ---------------------------------------------------------------------------
   // Thinking panel
   // ---------------------------------------------------------------------------
-  const thinkingTarget = ref<{ turnId: string; blockIndex: number } | null>(null);
+  const thinkingTarget = ref<{ turnId: string; blockIndex: number; live?: boolean } | null>(null);
 
   const thinkingPanelText = computed<string | null>(() => {
     const target = thinkingTarget.value;
     if (!target) return null;
+    // A live (still-streaming) thinking block is not in `client.turns` — its
+    // text lives in the streaming store. Read it there so the panel shows the
+    // growing text while the reply is still streaming (reactive: updates on
+    // each delta).
+    if (target.live) {
+      const sid = client.activeSessionId.value;
+      const live = streamingBySession[sid]?.blocks.find(
+        (b) => b.kind === 'thinking' && b.contentIndex === target.blockIndex,
+      );
+      if (live?.text) return live.text;
+      // The store is cleared at every `messageUpdated` (tool slot / step end /
+      // turn end) so the committed content takes over in the chat. The last
+      // deltas and that clear land in the same tick and coalesce, so without a
+      // fallback the panel would close *before* rendering the final chunk.
+      // Fall back to the committed thinking block in the turn — it already
+      // holds the full text — so the panel keeps showing the complete content
+      // through the boundary instead of flickering closed.
+      const turn = client.turns.value.find((tn) => tn.id === target.turnId);
+      const committed = turn?.blocks?.find((b) => b.kind === 'thinking');
+      return committed?.kind === 'thinking' ? committed.thinking : null;
+    }
     const turn = client.turns.value.find((tn) => tn.id === target.turnId);
     const blk = turn?.blocks?.[target.blockIndex];
     return blk?.kind === 'thinking' ? blk.thinking : null;
   });
 
   const thinkingVisible = computed(() => thinkingPanelText.value !== null);
 
-  function openThinkingPanel(target: { turnId: string; blockIndex: number }): void {
+  function openThinkingPanel(target: { turnId: string; blockIndex: number; live?: boolean }): void {
     const current = thinkingTarget.value;
-    if (current && current.turnId === target.turnId && current.blockIndex === target.blockIndex) {
+    if (
+      current &&
+      current.turnId === target.turnId &&
+      current.blockIndex === target.blockIndex &&
+      current.live === target.live
+    ) {
       thinkingTarget.value = null;
       if (detailTarget.value === 'thinking') detailTarget.value = null;
       return;