-
Notifications
You must be signed in to change notification settings - Fork 355
perf(web): isolate streaming text to fix main-thread jank #1111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a297987
ab17799
b791a35
119eba8
318e752
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| "@moonshot-ai/kimi-code": patch | ||
| --- | ||
|
|
||
| Keep the web chat responsive during long streaming replies by isolating live token text from the rest of the UI state, so it no longer stalls the main thread. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| <!-- apps/kimi-web/src/components/chat/StreamingBlocks.vue --> | ||
| <!-- | ||
| Renders the live (still-streaming) text/thinking blocks of the active | ||
| assistant message. This is the ONLY component that re-renders on each | ||
| `assistantDelta`: it subscribes to the fine-grained streaming store, so the | ||
| rest of the app (App, sidebar, the turn list) does not move on every token. | ||
|
|
||
| Mounted by ChatPane only for the turn that is currently streaming; unmounts | ||
| when the turn settles (the committed content in `messagesBySession` takes | ||
| over). | ||
| --> | ||
| <script setup lang="ts"> | ||
| import { computed } from 'vue'; | ||
| import Markdown from './Markdown.vue'; | ||
| import ThinkingBlock from './ThinkingBlock.vue'; | ||
| import { streamingBySession } from '../../composables/client/streamingStore'; | ||
| import type { FilePreviewRequest } from '../../types'; | ||
|
|
||
| const props = withDefaults( | ||
| defineProps<{ | ||
| sessionId: string; | ||
| turnId: string; | ||
| mobile?: boolean; | ||
| }>(), | ||
| { mobile: false }, | ||
| ); | ||
|
|
||
| const emit = defineEmits<{ | ||
| openFile: [target: FilePreviewRequest]; | ||
| openThinking: [target: { turnId: string; blockIndex: number; live?: boolean }]; | ||
| }>(); | ||
|
|
||
| // Subscribe to this session's live blocks. Only this computed (and therefore | ||
| // only this component) is dirtied when a delta appends to the store. | ||
| const blocks = computed(() => streamingBySession[props.sessionId]?.blocks ?? []); | ||
| </script> | ||
|
|
||
| <template> | ||
| <template v-for="blk in blocks" :key="`stream-${blk.kind}-${blk.contentIndex}`"> | ||
| <ThinkingBlock | ||
| v-if="blk.kind === 'thinking'" | ||
| :text="blk.text" | ||
| :mobile="mobile" | ||
| :streaming="true" | ||
| @open="emit('openThinking', { turnId, blockIndex: blk.contentIndex, live: true })" | ||
| /> | ||
| <div v-else-if="blk.kind === 'text' && blk.text" class="msg"> | ||
| <Markdown :text="blk.text" :streaming="true" :open-file="(target) => emit('openFile', target)" /> | ||
| </div> | ||
| </template> | ||
| </template> | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| // apps/kimi-web/src/composables/client/streamingStore.ts | ||
| // | ||
| // Fine-grained streaming-text store, kept OUTSIDE `rawState` on purpose. | ||
| // | ||
| // `assistantDelta` is the only genuinely high-frequency event (dozens to | ||
| // hundreds per second). Routing it through the immutable reducer + the coarse | ||
| // `rawState` graph makes every delta re-render the whole App and recompute the | ||
| // sidebar computeds (see the main-thread-jank investigation). Instead, deltas | ||
| // append here and only the single `StreamingBlocks` component subscribed to a | ||
| // session re-renders. | ||
| // | ||
| // Lifecycle: deltas append; `messageUpdated` (authoritative full content) and | ||
| // turn-end (`sessionStatusChanged` idle/aborted) clear the entry so the | ||
| // committed content in `messagesBySession` takes over without duplication. | ||
|
|
||
| import { reactive } from 'vue'; | ||
|
|
||
| export interface StreamingBlock { | ||
| contentIndex: number; | ||
| kind: 'text' | 'thinking'; | ||
| text: string; | ||
| } | ||
|
|
||
| export interface StreamingState { | ||
| /** id of the assistant message currently being streamed. */ | ||
| messageId: string; | ||
| /** Ordered live text/thinking blocks (always trailing in the message). */ | ||
| blocks: StreamingBlock[]; | ||
| } | ||
|
|
||
| /** | ||
| * Per-session live streaming state. A session has at most one in-flight | ||
| * assistant message (its trailing one), so a single entry per session suffices. | ||
| */ | ||
| export const streamingBySession = reactive<Record<string, StreamingState>>({}); | ||
|
|
||
| /** | ||
| * Append one `assistantDelta` to the streaming store. O(1): either mutates the | ||
| * trailing block's text in place (same contentIndex) or pushes a new block | ||
| * (new contentIndex, rare). Never touches `rawState`, so no heavy computed | ||
| * (`turns`, sidebar) is dirtied. | ||
| */ | ||
| export function appendStreamingDelta( | ||
| sessionId: string, | ||
| messageId: string, | ||
| contentIndex: number, | ||
| delta: { text?: string; thinking?: string }, | ||
| ): void { | ||
| let state = streamingBySession[sessionId]; | ||
| // A new assistant message (new step, or text resuming after a tool) starts a | ||
| // fresh entry — the previous message is already committed via messageUpdated. | ||
| if (!state || state.messageId !== messageId) { | ||
| state = streamingBySession[sessionId] = { messageId, blocks: [] }; | ||
| } | ||
|
|
||
| const kind: 'text' | 'thinking' = delta.text !== undefined ? 'text' : 'thinking'; | ||
| const chunk = delta.text ?? delta.thinking ?? ''; | ||
| if (chunk.length === 0) return; | ||
|
|
||
| const last = state.blocks.at(-1); | ||
| if (last && last.contentIndex === contentIndex && last.kind === kind) { | ||
| last.text += chunk; | ||
| } else { | ||
| state.blocks.push({ contentIndex, kind, text: chunk }); | ||
| } | ||
| } | ||
|
|
||
| /** Drop the live entry for a session (commit or turn end). */ | ||
| export function clearStreaming(sessionId: string): void { | ||
| delete streamingBySession[sessionId]; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ import type { useKimiWebClient } from './useKimiWebClient'; | |
| import { buildEditDiffLines, extractEditPath, findToolCallById } from '../lib/toolDiff'; | ||
| import { toolLabel } from '../lib/toolMeta'; | ||
| import { clampPanelWidth, panelMaxWidth, useViewportWidth } from './useViewportWidth'; | ||
| import { streamingBySession } from './client/streamingStore'; | ||
|
|
||
| type KimiWebClient = ReturnType<typeof useKimiWebClient>; | ||
|
|
||
|
|
@@ -64,21 +65,47 @@ export function useDetailPanel({ | |
| // --------------------------------------------------------------------------- | ||
| // Thinking panel | ||
| // --------------------------------------------------------------------------- | ||
| const thinkingTarget = ref<{ turnId: string; blockIndex: number } | null>(null); | ||
| const thinkingTarget = ref<{ turnId: string; blockIndex: number; live?: boolean } | null>(null); | ||
|
|
||
| const thinkingPanelText = computed<string | null>(() => { | ||
| const target = thinkingTarget.value; | ||
| if (!target) return null; | ||
| // A live (still-streaming) thinking block is not in `client.turns` — its | ||
| // text lives in the streaming store. Read it there so the panel shows the | ||
| // growing text while the reply is still streaming (reactive: updates on | ||
| // each delta). | ||
| if (target.live) { | ||
| const sid = client.activeSessionId.value; | ||
| const live = streamingBySession[sid]?.blocks.find( | ||
| (b) => b.kind === 'thinking' && b.contentIndex === target.blockIndex, | ||
| ); | ||
| if (live?.text) return live.text; | ||
| // The store is cleared at every `messageUpdated` (tool slot / step end / | ||
| // turn end) so the committed content takes over in the chat. The last | ||
| // deltas and that clear land in the same tick and coalesce, so without a | ||
| // fallback the panel would close *before* rendering the final chunk. | ||
| // Fall back to the committed thinking block in the turn — it already | ||
| // holds the full text — so the panel keeps showing the complete content | ||
| // through the boundary instead of flickering closed. | ||
| const turn = client.turns.value.find((tn) => tn.id === target.turnId); | ||
| const committed = turn?.blocks?.find((b) => b.kind === 'thinking'); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a user opens a later live thinking block, such as a turn that already had an earlier thinking segment before a tool and then starts thinking again, Useful? React with 👍 / 👎. |
||
| return committed?.kind === 'thinking' ? committed.thinking : null; | ||
| } | ||
| const turn = client.turns.value.find((tn) => tn.id === target.turnId); | ||
| const blk = turn?.blocks?.[target.blockIndex]; | ||
| return blk?.kind === 'thinking' ? blk.thinking : null; | ||
| }); | ||
|
|
||
| const thinkingVisible = computed(() => thinkingPanelText.value !== null); | ||
|
|
||
| function openThinkingPanel(target: { turnId: string; blockIndex: number }): void { | ||
| function openThinkingPanel(target: { turnId: string; blockIndex: number; live?: boolean }): void { | ||
| const current = thinkingTarget.value; | ||
| if (current && current.turnId === target.turnId && current.blockIndex === target.blockIndex) { | ||
| if ( | ||
| current && | ||
| current.turnId === target.turnId && | ||
| current.blockIndex === target.blockIndex && | ||
| current.live === target.live | ||
| ) { | ||
| thinkingTarget.value = null; | ||
| if (detailTarget.value === 'thinking') detailTarget.value = null; | ||
| return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a user refreshes or reconnects mid-reply,
seedInFlightputs the already-generatedassistantText/thinkingTextintoturn.blocks, and later deltas for the samecontentIndexrender here as a separate Markdown/ThinkingBlock appended after the seeded block. Markdown constructs spanning the snapshot boundary, such as an open code fence or list, are parsed as two documents and render incorrectly untilmessageUpdatedcommits. Seed the streaming store with the snapshot block or render same-index live text through the existing block instead of a separate Markdown instance.Useful? React with 👍 / 👎.