From 8b974eee3d495de15daf1308b9fde9682aee4ca8 Mon Sep 17 00:00:00 2001
From: ran <edding.suree@gmail.com>
Date: Fri, 5 Jun 2026 14:12:35 +0800
Subject: [PATCH] perf: precompute start/message formatting token ids in
 StreamableParser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`StreamableParser::process_next` called `render_formatting_token` — a full
`encode_with_special_tokens` — for `<|start|>` and `<|message|>` on every input
token while in the `ExpectStart` and `Header` states. `stop_tokens` is already
precomputed once in `new_with_options`; the start/message ids are equally
constant, so re-encoding them per token is wasted work.

Precompute both ids once (like `stop_tokens`) and compare the incoming token
against the cached `Rank`. Pure caching; no behavior change.

On a 1,000,000-token stream that stays in the header state, parse time drops
from ~13.06s to ~3.07ms (release build). All 30 tests pass unchanged.
---
 src/encoding.rs | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/encoding.rs b/src/encoding.rs
index 60257e7..febac72 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -1059,6 +1059,8 @@ pub struct StreamableParser {
     messages: Vec<Message>,
     state: StreamState,
     stop_tokens: HashSet<Rank>,
+    start_token: Rank,
+    message_token: Rank,
     last_content_delta: Option<String>,
     undecoded_tokens: Vec<Rank>,
     undecoded_bytes: Vec<u8>,
@@ -1090,6 +1092,8 @@ impl StreamableParser {
         options: ParseOptions,
     ) -> anyhow::Result<Self> {
         let stop_tokens = encoding.stop_tokens()?;
+        let start_token = encoding.render_formatting_token(FormattingToken::Start)?;
+        let message_token = encoding.render_formatting_token(FormattingToken::Message)?;
         let (state, next_role) = match role {
             Some(role) => (
                 StreamState::Header {
@@ -1106,6 +1110,8 @@ impl StreamableParser {
             messages: Vec::new(),
             state,
             stop_tokens,
+            start_token,
+            message_token,
             last_content_delta: None,
             undecoded_tokens: Vec::new(),
             undecoded_bytes: Vec::new(),
@@ -1123,9 +1129,7 @@ impl StreamableParser {
         let next_role_clone = self.next_role.clone();
         match &mut self.state {
             StreamState::ExpectStart => {
-                let start = self
-                    .encoding
-                    .render_formatting_token(FormattingToken::Start)?;
+                let start = self.start_token;
                 match token {
                     Some(token) if token == start => {
                         self.state = StreamState::Header {
@@ -1147,9 +1151,7 @@ impl StreamableParser {
                 }
             }
             StreamState::Header { header_tokens } => {
-                let msg_tok = self
-                    .encoding
-                    .render_formatting_token(FormattingToken::Message)?;
+                let msg_tok = self.message_token;
                 match token {
                     Some(token) if token == msg_tok => {
                         // Clone the tokens and next_role, then clear the state before parsing