From 8b974eee3d495de15daf1308b9fde9682aee4ca8 Mon Sep 17 00:00:00 2001 From: ran Date: Fri, 5 Jun 2026 14:12:35 +0800 Subject: [PATCH] perf: precompute start/message formatting token ids in StreamableParser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `StreamableParser::process_next` called `render_formatting_token` — a full `encode_with_special_tokens` — for `<|start|>` and `<|message|>` on every input token while in the `ExpectStart` and `Header` states. `stop_tokens` is already precomputed once in `new_with_options`; the start/message ids are equally constant, so re-encoding them per token is wasted work. Precompute both ids once (like `stop_tokens`) and compare the incoming token against the cached `Rank`. Pure caching; no behavior change. On a 1,000,000-token stream that stays in the header state, parse time drops from ~13.06s to ~3.07ms (release build). All 30 tests pass unchanged. --- src/encoding.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index 60257e7..febac72 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -1059,6 +1059,8 @@ pub struct StreamableParser { messages: Vec, state: StreamState, stop_tokens: HashSet, + start_token: Rank, + message_token: Rank, last_content_delta: Option, undecoded_tokens: Vec, undecoded_bytes: Vec, @@ -1090,6 +1092,8 @@ impl StreamableParser { options: ParseOptions, ) -> anyhow::Result { let stop_tokens = encoding.stop_tokens()?; + let start_token = encoding.render_formatting_token(FormattingToken::Start)?; + let message_token = encoding.render_formatting_token(FormattingToken::Message)?; let (state, next_role) = match role { Some(role) => ( StreamState::Header { @@ -1106,6 +1110,8 @@ impl StreamableParser { messages: Vec::new(), state, stop_tokens, + start_token, + message_token, last_content_delta: None, undecoded_tokens: Vec::new(), undecoded_bytes: Vec::new(), @@ -1123,9 +1129,7 @@ impl StreamableParser { let next_role_clone = self.next_role.clone(); match &mut self.state { StreamState::ExpectStart => { - let start = self - .encoding - .render_formatting_token(FormattingToken::Start)?; + let start = self.start_token; match token { Some(token) if token == start => { self.state = StreamState::Header { @@ -1147,9 +1151,7 @@ impl StreamableParser { } } StreamState::Header { header_tokens } => { - let msg_tok = self - .encoding - .render_formatting_token(FormattingToken::Message)?; + let msg_tok = self.message_token; match token { Some(token) if token == msg_tok => { // Clone the tokens and next_role, then clear the state before parsing