From f76b2b046b370d933cf16adc48dff46157e18cfc Mon Sep 17 00:00:00 2001 From: YosefHayim Date: Sun, 18 Jan 2026 10:38:48 +0200 Subject: [PATCH] Add token limit handling to prevent 8k token overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: - GitHub Models API has an 8k token limit for entire requests - Large git diffs can exceed this limit, causing API failures - Users experience failures when staging large changes Solution: - Added token estimation using character-based heuristic (1 token ≈ 4 chars) - Implemented truncation logic that preserves UTF-8 boundaries - Added intelligent content prioritization when over limit Implementation Details: - estimateTokens(): Approximates tokens for any text content - truncateToTokenLimit(): Safely truncates text with ellipsis indicator - Modified GenerateCommitMessage() to: * Estimate tokens for prompt templates + changes + examples * Reserve tokens for templates (with buffer) * Prioritize examples (20% of remaining tokens) when present * Truncate changes to fit remaining budget * Display warning when truncation occurs Benefits: - Prevents API failures from token overflow - Maintains functionality by preserving maximum content - User-friendly with clear truncation warnings - No external dependencies, follows existing code style - Gracefully handles both changes-only and changes+examples scenarios --- internal/llm/client.go | 108 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/internal/llm/client.go b/internal/llm/client.go index 9ae04c3..911f4e2 100644 --- a/internal/llm/client.go +++ b/internal/llm/client.go @@ -18,6 +18,47 @@ import ( //go:embed commitmsg.prompt.yml var commitmsgPromptYAML []byte +const ( + maxTokens = 8000 + tokensPerChar = 0.25 // 1 token ≈ 4 characters +) + +// estimateTokens approximates the number of tokens in text using a simple character-based heuristic. +func estimateTokens(text string) int { + return int(float64(len(text)) * tokensPerChar) +} + +// truncateToTokenLimit truncates text to fit within the specified token limit. +// Preserves UTF-8 boundaries and adds ellipsis when truncation occurs. +func truncateToTokenLimit(text string, maxTokens int) string { + if estimateTokens(text) <= maxTokens { + return text + } + + // Convert maxTokens to approximate character limit + maxChars := int(float64(maxTokens) / tokensPerChar) + + // Ensure we don't exceed the string length + if maxChars >= len(text) { + return text + } + + // Find a safe UTF-8 boundary near the limit + runes := []rune(text) + targetLen := maxChars + if targetLen > len(runes) { + targetLen = len(runes) + } + + // Reserve space for ellipsis if we're truncating + if targetLen > 3 { + targetLen -= 3 + } + + truncated := string(runes[:targetLen]) + "..." + return truncated +} + // PromptConfig represents the structure of the prompt configuration file. type PromptConfig struct { Name string `yaml:"name"` @@ -103,17 +144,78 @@ func (c *Client) GenerateCommitMessage( selectedModel := model selectedLanguage := language + // Estimate tokens and truncate if necessary to stay under 8k limit + truncatedChanges := changesSummary + truncatedExamples := examples + + // Estimate tokens for the prompt template (without placeholders) + promptTemplateTokens := 0 + for _, msg := range promptConfig.Messages { + content := msg.Content + content = strings.ReplaceAll(content, "{{changes}}", "") + content = strings.ReplaceAll(content, "{{language}}", selectedLanguage) + content = strings.ReplaceAll(content, "{{examples}}", "") + promptTemplateTokens += estimateTokens(content) + } + + // Estimate tokens for changes and examples + changesTokens := estimateTokens(changesSummary) + examplesTokens := 0 + if examples != "" { + examplesTokens = estimateTokens(createExamplesString(examples)) + } + + totalTokens := promptTemplateTokens + changesTokens + examplesTokens + + if totalTokens > maxTokens { + fmt.Println(" Warning: Content truncated to fit token limit") + + // Reserve tokens for prompt templates (add some buffer) + reservedTokens := promptTemplateTokens + 500 // buffer for template processing + + remainingTokens := maxTokens - reservedTokens + if remainingTokens < 0 { + remainingTokens = 0 + } + + // Prioritize examples if present, otherwise use all remaining for changes + if examplesTokens > 0 { + // Reserve some tokens for examples (up to 20% of remaining) + examplesReserved := int(float64(remainingTokens) * 0.2) + if examplesReserved > examplesTokens { + examplesReserved = examplesTokens + } + + remainingTokens -= examplesReserved + + // Truncate examples if needed + if examplesReserved < examplesTokens { + truncatedExamples = truncateToTokenLimit(examples, examplesReserved) + } + + // Truncate changes with remaining tokens + if remainingTokens < changesTokens { + truncatedChanges = truncateToTokenLimit(changesSummary, remainingTokens) + } + } else { + // No examples, use all remaining tokens for changes + if remainingTokens < changesTokens { + truncatedChanges = truncateToTokenLimit(changesSummary, remainingTokens) + } + } + } + // Build messages from the prompt config, replacing template variables messages := make([]Message, len(promptConfig.Messages)) for i, msg := range promptConfig.Messages { content := msg.Content // Replace the template variables - content = strings.ReplaceAll(content, "{{changes}}", changesSummary) + content = strings.ReplaceAll(content, "{{changes}}", truncatedChanges) content = strings.ReplaceAll(content, "{{language}}", selectedLanguage) - if examples != "" && strings.Contains(content, "{{examples}}") { + if truncatedExamples != "" && strings.Contains(content, "{{examples}}") { // If examples are provided, replace the {{examples}} placeholder - content = strings.ReplaceAll(content, "{{examples}}", createExamplesString(examples)) + content = strings.ReplaceAll(content, "{{examples}}", createExamplesString(truncatedExamples)) } else { // If no examples are provided, remove the {{examples}} placeholder content = strings.ReplaceAll(content, "{{examples}}", "")