From 2bddad6326ddfa6f632f2be5b6d471c40264a504 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 2 Jun 2026 02:37:51 +0000
Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Replace=20Regex=20with?=
 =?UTF-8?q?=20manual=20string=20parsing=20in=20HintExtractor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces multiple Regex string manipulations with a single manual character iteration loop in HintExtractor to drastically improve execution speed and limit string allocations.

Co-authored-by: himattm <6266621+himattm@users.noreply.github.com>
---
 .jules/bolt.md                                |  3 +
 .../kotlin/halogen/engine/HintExtractor.kt    | 81 ++++++++++++++-----
 2 files changed, 62 insertions(+), 22 deletions(-)
 create mode 100644 .jules/bolt.md

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..1d5f072
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2024-05-24 - Manual Parsing Over Regex in Hot Paths
+**Learning:** In Kotlin Multiplatform projects (especially on JVM/WasmJs), string manipulation pipelines using sequential `Regex` objects (`Regex.replace`, `Regex.matches`) introduce significant performance overhead due to recompilation/execution costs and numerous intermediate string allocations.
+**Action:** When working in hot paths like frequent parsers, replace multiple simple regexes (e.g., camelCase splits, whitespace normalization, fixed prefix checking) with a single manual character iteration loop using a `StringBuilder` to eliminate regex state machine overhead and minimize memory allocations.
diff --git a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt
index da90f01..ed4aa39 100644
--- a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt
+++ b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt
@@ -10,42 +10,79 @@ package halogen.engine
  */
 internal object HintExtractor {
 
-    private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""")
-    private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""")
-    private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE)
-    private val NUMERIC_ONLY = Regex("""^\d+$""")
-    private val WHITESPACE_PATTERN = Regex("""\s+""")
+    private val PREFIXES = arrayOf("/r/", "/category/", "/topic/", "/", "#")
 
     fun extract(key: String): String? {
         if (key.isBlank()) return null
 
         // Strip common prefixes
-        var cleaned = PREFIX_PATTERN.replace(key.trim(), "")
+        var start = 0
+        val trimmedKey = key.trim()
+        for (prefix in PREFIXES) {
+            if (trimmedKey.startsWith(prefix)) {
+                start = prefix.length
+                break
+            }
+        }
 
-        // Remove leading/trailing slashes
-        cleaned = cleaned.trim('/')
+        var cleaned = trimmedKey.substring(start).trim('/')
 
         // Take the last meaningful segment if it looks like a path
-        if ('/' in cleaned) {
-            cleaned = cleaned.substringAfterLast('/')
+        val lastSlash = cleaned.lastIndexOf('/')
+        if (lastSlash != -1) {
+            cleaned = cleaned.substring(lastSlash + 1)
         }
 
-        // Split camelCase
-        cleaned = CAMEL_SPLIT.replace(cleaned, " ")
+        // Single pass for camelCase, snake_case, kebab-case, and whitespace
+        val sb = StringBuilder(cleaned.length + 5)
+        var lastWasSpace = true
+        var prevChar: Char? = null
+
+        for (i in cleaned.indices) {
+            val c = cleaned[i]
+            if (c == '_' || c == '-' || c.isWhitespace()) {
+                if (!lastWasSpace) {
+                    sb.append(' ')
+                    lastWasSpace = true
+                }
+            } else {
+                // Camel case detection
+                if (prevChar != null && prevChar in 'a'..'z' && c in 'A'..'Z') {
+                    if (!lastWasSpace) {
+                        sb.append(' ')
+                    }
+                }
+                sb.append(c)
+                lastWasSpace = false
+            }
+            prevChar = c
+        }
 
-        // Split snake_case and kebab-case
-        cleaned = cleaned.replace('_', ' ').replace('-', ' ')
+        val result = sb.toString().trim()
+        if (result.isBlank()) return null
 
-        // Normalize whitespace
-        cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ")
+        // Reject things that look like IDs (numeric only or 8+ hex chars)
+        var hexCount = 0
+        var isNumericOnly = true
+        var hasNonHex = false
+        var charCount = 0
+        for (i in result.indices) {
+            val c = result[i]
+            if (c.isWhitespace()) continue
+            charCount++
+            if (c !in '0'..'9') isNumericOnly = false
+            if (c in '0'..'9' || c in 'A'..'F' || c in 'a'..'f') {
+                hexCount++
+            } else {
+                hasNonHex = true
+            }
+        }
 
-        if (cleaned.isBlank()) return null
+        if (charCount == 0) return null
 
-        // Reject things that look like IDs
-        val noSpaces = cleaned.replace(" ", "")
-        if (ID_PATTERN.matches(noSpaces)) return null
-        if (NUMERIC_ONLY.matches(noSpaces)) return null
+        if (isNumericOnly) return null
+        if (!hasNonHex && hexCount >= 8) return null
 
-        return cleaned.lowercase()
+        return result.lowercase()
     }
 }

From 6c4fe9ac2735435e4054ed0ceed25e182c81cf47 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 2 Jun 2026 02:57:23 +0000
Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Replace=20Regex=20with?=
 =?UTF-8?q?=20manual=20string=20parsing=20in=20HintExtractor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces multiple Regex string manipulations with a single manual character iteration loop in HintExtractor to drastically improve execution speed and limit string allocations.

Co-authored-by: himattm <6266621+himattm@users.noreply.github.com>