From 2bddad6326ddfa6f632f2be5b6d471c40264a504 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 02:37:51 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Replace=20Regex=20with?= =?UTF-8?q?=20manual=20string=20parsing=20in=20HintExtractor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces multiple Regex string manipulations with a single manual character iteration loop in HintExtractor to drastically improve execution speed and limit string allocations. Co-authored-by: himattm <6266621+himattm@users.noreply.github.com> --- .jules/bolt.md | 3 + .../kotlin/halogen/engine/HintExtractor.kt | 81 ++++++++++++++----- 2 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..1d5f072 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-24 - Manual Parsing Over Regex in Hot Paths +**Learning:** In Kotlin Multiplatform projects (especially on JVM/WasmJs), string manipulation pipelines using sequential `Regex` objects (`Regex.replace`, `Regex.matches`) introduce significant performance overhead due to recompilation/execution costs and numerous intermediate string allocations. +**Action:** When working in hot paths like frequent parsers, replace multiple simple regexes (e.g., camelCase splits, whitespace normalization, fixed prefix checking) with a single manual character iteration loop using a `StringBuilder` to eliminate regex state machine overhead and minimize memory allocations. diff --git a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt index da90f01..ed4aa39 100644 --- a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt +++ b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt @@ -10,42 +10,79 @@ package halogen.engine */ internal object HintExtractor { - private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""") - private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""") - private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE) - private val NUMERIC_ONLY = Regex("""^\d+$""") - private val WHITESPACE_PATTERN = Regex("""\s+""") + private val PREFIXES = arrayOf("/r/", "/category/", "/topic/", "/", "#") fun extract(key: String): String? { if (key.isBlank()) return null // Strip common prefixes - var cleaned = PREFIX_PATTERN.replace(key.trim(), "") + var start = 0 + val trimmedKey = key.trim() + for (prefix in PREFIXES) { + if (trimmedKey.startsWith(prefix)) { + start = prefix.length + break + } + } - // Remove leading/trailing slashes - cleaned = cleaned.trim('/') + var cleaned = trimmedKey.substring(start).trim('/') // Take the last meaningful segment if it looks like a path - if ('/' in cleaned) { - cleaned = cleaned.substringAfterLast('/') + val lastSlash = cleaned.lastIndexOf('/') + if (lastSlash != -1) { + cleaned = cleaned.substring(lastSlash + 1) } - // Split camelCase - cleaned = CAMEL_SPLIT.replace(cleaned, " ") + // Single pass for camelCase, snake_case, kebab-case, and whitespace + val sb = StringBuilder(cleaned.length + 5) + var lastWasSpace = true + var prevChar: Char? = null + + for (i in cleaned.indices) { + val c = cleaned[i] + if (c == '_' || c == '-' || c.isWhitespace()) { + if (!lastWasSpace) { + sb.append(' ') + lastWasSpace = true + } + } else { + // Camel case detection + if (prevChar != null && prevChar in 'a'..'z' && c in 'A'..'Z') { + if (!lastWasSpace) { + sb.append(' ') + } + } + sb.append(c) + lastWasSpace = false + } + prevChar = c + } - // Split snake_case and kebab-case - cleaned = cleaned.replace('_', ' ').replace('-', ' ') + val result = sb.toString().trim() + if (result.isBlank()) return null - // Normalize whitespace - cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ") + // Reject things that look like IDs (numeric only or 8+ hex chars) + var hexCount = 0 + var isNumericOnly = true + var hasNonHex = false + var charCount = 0 + for (i in result.indices) { + val c = result[i] + if (c.isWhitespace()) continue + charCount++ + if (c !in '0'..'9') isNumericOnly = false + if (c in '0'..'9' || c in 'A'..'F' || c in 'a'..'f') { + hexCount++ + } else { + hasNonHex = true + } + } - if (cleaned.isBlank()) return null + if (charCount == 0) return null - // Reject things that look like IDs - val noSpaces = cleaned.replace(" ", "") - if (ID_PATTERN.matches(noSpaces)) return null - if (NUMERIC_ONLY.matches(noSpaces)) return null + if (isNumericOnly) return null + if (!hasNonHex && hexCount >= 8) return null - return cleaned.lowercase() + return result.lowercase() } } From 6c4fe9ac2735435e4054ed0ceed25e182c81cf47 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 02:57:23 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Replace=20Regex=20with?= =?UTF-8?q?=20manual=20string=20parsing=20in=20HintExtractor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces multiple Regex string manipulations with a single manual character iteration loop in HintExtractor to drastically improve execution speed and limit string allocations. Co-authored-by: himattm <6266621+himattm@users.noreply.github.com>