diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..8fc7de4 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-11-20 - [Optimizing Regex in Kotlin Hot Paths] +**Learning:** In Kotlin multiplatform hot paths, relying heavily on sequential `Regex` operations (e.g. prefix stripping followed by substring processing followed by case conversion) generates immense overhead due to state machine creation, tracking, and backtracking. +**Action:** Always replace heavy regex parsing chains in hot paths with raw manual string traversal via bounded indices and `StringBuilder`. Precalculate maximum string capacities and reduce object allocations to measurably increase execution speed. diff --git a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt index da90f01..00becf7 100644 --- a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt +++ b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt @@ -10,41 +10,84 @@ package halogen.engine */ internal object HintExtractor { - private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""") - private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""") - private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE) - private val NUMERIC_ONLY = Regex("""^\d+$""") - private val WHITESPACE_PATTERN = Regex("""\s+""") - fun extract(key: String): String? { if (key.isBlank()) return null + val trimmed = key.trim() + var startIdx = 0 + // Strip common prefixes - var cleaned = PREFIX_PATTERN.replace(key.trim(), "") + if (trimmed.startsWith("/r/")) startIdx = 3 + else if (trimmed.startsWith("/category/")) startIdx = 10 + else if (trimmed.startsWith("/topic/")) startIdx = 7 + else if (trimmed.startsWith("/")) startIdx = 1 + else if (trimmed.startsWith("#")) startIdx = 1 + + // Trim leading slashes if any + while (startIdx < trimmed.length && trimmed[startIdx] == '/') { + startIdx++ + } + + // Trim trailing slashes + var endIdx = trimmed.length - 1 + while (endIdx >= startIdx && trimmed[endIdx] == '/') { + endIdx-- + } - // Remove leading/trailing slashes - cleaned = cleaned.trim('/') + if (startIdx > endIdx) return null // Take the last meaningful segment if it looks like a path - if ('/' in cleaned) { - cleaned = cleaned.substringAfterLast('/') + val lastSlash = trimmed.lastIndexOf('/', endIdx) + if (lastSlash >= startIdx) { + startIdx = lastSlash + 1 } - // Split camelCase - cleaned = CAMEL_SPLIT.replace(cleaned, " ") + // Process characters for camelCase, snake_case, kebab-case, and normalize whitespace + val sb = StringBuilder(endIdx - startIdx + 10) + var prevChar = ' ' - // Split snake_case and kebab-case - cleaned = cleaned.replace('_', ' ').replace('-', ' ') + for (i in startIdx..endIdx) { + val c = trimmed[i] - // Normalize whitespace - cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ") + val isSeparator = c == '_' || c == '-' || c.isWhitespace() - if (cleaned.isBlank()) return null + if (isSeparator) { + if (prevChar != ' ') { + sb.append(' ') + prevChar = ' ' + } + } else { + // Split camelCase + if (prevChar in 'a'..'z' && c in 'A'..'Z') { + sb.append(' ') + } + sb.append(c) + prevChar = c + } + } + + val cleaned = sb.toString().trim() + if (cleaned.isEmpty()) return null // Reject things that look like IDs - val noSpaces = cleaned.replace(" ", "") - if (ID_PATTERN.matches(noSpaces)) return null - if (NUMERIC_ONLY.matches(noSpaces)) return null + var hexChars = 0 + var digitChars = 0 + var totalChars = 0 + + for (i in 0 until cleaned.length) { + val c = cleaned[i] + if (c == ' ') continue + totalChars++ + if (c in '0'..'9') { + digitChars++ + hexChars++ + } else if (c in 'a'..'f' || c in 'A'..'F') { + hexChars++ + } + } + + if (totalChars >= 8 && hexChars == totalChars) return null + if (totalChars > 0 && digitChars == totalChars) return null return cleaned.lowercase() }