diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..b8c85cc --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-06-25 - Replace regex with character iteration in hot path +**Learning:** Replacing lookaround and formatting regexes with a single manual character iteration loop using a `StringBuilder` significantly reduces compilation overhead, object allocation, and backtracking delays in hot paths for string processing. +**Action:** Always consider converting complex, heavily-used `Regex` replacements into manual string parsing functions when performance is a priority and the processing logic involves straightforward conditions like prefix checking, whitespace handling, and character cases. diff --git a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt index da90f01..d217e11 100644 --- a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt +++ b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt @@ -10,42 +10,81 @@ package halogen.engine */ internal object HintExtractor { - private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""") - private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""") - private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE) - private val NUMERIC_ONLY = Regex("""^\d+$""") - private val WHITESPACE_PATTERN = Regex("""\s+""") - fun extract(key: String): String? { if (key.isBlank()) return null // Strip common prefixes - var cleaned = PREFIX_PATTERN.replace(key.trim(), "") + var cleaned = key.trim() + if (cleaned.startsWith("/r/")) { + cleaned = cleaned.substring(3) + } else if (cleaned.startsWith("/category/")) { + cleaned = cleaned.substring(10) + } else if (cleaned.startsWith("/topic/")) { + cleaned = cleaned.substring(7) + } else if (cleaned.startsWith("/")) { + cleaned = cleaned.substring(1) + } else if (cleaned.startsWith("#")) { + cleaned = cleaned.substring(1) + } // Remove leading/trailing slashes - cleaned = cleaned.trim('/') + var start = 0 + var end = cleaned.length - 1 + while (start <= end && cleaned[start] == '/') start++ + while (end >= start && cleaned[end] == '/') end-- + if (start > end) return null + cleaned = cleaned.substring(start, end + 1) // Take the last meaningful segment if it looks like a path - if ('/' in cleaned) { - cleaned = cleaned.substringAfterLast('/') + val lastSlashIndex = cleaned.lastIndexOf('/') + if (lastSlashIndex >= 0) { + cleaned = cleaned.substring(lastSlashIndex + 1) } - // Split camelCase - cleaned = CAMEL_SPLIT.replace(cleaned, " ") + val sb = StringBuilder(cleaned.length + 4) // Some extra space for added spaces + var lastWasSpace = true - // Split snake_case and kebab-case - cleaned = cleaned.replace('_', ' ').replace('-', ' ') + for (i in cleaned.indices) { + val c = cleaned[i] + if (c == '_' || c == '-' || c.isWhitespace()) { + if (!lastWasSpace) { + sb.append(' ') + lastWasSpace = true + } + } else { + if (i > 0 && c.isUpperCase() && cleaned[i - 1].isLowerCase()) { + if (!lastWasSpace) { + sb.append(' ') + } + } + sb.append(c.lowercaseChar()) + lastWasSpace = false + } + } - // Normalize whitespace - cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ") + var result = sb.toString() + if (result.endsWith(" ")) { + result = result.substring(0, result.length - 1) + } - if (cleaned.isBlank()) return null + if (result.isBlank()) return null // Reject things that look like IDs - val noSpaces = cleaned.replace(" ", "") - if (ID_PATTERN.matches(noSpaces)) return null - if (NUMERIC_ONLY.matches(noSpaces)) return null + var noSpacesLen = 0 + var allNumeric = true + var allHex = true + for (i in result.indices) { + val c = result[i] + if (c != ' ') { + noSpacesLen++ + if (c !in '0'..'9') allNumeric = false + if (c !in '0'..'9' && c !in 'a'..'f') allHex = false + } + } + + if (noSpacesLen > 0 && allNumeric) return null + if (noSpacesLen >= 8 && allHex) return null - return cleaned.lowercase() + return result } }