diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..1089df1 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2025-05-18 - Optimized HintExtractor +**Learning:** Replaced multiple Regex compilations and operations in `HintExtractor.extract` with a single manual character iteration loop using `StringBuilder`. This prevents significant compilation and backtracking overhead in Kotlin hot paths. +**Action:** Always prefer manual string iterations over complex regex pipelines in Kotlin when parsing strings in performance-critical paths. diff --git a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt index da90f01..1cffcb8 100644 --- a/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt +++ b/halogen-engine/src/commonMain/kotlin/halogen/engine/HintExtractor.kt @@ -10,42 +10,77 @@ package halogen.engine */ internal object HintExtractor { - private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""") - private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""") - private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE) - private val NUMERIC_ONLY = Regex("""^\d+$""") - private val WHITESPACE_PATTERN = Regex("""\s+""") - + /** + * Extracts a hint from the key. + * Note: Replaced multiple Regexes with manual string iteration and StringBuilder + * to avoid regex compilation and matching overhead on hot paths. + */ fun extract(key: String): String? { if (key.isBlank()) return null + var trimmed = key.trim() // Strip common prefixes - var cleaned = PREFIX_PATTERN.replace(key.trim(), "") + if (trimmed.startsWith("/r/")) trimmed = trimmed.substring(3) + else if (trimmed.startsWith("/category/")) trimmed = trimmed.substring(10) + else if (trimmed.startsWith("/topic/")) trimmed = trimmed.substring(7) + else if (trimmed.startsWith("/")) trimmed = trimmed.substring(1) + else if (trimmed.startsWith("#")) trimmed = trimmed.substring(1) // Remove leading/trailing slashes - cleaned = cleaned.trim('/') + trimmed = trimmed.trim('/') // Take the last meaningful segment if it looks like a path - if ('/' in cleaned) { - cleaned = cleaned.substringAfterLast('/') + val lastSlash = trimmed.lastIndexOf('/') + if (lastSlash != -1) { + trimmed = trimmed.substring(lastSlash + 1) } - // Split camelCase - cleaned = CAMEL_SPLIT.replace(cleaned, " ") + if (trimmed.isEmpty()) return null + + val sb = StringBuilder(trimmed.length * 2) + var lastWasSpace = true // Start as true to trim leading spaces + var isAllHex = true + var isAllNumeric = true + var lengthWithoutSpaces = 0 + + for (i in trimmed.indices) { + val c = trimmed[i] + + if (c == '_' || c == '-' || c.isWhitespace()) { + if (!lastWasSpace) { + sb.append(' ') + lastWasSpace = true + } + continue + } - // Split snake_case and kebab-case - cleaned = cleaned.replace('_', ' ').replace('-', ' ') + if (i > 0 && c.isUpperCase() && trimmed[i-1].isLowerCase()) { + if (!lastWasSpace) { + sb.append(' ') + } + } - // Normalize whitespace - cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ") + val lowerC = c.lowercaseChar() + sb.append(lowerC) + lastWasSpace = false + lengthWithoutSpaces++ + + if (lowerC !in '0'..'9') isAllNumeric = false + if (lowerC !in '0'..'9' && lowerC !in 'a'..'f') isAllHex = false + } + + // Trim trailing space if any + var result = sb.toString() + if (result.endsWith(" ")) { + result = result.substring(0, result.length - 1) + } - if (cleaned.isBlank()) return null + if (result.isEmpty()) return null // Reject things that look like IDs - val noSpaces = cleaned.replace(" ", "") - if (ID_PATTERN.matches(noSpaces)) return null - if (NUMERIC_ONLY.matches(noSpaces)) return null + if (isAllNumeric) return null + if (isAllHex && lengthWithoutSpaces >= 8) return null - return cleaned.lowercase() + return result } }