Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-11-20 - [Optimizing Regex in Kotlin Hot Paths]
**Learning:** In Kotlin multiplatform hot paths, relying heavily on sequential `Regex` operations (e.g. prefix stripping followed by substring processing followed by case conversion) generates immense overhead due to state machine creation, tracking, and backtracking.
**Action:** Always replace heavy regex parsing chains in hot paths with raw manual string traversal via bounded indices and `StringBuilder`. Precalculate maximum string capacities and reduce object allocations to measurably increase execution speed.
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,84 @@ package halogen.engine
*/
internal object HintExtractor {

private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""")
private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""")
private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE)
private val NUMERIC_ONLY = Regex("""^\d+$""")
private val WHITESPACE_PATTERN = Regex("""\s+""")

fun extract(key: String): String? {
if (key.isBlank()) return null

val trimmed = key.trim()
var startIdx = 0

// Strip common prefixes
var cleaned = PREFIX_PATTERN.replace(key.trim(), "")
if (trimmed.startsWith("/r/")) startIdx = 3
else if (trimmed.startsWith("/category/")) startIdx = 10
else if (trimmed.startsWith("/topic/")) startIdx = 7
else if (trimmed.startsWith("/")) startIdx = 1
else if (trimmed.startsWith("#")) startIdx = 1

// Trim leading slashes if any
while (startIdx < trimmed.length && trimmed[startIdx] == '/') {
startIdx++
}

// Trim trailing slashes
var endIdx = trimmed.length - 1
while (endIdx >= startIdx && trimmed[endIdx] == '/') {
endIdx--
}

// Remove leading/trailing slashes
cleaned = cleaned.trim('/')
if (startIdx > endIdx) return null

// Take the last meaningful segment if it looks like a path
if ('/' in cleaned) {
cleaned = cleaned.substringAfterLast('/')
val lastSlash = trimmed.lastIndexOf('/', endIdx)
if (lastSlash >= startIdx) {
startIdx = lastSlash + 1
}

// Split camelCase
cleaned = CAMEL_SPLIT.replace(cleaned, " ")
// Process characters for camelCase, snake_case, kebab-case, and normalize whitespace
val sb = StringBuilder(endIdx - startIdx + 10)
var prevChar = ' '

// Split snake_case and kebab-case
cleaned = cleaned.replace('_', ' ').replace('-', ' ')
for (i in startIdx..endIdx) {
val c = trimmed[i]

// Normalize whitespace
cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ")
val isSeparator = c == '_' || c == '-' || c.isWhitespace()

if (cleaned.isBlank()) return null
if (isSeparator) {
if (prevChar != ' ') {
sb.append(' ')
prevChar = ' '
}
} else {
// Split camelCase
if (prevChar in 'a'..'z' && c in 'A'..'Z') {
sb.append(' ')
}
sb.append(c)
prevChar = c
}
}

val cleaned = sb.toString().trim()
if (cleaned.isEmpty()) return null

// Reject things that look like IDs
val noSpaces = cleaned.replace(" ", "")
if (ID_PATTERN.matches(noSpaces)) return null
if (NUMERIC_ONLY.matches(noSpaces)) return null
var hexChars = 0
var digitChars = 0
var totalChars = 0

for (i in 0 until cleaned.length) {
val c = cleaned[i]
if (c == ' ') continue
totalChars++
if (c in '0'..'9') {
digitChars++
hexChars++
} else if (c in 'a'..'f' || c in 'A'..'F') {
hexChars++
}
}

if (totalChars >= 8 && hexChars == totalChars) return null
if (totalChars > 0 && digitChars == totalChars) return null

return cleaned.lowercase()
}
Expand Down
Loading