Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-06-25 - Replace regex with character iteration in hot path
**Learning:** Replacing lookaround and formatting regexes with a single manual character iteration loop using a `StringBuilder` significantly reduces compilation overhead, object allocation, and backtracking delays in hot paths for string processing.
**Action:** Always consider converting complex, heavily-used `Regex` replacements into manual string parsing functions when performance is a priority and the processing logic involves straightforward conditions like prefix checking, whitespace handling, and character cases.
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,81 @@ package halogen.engine
*/
internal object HintExtractor {

private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""")
private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""")
private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE)
private val NUMERIC_ONLY = Regex("""^\d+$""")
private val WHITESPACE_PATTERN = Regex("""\s+""")

fun extract(key: String): String? {
if (key.isBlank()) return null

// Strip common prefixes
var cleaned = PREFIX_PATTERN.replace(key.trim(), "")
var cleaned = key.trim()
if (cleaned.startsWith("/r/")) {
cleaned = cleaned.substring(3)
} else if (cleaned.startsWith("/category/")) {
cleaned = cleaned.substring(10)
} else if (cleaned.startsWith("/topic/")) {
cleaned = cleaned.substring(7)
} else if (cleaned.startsWith("/")) {
cleaned = cleaned.substring(1)
} else if (cleaned.startsWith("#")) {
cleaned = cleaned.substring(1)
}

// Remove leading/trailing slashes
cleaned = cleaned.trim('/')
var start = 0
var end = cleaned.length - 1
while (start <= end && cleaned[start] == '/') start++
while (end >= start && cleaned[end] == '/') end--
if (start > end) return null
cleaned = cleaned.substring(start, end + 1)

// Take the last meaningful segment if it looks like a path
if ('/' in cleaned) {
cleaned = cleaned.substringAfterLast('/')
val lastSlashIndex = cleaned.lastIndexOf('/')
if (lastSlashIndex >= 0) {
cleaned = cleaned.substring(lastSlashIndex + 1)
}

// Split camelCase
cleaned = CAMEL_SPLIT.replace(cleaned, " ")
val sb = StringBuilder(cleaned.length + 4) // Some extra space for added spaces
var lastWasSpace = true

// Split snake_case and kebab-case
cleaned = cleaned.replace('_', ' ').replace('-', ' ')
for (i in cleaned.indices) {
val c = cleaned[i]
if (c == '_' || c == '-' || c.isWhitespace()) {
if (!lastWasSpace) {
sb.append(' ')
lastWasSpace = true
}
} else {
if (i > 0 && c.isUpperCase() && cleaned[i - 1].isLowerCase()) {
if (!lastWasSpace) {
sb.append(' ')
}
}
sb.append(c.lowercaseChar())
lastWasSpace = false
}
}

// Normalize whitespace
cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ")
var result = sb.toString()
if (result.endsWith(" ")) {
result = result.substring(0, result.length - 1)
}

if (cleaned.isBlank()) return null
if (result.isBlank()) return null

// Reject things that look like IDs
val noSpaces = cleaned.replace(" ", "")
if (ID_PATTERN.matches(noSpaces)) return null
if (NUMERIC_ONLY.matches(noSpaces)) return null
var noSpacesLen = 0
var allNumeric = true
var allHex = true
for (i in result.indices) {
val c = result[i]
if (c != ' ') {
noSpacesLen++
if (c !in '0'..'9') allNumeric = false
if (c !in '0'..'9' && c !in 'a'..'f') allHex = false
}
}

if (noSpacesLen > 0 && allNumeric) return null
if (noSpacesLen >= 8 && allHex) return null

return cleaned.lowercase()
return result
}
}
Loading