Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-05-24 - Manual Parsing Over Regex in Hot Paths
**Learning:** In Kotlin Multiplatform projects (especially on JVM/WasmJs), string manipulation pipelines using sequential `Regex` objects (`Regex.replace`, `Regex.matches`) introduce significant performance overhead due to recompilation/execution costs and numerous intermediate string allocations.
**Action:** When working in hot paths like frequent parsers, replace multiple simple regexes (e.g., camelCase splits, whitespace normalization, fixed prefix checking) with a single manual character iteration loop using a `StringBuilder` to eliminate regex state machine overhead and minimize memory allocations.
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,79 @@ package halogen.engine
*/
internal object HintExtractor {

private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""")
private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""")
private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE)
private val NUMERIC_ONLY = Regex("""^\d+$""")
private val WHITESPACE_PATTERN = Regex("""\s+""")
private val PREFIXES = arrayOf("/r/", "/category/", "/topic/", "/", "#")

fun extract(key: String): String? {
if (key.isBlank()) return null

// Strip common prefixes
var cleaned = PREFIX_PATTERN.replace(key.trim(), "")
var start = 0
val trimmedKey = key.trim()
for (prefix in PREFIXES) {
if (trimmedKey.startsWith(prefix)) {
start = prefix.length
break
}
}

// Remove leading/trailing slashes
cleaned = cleaned.trim('/')
var cleaned = trimmedKey.substring(start).trim('/')

// Take the last meaningful segment if it looks like a path
if ('/' in cleaned) {
cleaned = cleaned.substringAfterLast('/')
val lastSlash = cleaned.lastIndexOf('/')
if (lastSlash != -1) {
cleaned = cleaned.substring(lastSlash + 1)
}

// Split camelCase
cleaned = CAMEL_SPLIT.replace(cleaned, " ")
// Single pass for camelCase, snake_case, kebab-case, and whitespace
val sb = StringBuilder(cleaned.length + 5)
var lastWasSpace = true
var prevChar: Char? = null

for (i in cleaned.indices) {
val c = cleaned[i]
if (c == '_' || c == '-' || c.isWhitespace()) {
if (!lastWasSpace) {
sb.append(' ')
lastWasSpace = true
}
} else {
// Camel case detection
if (prevChar != null && prevChar in 'a'..'z' && c in 'A'..'Z') {
if (!lastWasSpace) {
sb.append(' ')
}
}
sb.append(c)
lastWasSpace = false
}
prevChar = c
}

// Split snake_case and kebab-case
cleaned = cleaned.replace('_', ' ').replace('-', ' ')
val result = sb.toString().trim()
if (result.isBlank()) return null

// Normalize whitespace
cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ")
// Reject things that look like IDs (numeric only or 8+ hex chars)
var hexCount = 0
var isNumericOnly = true
var hasNonHex = false
var charCount = 0
for (i in result.indices) {
val c = result[i]
if (c.isWhitespace()) continue
charCount++
if (c !in '0'..'9') isNumericOnly = false
if (c in '0'..'9' || c in 'A'..'F' || c in 'a'..'f') {
hexCount++
} else {
hasNonHex = true
}
}

if (cleaned.isBlank()) return null
if (charCount == 0) return null

// Reject things that look like IDs
val noSpaces = cleaned.replace(" ", "")
if (ID_PATTERN.matches(noSpaces)) return null
if (NUMERIC_ONLY.matches(noSpaces)) return null
if (isNumericOnly) return null
if (!hasNonHex && hexCount >= 8) return null

return cleaned.lowercase()
return result.lowercase()
}
}
Loading