Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
## 2025-02-18 - Manual String Builder for Extractors
**Learning:** Sequential Regex modifications in hot path extractors (like `HintExtractor.kt`) cause massive overhead due to state machine initialization, capture groups, and temporary object creation.
**Action:** Replace sequential `Regex.replace` and `Regex.matches` with a single-pass loop over the character array using a `StringBuilder` pre-sized to `cleaned.length + n`. Utilize early exits with `substring` over regex prefix removals. This reduced execution time by roughly 80%.

## 2025-02-18 - Gradle Test OOM via memory threshold
**Learning:** Running `:test` sequentially inside tight environment limits via Gradle wrapper sometimes throws OOM or kills tasks randomly.
**Action:** Use `-Pkotlin.compiler.execution.strategy=in-process` alongside standard Gradle wrapper tasks when testing JVM limits to run compile in process and not spin up external daemons that steal process memory.
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,89 @@ package halogen.engine
*/
internal object HintExtractor {

private val PREFIX_PATTERN = Regex("""^(?:/r/|/category/|/topic/|/|#)""")
private val CAMEL_SPLIT = Regex("""(?<=[a-z])(?=[A-Z])""")
private val ID_PATTERN = Regex("""^[0-9a-f]{8,}$""", RegexOption.IGNORE_CASE)
private val NUMERIC_ONLY = Regex("""^\d+$""")
private val WHITESPACE_PATTERN = Regex("""\s+""")
private val PREFIXES = arrayOf("/r/", "/category/", "/topic/", "/", "#")

private fun stripPrefix(key: String): String {
for (prefix in PREFIXES) {
if (key.startsWith(prefix)) {
return key.substring(prefix.length)
}
}
return key
}

private fun isHexId(str: String): Boolean {
if (str.length < 8) return false
for (i in 0 until str.length) {
val c = str[i]
if (!(c in '0'..'9' || c in 'a'..'f' || c in 'A'..'F')) return false
}
return true
}

private fun isNumeric(str: String): Boolean {
if (str.isEmpty()) return false
for (i in 0 until str.length) {
if (str[i] !in '0'..'9') return false
}
return true
}

fun extract(key: String): String? {
if (key.isBlank()) return null

// Strip common prefixes
var cleaned = PREFIX_PATTERN.replace(key.trim(), "")
var cleaned = stripPrefix(key.trim())

// Remove leading/trailing slashes
cleaned = cleaned.trim('/')
var startIdx = 0
while (startIdx < cleaned.length && cleaned[startIdx] == '/') {
startIdx++
}
var endIdx = cleaned.length - 1
while (endIdx >= startIdx && cleaned[endIdx] == '/') {
endIdx--
}
if (startIdx > endIdx) return null
cleaned = cleaned.substring(startIdx, endIdx + 1)

// Take the last meaningful segment if it looks like a path
if ('/' in cleaned) {
cleaned = cleaned.substringAfterLast('/')
val lastSlash = cleaned.lastIndexOf('/')
if (lastSlash != -1) {
cleaned = cleaned.substring(lastSlash + 1)
}

// Split camelCase
cleaned = CAMEL_SPLIT.replace(cleaned, " ")
val sb = StringBuilder(cleaned.length + 5)
var lastChar = ' '
for (i in 0 until cleaned.length) {
val c = cleaned[i]
val isUpper = c in 'A'..'Z'
val isSymbolOrWhitespace = c == '_' || c == '-' || c.isWhitespace()

// Split snake_case and kebab-case
cleaned = cleaned.replace('_', ' ').replace('-', ' ')
if (isUpper && lastChar in 'a'..'z') {
sb.append(' ')
}

// Normalize whitespace
cleaned = cleaned.trim().replace(WHITESPACE_PATTERN, " ")
if (isSymbolOrWhitespace) {
if (lastChar != ' ') {
sb.append(' ')
lastChar = ' '
}
} else {
sb.append(c)
lastChar = c
}
}

cleaned = sb.toString().trim()

if (cleaned.isBlank()) return null

// Reject things that look like IDs
val noSpaces = cleaned.replace(" ", "")
if (ID_PATTERN.matches(noSpaces)) return null
if (NUMERIC_ONLY.matches(noSpaces)) return null
val noSpaces = StringBuilder(cleaned.length)
for (i in 0 until cleaned.length) {
if (cleaned[i] != ' ') noSpaces.append(cleaned[i])
}
val noSpacesStr = noSpaces.toString()

if (isHexId(noSpacesStr)) return null
if (isNumeric(noSpacesStr)) return null

return cleaned.lowercase()
}
Expand Down
Loading