Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 31 additions & 25 deletions cli/integrations/job_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,31 @@
requests = None


# ⚡ Bolt Optimization: Pre-compile regex patterns at the module level to avoid
# recompilation and array instantiation overhead during every extraction function call.
_SALARY_PATTERNS = [
re.compile(r"\$[\d,]+(?:\s*[-–to]+\s*\$[\d,]+)?", re.IGNORECASE), # $100k - $150k
re.compile(r"\$[\d,]+k(?:\s*[-–to]+\s*\$[\d,]+k)?", re.IGNORECASE), # $100k - $150k
re.compile(r"[\d,]+k(?:\s*[-–to]+\s*[\d,]+k)", re.IGNORECASE), # 100k - 150k
re.compile(r"(?:salary|pay|compensation)[:\s]*(\$[^<>\n]+)", re.IGNORECASE), # Salary: $X
re.compile(r"(?:per|/)\s*(?:year|annum)[:\s]*(\$[^<>\n]+)", re.IGNORECASE), # per year: $X
]

_JOB_TYPE_PATTERNS = [
re.compile(
r"\b(full[- ]?time|part[- ]?time|contract|freelance|intern|temporary)\b", re.IGNORECASE
),
re.compile(r"\b(permanent|fixed[- ]?term)\b", re.IGNORECASE),
]

_EXPERIENCE_LEVEL_PATTERNS = [
re.compile(
r"\b(entry[- ]?level|junior|mid[- ]?level|senior|staff|principal|lead)\b", re.IGNORECASE
),
re.compile(r"\b(associate|vice[- ]?president|director|executive)\b", re.IGNORECASE),
]


@dataclass
class JobDetails:
"""Structured job posting data."""
Expand Down Expand Up @@ -555,17 +580,8 @@ def _extract_salary_from_text(self, text: str) -> Optional[str]:
Returns:
Salary string or None
"""
# Common salary patterns
patterns = [
r"\$[\d,]+(?:\s*[-–to]+\s*\$[\d,]+)?", # $100k - $150k
r"\$[\d,]+k(?:\s*[-–to]+\s*\$[\d,]+k)?", # $100k - $150k
r"[\d,]+k(?:\s*[-–to]+\s*[\d,]+k)", # 100k - 150k
r"(?:salary|pay|compensation)[:\s]*(\$[^<>\n]+)", # Salary: $X
r"(?:per|/)\s*(?:year|annum)[:\s]*(\$[^<>\n]+)", # per year: $X
]

for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
for pattern in _SALARY_PATTERNS:
match = pattern.search(text)
if match:
salary = match.group(0) if match.lastindex is None else match.group(1)
# Clean up the salary string
Expand Down Expand Up @@ -805,13 +821,8 @@ def _extract_job_type(self, html: str) -> Optional[str]:
Returns:
Job type string or None
"""
patterns = [
r"\b(full[- ]?time|part[- ]?time|contract|freelance|intern|temporary)\b",
r"\b(permanent|fixed[- ]?term)\b",
]

for pattern in patterns:
match = re.search(pattern, html, re.IGNORECASE)
for pattern in _JOB_TYPE_PATTERNS:
match = pattern.search(html)
if match:
return match.group(1).lower().replace("-", "-")

Expand All @@ -827,13 +838,8 @@ def _extract_experience_level(self, html: str) -> Optional[str]:
Returns:
Experience level string or None
"""
patterns = [
r"\b(entry[- ]?level|junior|mid[- ]?level|senior|staff|principal|lead)\b",
r"\b(associate|vice[- ]?president|director|executive)\b",
]

for pattern in patterns:
match = re.search(pattern, html, re.IGNORECASE)
for pattern in _EXPERIENCE_LEVEL_PATTERNS:
match = pattern.search(html)
if match:
return match.group(1).lower().replace("-", "-")

Expand Down
Loading