diff --git a/cli/utils/keyword_density.py b/cli/utils/keyword_density.py index 7a0df7b..acde1d4 100644 --- a/cli/utils/keyword_density.py +++ b/cli/utils/keyword_density.py @@ -37,6 +37,18 @@ console = Console() +# Pre-compiled regex patterns for extracting job details to optimize performance +_TITLE_PATTERNS = [ + re.compile(r"(?:job title|position|title):\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), + re.compile(r"^([^\n]+)\s*[-|]\s*[^|]+$", re.IGNORECASE | re.MULTILINE), + re.compile(r"#\s*([^\n]+)", re.IGNORECASE | re.MULTILINE), +] + +_COMPANY_PATTERNS = [ + re.compile(r"(?:company|organization):\s*([^\n]+)", re.IGNORECASE), + re.compile(r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", re.IGNORECASE), +] + @dataclass class KeywordInfo: @@ -208,26 +220,15 @@ def _extract_job_details(self, job_description: str) -> Tuple[str, str]: company = "" # Try to extract job title (common patterns) - title_patterns = [ - r"(?:job title|position|title):\s*([^\n]+)", - r"^([^\n]+)\s*[-|]\s*[^|]+$", - r"#\s*([^\n]+)", # Markdown headers often have job title - ] - - for pattern in title_patterns: - match = re.search(pattern, job_description, re.IGNORECASE | re.MULTILINE) + for pattern in _TITLE_PATTERNS: + match = pattern.search(job_description) if match: job_title = match.group(1).strip() break # Try to extract company name - company_patterns = [ - r"(?:company|organization):\s*([^\n]+)", - r"(?:at|from)\s+([A-Z][^\n]+?)(?:\s+[-\u2014]|\s+$)", - ] - - for pattern in company_patterns: - match = re.search(pattern, job_description, re.IGNORECASE) + for pattern in _COMPANY_PATTERNS: + match = pattern.search(job_description) if match: company = match.group(1).strip() break