|
| 1 | +"""Regex-based tag extractor — zero LLM dependency.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import re |
| 6 | + |
| 7 | +_I = re.IGNORECASE |
| 8 | + |
| 9 | +# Common tech/domain terms to extract as tags |
| 10 | +_PATTERNS: list[tuple[str, re.Pattern[str]]] = [ |
| 11 | + ("api", re.compile(r"\bAPI\b|\bREST\b|\bGraphQL\b|\bgRPC\b", _I)), |
| 12 | + ("database", re.compile(r"\b(?:DB|database|SQL|PostgreSQL|SQLite|MySQL|MongoDB)\b", _I)), |
| 13 | + ("deploy", re.compile(r"\b(?:deploy|배포|CI/CD|릴리즈|release)\b", _I)), |
| 14 | + ("test", re.compile(r"\b(?:test|테스트|검증|QA|unittest|pytest)\b", _I)), |
| 15 | + ("security", re.compile(r"\b(?:security|보안|auth|인증|취약점|OWASP)\b", _I)), |
| 16 | + ("performance", re.compile(r"\b(?:performance|성능|latency|throughput|최적화)\b", _I)), |
| 17 | + ("bug", re.compile(r"\b(?:bug|버그|오류|에러|error|fix|수정)\b", _I)), |
| 18 | + ("frontend", re.compile(r"\b(?:frontend|프론트|React|Vue|UI|CSS|HTML)\b", _I)), |
| 19 | + ("backend", re.compile(r"\b(?:backend|백엔드|서버|server|FastAPI|Django)\b", _I)), |
| 20 | + ("infra", re.compile(r"\b(?:infra|인프라|Docker|K8s|Kubernetes|AWS|GCP)\b", _I)), |
| 21 | + ("ai", re.compile(r"\b(?:AI|ML|LLM|GPT|Claude|embedding|벡터)\b", _I)), |
| 22 | + ("docs", re.compile(r"\b(?:doc|문서|README|documentation|문서화)\b", _I)), |
| 23 | + ("refactor", re.compile(r"\b(?:refactor|리팩토링|개선|cleanup|정리)\b", _I)), |
| 24 | + ("design", re.compile(r"\b(?:design|설계|architecture|아키텍처|구조)\b", _I)), |
| 25 | + ("monitoring", re.compile(r"\b(?:monitoring|모니터링|로그|logging|메트릭|alert)\b", _I)), |
| 26 | +] |
| 27 | + |
| 28 | + |
| 29 | +class RegexTagExtractor: |
| 30 | + """Extract tags from text using regex patterns. Zero dependencies.""" |
| 31 | + |
| 32 | + __slots__ = ("_patterns",) |
| 33 | + |
| 34 | + def __init__( |
| 35 | + self, |
| 36 | + extra_patterns: list[tuple[str, re.Pattern[str]]] | None = None, |
| 37 | + ) -> None: |
| 38 | + self._patterns = [*_PATTERNS] |
| 39 | + if extra_patterns: |
| 40 | + self._patterns.extend(extra_patterns) |
| 41 | + |
| 42 | + def extract(self, text: str) -> list[str]: |
| 43 | + """Extract matching tags from text.""" |
| 44 | + tags: list[str] = [] |
| 45 | + for tag, pattern in self._patterns: |
| 46 | + if pattern.search(text): |
| 47 | + tags.append(tag) |
| 48 | + return tags |
0 commit comments