PlateerLab
diff --git a/‎src/synaptic/__pycache__/search.cpython-314.pyc‎
466 Bytes b/‎src/synaptic/__pycache__/search.cpython-314.pyc‎
466 Bytes
diff --git a/‎src/synaptic/extensions/__pycache__/phrase_extractor.cpython-314.pyc‎
266 Bytes b/‎src/synaptic/extensions/__pycache__/phrase_extractor.cpython-314.pyc‎
266 Bytes
diff --git a/‎src/synaptic/extensions/phrase_extractor.py‎
Lines changed: 12 additions & 3 deletions b/‎src/synaptic/extensions/phrase_extractor.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎src/synaptic/search.py‎
Lines changed: 15 additions & 1 deletion b/‎src/synaptic/search.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎tests/benchmark/__pycache__/test_e2e_qa.cpython-314-pytest-9.0.2.pyc‎
28 Bytes b/‎tests/benchmark/__pycache__/test_e2e_qa.cpython-314-pytest-9.0.2.pyc‎
28 Bytes
diff --git a/‎tests/benchmark/test_e2e_qa.py‎
Lines changed: 4 additions & 2 deletions b/‎tests/benchmark/test_e2e_qa.py‎
Lines changed: 4 additions & 2 deletions
@@ -77,8 +77,17 @@ def _normalize_phrase(phrase: str) -> str:
 def _is_meaningful(phrase: str) -> bool:
     """Phrase가 의미 있는지 검사한다.
 
-    stop word만으로 구성된 구문은 제외.
+    제외 조건:
+    - stop word만으로 구성된 구문
+    - 숫자만으로 구성된 구문 (연도 제외 — 연도는 별도 regex에서 처리)
+    - 1글자 phrase
     """
+    stripped = phrase.strip()
+    if len(stripped) < 2:
+        return False
+    # 숫자만으로 구성 (연도는 _RE_YEAR에서 이미 처리하므로 여기선 제외 가능)
+    if stripped.isdigit():
+        return False
     words = phrase.lower().split()
     non_stop = [w for w in words if w not in _STOP_WORDS]
     return len(non_stop) > 0
@@ -108,7 +117,7 @@ def __init__(
         self,
         *,
         min_phrase_length: int = 2,
-        max_phrases_per_node: int = 10,
+        max_phrases_per_node: int = 5,
     ) -> None:
         """PhraseExtractor를 초기화한다.
 
@@ -174,7 +183,7 @@ async def extract_and_link(
             # graph.add가 아닌 store를 직접 사용)
             phrase_node = await graph._store.add_node(
                 title=phrase,
-                content=f"Phrase extracted from: {title}",
+                content="",  # minimal content to avoid FTS noise
                 kind=NodeKind.ENTITY,
                 tags=["_phrase"],
             )
 
@@ -206,10 +206,24 @@ async def search(
         # Sort by resonance descending
         activated.sort(key=lambda a: a.resonance, reverse=True)
 
+        # Filter out internal phrase nodes (_phrase tag) from final results.
+        # Phrase nodes serve as PPR bridge nodes but should not appear in
+        # user-facing search results — they carry no passage content.
+        final: list[ActivatedNode] = []
+        fallback: list[ActivatedNode] = []
+        for a in activated:
+            if "_phrase" in (a.node.tags or []):
+                fallback.append(a)  # keep as last resort
+            else:
+                final.append(a)
+        # If filtering removed too many, pad back with phrase nodes
+        if len(final) < limit and fallback:
+            final.extend(fallback[: limit - len(final)])
+
         elapsed_ms = (time() - start) * 1000
         return SearchResult(
             query=query,
-            nodes=activated[:limit],
+            nodes=final[:limit],
             total_candidates=total_candidates,
             search_time_ms=elapsed_ms,
             stages_used=stages_used,
 
@@ -312,9 +312,10 @@ async def test_hotpotqa_e2e(self) -> None:
         if len(query_ids) > 24:
             query_ids = random.sample(query_ids, 24)
 
-        # 1. 그래프 구축 (Auto-Ontology)
-        print("\n[Phase 1] 그래프 구축...")
+        # 1. 그래프 구축 (Auto-Ontology + PhraseExtractor)
+        print("\n[Phase 1] 그래프 구축 (PhraseExtractor 활성화)...")
         from synaptic.extensions.classifier_rules import RuleBasedClassifier
+        from synaptic.extensions.phrase_extractor import PhraseExtractor
         from synaptic.extensions.relation_detector import RuleBasedRelationDetector
 
         backend = MemoryBackend()
@@ -324,6 +325,7 @@ async def test_hotpotqa_e2e(self) -> None:
             backend,
             classifier=RuleBasedClassifier(),
             relation_detector=detector,
+            phrase_extractor=PhraseExtractor(max_phrases_per_node=5),
         )
 
         id_map: dict[str, str] = {}