File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -77,8 +77,17 @@ def _normalize_phrase(phrase: str) -> str:
7777def _is_meaningful (phrase : str ) -> bool :
7878 """Phrase가 의미 있는지 검사한다.
7979
80- stop word만으로 구성된 구문은 제외.
80+ 제외 조건:
81+ - stop word만으로 구성된 구문
82+ - 숫자만으로 구성된 구문 (연도 제외 — 연도는 별도 regex에서 처리)
83+ - 1글자 phrase
8184 """
85+ stripped = phrase .strip ()
86+ if len (stripped ) < 2 :
87+ return False
88+ # 숫자만으로 구성 (연도는 _RE_YEAR에서 이미 처리하므로 여기선 제외 가능)
89+ if stripped .isdigit ():
90+ return False
8291 words = phrase .lower ().split ()
8392 non_stop = [w for w in words if w not in _STOP_WORDS ]
8493 return len (non_stop ) > 0
@@ -108,7 +117,7 @@ def __init__(
108117 self ,
109118 * ,
110119 min_phrase_length : int = 2 ,
111- max_phrases_per_node : int = 10 ,
120+ max_phrases_per_node : int = 5 ,
112121 ) -> None :
113122 """PhraseExtractor를 초기화한다.
114123
@@ -174,7 +183,7 @@ async def extract_and_link(
174183 # graph.add가 아닌 store를 직접 사용)
175184 phrase_node = await graph ._store .add_node (
176185 title = phrase ,
177- content = f"Phrase extracted from: { title } " ,
186+ content = "" , # minimal content to avoid FTS noise
178187 kind = NodeKind .ENTITY ,
179188 tags = ["_phrase" ],
180189 )
Original file line number Diff line number Diff line change @@ -206,10 +206,24 @@ async def search(
206206 # Sort by resonance descending
207207 activated .sort (key = lambda a : a .resonance , reverse = True )
208208
209+ # Filter out internal phrase nodes (_phrase tag) from final results.
210+ # Phrase nodes serve as PPR bridge nodes but should not appear in
211+ # user-facing search results — they carry no passage content.
212+ final : list [ActivatedNode ] = []
213+ fallback : list [ActivatedNode ] = []
214+ for a in activated :
215+ if "_phrase" in (a .node .tags or []):
216+ fallback .append (a ) # keep as last resort
217+ else :
218+ final .append (a )
219+ # If filtering removed too many, pad back with phrase nodes
220+ if len (final ) < limit and fallback :
221+ final .extend (fallback [: limit - len (final )])
222+
209223 elapsed_ms = (time () - start ) * 1000
210224 return SearchResult (
211225 query = query ,
212- nodes = activated [:limit ],
226+ nodes = final [:limit ],
213227 total_candidates = total_candidates ,
214228 search_time_ms = elapsed_ms ,
215229 stages_used = stages_used ,
Original file line number Diff line number Diff line change @@ -312,9 +312,10 @@ async def test_hotpotqa_e2e(self) -> None:
312312 if len (query_ids ) > 24 :
313313 query_ids = random .sample (query_ids , 24 )
314314
315- # 1. 그래프 구축 (Auto-Ontology)
316- print ("\n [Phase 1] 그래프 구축..." )
315+ # 1. 그래프 구축 (Auto-Ontology + PhraseExtractor )
316+ print ("\n [Phase 1] 그래프 구축 (PhraseExtractor 활성화) ..." )
317317 from synaptic .extensions .classifier_rules import RuleBasedClassifier
318+ from synaptic .extensions .phrase_extractor import PhraseExtractor
318319 from synaptic .extensions .relation_detector import RuleBasedRelationDetector
319320
320321 backend = MemoryBackend ()
@@ -324,6 +325,7 @@ async def test_hotpotqa_e2e(self) -> None:
324325 backend ,
325326 classifier = RuleBasedClassifier (),
326327 relation_detector = detector ,
328+ phrase_extractor = PhraseExtractor (max_phrases_per_node = 5 ),
327329 )
328330
329331 id_map : dict [str , str ] = {}
You can’t perform that action at this time.
0 commit comments