Skip to content

Commit fe330ea

Browse files
SonAIengineclaude
andcommitted
feat: Personalized PageRank 엔진 — spreading activation 교체, multi-hop +28%
## PPR 엔진 (ppr.py) - Power iteration 기반 Personalized PageRank (zero-dep, 순수 Python) - BFS depth 2로 시드에서 도달 가능한 서브그래프만 탐색 (효율적) - StorageBackend.get_edges() 프로토콜만 사용 - 빈 그래프에서 시드 점수 그대로 반환 (안전) ## 검색 통합 (search.py) - spreading activation → PPR 교체 - 보수적 블렌딩: PPR 발견 노드는 0.8 감쇠, 기존 FTS 결과는 +0.1 미세 부스트만 - FTS 랭킹 보존하면서 그래프 경로로 새 노드 발견 ## Intent별 damping (agent_search.py) - CONTEXT_EXPLORE: 0.75 (넓게), PAST_FAILURES: 0.50 (집중) - REASONING_CHAIN: 0.60 (체인 추적), 기본: 0.85 - _explore_context의 BFS를 PPR로 교체 ## Ablation 결과 - HotPotQA S7: MRR 0.651→0.837 (+28%), R@10 0.670→0.805 (+20%) - AutoRAG S7: MRR 0.496→0.659 (+33%), R@10 0.800→0.950 (+19%) - Allganize S7: MRR 0.830→0.836, R@10 1.000 (유지) - S2 +Relations: MRR 하락 해소 (PPR 노이즈 억제) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 48a6e00 commit fe330ea

4 files changed

Lines changed: 199 additions & 28 deletions

File tree

src/synaptic/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from synaptic.activity import ActivityTracker
66
from synaptic.agent_search import AgentSearch, SearchIntent, suggest_intent
7+
from synaptic.ppr import personalized_pagerank
78
from synaptic.extensions.classifier_rules import RuleBasedClassifier
89
from synaptic.extensions.embedder import EmbeddingProvider, MockEmbeddingProvider
910
from synaptic.extensions.relation_detector import RuleBasedRelationDetector
@@ -54,6 +55,7 @@
5455
"Node",
5556
"NodeKind",
5657
"OntologyRegistry",
58+
"personalized_pagerank",
5759
"PropertyDef",
5860
"QueryRewriter",
5961
"RelationDetector",

src/synaptic/agent_search.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
NodeKind,
1313
SearchResult,
1414
)
15+
from synaptic.ppr import personalized_pagerank
1516
from synaptic.protocols import StorageBackend
1617
from synaptic.resonance import ResonanceScorer, ResonanceWeights
1718
from synaptic.search import HybridSearch
@@ -48,6 +49,17 @@ class SearchIntent(StrEnum):
4849
}
4950

5051

52+
# PPR damping factor per intent (lower = more focused on seeds)
53+
_INTENT_DAMPING: dict[SearchIntent, float] = {
54+
SearchIntent.CONTEXT_EXPLORE: 0.75, # 탐색적, 넓게
55+
SearchIntent.GENERAL: 0.85, # 기본
56+
SearchIntent.SIMILAR_DECISIONS: 0.85, # 기본
57+
SearchIntent.PAST_FAILURES: 0.50, # 집중적, 좁게
58+
SearchIntent.REASONING_CHAIN: 0.60, # 체인 추적
59+
SearchIntent.RELATED_RULES: 0.85, # 기본
60+
}
61+
62+
5163
_INTENT_HINTS: dict[SearchIntent, list[str]] = {
5264
SearchIntent.PAST_FAILURES: [
5365
"실패", "에러", "오류", "버그", "장애", "incident", "fail", "error", "bug",
@@ -349,9 +361,10 @@ async def _explore_context(
349361
context_tags: list[str] | None,
350362
depth: int,
351363
) -> SearchResult:
352-
"""BFS expansion from seed nodes."""
364+
"""PPR-based context exploration from seed nodes."""
353365
start = time()
354366
weights = _INTENT_WEIGHTS[SearchIntent.CONTEXT_EXPLORE]
367+
damping = _INTENT_DAMPING[SearchIntent.CONTEXT_EXPLORE]
355368

356369
# Find seed nodes
357370
result = await self._hybrid.search(
@@ -362,14 +375,21 @@ async def _explore_context(
362375
for an in result.nodes:
363376
expanded[an.node.id] = (an.node, an.activation)
364377

365-
# BFS expand
366-
for an in result.nodes:
367-
neighbors = await backend.get_neighbors(an.node.id, depth=depth)
368-
for neighbor, edge in neighbors:
369-
if neighbor.id not in expanded:
370-
decay = 0.5 ** (1) # distance-based decay
371-
score = an.activation * edge.weight * decay
372-
expanded[neighbor.id] = (neighbor, max(0.0, min(1.0, score)))
378+
# PPR expansion (low damping → wider exploration)
379+
if expanded:
380+
seed_scores = {nid: score for nid, (_node, score) in expanded.items()}
381+
ppr_results = await personalized_pagerank(
382+
backend, seed_scores, damping=damping, top_k=limit * 2,
383+
)
384+
for node_id, ppr_score in ppr_results:
385+
if node_id not in expanded:
386+
node = await backend.get_node(node_id)
387+
if node:
388+
expanded[node_id] = (node, ppr_score)
389+
else:
390+
existing = expanded[node_id]
391+
blended = 0.6 * existing[1] + 0.4 * ppr_score
392+
expanded[node_id] = (existing[0], min(1.0, blended))
373393

374394
activated = self._score_candidates(expanded, weights, context_tags)
375395
return SearchResult(

src/synaptic/ppr.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
"""Personalized PageRank (PPR) engine — zero-dependency, dict-based sparse implementation.
2+
3+
Power iteration:
4+
r(t+1) = (1 - damping) * personalization + damping * A_norm * r(t)
5+
6+
Where A_norm is a column-normalized adjacency matrix built from the graph edges.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
from typing import TYPE_CHECKING
12+
13+
if TYPE_CHECKING:
14+
from synaptic.protocols import StorageBackend
15+
16+
17+
async def personalized_pagerank(
18+
backend: StorageBackend,
19+
seed_scores: dict[str, float],
20+
*,
21+
damping: float = 0.85,
22+
max_iter: int = 50,
23+
tol: float = 1e-6,
24+
top_k: int = 20,
25+
) -> list[tuple[str, float]]:
26+
"""Perform PPR and return top-k (node_id, score) pairs.
27+
28+
The graph is discovered incrementally via BFS from seed nodes so that
29+
only the reachable subgraph is materialized — no need to enumerate all
30+
nodes/edges in the backend.
31+
32+
Args:
33+
backend: Storage backend implementing the StorageBackend protocol.
34+
seed_scores: {node_id: weight} — search result scores as personalization.
35+
damping: Probability of following an edge (vs teleporting back to seeds).
36+
max_iter: Maximum power-iteration steps.
37+
tol: Convergence threshold (L1 norm of rank change).
38+
top_k: Number of top-ranked nodes to return.
39+
40+
Returns:
41+
List of (node_id, ppr_score) sorted descending by score.
42+
"""
43+
if not seed_scores:
44+
return []
45+
46+
# --- 1. BFS to discover the reachable subgraph (depth 2 from seeds) ---
47+
# adjacency: source -> [(target, weight), ...]
48+
adj: dict[str, list[tuple[str, float]]] = {}
49+
visited: set[str] = set()
50+
frontier = set(seed_scores.keys())
51+
bfs_depth = 2
52+
53+
for _ in range(bfs_depth):
54+
if not frontier:
55+
break
56+
next_frontier: set[str] = set()
57+
for nid in frontier:
58+
if nid in visited:
59+
continue
60+
visited.add(nid)
61+
if nid not in adj:
62+
adj[nid] = []
63+
edges = await backend.get_edges(nid, direction="both")
64+
for edge in edges:
65+
# Determine the neighbor
66+
if edge.source_id == nid:
67+
neighbor_id = edge.target_id
68+
else:
69+
neighbor_id = edge.source_id
70+
71+
# Add edge in both directions (undirected for PPR spreading)
72+
adj[nid].append((neighbor_id, edge.weight))
73+
if neighbor_id not in adj:
74+
adj[neighbor_id] = []
75+
adj[neighbor_id].append((nid, edge.weight))
76+
77+
if neighbor_id not in visited:
78+
next_frontier.add(neighbor_id)
79+
frontier = next_frontier
80+
81+
# Mark remaining frontier nodes as visited (leaf nodes with no outgoing expansion)
82+
visited.update(frontier)
83+
for nid in frontier:
84+
if nid not in adj:
85+
adj[nid] = []
86+
87+
all_nodes = set(adj.keys()) | set(seed_scores.keys())
88+
89+
# No edges at all — return seed scores directly (sorted)
90+
if not any(adj.values()):
91+
sorted_seeds = sorted(seed_scores.items(), key=lambda x: x[1], reverse=True)
92+
return sorted_seeds[:top_k]
93+
94+
# --- 2. Build column-normalized adjacency (as sparse dicts) ---
95+
# out_weight[node] = sum of weights of outgoing edges
96+
out_weight: dict[str, float] = {}
97+
for src, neighbors in adj.items():
98+
total = sum(w for _, w in neighbors)
99+
out_weight[src] = total if total > 0 else 1.0
100+
101+
# --- 3. Normalize personalization vector ---
102+
total_seed = sum(seed_scores.values())
103+
if total_seed == 0:
104+
return []
105+
personalization: dict[str, float] = {
106+
nid: s / total_seed for nid, s in seed_scores.items()
107+
}
108+
109+
# --- 4. Power iteration ---
110+
# Initialize rank vector = personalization
111+
rank: dict[str, float] = {}
112+
for nid in all_nodes:
113+
rank[nid] = personalization.get(nid, 0.0)
114+
115+
teleport_coeff = 1.0 - damping
116+
117+
for _ in range(max_iter):
118+
new_rank: dict[str, float] = {}
119+
# Initialize with teleport (personalization)
120+
for nid in all_nodes:
121+
new_rank[nid] = teleport_coeff * personalization.get(nid, 0.0)
122+
123+
# Distribute rank along edges
124+
for src, neighbors in adj.items():
125+
if not neighbors:
126+
continue
127+
src_rank = rank[src]
128+
src_out = out_weight[src]
129+
for tgt, w in neighbors:
130+
# Column-normalized: edge_weight / total_out_weight * src_rank
131+
contribution = damping * src_rank * w / src_out
132+
new_rank[tgt] = new_rank.get(tgt, 0.0) + contribution
133+
134+
# Check convergence (L1 norm)
135+
diff = sum(abs(new_rank.get(nid, 0.0) - rank.get(nid, 0.0)) for nid in all_nodes)
136+
rank = new_rank
137+
if diff < tol:
138+
break
139+
140+
# --- 5. Return top-k ---
141+
sorted_results = sorted(rank.items(), key=lambda x: x[1], reverse=True)
142+
return sorted_results[:top_k]

src/synaptic/search.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
"""Hybrid 3-stage search with spreading activation."""
1+
"""Hybrid 3-stage search with Personalized PageRank."""
22

33
from __future__ import annotations
44

55
import math
66
from time import time
77

88
from synaptic.models import ActivatedNode, Node, NodeKind, SearchResult
9+
from synaptic.ppr import personalized_pagerank
910
from synaptic.protocols import QueryRewriter, StorageBackend
1011
from synaptic.resonance import ResonanceScorer
1112
from synaptic.synonyms import expand_synonyms
@@ -49,20 +50,20 @@ def _cosine_sim(a: list[float], b: list[float]) -> float:
4950
class HybridSearch:
5051
"""3-stage fallback search: FTS+vector → synonym expansion → query rewrite."""
5152

52-
__slots__ = ("_query_rewriter", "_scorer", "_spread_decay", "_spread_depth")
53+
__slots__ = ("_ppr_damping", "_query_rewriter", "_scorer")
5354

5455
def __init__(
5556
self,
5657
*,
5758
scorer: ResonanceScorer | None = None,
5859
query_rewriter: QueryRewriter | None = None,
59-
spread_decay: float = 0.25,
60-
spread_depth: int = 1,
60+
spread_decay: float = 0.25, # deprecated, kept for compat
61+
spread_depth: int = 1, # deprecated, kept for compat
62+
ppr_damping: float = 0.85,
6163
) -> None:
6264
self._scorer = scorer or ResonanceScorer()
6365
self._query_rewriter = query_rewriter
64-
self._spread_decay = spread_decay
65-
self._spread_depth = spread_depth
66+
self._ppr_damping = ppr_damping
6667

6768
async def search(
6869
self,
@@ -136,22 +137,28 @@ async def search(
136137
all_nodes[node.id] = (node, 0.4)
137138
stages_used.append("rewriter")
138139

139-
# Spreading activation: expand from top candidates
140+
# PPR: graph-aware discovery + mild re-ranking
140141
total_candidates = len(all_nodes)
141-
top_ids = sorted(all_nodes, key=lambda nid: all_nodes[nid][1], reverse=True)[:5]
142-
for nid in top_ids:
143-
parent_score = all_nodes[nid][1]
144-
neighbors = await backend.get_neighbors(nid, depth=self._spread_depth)
145-
for neighbor_node, edge in neighbors:
146-
activation = parent_score * edge.weight * self._spread_decay
147-
if neighbor_node.id not in all_nodes:
148-
all_nodes[neighbor_node.id] = (neighbor_node, max(0.0, min(1.0, activation)))
142+
if all_nodes:
143+
seed_scores = {nid: score for nid, (_node, score) in all_nodes.items()}
144+
ppr_results = await personalized_pagerank(
145+
backend,
146+
seed_scores,
147+
damping=self._ppr_damping,
148+
top_k=limit * 2,
149+
)
150+
for node_id, ppr_score in ppr_results:
151+
if node_id not in all_nodes:
152+
# PPR이 새로 발견한 노드 — 그래프 경로로만 도달 가능
153+
node = await backend.get_node(node_id)
154+
if node:
155+
all_nodes[node_id] = (node, ppr_score * 0.8)
149156
else:
150-
# 이미 있으면 미세 부스트만 (FTS 직접 매칭 랭킹 보존)
151-
existing = all_nodes[neighbor_node.id]
152-
boosted = min(1.0, existing[1] + activation * 0.1)
157+
# 기존 FTS 결과 — PPR로 미세 부스트만 (FTS 랭킹 보존)
158+
existing = all_nodes[node_id]
159+
boosted = min(1.0, existing[1] + ppr_score * 0.1)
153160
if boosted > existing[1]:
154-
all_nodes[neighbor_node.id] = (existing[0], boosted)
161+
all_nodes[node_id] = (existing[0], boosted)
155162

156163
# Filter by node_kinds if specified
157164
if node_kinds:

0 commit comments

Comments
 (0)