Skip to content

Commit 329b5e8

Browse files
SonAIengineclaude
andcommitted
feat: v0.3.0 — 프로토콜 구현체 + LRU 캐시 + JSON export + 노드 병합
extensions/ 모듈 (P4): - RegexTagExtractor: 15개 기술 도메인 패턴 (한/영), 커스텀 패턴 확장 가능 - LLMQueryRewriter: LLM 기반 쿼리 재작성 (economy tier), StaticQueryRewriter(테스트용) - MockEmbeddingProvider: 결정적 해시 기반 (테스트용) - OpenAIEmbeddingProvider: OpenAI/vLLM/Ollama 호환 임베딩 (httpx) LRU 캐시 (P5): - NodeCache: OrderedDict 기반 LRU, maxsize 제한, hit_rate 통계 - SynapticGraph.get()에서 자동 캐시 → add/remove/reinforce/decay 시 invalidate JSON Export + 노드 병합 (P6): - export_json(): nodes + edges JSON 직렬화 - merge(source, target): 콘텐츠/태그/통계/엣지 합치고 source 삭제 - find_duplicates(): SequenceMatcher로 유사 제목 탐지 (같은 kind만) 121 unit tests + 13 integration tests = 134 total Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4748aa9 commit 329b5e8

11 files changed

Lines changed: 696 additions & 11 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "synaptic-memory"
7-
version = "0.2.0"
7+
version = "0.3.0"
88
description = "Brain-inspired knowledge graph: spreading activation, Hebbian learning, memory consolidation."
99
license = "MIT"
1010
requires-python = ">=3.12"

src/synaptic/cache.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""LRU cache layer for frequently accessed nodes."""
2+
3+
from __future__ import annotations
4+
5+
from collections import OrderedDict
6+
7+
from synaptic.models import Node
8+
9+
10+
class NodeCache:
11+
"""Bounded LRU cache for nodes. Thread-safe is NOT guaranteed — async-only.
12+
13+
Usage:
14+
cache = NodeCache(maxsize=256)
15+
cache.put(node)
16+
node = cache.get("node_id") # None if miss
17+
cache.invalidate("node_id")
18+
"""
19+
20+
__slots__ = ("_cache", "_hits", "_maxsize", "_misses")
21+
22+
def __init__(self, maxsize: int = 256) -> None:
23+
self._maxsize = maxsize
24+
self._cache: OrderedDict[str, Node] = OrderedDict()
25+
self._hits = 0
26+
self._misses = 0
27+
28+
def get(self, node_id: str) -> Node | None:
29+
if node_id in self._cache:
30+
self._cache.move_to_end(node_id)
31+
self._hits += 1
32+
return self._cache[node_id]
33+
self._misses += 1
34+
return None
35+
36+
def put(self, node: Node) -> None:
37+
if node.id in self._cache:
38+
self._cache.move_to_end(node.id)
39+
self._cache[node.id] = node
40+
if len(self._cache) > self._maxsize:
41+
self._cache.popitem(last=False)
42+
43+
def invalidate(self, node_id: str) -> None:
44+
self._cache.pop(node_id, None)
45+
46+
def clear(self) -> None:
47+
self._cache.clear()
48+
self._hits = 0
49+
self._misses = 0
50+
51+
@property
52+
def size(self) -> int:
53+
return len(self._cache)
54+
55+
@property
56+
def hit_rate(self) -> float:
57+
total = self._hits + self._misses
58+
return self._hits / total if total > 0 else 0.0
59+
60+
def stats(self) -> dict[str, int | float]:
61+
return {
62+
"size": self.size,
63+
"maxsize": self._maxsize,
64+
"hits": self._hits,
65+
"misses": self._misses,
66+
"hit_rate": round(self.hit_rate, 3),
67+
}

src/synaptic/exporter.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
"""Markdown exporter for knowledge graph."""
1+
"""Exporters for knowledge graph — Markdown and JSON."""
22

33
from __future__ import annotations
44

5-
from synaptic.models import Node
5+
import json
6+
7+
from synaptic.models import Edge, Node
68
from synaptic.protocols import StorageBackend
79

810

@@ -56,3 +58,63 @@ async def export(
5658
lines.append(f"\n{node.content}\n")
5759

5860
return "\n".join(lines)
61+
62+
63+
class JSONExporter:
64+
"""Exports nodes and edges as JSON."""
65+
66+
__slots__ = ()
67+
68+
async def export(
69+
self,
70+
backend: StorageBackend,
71+
*,
72+
node_ids: list[str] | None = None,
73+
) -> str:
74+
if node_ids is not None:
75+
nodes: list[Node] = []
76+
for nid in node_ids:
77+
node = await backend.get_node(nid)
78+
if node is not None:
79+
nodes.append(node)
80+
else:
81+
nodes = await backend.list_nodes(limit=500)
82+
83+
# Collect edges for these nodes
84+
node_id_set = {n.id for n in nodes}
85+
edges: list[Edge] = []
86+
for node in nodes:
87+
node_edges = await backend.get_edges(node.id, direction="outgoing")
88+
for edge in node_edges:
89+
if edge.target_id in node_id_set:
90+
edges.append(edge)
91+
92+
data = {
93+
"nodes": [
94+
{
95+
"id": n.id,
96+
"kind": str(n.kind),
97+
"title": n.title,
98+
"content": n.content,
99+
"tags": n.tags,
100+
"level": str(n.level),
101+
"vitality": n.vitality,
102+
"access_count": n.access_count,
103+
"success_count": n.success_count,
104+
"failure_count": n.failure_count,
105+
"source": n.source,
106+
}
107+
for n in nodes
108+
],
109+
"edges": [
110+
{
111+
"id": e.id,
112+
"source_id": e.source_id,
113+
"target_id": e.target_id,
114+
"kind": str(e.kind),
115+
"weight": e.weight,
116+
}
117+
for e in edges
118+
],
119+
}
120+
return json.dumps(data, ensure_ascii=False, indent=2)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Synaptic Memory extensions — protocol implementations."""
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""Embedding providers — generate vector embeddings for nodes."""
2+
3+
from __future__ import annotations
4+
5+
from typing import Protocol
6+
7+
8+
class EmbeddingProvider(Protocol):
9+
"""Generate embedding vectors from text."""
10+
11+
async def embed(self, text: str) -> list[float]: ...
12+
async def embed_batch(self, texts: list[str]) -> list[list[float]]: ...
13+
14+
15+
class MockEmbeddingProvider:
16+
"""Mock embedding provider for testing. Returns deterministic vectors."""
17+
18+
__slots__ = ("_dim",)
19+
20+
def __init__(self, dim: int = 4) -> None:
21+
self._dim = dim
22+
23+
async def embed(self, text: str) -> list[float]:
24+
# Deterministic: hash text into a vector
25+
h = hash(text) & 0xFFFFFFFF
26+
return [((h >> (i * 8)) & 0xFF) / 255.0 for i in range(self._dim)]
27+
28+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
29+
return [await self.embed(t) for t in texts]
30+
31+
32+
class OpenAIEmbeddingProvider:
33+
"""OpenAI-compatible embedding provider (works with OpenAI, vLLM, Ollama).
34+
35+
Usage:
36+
provider = OpenAIEmbeddingProvider(
37+
api_base="https://api.openai.com/v1",
38+
api_key="sk-...",
39+
model="text-embedding-3-small",
40+
)
41+
"""
42+
43+
__slots__ = ("_api_base", "_api_key", "_model")
44+
45+
def __init__(
46+
self,
47+
api_base: str = "https://api.openai.com/v1",
48+
api_key: str = "",
49+
model: str = "text-embedding-3-small",
50+
) -> None:
51+
self._api_base = api_base.rstrip("/")
52+
self._api_key = api_key
53+
self._model = model
54+
55+
async def embed(self, text: str) -> list[float]:
56+
results = await self.embed_batch([text])
57+
return results[0]
58+
59+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
60+
import httpx # type: ignore[import-untyped] # noqa: PLC0415
61+
62+
url = f"{self._api_base}/embeddings"
63+
headers: dict[str, str] = {"Content-Type": "application/json"}
64+
if self._api_key:
65+
headers["Authorization"] = f"Bearer {self._api_key}"
66+
67+
payload = {"model": self._model, "input": texts}
68+
69+
async with httpx.AsyncClient(timeout=60) as client:
70+
resp = await client.post(url, headers=headers, json=payload)
71+
resp.raise_for_status()
72+
data = resp.json()
73+
74+
embeddings: list[list[float]] = []
75+
for item in sorted(data["data"], key=lambda x: x["index"]): # type: ignore[no-any-return]
76+
embeddings.append(item["embedding"]) # type: ignore[index]
77+
return embeddings
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""LLM-based query rewriter — expands search queries via language model."""
2+
3+
from __future__ import annotations
4+
5+
from typing import Protocol
6+
7+
8+
class LLMChatFn(Protocol):
9+
"""Minimal LLM chat interface for query rewriting."""
10+
11+
async def __call__(self, *, system: str, user: str, max_tokens: int) -> str: ...
12+
13+
14+
class LLMQueryRewriter:
15+
"""Rewrites search queries using an LLM for better recall.
16+
17+
Generates 2-3 alternative phrasings of the query.
18+
Uses economy-tier models (e.g. Haiku) for cost efficiency.
19+
"""
20+
21+
__slots__ = ("_chat_fn",)
22+
23+
def __init__(self, chat_fn: LLMChatFn) -> None:
24+
self._chat_fn = chat_fn
25+
26+
async def rewrite(self, query: str) -> list[str]:
27+
"""Rewrite query into 2-3 alternative forms."""
28+
if not query.strip():
29+
return []
30+
31+
system = (
32+
"You are a search query expander. Given a search query, "
33+
"generate 2-3 alternative phrasings that could match relevant documents. "
34+
"Include both Korean and English variants if applicable. "
35+
"Return one query per line, nothing else."
36+
)
37+
try:
38+
response = await self._chat_fn(
39+
system=system,
40+
user=f"Query: {query}",
41+
max_tokens=256,
42+
)
43+
lines = [
44+
line.strip().lstrip("- ·•0123456789.") for line in response.strip().splitlines()
45+
]
46+
return [line for line in lines if line and line != query][:3]
47+
except Exception:
48+
return []
49+
50+
51+
class StaticQueryRewriter:
52+
"""Static query rewriter for testing — returns predefined expansions."""
53+
54+
__slots__ = ("_expansions",)
55+
56+
def __init__(self, expansions: dict[str, list[str]] | None = None) -> None:
57+
self._expansions = expansions or {}
58+
59+
async def rewrite(self, query: str) -> list[str]:
60+
return self._expansions.get(query, [])
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Regex-based tag extractor — zero LLM dependency."""
2+
3+
from __future__ import annotations
4+
5+
import re
6+
7+
_I = re.IGNORECASE
8+
9+
# Common tech/domain terms to extract as tags
10+
_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
11+
("api", re.compile(r"\bAPI\b|\bREST\b|\bGraphQL\b|\bgRPC\b", _I)),
12+
("database", re.compile(r"\b(?:DB|database|SQL|PostgreSQL|SQLite|MySQL|MongoDB)\b", _I)),
13+
("deploy", re.compile(r"\b(?:deploy|배포|CI/CD|릴리즈|release)\b", _I)),
14+
("test", re.compile(r"\b(?:test|테스트|검증|QA|unittest|pytest)\b", _I)),
15+
("security", re.compile(r"\b(?:security|보안|auth|인증|취약점|OWASP)\b", _I)),
16+
("performance", re.compile(r"\b(?:performance|성능|latency|throughput|최적화)\b", _I)),
17+
("bug", re.compile(r"\b(?:bug|버그|오류|에러|error|fix|수정)\b", _I)),
18+
("frontend", re.compile(r"\b(?:frontend|프론트|React|Vue|UI|CSS|HTML)\b", _I)),
19+
("backend", re.compile(r"\b(?:backend|백엔드|서버|server|FastAPI|Django)\b", _I)),
20+
("infra", re.compile(r"\b(?:infra|인프라|Docker|K8s|Kubernetes|AWS|GCP)\b", _I)),
21+
("ai", re.compile(r"\b(?:AI|ML|LLM|GPT|Claude|embedding|벡터)\b", _I)),
22+
("docs", re.compile(r"\b(?:doc|문서|README|documentation|문서화)\b", _I)),
23+
("refactor", re.compile(r"\b(?:refactor|리팩토링|개선|cleanup|정리)\b", _I)),
24+
("design", re.compile(r"\b(?:design|설계|architecture|아키텍처|구조)\b", _I)),
25+
("monitoring", re.compile(r"\b(?:monitoring|모니터링|로그|logging|메트릭|alert)\b", _I)),
26+
]
27+
28+
29+
class RegexTagExtractor:
30+
"""Extract tags from text using regex patterns. Zero dependencies."""
31+
32+
__slots__ = ("_patterns",)
33+
34+
def __init__(
35+
self,
36+
extra_patterns: list[tuple[str, re.Pattern[str]]] | None = None,
37+
) -> None:
38+
self._patterns = [*_PATTERNS]
39+
if extra_patterns:
40+
self._patterns.extend(extra_patterns)
41+
42+
def extract(self, text: str) -> list[str]:
43+
"""Extract matching tags from text."""
44+
tags: list[str] = []
45+
for tag, pattern in self._patterns:
46+
if pattern.search(text):
47+
tags.append(tag)
48+
return tags

0 commit comments

Comments
 (0)