diff --git a/src/memory/core.py b/src/memory/core.py index 8fd032b..48fa35a 100644 --- a/src/memory/core.py +++ b/src/memory/core.py @@ -107,6 +107,18 @@ def _create_embedding_provider(self) -> EmbeddingProvider: model=self.config.embedding.model, base_url=self.config.embedding.base_url or "http://localhost:11434", ) + elif provider == "llama": + from memory.embeddings.llama import LlamaEmbedding + return LlamaEmbedding( + model=self.config.embedding.model, + base_url=self.config.embedding.base_url or "http://localhost:11435", + ) + elif provider == "llama-nomic": + from memory.embeddings.llama_nomic import LlamaNomicEmbedding + return LlamaNomicEmbedding( + model=self.config.embedding.model, + base_url=self.config.embedding.base_url or "http://localhost:11435", + ) elif provider == "openai": from memory.embeddings.openai_embed import OpenAIEmbedding return OpenAIEmbedding( diff --git a/src/memory/embeddings/__init__.py b/src/memory/embeddings/__init__.py index fa53c6d..e2f871e 100644 --- a/src/memory/embeddings/__init__.py +++ b/src/memory/embeddings/__init__.py @@ -1,9 +1,13 @@ from memory.embeddings.base import EmbeddingProvider from memory.embeddings.ollama import OllamaEmbedding from memory.embeddings.openai_embed import OpenAIEmbedding +from memory.embeddings.llama import LlamaEmbedding +from memory.embeddings.llama_nomic import LlamaNomicEmbedding __all__ = [ "EmbeddingProvider", "OllamaEmbedding", "OpenAIEmbedding", + "LlamaEmbedding", + "LlamaNomicEmbedding" ] diff --git a/src/memory/embeddings/base.py b/src/memory/embeddings/base.py index 2768723..1ecf3f0 100644 --- a/src/memory/embeddings/base.py +++ b/src/memory/embeddings/base.py @@ -6,5 +6,9 @@ class EmbeddingProvider(ABC): def embed(self, text: str) -> list[float]: ... + @abstractmethod + def search(self, text: str) -> list[float]: + ... + def embed_batch(self, texts: list[str]) -> list[list[float]]: return [self.embed(t) for t in texts] diff --git a/src/memory/embeddings/llama.py b/src/memory/embeddings/llama.py new file mode 100644 index 0000000..1c53952 --- /dev/null +++ b/src/memory/embeddings/llama.py @@ -0,0 +1,27 @@ +import httpx +from memory.embeddings.base import EmbeddingProvider + + +class LlamaEmbedding(EmbeddingProvider): + def __init__(self, model: str = "text-embedder", + base_url: str = "http://localhost:11435"): + self.model = model + self.base_url = base_url + + def embed(self, text: str) -> list[float]: + resp = httpx.post( + f"{self.base_url}/embeddings", + json={"model": self.model, "content": text}, + timeout=30.0, + ) + resp.raise_for_status() + return resp.json()[0]["embedding"][0] + + def search(self, text: str) -> list[float]: + resp = httpx.post( + f"{self.base_url}/embeddings", + json={"model": self.model, "content": text}, + timeout=30.0, + ) + resp.raise_for_status() + return resp.json()[0]["embedding"][0] diff --git a/src/memory/embeddings/llama_nomic.py b/src/memory/embeddings/llama_nomic.py new file mode 100644 index 0000000..123cac6 --- /dev/null +++ b/src/memory/embeddings/llama_nomic.py @@ -0,0 +1,27 @@ +import httpx +from memory.embeddings.base import EmbeddingProvider + + +class LlamaNomicEmbedding(EmbeddingProvider): + def __init__(self, model: str = "text-embedder", + base_url: str = "http://localhost:11435"): + self.model = model + self.base_url = base_url + + def embed(self, text: str) -> list[float]: + resp = httpx.post( + f"{self.base_url}/embeddings", + json={"model": self.model, "content": 'search_document: ' + text}, + timeout=30.0, + ) + resp.raise_for_status() + return resp.json()[0]["embedding"][0] + + def search(self, text: str) -> list[float]: + resp = httpx.post( + f"{self.base_url}/embeddings", + json={"model": self.model, "content": 'search_query: ' + text}, + timeout=30.0, + ) + resp.raise_for_status() + return resp.json()[0]["embedding"][0] diff --git a/src/memory/embeddings/ollama.py b/src/memory/embeddings/ollama.py index 13af8fe..1ccec52 100644 --- a/src/memory/embeddings/ollama.py +++ b/src/memory/embeddings/ollama.py @@ -37,3 +37,5 @@ def embed(self, text: str) -> list[float]: ) resp.raise_for_status() return resp.json()["embedding"] + + search = embed diff --git a/src/memory/embeddings/openai_embed.py b/src/memory/embeddings/openai_embed.py index 501d031..0ed7d58 100644 --- a/src/memory/embeddings/openai_embed.py +++ b/src/memory/embeddings/openai_embed.py @@ -17,3 +17,5 @@ def embed(self, text: str) -> list[float]: ) resp.raise_for_status() return resp.json()["data"][0]["embedding"] + + search = embed diff --git a/src/memory/search.py b/src/memory/search.py index 3d8cb10..53af742 100644 --- a/src/memory/search.py +++ b/src/memory/search.py @@ -99,7 +99,7 @@ def tiered_search( # FTS results are sparse — fall back to hybrid (embed + vector search + merge) try: - query_vec = embedding_provider.embed(query) + query_vec = embedding_provider.search(query) vec_results = db.vector_search( query_vec, limit=limit * 2, project=project, source=source ) @@ -144,7 +144,7 @@ def hybrid_search( r["score"] = r["score"] / max_score if max_score > 0 else 0.0 return fts_results[:limit] - query_vec = embedding_provider.embed(query) + query_vec = embedding_provider.search(query) vec_results = db.vector_search( query_vec, limit=limit * 2, project=project, source=source )