Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__/
*.pyc
.venv/
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MemoMind

MemoMind is a local-first starter kit for building a multimodal personal knowledge agent. It provides a minimal set of Python modules for ingesting content, representing memories, and querying vector stores.

## Quick start

```bash
python -m venv .venv
source .venv/bin/activate
pip install -e .
```

## Project layout

- `src/memomind` - core package
- `docs` - specifications and architecture notes

## Storage options

- `InMemoryVectorStore` for prototyping.
- `SqliteVectorStore` for local persistence.
14 changes: 14 additions & 0 deletions docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# MemoMind Architecture (Draft)

## Components
- `core.agent`: orchestrates ingestion, retrieval, and LLM calls.
- `core.memory`: manages short/long-term memory stores.
- `storage.vector_store`: in-memory similarity search.
- `storage.sqlite_store`: SQLite-backed vector persistence.
- `llm`: pluggable LLM and embedding clients.

## Data Flow
1. Raw input -> `models.Document`
2. Document -> embeddings via `llm.embeddings`
3. Embeddings -> `storage.vector_store` or `storage.sqlite_store`
4. Retrieval results -> `core.agent` response
12 changes: 12 additions & 0 deletions docs/SPEC.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# MemoMind Specification (Draft)

## Goals
- Collect multimodal data (text, image, audio, structured) into a personal knowledge base.
- Provide short/long-term memory with retrieval for Q&A.
- Run locally with optional pluggable LLM backends.

## MVP Scope
- Text ingestion pipeline.
- Memory data models (document, memory, message).
- In-memory or SQLite-backed vector search interface.
- Simple agent orchestrator for storing and retrieving memories.
23 changes: 23 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[project]
name = "memomind"
version = "0.1.0"
description = "Local-first multimodal memory agent starter"
readme = "README.md"
requires-python = ">=3.10"
license = { text = "MIT" }
authors = [
{ name = "Project-Test Contributors" }
]

[project.optional-dependencies]
dev = ["pytest"]

[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.packages.find]
where = ["src"]
9 changes: 9 additions & 0 deletions src/memomind/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""MemoMind core package."""

from memomind.core.agent import MemoAgent
from memomind.core.memory import MemoryStore
from memomind.models.document import Document
from memomind.models.memory import Memory
from memomind.models.message import Message

__all__ = ["Document", "Memory", "Message", "MemoryStore", "MemoAgent"]
5 changes: 5 additions & 0 deletions src/memomind/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Configuration helpers."""

from memomind.config.settings import Settings

__all__ = ["Settings"]
12 changes: 12 additions & 0 deletions src/memomind/config/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True)
class Settings:
"""Runtime configuration for MemoMind."""

embedding_model: str = "local-embedding"
llm_model: str = "local-llm"
max_context_messages: int = 12
6 changes: 6 additions & 0 deletions src/memomind/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Core orchestration modules."""

from memomind.core.agent import MemoAgent
from memomind.core.memory import MemoryStore

__all__ = ["MemoAgent", "MemoryStore"]
51 changes: 51 additions & 0 deletions src/memomind/core/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import List, Optional

from memomind.config.settings import Settings
from memomind.core.knowledge import KnowledgeBase
from memomind.core.memory import MemoryStore
from memomind.llm.client import LlmClient
from memomind.llm.embeddings import EmbeddingClient
from memomind.models.document import Document
from memomind.models.memory import Memory


@dataclass
class MemoAgent:
"""Coordinates ingestion, memory storage, and retrieval."""

settings: Settings = field(default_factory=Settings)
knowledge_base: KnowledgeBase = field(default_factory=KnowledgeBase)
memory_store: MemoryStore = field(default_factory=MemoryStore)
llm_client: LlmClient = field(default_factory=LlmClient)
embedding_client: EmbeddingClient = field(default_factory=EmbeddingClient)

def ingest(self, document: Document) -> None:
embedding = self.embedding_client.embed(document.content)
enriched = Document(
content=document.content,
source=document.source,
created_at=document.created_at,
metadata=document.metadata,
tags=document.tags,
embedding=embedding,
)
self.knowledge_base.add(enriched)

def remember(self, memory_id: str, memory: Memory) -> None:
embedding = self.embedding_client.embed(memory.summary)
self.memory_store.add_memory(memory_id, memory, embedding=embedding)

def recall(self, query: str, limit: int = 5) -> List[Memory]:
embedding = self.embedding_client.embed(query)
return list(self.memory_store.search(embedding, limit=limit))

def answer(self, query: str) -> Optional[str]:
memories = self.recall(query)
if not memories:
return None
context = "\n".join(memory.summary for memory in memories)
prompt = f"Use the following memories to answer the question:\n{context}\nQuestion: {query}"
return self.llm_client.complete(prompt)
19 changes: 19 additions & 0 deletions src/memomind/core/knowledge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import List

from memomind.models.document import Document


@dataclass
class KnowledgeBase:
"""Tracks ingested documents for later processing."""

documents: List[Document] = field(default_factory=list)

def add(self, document: Document) -> None:
self.documents.append(document)

def list_sources(self) -> List[str]:
return [doc.source for doc in self.documents]
31 changes: 31 additions & 0 deletions src/memomind/core/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, Iterable, List, Optional

from memomind.models.memory import Memory
from memomind.storage.base import VectorStore
from memomind.storage.vector_store import InMemoryVectorStore


@dataclass
class MemoryStore:
"""Manages short and long-term memory collections."""

vector_store: VectorStore = field(default_factory=InMemoryVectorStore)
memories: Dict[str, Memory] = field(default_factory=dict)

def add_memory(self, memory_id: str, memory: Memory, embedding: Optional[List[float]] = None) -> None:
self.memories[memory_id] = memory
if embedding is not None:
self.vector_store.upsert(memory_id, embedding, payload={"summary": memory.summary})

def get_memory(self, memory_id: str) -> Optional[Memory]:
return self.memories.get(memory_id)

def search(self, embedding: List[float], limit: int = 5) -> Iterable[Memory]:
results = self.vector_store.search(embedding, limit=limit)
for memory_id, _score in results:
memory = self.memories.get(memory_id)
if memory:
yield memory
6 changes: 6 additions & 0 deletions src/memomind/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""LLM integrations."""

from memomind.llm.client import LlmClient
from memomind.llm.embeddings import EmbeddingClient

__all__ = ["LlmClient", "EmbeddingClient"]
13 changes: 13 additions & 0 deletions src/memomind/llm/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

from dataclasses import dataclass


@dataclass
class LlmClient:
"""Placeholder LLM client implementation."""

model: str = "local-llm"

def complete(self, prompt: str) -> str:
return f"[stubbed response from {self.model}] {prompt}"
16 changes: 16 additions & 0 deletions src/memomind/llm/embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import List


@dataclass
class EmbeddingClient:
"""Placeholder embedding client implementation."""

model: str = "local-embedding"

def embed(self, text: str) -> List[float]:
if not text:
return [0.0]
return [float(sum(bytearray(text, "utf-8")) % 997) / 997.0]
7 changes: 7 additions & 0 deletions src/memomind/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Data models for MemoMind."""

from memomind.models.document import Document
from memomind.models.memory import Memory
from memomind.models.message import Message

__all__ = ["Document", "Memory", "Message"]
17 changes: 17 additions & 0 deletions src/memomind/models/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional


@dataclass(frozen=True)
class Document:
"""Raw content ingested into the system."""

content: str
source: str
created_at: datetime = field(default_factory=datetime.utcnow)
metadata: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
embedding: Optional[List[float]] = None
17 changes: 17 additions & 0 deletions src/memomind/models/memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, Optional


@dataclass(frozen=True)
class Memory:
"""Normalized memory representation stored for retrieval."""

summary: str
memory_type: str
created_at: datetime = field(default_factory=datetime.utcnow)
importance: float = 0.0
metadata: Dict[str, Any] = field(default_factory=dict)
related_document_id: Optional[str] = None
15 changes: 15 additions & 0 deletions src/memomind/models/message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict


@dataclass(frozen=True)
class Message:
"""Conversation message for working memory context."""

role: str
content: str
created_at: datetime = field(default_factory=datetime.utcnow)
metadata: Dict[str, str] = field(default_factory=dict)
7 changes: 7 additions & 0 deletions src/memomind/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Storage backends."""

from memomind.storage.base import VectorStore
from memomind.storage.sqlite_store import SqliteVectorStore
from memomind.storage.vector_store import InMemoryVectorStore

__all__ = ["InMemoryVectorStore", "SqliteVectorStore", "VectorStore"]
13 changes: 13 additions & 0 deletions src/memomind/storage/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

from typing import Dict, List, Protocol, Tuple


class VectorStore(Protocol):
"""Protocol for vector storage backends."""

def upsert(self, item_id: str, vector: List[float], payload: Dict[str, str]) -> None:
...

def search(self, vector: List[float], limit: int = 5) -> List[Tuple[str, float]]:
...
20 changes: 20 additions & 0 deletions src/memomind/storage/serialization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

import json
from typing import Any, Dict, List


def dumps_vector(vector: List[float]) -> str:
return json.dumps(vector)


def loads_vector(value: str) -> List[float]:
return [float(item) for item in json.loads(value)]


def dumps_payload(payload: Dict[str, str]) -> str:
return json.dumps(payload)


def loads_payload(value: str) -> Dict[str, str]:
return {str(key): str(val) for key, val in json.loads(value).items()}
Loading