graph-memory
diff --git a/‎SPEC.md‎
Lines changed: 1 addition & 1 deletion b/‎SPEC.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/architecture.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/architecture.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/concepts-docs-indexing.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/concepts-docs-indexing.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/configuration.md‎
Lines changed: 8 additions & 8 deletions b/‎docs/configuration.md‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎docs/docker.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/docker.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/embeddings.md‎
Lines changed: 28 additions & 19 deletions b/‎docs/embeddings.md‎
Lines changed: 28 additions & 19 deletions
diff --git a/‎docs/indexer.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/indexer.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/overview.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/overview.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎graph-memory.yaml.example‎
Lines changed: 29 additions & 20 deletions b/‎graph-memory.yaml.example‎
Lines changed: 29 additions & 20 deletions
diff --git a/‎site/blog/2026-03-23-getting-started-5-minutes.md‎
Lines changed: 1 addition & 1 deletion b/‎site/blog/2026-03-23-getting-started-5-minutes.md‎
Lines changed: 1 addition & 1 deletion
@@ -43,7 +43,7 @@ See [docs/api-mcp.md](docs/api-mcp.md) for schemas and [docs/mcp-tools-guide.md]
 ## Key features
 
 - **Hybrid search**: BM25 + vector cosine, fused via RRF, BFS graph expansion — [docs/search.md](docs/search.md)
-- **Embeddings**: local ONNX (Xenova/bge-m3 default) or remote HTTP proxy — [docs/embeddings.md](docs/embeddings.md)
+- **Embeddings**: local ONNX (Xenova/jina-embeddings-v2-small-en default) or remote HTTP proxy — [docs/embeddings.md](docs/embeddings.md)
 - **File mirror**: `.notes/`, `.tasks/`, `.skills/` markdown files with reverse import — [docs/file-mirror.md](docs/file-mirror.md)
 - **Cross-graph links**: phantom proxy nodes connecting any graph to any graph — [docs/graphs-overview.md](docs/graphs-overview.md)
 - **Auth**: password login (JWT cookies) + API keys (Bearer) — [docs/authentication.md](docs/authentication.md)
 
@@ -21,7 +21,7 @@ graph TD
 
     subgraph Embed["Embedding Layer"]
         ONNX["ONNX Runtime"]
-        Models["bge-m3 / jina-code"]
+        Models["jina-small / jina-code"]
     end
 
     Indexer --> Embed
 
@@ -76,7 +76,7 @@ This means you can search for code examples by the symbols they define, or by se
 
 ### Step 4: Embed everything
 
-Each chunk is embedded into a vector using the configured model (default: `Xenova/bge-m3`). The embedding captures the **semantic meaning** of `title + content`, enabling similarity-based search.
+Each chunk is embedded into a vector using the configured model (default: `Xenova/jina-embeddings-v2-small-en`). The embedding captures the **semantic meaning** of `title + content`, enabling similarity-based search.
 
 Root nodes additionally get a `fileEmbedding` — embedded from `file path + h1 title` — used for file-level search ("find docs about authentication").
 
 
@@ -2,7 +2,7 @@
 
 ## Zero-config mode
 
-No config file needed. Just run `graphmemory serve` in your project directory — the current directory becomes the project with sensible defaults (BGE-M3 q8 model, all graphs enabled).
+No config file needed. Just run `graphmemory serve` in your project directory — the current directory becomes the project with sensible defaults (jina-small q8 model, all graphs enabled).
 
 ## Config file
 
@@ -60,8 +60,8 @@ server:
     search: 120
     auth: 10
   model:
-    name: "Xenova/bge-m3"
-    pooling: "cls"
+    name: "Xenova/jina-embeddings-v2-small-en"
+    pooling: "mean"
     normalize: true
     dtype: "q8"
     queryPrefix: ""
@@ -100,7 +100,7 @@ projects:
       name: "Project Bot"
       email: "bot@example.com"
     model:
-      name: "Xenova/bge-m3"
+      name: "Xenova/jina-embeddings-v2-small-en"
     embedding:
       maxChars: 24000
     access:
@@ -111,7 +111,7 @@ projects:
         include: "**/*.md"
         exclude: "**/drafts/**"
         model:
-          name: "Xenova/bge-m3"
+          name: "Xenova/bge-m3"    # override: use multilingual model for docs
           pooling: "cls"
           normalize: true
         access:
@@ -140,7 +140,7 @@ workspaces:
     access:
       alice: rw
     model:
-      name: "Xenova/bge-m3"
+      name: "Xenova/jina-embeddings-v2-small-en"
     embedding:
       maxChars: 24000
 ```
@@ -195,8 +195,8 @@ graphs.code.model → project.codeModel → server.codeModel → code defaults
 
 | Field | Type | Default (general / code) | Description |
 |-------|------|---------|-------------|
-| `name` | string | `Xenova/bge-m3` / `jinaai/jina-embeddings-v2-base-code` | HuggingFace model ID |
-| `pooling` | string | `cls` / `mean` | Pooling strategy: `mean` or `cls` |
+| `name` | string | `Xenova/jina-embeddings-v2-small-en` / `jinaai/jina-embeddings-v2-base-code` | HuggingFace model ID |
+| `pooling` | string | `mean` / `mean` | Pooling strategy: `mean` or `cls` |
 | `normalize` | boolean | `true` | L2-normalize output vectors |
 | `dtype` | string | `q8` | Quantization: `fp32`, `fp16`, `q8`, `q4` |
 | `queryPrefix` | string | `""` | Prefix prepended to search queries |
 
@@ -76,7 +76,7 @@ docker compose up -d
 
 ### Model cache
 
-The default embedding model (`Xenova/bge-m3`, ~560 MB) downloads on first startup. Use a **named volume** so the model persists across container restarts.
+The default embedding model (`Xenova/jina-embeddings-v2-small-en`, ~33 MB) downloads on first startup. Use a **named volume** so the model persists across container restarts.
 
 ## Config for Docker
 
 
@@ -6,12 +6,12 @@ The embedding system converts text into high-dimensional vectors for semantic se
 
 ## Default models
 
-**Xenova/bge-m3** — the default embedding model (docs, knowledge, tasks, skills, files):
-- 1024 dimensions
-- Multilingual (100+ languages)
+**Xenova/jina-embeddings-v2-small-en** — the default embedding model (docs, knowledge, tasks, skills, files):
+- 512 dimensions
+- English, 33M parameters (4 transformer layers)
 - 8K token context
-- ~560 MB download size
-- Pooling: `cls`
+- ~33 MB download size (q8)
+- Pooling: `mean`
 - Normalization: L2-normalized (cosine similarity = dot product)
 
 **jinaai/jina-embeddings-v2-base-code** — the default code graph model:
@@ -21,7 +21,7 @@ The embedding system converts text into high-dimensional vectors for semantic se
 - Pooling: `mean`
 - Normalization: L2-normalized
 
-The code graph uses a separate model inheritance chain (`codeModel`) so it can use a code-optimized model by default while other graphs use BGE-M3.
+The code graph uses a separate model inheritance chain (`codeModel`) so it can use a code-optimized model by default while other graphs use jina-small.
 
 ## Model registry
 
@@ -77,8 +77,8 @@ graph.model → project.codeModel → server.codeModel  → code defaults   (cod
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
-| `name` | string | `Xenova/bge-m3` | HuggingFace model ID |
-| `pooling` | string | `cls` | Pooling strategy: `mean` or `cls` |
+| `name` | string | `Xenova/jina-embeddings-v2-small-en` | HuggingFace model ID |
+| `pooling` | string | `mean` | Pooling strategy: `mean` or `cls` |
 | `normalize` | boolean | `true` | L2-normalize output vectors |
 | `dtype` | string | `q8` | Quantization: `fp32`, `fp16`, `q8`, `q4` |
 | `queryPrefix` | string | `""` | Prefix prepended to search queries |
@@ -103,7 +103,16 @@ graph.embedding → project.embedding → server.embedding → defaults
 
 ## Model examples
 
-### BGE-M3 (default, recommended)
+### jina-embeddings-v2-small-en (default)
+
+```yaml
+model:
+  name: "Xenova/jina-embeddings-v2-small-en"
+  pooling: "mean"
+  normalize: true
+```
+
+### BGE-M3 (multilingual, larger)
 
 ```yaml
 model:
@@ -117,7 +126,7 @@ model:
 ```yaml
 model:
   name: "Xenova/bge-base-en-v1.5"
-  pooling: "cls"
+  pooling: "mean"
   normalize: true
   queryPrefix: "Represent this sentence for searching relevant passages: "
 ```
@@ -127,7 +136,7 @@ model:
 ```yaml
 model:
   name: "Xenova/bge-small-en-v1.5"
-  pooling: "cls"
+  pooling: "mean"
   normalize: true
   queryPrefix: "Represent this sentence for searching relevant passages: "
 ```
@@ -156,10 +165,10 @@ model:
 
 ```yaml
 model:
-  name: "Xenova/bge-m3"
-  pooling: "cls"
+  name: "Xenova/jina-embeddings-v2-small-en"
+  pooling: "mean"
   normalize: true
-  dtype: "q8"      # fp32, fp16, q8, q4
+  dtype: "q4"      # fp32, fp16, q8, q4
 ```
 
 ## Remote embedding
@@ -222,7 +231,7 @@ server:
 { "embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]] }
 ```
 
-The `model` parameter selects which embedding model to use: `"default"` (general, BGE-M3) or `"code"` (code-optimized, jina-code). Both models are loaded when the embedding API is enabled.
+The `model` parameter selects which embedding model to use: `"default"` (general, jina-small) or `"code"` (code-optimized, jina-code). Both models are loaded when the embedding API is enabled.
 
 ### Embedding API configuration
 
@@ -271,19 +280,19 @@ projects:
   my-app:
     projectDir: "/path/to/my-app"
     model:
-      name: "Xenova/bge-m3"               # default for most graphs
+      name: "Xenova/bge-m3"               # multilingual model for most graphs
       pooling: "cls"
       normalize: true
     graphs:
       files:
         model:
-          name: "Xenova/bge-small-en-v1.5" # smaller model for file paths
-          pooling: "cls"
+          name: "Xenova/jina-embeddings-v2-small-en" # lighter model for file paths
+          pooling: "mean"
           normalize: true
       code:
         model:
           name: "Xenova/bge-base-en-v1.5"  # different model for code
-          pooling: "cls"
+          pooling: "mean"
           normalize: true
 ```
 
 
@@ -25,8 +25,8 @@ flowchart TD
 During initial indexing, the three queues run **sequentially by phase** rather than concurrently. This ensures only one embedding model is loaded at a time, reducing peak memory:
 
 ```
-Phase 1: docs   → scan(docs)  + drain(docs)   — triggers bge-m3 lazy load
-Phase 2: files  → scan(files) + drain(files)  — reuses bge-m3 (already loaded)
+Phase 1: docs   → scan(docs)  + drain(docs)   — triggers jina-small lazy load
+Phase 2: files  → scan(files) + drain(files)  — reuses jina-small (already loaded)
 Phase 3: code   → scan(code)  + drain(code)   — triggers jina-code lazy load
 Finalize:  rebuildDirectoryStats, resolvePendingLinks, scanMirrorDirs (K/T/S)
 ```
 
@@ -10,7 +10,7 @@
 - **Stores knowledge** (facts, notes, decisions) in a dedicated knowledge graph with typed relations, file attachments, and cross-graph links
 - **Tracks tasks** with kanban workflow, priorities, due dates, estimates, assignees, and cross-graph links
 - **Manages skills** (reusable recipes/procedures) with steps, triggers, usage tracking, and cross-graph links
-- **Embeds every node** locally using `Xenova/bge-m3` by default (no external API calls); supports per-graph models with configurable pooling, normalization, dtype, and prefixes
+- **Embeds every node** locally using `Xenova/jina-embeddings-v2-small-en` by default (no external API calls); supports per-graph models with configurable pooling, normalization, dtype, and prefixes
 - **Answers search queries** via hybrid search (BM25 keyword + vector cosine similarity) with BFS graph expansion
 - **Watches for file changes** and re-indexes incrementally in real time
 
@@ -45,7 +45,7 @@
 ## Requirements
 
 - **Node.js** >= 22
-- The default embedding model (`Xenova/bge-m3`, ~560 MB) downloads on first startup
+- The default embedding model (`Xenova/jina-embeddings-v2-small-en`, ~33 MB) downloads on first startup
 
 ## Repository
 
 
@@ -77,8 +77,8 @@ server:
   # Default model config (fallback for all graphs except code).
   # Taken as a whole object from the first level that defines it (no field-by-field merge).
   # model:
-  #   name: "Xenova/bge-m3"                     # HuggingFace model ID (default: Xenova/bge-m3)
-  #   pooling: "cls"                            # Pooling strategy: "mean" or "cls"
+  #   name: "Xenova/jina-embeddings-v2-small-en" # HuggingFace model ID (default)
+  #   pooling: "mean"                           # Pooling strategy: "mean" or "cls"
   #   normalize: true                           # L2-normalize output vectors
   #   dtype: "q8"                               # Quantization: fp32, fp16, q8, q4 (default: q8)
   #   queryPrefix: ""                           # Prefix prepended to search queries
@@ -130,8 +130,8 @@ projects:
 
     # Per-project model config (overrides server.model — taken as whole object)
     # model:
-    #   name: "Xenova/bge-m3"
-    #   pooling: "cls"
+    #   name: "Xenova/jina-embeddings-v2-small-en"
+    #   pooling: "mean"
     #   normalize: true
 
     # Per-project code model (overrides server.codeModel — separate chain for code graph)
@@ -164,7 +164,7 @@ projects:
     #     include: "**/*.md"                     # Glob for markdown files (default: "**/*.md")
     #     exclude: "**/changelog/**"              # Additional exclude (merged with project + server)
     #     model:                                 # Full model config (no merge with parent)
-    #       name: "Xenova/bge-m3"
+    #       name: "Xenova/bge-m3"                # override: use multilingual model for docs
     #       pooling: "cls"
     #       normalize: true
     #     embedding:                             # Embedding config (field-by-field merge with parent)
@@ -224,8 +224,8 @@ projects:
 #     # exclude: "**/vendor/**"                 # Additional exclude (merged with server default)
 #     # Workspace-level model (overrides server.model for shared graphs)
 #     # model:
-#     #   name: "Xenova/bge-m3"
-#     #   pooling: "cls"
+#     #   name: "Xenova/jina-embeddings-v2-small-en"
+#     #   pooling: "mean"
 #     #   normalize: true
 #     # Workspace-level embedding (overrides server.embedding for shared graphs)
 #     # embedding:
@@ -235,7 +235,7 @@ projects:
 #     #   knowledge:
 #     #     enabled: true
 #     #     model:
-#     #       name: "Xenova/bge-m3"
+#     #       name: "Xenova/jina-embeddings-v2-small-en"
 #     #     embedding:
 #     #       maxChars: 16000
 
@@ -245,9 +245,17 @@ projects:
 # Below are examples of how to configure different embedding models.
 # Copy the relevant `model:` block into server, project, or graphs section.
 #
-# ── Default: BGE-M3 (recommended) ─────────────────────────────────────────
-# Best general-purpose model. 1024 dimensions, multilingual (100+ languages),
-# 8K token context. Works out of the box with default settings.
+# ── Default: jina-embeddings-v2-small-en ──────────────────────────────────
+# Lightweight English model. 512 dimensions, 33M parameters, 8K token context.
+# ~33 MB download (q8). Works out of the box with default settings.
+#
+#   model:
+#     name: "Xenova/jina-embeddings-v2-small-en"
+#     pooling: "mean"
+#     normalize: true
+#
+# ── BGE-M3 (multilingual) ────────────────────────────────────────────────
+# Best multilingual model. 1024 dimensions, 100+ languages, 8K context, ~560 MB.
 #
 #   model:
 #     name: "Xenova/bge-m3"
@@ -260,7 +268,7 @@ projects:
 #
 #   model:
 #     name: "Xenova/bge-base-en-v1.5"
-#     pooling: "cls"
+#     pooling: "mean"
 #     normalize: true
 #     queryPrefix: "Represent this sentence for searching relevant passages: "
 #
@@ -269,7 +277,7 @@ projects:
 #
 #   model:
 #     name: "Xenova/bge-small-en-v1.5"
-#     pooling: "cls"
+#     pooling: "mean"
 #     normalize: true
 #     queryPrefix: "Represent this sentence for searching relevant passages: "
 #
@@ -298,20 +306,21 @@ projects:
 # Options: fp32 (default), fp16, q8, q4
 #
 #   model:
-#     name: "Xenova/bge-m3"
-#     pooling: "cls"
+#     name: "Xenova/jina-embeddings-v2-small-en"
+#     pooling: "mean"
 #     normalize: true
-#     dtype: "q8"
+#     dtype: "q4"
 #
 # ── Mixed config: different models per graph ──────────────────────────────
 # The code graph defaults to jinaai/jina-embeddings-v2-base-code via `codeModel`.
-# Other graphs default to Xenova/bge-m3 via `model`. You can override per-graph:
+# Other graphs default to Xenova/jina-embeddings-v2-small-en via `model`.
+# You can override per-graph:
 #
 # projects:
 #   my-app:
 #     projectDir: "/path/to/my-app"
 #     model:
-#       name: "Xenova/bge-m3"                     # default for docs, knowledge, tasks, skills, files
+#       name: "Xenova/bge-m3"                     # override: multilingual model for most graphs
 #       pooling: "cls"
 #       normalize: true
 #     codeModel:
@@ -321,6 +330,6 @@ projects:
 #     graphs:
 #       files:
 #         model:
-#           name: "Xenova/bge-small-en-v1.5"      # smaller model for file paths
-#           pooling: "cls"
+#           name: "Xenova/jina-embeddings-v2-small-en" # lighter model for file paths
+#           pooling: "mean"
 #           normalize: true
@@ -23,7 +23,7 @@ No config file needed. Graph Memory uses your current directory as the project.
 You'll see output like:
 
 ```
-INFO  Registered model (lazy)         model="Xenova/bge-m3"
+INFO  Registered model (lazy)         model="Xenova/jina-embeddings-v2-small-en"
 INFO  Starting indexing phase         phase="1/3 docs"
 INFO  Starting indexing phase         phase="2/3 files"
 INFO  Starting indexing phase         phase="3/3 code"