From 176b4a0e66c133cf8fc655848a30b0f1a096d11a Mon Sep 17 00:00:00 2001 From: anirudh5harma Date: Sun, 28 Jun 2026 02:33:51 +0530 Subject: [PATCH] docs: clarify self-hosted local embedding limits --- apps/docs/docs.json | 1 + apps/docs/self-hosting/configuration.mdx | 6 ++ apps/docs/self-hosting/embedding-models.mdx | 52 +++++++++++++++ apps/docs/self-hosting/quickstart.mdx | 4 ++ packages/docs-test/package.json | 3 +- packages/docs-test/run.ts | 17 ++++- .../self-hosting/embedding-models-docs.ts | 65 +++++++++++++++++++ 7 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 apps/docs/self-hosting/embedding-models.mdx create mode 100644 packages/docs-test/tests/self-hosting/embedding-models-docs.ts diff --git a/apps/docs/docs.json b/apps/docs/docs.json index 92a32d9a0..95a6ec207 100644 --- a/apps/docs/docs.json +++ b/apps/docs/docs.json @@ -77,6 +77,7 @@ "self-hosting/overview", "self-hosting/quickstart", "self-hosting/configuration", + "self-hosting/embedding-models", "self-hosting/local-vs-enterprise" ] }, diff --git a/apps/docs/self-hosting/configuration.mdx b/apps/docs/self-hosting/configuration.mdx index 49580e6dd..2ffedccfe 100644 --- a/apps/docs/self-hosting/configuration.mdx +++ b/apps/docs/self-hosting/configuration.mdx @@ -61,6 +61,12 @@ OPENAI_MODEL=gpt-oss:20b Nothing to configure. Uploaded files (PDFs, images) are stored on local disk inside `$SUPERMEMORY_DATA_DIR` and served by the server at `/files/:key`. +## Local embedding model + +The self-hosted server computes dense embeddings locally. The current release does not expose a supported embedding-model selector; changing your LLM provider settings changes summaries, extraction, and chunking, but not the dense embedding model used for semantic search. + +For multilingual or non-English deployments, read [Local Embeddings](/self-hosting/embedding-models) before large backfills. The variables below tune embedding throughput and memory behavior only. + ## Embedding performance Local embeddings are prewarmed at startup with conservative defaults — one worker, minimal CPU footprint. Turn these up if you're ingesting heavily and prefer throughput over headroom: diff --git a/apps/docs/self-hosting/embedding-models.mdx b/apps/docs/self-hosting/embedding-models.mdx new file mode 100644 index 000000000..d1db76d9c --- /dev/null +++ b/apps/docs/self-hosting/embedding-models.mdx @@ -0,0 +1,52 @@ +--- +title: "Local Embeddings" +sidebarTitle: "Local Embeddings" +description: "How self-hosted local embeddings affect multilingual search." +icon: "languages" +--- + +Self-hosted Supermemory computes dense embeddings locally. These embeddings are separate from the LLM provider you configure for summaries, extraction, and chunking. + +Changing `OPENAI_MODEL`, `OPENAI_BASE_URL`, `ANTHROPIC_API_KEY`, or another LLM provider setting does **not** change the local embedding model used for semantic search. + + +Current self-hosted release binaries do not expose a supported embedding-model selector. The `SUPERMEMORY_LOCAL_EMBEDDING_*` variables documented on the configuration page tune worker performance only; they do not change the embedding model or vector dimensions. + + +## Why this matters + +Semantic search compares the query embedding with stored document and memory embeddings. If the embedding model is not trained for the language in your content, ingestion can still finish successfully while dense recall returns weak, wrong, or empty results. + +This is most visible for multilingual or non-English deployments: exact keyword matches may still work through the lexical side of hybrid search, but paraphrased natural-language queries can fail because the dense vector space is not reliable for that language. + +## Production fix shape + +A production-grade multilingual fix needs more than a model-name environment variable: + +- A multilingual default embedding profile for fresh self-hosted installs. +- Durable profile metadata for model id, dimensions, pooling, normalization, token limits, and model-family text formatting. +- Model-scoped vector storage and search so embeddings from different models are never compared. +- A reindex command for switching profiles, because embeddings from different models are not comparable. +- Upgrade behavior that keeps existing stores searchable until an operator explicitly reindexes. +- Release validation across multiple language families and scripts, not only one or two reported languages. + +The current recommendation for the default multilingual dense model is **BGE-M3** because it is designed for multilingual retrieval, supports more than 100 languages, supports long inputs, and uses 1024-dimensional dense vectors. That 1024-dimensional output is also why the database/index layer must change together with the model loader. + +## What to do today + +For English-only self-hosted deployments, the current local embedding path can still be suitable. + +For production multilingual self-hosting, track [GitHub issue #1104](https://github.com/supermemoryai/supermemory/issues/1104) and avoid large backfills that you expect to re-embed later. If you already have a multilingual corpus, keep the canonical content unchanged and plan for a reindex once a release includes model-scoped multilingual embedding profiles. + +Avoid these workarounds in production: + +- Translating memories to English at write time, because it changes the canonical user data. +- Storing bilingual duplicates, because it increases storage and pollutes extraction/search. +- Replacing model cache files under a different model name, because it hides model identity and can be overwritten by release updates. +- Truncating model output dimensions to fit the old vector schema, because it treats a schema constraint as a retrieval-quality decision. + +## Related configuration + +Use [Embedding performance](/self-hosting/configuration#embedding-performance) to tune worker count, batch size, WASM threads, and prewarm behavior. These settings affect throughput and memory use only. + +Use [Memory limits and ingestion queue](/self-hosting/configuration#memory-limits--ingestion-queue) to control how much additional memory background ingestion can consume. diff --git a/apps/docs/self-hosting/quickstart.mdx b/apps/docs/self-hosting/quickstart.mdx index 713ea98be..392d6d491 100644 --- a/apps/docs/self-hosting/quickstart.mdx +++ b/apps/docs/self-hosting/quickstart.mdx @@ -130,6 +130,10 @@ curl http://localhost:6767/v3/search \ That's it. Everything in the [Memory API](/quickstart) — documents, memories, user profiles, spaces, filtering — works identically against your local server. + +Self-hosted embeddings are local and separate from your configured LLM provider. For multilingual or non-English production deployments, review [Local Embeddings](/self-hosting/embedding-models) before backfilling a large corpus. + + ## Where things live By default, all state lives in a single directory you can back up or move: diff --git a/packages/docs-test/package.json b/packages/docs-test/package.json index 1c1b66745..6aa0db28f 100644 --- a/packages/docs-test/package.json +++ b/packages/docs-test/package.json @@ -10,7 +10,8 @@ "test:integrations": "bun run run.ts integrations", "test:quickstart": "bun run run.ts quickstart", "test:sdk": "bun run run.ts sdk", - "test:search": "bun run run.ts search" + "test:search": "bun run run.ts search", + "test:self-hosting": "bun run run.ts self-hosting" }, "dependencies": { "@ai-sdk/anthropic": "^3.0.15", diff --git a/packages/docs-test/run.ts b/packages/docs-test/run.ts index be9a588c0..4817790f5 100644 --- a/packages/docs-test/run.ts +++ b/packages/docs-test/run.ts @@ -1,6 +1,6 @@ #!/usr/bin/env bun -import { spawn } from "child_process" -import path from "path" +import { spawn } from "node:child_process" +import path from "node:path" const args = process.argv.slice(2) const filter = args[0] // e.g., "typescript", "python", "integrations", or specific file @@ -55,6 +55,15 @@ function getTests(): TestFile[] { }) } + const selfHostingTests = ["embedding-models-docs"] + for (const t of selfHostingTests) { + tests.push({ + name: `self-hosting/${t}`, + path: path.join(TESTS_DIR, "self-hosting", `${t}.ts`), + type: "ts", + }) + } + return tests } @@ -95,7 +104,9 @@ async function main() { if (tests.length === 0) { console.log("No tests matched the filter:", filter) console.log("\nAvailable tests:") - getTests().forEach((t) => console.log(` - ${t.name} (${t.type})`)) + for (const t of getTests()) { + console.log(` - ${t.name} (${t.type})`) + } process.exit(1) } diff --git a/packages/docs-test/tests/self-hosting/embedding-models-docs.ts b/packages/docs-test/tests/self-hosting/embedding-models-docs.ts new file mode 100644 index 000000000..b536c1d5a --- /dev/null +++ b/packages/docs-test/tests/self-hosting/embedding-models-docs.ts @@ -0,0 +1,65 @@ +import { readFileSync } from "node:fs" +import path from "node:path" + +const repoRoot = path.resolve(import.meta.dir, "../../../..") + +function readRepoFile(relativePath: string) { + return readFileSync(path.join(repoRoot, relativePath), "utf8") +} + +function assert(condition: unknown, message: string) { + if (!condition) { + throw new Error(message) + } +} + +const docsJson = JSON.parse(readRepoFile("apps/docs/docs.json")) +const embeddingDocs = readRepoFile( + "apps/docs/self-hosting/embedding-models.mdx", +) +const configDocs = readRepoFile("apps/docs/self-hosting/configuration.mdx") +const quickstartDocs = readRepoFile("apps/docs/self-hosting/quickstart.mdx") + +const selfHostingPages = docsJson.navigation.tabs[0].anchors[1].pages[1].pages + +assert( + selfHostingPages.includes("self-hosting/embedding-models"), + "Self-hosting navigation should include the local embeddings page", +) + +assert( + embeddingDocs.includes("GitHub issue #1104") && + embeddingDocs.includes( + "https://github.com/supermemoryai/supermemory/issues/1104", + ), + "Local embeddings docs should link to the upstream multilingual issue", +) + +assert( + embeddingDocs.includes("BGE-M3") && + embeddingDocs.includes("more than 100 languages") && + embeddingDocs.includes("1024-dimensional"), + "Local embeddings docs should explain the recommended multilingual model and dimension constraint", +) + +assert( + embeddingDocs.includes("do not expose a supported embedding-model selector"), + "Local embeddings docs should state that current releases do not support model selection", +) + +assert( + configDocs.includes("[Local Embeddings](/self-hosting/embedding-models)"), + "Configuration docs should link to local embedding guidance", +) + +assert( + quickstartDocs.includes("[Local Embeddings](/self-hosting/embedding-models)"), + "Quickstart should direct multilingual users to local embedding guidance", +) + +assert( + !configDocs.includes("SUPERMEMORY_LOCAL_EMBEDDING_MODEL"), + "Configuration docs must not document an unsupported embedding model env var as live config", +) + +console.log("Self-hosting local embedding docs checks passed")