From 9922825747ec746c45913f4227d47070ed75773c Mon Sep 17 00:00:00 2001 From: Navid Shad Date: Thu, 21 May 2026 21:05:58 +0300 Subject: [PATCH 1/2] refactor(bundle-suggestion): dedicated per-page RPC, off the translation path The bundle-name suggestion was piggy-backed on getDetailedTranslation, so it ran on every word lookup (incl. anonymous users). Move it to its own call: - New phrase_bundle RPC getBundleSuggestionForPage (user_access): matches an existing bundle by normalised source URL, else asks the model for a short name from the page title. Best-effort; never blocks saving. - Remove pageTitle/pageUrl from translateWithContext + getDetailedTranslation and suggested_bundle_name from the translation schema/prompt. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/modules/phrase_bundle/functions.ts | 106 ++++++++++++++++++ server/src/modules/translation/functions.ts | 4 - server/src/modules/translation/schema.ts | 6 - server/src/modules/translation/service.ts | 13 +-- server/src/modules/translation/types.ts | 2 - 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/server/src/modules/phrase_bundle/functions.ts b/server/src/modules/phrase_bundle/functions.ts index 7ffa84b..ca53806 100644 --- a/server/src/modules/phrase_bundle/functions.ts +++ b/server/src/modules/phrase_bundle/functions.ts @@ -8,6 +8,9 @@ import { // Import the PhraseSchema type from the database module import { PhraseSchema } from "./db"; import { normaliseSourceUrl } from "../translation/url-normalise"; +import { openRouter } from "../../utils/openrouter"; +import { TRANSLATION_MODELS } from "../../utils/openrouter-models"; +import { z } from "zod"; interface RemoveBundleParams { _id: string; @@ -283,9 +286,112 @@ const updatePhrase = defineFunction({ }, }); +interface BundleSuggestionParams { + refId: string; + pageTitle?: string; + pageUrl?: string; +} + +const BundleNameSchema = z.object({ + bundle_name: z + .string() + .describe( + "A short, clean bundle name derived from the page title, generalised so multiple episodes/chapters/articles from the same source group together (e.g. 'Stranger Things S2E5 — Netflix' -> 'Stranger Things S2')." + ), +}); + +/** + * Suggest which bundle the save modal should default to for a given page+user. + * Called once per page (first time the word detail opens) for logged-in users. + * + * - If the user already saved a phrase from this page (matched by normalised + * source URL), returns that bundle and no AI call is made. + * - Otherwise asks the model for a short bundle name derived from the title. + */ +const getBundleSuggestionForPage = defineFunction({ + name: "getBundleSuggestionForPage", + permissionTypes: ["user_access"], + callback: async ({ + refId, + pageTitle, + pageUrl, + }: BundleSuggestionParams): Promise<{ + matchedBundle: { _id: string; title: string } | null; + suggestedName: string | null; + }> => { + const phraseCollection = getCollection(DATABASE, PHRASE_COLLECTION); + const phraseBundleCollection = getCollection( + DATABASE, + BUNDLE_COLLECTION + ); + + // 1. Existing bundle from this same page (matched by normalised URL). + const sourceUrl = normaliseSourceUrl(pageUrl || ""); + if (sourceUrl) { + const phrases = await phraseCollection.find( + { refId, sourceUrl }, + { _id: 1 } + ); + const phraseIds = (phrases || []).map((p: any) => p._id); + + if (phraseIds.length) { + const bundle = await phraseBundleCollection.findOne( + { refId, phrases: { $in: phraseIds } }, + {}, + { sort: { _id: -1 } } + ); + if (bundle) { + return { + matchedBundle: { _id: String(bundle._id), title: bundle.title }, + suggestedName: null, + }; + } + } + } + + // 2. No existing bundle: ask the model for a short, generalised name. + if (!pageTitle || !pageTitle.trim()) { + return { matchedBundle: null, suggestedName: null }; + } + + try { + const result = await openRouter.createStructuredOutputWithZod<{ + bundle_name: string; + }>({ + options: { + models: TRANSLATION_MODELS, + messages: [ + { + role: "system", + content: + "You name vocabulary bundles. Given a web page title, produce a short, clean bundle name that generalises across multiple episodes/chapters/articles from the same source. Drop site suffixes, episode numbers and noise.", + }, + { role: "user", content: `Page title: "${pageTitle}"` }, + ], + temperature: 0, + max_tokens: 60, + }, + zodSchema: BundleNameSchema, + schemaName: "bundle_name", + strict: true, + }); + + return { + matchedBundle: null, + suggestedName: result.bundle_name?.trim() || null, + }; + } catch (error: unknown) { + console.error("Bundle suggestion error:", error); + // Naming is best-effort; never block the save flow. + return { matchedBundle: null, suggestedName: null }; + } + }, +}); + module.exports.functions = [ removeBundle, removePhrase, createPhrase, updatePhrase, + getBundleSuggestionForPage, ]; diff --git a/server/src/modules/translation/functions.ts b/server/src/modules/translation/functions.ts index 6c4d1bf..cbec93f 100644 --- a/server/src/modules/translation/functions.ts +++ b/server/src/modules/translation/functions.ts @@ -22,8 +22,6 @@ const translateWithContext = defineFunction({ translationType = "simple", sourceLanguage = "", targetLanguage = "", - pageTitle, - pageUrl, } = params; // normalize the source language @@ -38,8 +36,6 @@ const translateWithContext = defineFunction({ context, sourceLanguage, targetLanguage, - pageTitle, - pageUrl, }); } diff --git a/server/src/modules/translation/schema.ts b/server/src/modules/translation/schema.ts index 5c0ba2d..d4b8868 100644 --- a/server/src/modules/translation/schema.ts +++ b/server/src/modules/translation/schema.ts @@ -96,12 +96,6 @@ export const LanguageLearningDataSchema = z.object({ linguistic_data: LinguisticDataSchema.describe( "Linguistic analysis data in target language" ), - suggested_bundle_name: z - .string() - .optional() - .describe( - "A short, clean bundle name derived from the page title, generalised so multiple episodes/chapters/articles from the same source group together (e.g. 'Stranger Things S2E5 — Netflix' -> 'Stranger Things S2'). Only set when a page title is provided." - ), }); // Type inference from Zod schemas diff --git a/server/src/modules/translation/service.ts b/server/src/modules/translation/service.ts index 9de87f3..0d1a454 100644 --- a/server/src/modules/translation/service.ts +++ b/server/src/modules/translation/service.ts @@ -72,8 +72,6 @@ export async function getDetailedTranslation({ context, sourceLanguage = "en", targetLanguage, - pageTitle, - pageUrl, }: TranslateWithContextParams): Promise { // Create prompt for OpenRouter const systemPrompt = ` @@ -85,19 +83,12 @@ export async function getDetailedTranslation({ Chunks: inside the user's selection ("phrase"), find the reusable language patterns worth learning (collocations, phrasal verbs, idioms, discourse markers). Rules: at most one chunk per 5-8 words of the selection, hard ceiling of 2 chunks. Each chunk's "text" must appear verbatim inside the selection. For each chunk, also provide: "transliteration" (how to pronounce that chunk, source language, in the ${targetLanguage} alphabet) and "definition" (a short, self-contained explanation of that chunk's meaning and usage, 1-2 sentences, in ${targetLanguage}). - Return an empty "chunks" array when the selection is under ~5 words, or when the selection is written in a different language than the target learning language. - ${ - pageTitle - ? `Bundle name: also produce "suggested_bundle_name" - a short, clean name derived from the page title that generalises across multiple episodes/chapters/articles from the same source (e.g. "Stranger Things S2E5 — Netflix" -> "Stranger Things S2").` - : `Do not set "suggested_bundle_name".` - }`; + Return an empty "chunks" array when the selection is under ~5 words, or when the selection is written in a different language than the target learning language.`; const userPrompt = ` Translate from ${sourceLanguage} to ${targetLanguage}: Phrase: "${phrase}" - Accuracy context: "${context}"${ - pageTitle ? `\n Page title: "${pageTitle}"` : "" - }${pageUrl ? `\n Page URL: "${pageUrl}"` : ""}`; + Accuracy context: "${context}"`; try { // Use the Zod schema directly with the OpenRouter service diff --git a/server/src/modules/translation/types.ts b/server/src/modules/translation/types.ts index 27d3173..cec3035 100644 --- a/server/src/modules/translation/types.ts +++ b/server/src/modules/translation/types.ts @@ -15,8 +15,6 @@ export interface TranslateWithContextParams { sourceLanguage?: string; targetLanguage?: string; translationType?: "simple" | "detailed"; - pageTitle?: string; - pageUrl?: string; } export interface TranslationAdviceMessage { From 50af64013bade30c36597e1bb1c96a5e0e0f1713 Mon Sep 17 00:00:00 2001 From: Navid Shad Date: Thu, 21 May 2026 23:49:14 +0300 Subject: [PATCH 2/2] fix(phrase,translation): chunk subdoc schema, match-by-phrase, token headroom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix Mongoose reserved-key collision: chunks were defined with a bare `type` field, so the array was treated as [String] and rejected objects (E11000-style cast error). Use a dedicated chunk sub-schema with the verbose `{ type }` form. - getBundleSuggestionForPage: if the suggested name already matches a bundle, return it as matchedBundle so the client preselects it instead of recreating. - createPhrase dedup now matches by phrase + type (+ owner), not translation — the AI returns a different translation each call, which caused duplicate docs and made an already-saved phrase look unsaved. - Raise max_tokens (detailed translation 700->2000, advisor 400->800): the richer per-chunk definition + transliteration overflowed and truncated the JSON ("unterminated string") in token-heavy target languages like Persian. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/modules/phrase_bundle/db.ts | 24 ++++++++------ server/src/modules/phrase_bundle/functions.ts | 31 ++++++++++++++----- server/src/modules/translation/service.ts | 7 +++-- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/server/src/modules/phrase_bundle/db.ts b/server/src/modules/phrase_bundle/db.ts index 0b54c31..29dcb00 100644 --- a/server/src/modules/phrase_bundle/db.ts +++ b/server/src/modules/phrase_bundle/db.ts @@ -73,6 +73,20 @@ interface PhraseBundleSchema { phrases: string[]; } +// Sub-schema for a confirmed chunk. The field literally named `type` collides +// with Mongoose's reserved type key, so each field uses the verbose `{ type }` +// descriptor form (and _id is disabled since chunks are plain value objects). +const chunkSchema = new Schema( + { + text: { type: String }, + type: { type: String }, + definition: { type: String }, + transliteration: { type: String }, + confidence: { type: Number }, + }, + { _id: false } +); + const phraseSchema = new Schema( { phrase: { type: String }, @@ -92,15 +106,7 @@ const phraseSchema = new Schema( }, linguistic_data: Schema.Types.Mixed, chunks: { - type: [ - { - text: String, - type: String, - definition: String, - transliteration: String, - confidence: Number, - }, - ], + type: [chunkSchema], default: [], }, sourceUrl: String, diff --git a/server/src/modules/phrase_bundle/functions.ts b/server/src/modules/phrase_bundle/functions.ts index ca53806..97b2ee2 100644 --- a/server/src/modules/phrase_bundle/functions.ts +++ b/server/src/modules/phrase_bundle/functions.ts @@ -168,12 +168,13 @@ const createPhrase = defineFunction({ throw new Error("One or more bundles not found or access denied"); } - // Check if phrase already exists (considering type) + // Check if phrase already exists. Match by phrase + type (+ owner) only: + // the translation can vary between AI calls, so including it would create + // duplicate phrase docs for the same saved text. const existingPhrase = await phraseCollection.findOne({ refId: refId, phrase: phrase.trim(), - translation: translation.trim(), - type: type, // Include type in the search + type: type, }); let phraseId: string; @@ -376,10 +377,26 @@ const getBundleSuggestionForPage = defineFunction({ strict: true, }); - return { - matchedBundle: null, - suggestedName: result.bundle_name?.trim() || null, - }; + const name = result.bundle_name?.trim(); + if (!name) return { matchedBundle: null, suggestedName: null }; + + // If a bundle with this exact name already exists, return it as a match + // so the client preselects it instead of trying to re-create it. + const existingByName = await phraseBundleCollection.findOne({ + refId, + title: name, + }); + if (existingByName) { + return { + matchedBundle: { + _id: String(existingByName._id), + title: existingByName.title, + }, + suggestedName: null, + }; + } + + return { matchedBundle: null, suggestedName: name }; } catch (error: unknown) { console.error("Bundle suggestion error:", error); // Naming is best-effort; never block the save flow. diff --git a/server/src/modules/translation/service.ts b/server/src/modules/translation/service.ts index 0d1a454..88e5d51 100644 --- a/server/src/modules/translation/service.ts +++ b/server/src/modules/translation/service.ts @@ -107,7 +107,10 @@ export async function getDetailedTranslation({ }, ], temperature: 0, - max_tokens: 700, + // Richer schema now includes per-chunk definition + transliteration + // (token-heavy in target languages like Persian); 700 truncated the + // JSON ("unterminated string"), so allow more headroom. + max_tokens: 2000, }, zodSchema: LanguageLearningDataSchema, // Pass the Zod schema directly schemaName: "language_learning_data", @@ -170,7 +173,7 @@ export async function getTranslationAdvice({ { role: "user", content: message }, ], temperature: 0, - max_tokens: 400, + max_tokens: 800, }, zodSchema: TranslationAdviceSchema, schemaName: "translation_advice",