Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions server/src/modules/phrase_bundle/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ interface PhraseBundleSchema {
phrases: string[];
}

// Sub-schema for a confirmed chunk. The field literally named `type` collides
// with Mongoose's reserved type key, so each field uses the verbose `{ type }`
// descriptor form (and _id is disabled since chunks are plain value objects).
const chunkSchema = new Schema(
{
text: { type: String },
type: { type: String },
definition: { type: String },
transliteration: { type: String },
confidence: { type: Number },
},
{ _id: false }
);

const phraseSchema = new Schema<PhraseSchema>(
{
phrase: { type: String },
Expand All @@ -92,15 +106,7 @@ const phraseSchema = new Schema<PhraseSchema>(
},
linguistic_data: Schema.Types.Mixed,
chunks: {
type: [
{
text: String,
type: String,
definition: String,
transliteration: String,
confidence: Number,
},
],
type: [chunkSchema],
default: [],
},
sourceUrl: String,
Expand Down
129 changes: 126 additions & 3 deletions server/src/modules/phrase_bundle/functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import {
// Import the PhraseSchema type from the database module
import { PhraseSchema } from "./db";
import { normaliseSourceUrl } from "../translation/url-normalise";
import { openRouter } from "../../utils/openrouter";
import { TRANSLATION_MODELS } from "../../utils/openrouter-models";
import { z } from "zod";

interface RemoveBundleParams {
_id: string;
Expand Down Expand Up @@ -165,12 +168,13 @@ const createPhrase = defineFunction({
throw new Error("One or more bundles not found or access denied");
}

// Check if phrase already exists (considering type)
// Check if phrase already exists. Match by phrase + type (+ owner) only:
// the translation can vary between AI calls, so including it would create
// duplicate phrase docs for the same saved text.
const existingPhrase = await phraseCollection.findOne({
refId: refId,
phrase: phrase.trim(),
translation: translation.trim(),
type: type, // Include type in the search
type: type,
});

let phraseId: string;
Expand Down Expand Up @@ -283,9 +287,128 @@ const updatePhrase = defineFunction({
},
});

interface BundleSuggestionParams {
refId: string;
pageTitle?: string;
pageUrl?: string;
}

const BundleNameSchema = z.object({
bundle_name: z
.string()
.describe(
"A short, clean bundle name derived from the page title, generalised so multiple episodes/chapters/articles from the same source group together (e.g. 'Stranger Things S2E5 β€” Netflix' -> 'Stranger Things S2')."
),
});

/**
* Suggest which bundle the save modal should default to for a given page+user.
* Called once per page (first time the word detail opens) for logged-in users.
*
* - If the user already saved a phrase from this page (matched by normalised
* source URL), returns that bundle and no AI call is made.
* - Otherwise asks the model for a short bundle name derived from the title.
*/
const getBundleSuggestionForPage = defineFunction({
name: "getBundleSuggestionForPage",
permissionTypes: ["user_access"],
callback: async ({
refId,
pageTitle,
pageUrl,
}: BundleSuggestionParams): Promise<{
matchedBundle: { _id: string; title: string } | null;
suggestedName: string | null;
}> => {
const phraseCollection = getCollection<any>(DATABASE, PHRASE_COLLECTION);
const phraseBundleCollection = getCollection<any>(
DATABASE,
BUNDLE_COLLECTION
);

// 1. Existing bundle from this same page (matched by normalised URL).
const sourceUrl = normaliseSourceUrl(pageUrl || "");
if (sourceUrl) {
const phrases = await phraseCollection.find(
{ refId, sourceUrl },
{ _id: 1 }
);
const phraseIds = (phrases || []).map((p: any) => p._id);

if (phraseIds.length) {
const bundle = await phraseBundleCollection.findOne(
{ refId, phrases: { $in: phraseIds } },
{},
{ sort: { _id: -1 } }
);
if (bundle) {
return {
matchedBundle: { _id: String(bundle._id), title: bundle.title },
suggestedName: null,
};
}
}
}

// 2. No existing bundle: ask the model for a short, generalised name.
if (!pageTitle || !pageTitle.trim()) {
return { matchedBundle: null, suggestedName: null };
}

try {
const result = await openRouter.createStructuredOutputWithZod<{
bundle_name: string;
}>({
options: {
models: TRANSLATION_MODELS,
messages: [
{
role: "system",
content:
"You name vocabulary bundles. Given a web page title, produce a short, clean bundle name that generalises across multiple episodes/chapters/articles from the same source. Drop site suffixes, episode numbers and noise.",
},
{ role: "user", content: `Page title: "${pageTitle}"` },
],
temperature: 0,
max_tokens: 60,
},
zodSchema: BundleNameSchema,
schemaName: "bundle_name",
strict: true,
});

const name = result.bundle_name?.trim();
if (!name) return { matchedBundle: null, suggestedName: null };

// If a bundle with this exact name already exists, return it as a match
// so the client preselects it instead of trying to re-create it.
const existingByName = await phraseBundleCollection.findOne({
refId,
title: name,
});
if (existingByName) {
return {
matchedBundle: {
_id: String(existingByName._id),
title: existingByName.title,
},
suggestedName: null,
};
}

return { matchedBundle: null, suggestedName: name };
} catch (error: unknown) {
console.error("Bundle suggestion error:", error);
// Naming is best-effort; never block the save flow.
return { matchedBundle: null, suggestedName: null };
}
},
});

module.exports.functions = [
removeBundle,
removePhrase,
createPhrase,
updatePhrase,
getBundleSuggestionForPage,
];
4 changes: 0 additions & 4 deletions server/src/modules/translation/functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ const translateWithContext = defineFunction({
translationType = "simple",
sourceLanguage = "",
targetLanguage = "",
pageTitle,
pageUrl,
} = params;

// normalize the source language
Expand All @@ -38,8 +36,6 @@ const translateWithContext = defineFunction({
context,
sourceLanguage,
targetLanguage,
pageTitle,
pageUrl,
});
}

Expand Down
6 changes: 0 additions & 6 deletions server/src/modules/translation/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,6 @@ export const LanguageLearningDataSchema = z.object({
linguistic_data: LinguisticDataSchema.describe(
"Linguistic analysis data in target language"
),
suggested_bundle_name: z
.string()
.optional()
.describe(
"A short, clean bundle name derived from the page title, generalised so multiple episodes/chapters/articles from the same source group together (e.g. 'Stranger Things S2E5 β€” Netflix' -> 'Stranger Things S2'). Only set when a page title is provided."
),
});

// Type inference from Zod schemas
Expand Down
20 changes: 7 additions & 13 deletions server/src/modules/translation/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ export async function getDetailedTranslation({
context,
sourceLanguage = "en",
targetLanguage,
pageTitle,
pageUrl,
}: TranslateWithContextParams): Promise<DetailedPhraseDataType> {
// Create prompt for OpenRouter
const systemPrompt = `
Expand All @@ -85,19 +83,12 @@ export async function getDetailedTranslation({

Chunks: inside the user's selection ("phrase"), find the reusable language patterns worth learning (collocations, phrasal verbs, idioms, discourse markers).
Rules: at most one chunk per 5-8 words of the selection, hard ceiling of 2 chunks. Each chunk's "text" must appear verbatim inside the selection. For each chunk, also provide: "transliteration" (how to pronounce that chunk, source language, in the ${targetLanguage} alphabet) and "definition" (a short, self-contained explanation of that chunk's meaning and usage, 1-2 sentences, in ${targetLanguage}).
Return an empty "chunks" array when the selection is under ~5 words, or when the selection is written in a different language than the target learning language.
${
pageTitle
? `Bundle name: also produce "suggested_bundle_name" - a short, clean name derived from the page title that generalises across multiple episodes/chapters/articles from the same source (e.g. "Stranger Things S2E5 β€” Netflix" -> "Stranger Things S2").`
: `Do not set "suggested_bundle_name".`
}`;
Return an empty "chunks" array when the selection is under ~5 words, or when the selection is written in a different language than the target learning language.`;

const userPrompt = `
Translate from ${sourceLanguage} to ${targetLanguage}:
Phrase: "${phrase}"
Accuracy context: "${context}"${
pageTitle ? `\n Page title: "${pageTitle}"` : ""
}${pageUrl ? `\n Page URL: "${pageUrl}"` : ""}`;
Accuracy context: "${context}"`;

try {
// Use the Zod schema directly with the OpenRouter service
Expand All @@ -116,7 +107,10 @@ export async function getDetailedTranslation({
},
],
temperature: 0,
max_tokens: 700,
// Richer schema now includes per-chunk definition + transliteration
// (token-heavy in target languages like Persian); 700 truncated the
// JSON ("unterminated string"), so allow more headroom.
max_tokens: 2000,
},
zodSchema: LanguageLearningDataSchema, // Pass the Zod schema directly
schemaName: "language_learning_data",
Expand Down Expand Up @@ -179,7 +173,7 @@ export async function getTranslationAdvice({
{ role: "user", content: message },
],
temperature: 0,
max_tokens: 400,
max_tokens: 800,
},
zodSchema: TranslationAdviceSchema,
schemaName: "translation_advice",
Expand Down
2 changes: 0 additions & 2 deletions server/src/modules/translation/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ export interface TranslateWithContextParams {
sourceLanguage?: string;
targetLanguage?: string;
translationType?: "simple" | "detailed";
pageTitle?: string;
pageUrl?: string;
}

export interface TranslationAdviceMessage {
Expand Down
Loading