diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..3a0d6806 --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +# === Translation (auto-translate for blog content) === +# DeepSeek Chat API key for translation +# Get one at https://platform.deepseek.com/api_keys +# Accepts either DEEPSEEK_APIKEY or DEEPSEEK_API_KEY. +# Uses the deepseek-chat model for high-quality translations. +# If unset, translation falls back to identity (no-op). +DEEPSEEK_APIKEY= + +# Translation cache directory (default: data/translations) +# TRANSLATE_CACHE_DIR=data/translations diff --git a/.gitignore b/.gitignore index a1c32781..6da02a9d 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,6 @@ next-env.d.ts # AGENTS.md is the canonical agent instructions file. # CLAUDE.md is a symlink → AGENTS.md (tracked by git, don't ignore). + +# Translation cache (auto-generated) +data/translations/ diff --git a/__tests__/lib/translate.shared.test.ts b/__tests__/lib/translate.shared.test.ts new file mode 100644 index 00000000..6a9d87b7 --- /dev/null +++ b/__tests__/lib/translate.shared.test.ts @@ -0,0 +1,60 @@ +import { + normalizeLocale, + shouldTranslate, + localeToLabel, + getTranslatableLocales, +} from '../../lib/translate.shared' + +describe('translate.shared', () => { + describe('normalizeLocale', () => { + it('maps zh to zh-CN', () => { + expect(normalizeLocale('zh')).toBe('zh-CN') + }) + + it('collapses zh-HK and zh-TW to zh-TW', () => { + expect(normalizeLocale('zh-HK')).toBe('zh-TW') + expect(normalizeLocale('zh-TW')).toBe('zh-TW') + }) + + it('strips the region from non-Chinese locales', () => { + expect(normalizeLocale('en-US')).toBe('en') + expect(normalizeLocale('de-DE')).toBe('de') + expect(normalizeLocale('fr')).toBe('fr') + }) + }) + + describe('shouldTranslate', () => { + it('returns false for every Chinese variant', () => { + for (const locale of ['zh', 'zh-CN', 'zh-TW', 'zh-HK', 'zh-Hans', 'zh-Hans-CN']) { + expect(shouldTranslate(locale)).toBe(false) + } + }) + + it('returns true for non-Chinese locales', () => { + for (const locale of ['en', 'en-US', 'ja', 'ko', 'fr', 'de-DE']) { + expect(shouldTranslate(locale)).toBe(true) + } + }) + }) + + describe('localeToLabel', () => { + it('returns the native label for known locales', () => { + expect(localeToLabel('en')).toBe('English') + expect(localeToLabel('ja')).toBe('日本語') + expect(localeToLabel('zh-TW')).toBe('繁體中文') + }) + + it('falls back to the raw code for unknown locales', () => { + expect(localeToLabel('xx')).toBe('xx') + }) + }) + + describe('getTranslatableLocales', () => { + it('lists only locales that should be translated', () => { + const locales = getTranslatableLocales() + expect(locales).toContain('en') + expect(locales).not.toContain('zh') + expect(locales.every((locale) => shouldTranslate(locale))).toBe(true) + }) + }) +}) diff --git a/__tests__/lib/translate.test.ts b/__tests__/lib/translate.test.ts new file mode 100644 index 00000000..273839c5 --- /dev/null +++ b/__tests__/lib/translate.test.ts @@ -0,0 +1,93 @@ +import fs from 'node:fs' +import os from 'node:os' +import path from 'node:path' + +// Point the engine at an isolated temp cache dir before importing it, so the +// module's lazily-memoised cacheDir never touches the real data/translations. +const CACHE_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'translate-test-')) +process.env.TRANSLATE_CACHE_DIR = CACHE_DIR + +import { translateText } from '../../lib/translate' + +function mockDeepSeek(content: string): jest.Mock { + return jest.fn().mockResolvedValue({ + ok: true, + json: async () => ({ choices: [{ message: { content } }] }), + }) +} + +describe('translateText', () => { + const originalFetch = global.fetch + const originalKey = process.env.DEEPSEEK_APIKEY + const originalKeyAlt = process.env.DEEPSEEK_API_KEY + + beforeEach(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}) + }) + + afterEach(() => { + global.fetch = originalFetch + jest.restoreAllMocks() + }) + + afterAll(() => { + fs.rmSync(CACHE_DIR, { recursive: true, force: true }) + restoreEnv('DEEPSEEK_APIKEY', originalKey) + restoreEnv('DEEPSEEK_API_KEY', originalKeyAlt) + }) + + it('returns blank text immediately without calling the API', async () => { + const fetchMock = jest.fn() + global.fetch = fetchMock as unknown as typeof fetch + + const result = await translateText(' ', 'en') + + expect(result.translatedText).toBe(' ') + expect(fetchMock).not.toHaveBeenCalled() + }) + + it('calls DeepSeek and caches the result on a cache miss', async () => { + process.env.DEEPSEEK_APIKEY = 'test-key' + const fetchMock = mockDeepSeek('Hello world') + global.fetch = fetchMock as unknown as typeof fetch + + const result = await translateText('你好世界-miss', 'en') + + expect(result.translatedText).toBe('Hello world') + expect(fetchMock).toHaveBeenCalledTimes(1) + }) + + it('serves a cached translation without calling the API again', async () => { + process.env.DEEPSEEK_APIKEY = 'test-key' + + global.fetch = mockDeepSeek('Cached translation') as unknown as typeof fetch + await translateText('你好世界-hit', 'en') // miss → writes cache + + const secondCall = jest.fn() + global.fetch = secondCall as unknown as typeof fetch + const result = await translateText('你好世界-hit', 'en') // hit → no fetch + + expect(result.translatedText).toBe('Cached translation') + expect(secondCall).not.toHaveBeenCalled() + }) + + it('falls back to the original text when no API key is configured', async () => { + delete process.env.DEEPSEEK_APIKEY + delete process.env.DEEPSEEK_API_KEY + const fetchMock = jest.fn() + global.fetch = fetchMock as unknown as typeof fetch + + const result = await translateText('未翻译-nokey', 'en') + + expect(result.translatedText).toBe('未翻译-nokey') + expect(fetchMock).not.toHaveBeenCalled() + }) +}) + +function restoreEnv(name: string, value: string | undefined): void { + if (value === undefined) { + delete process.env[name] + } else { + process.env[name] = value + } +} diff --git a/app/api/translate/route.ts b/app/api/translate/route.ts new file mode 100644 index 00000000..611e96ca --- /dev/null +++ b/app/api/translate/route.ts @@ -0,0 +1,76 @@ +import { translateMarkdown, translateText, shouldTranslate } from '@/lib/translate' +import { NextRequest, NextResponse } from 'next/server' + +export interface TranslateRequestBody { + text: string + targetLocale: string + /** If true, content is treated as markdown and markdown syntax is preserved. */ + isMarkdown?: boolean +} + +export interface TranslateResponseBody { + translatedText: string + error?: string +} + +/** + * POST /api/translate + * + * Translates the provided text into the target locale. + * Content in Chinese (zh) will be translated; Chinese locales return the original. + * + * Body: + * { text: string, targetLocale: string, isMarkdown?: boolean } + */ +export async function POST(request: NextRequest): Promise> { + try { + const body: TranslateRequestBody = await request.json() + + if (!body.text || !body.targetLocale) { + return NextResponse.json( + { translatedText: '', error: 'Missing required fields: text, targetLocale' }, + { status: 400 }, + ) + } + + // Sanity: limit input length to avoid abuse / excessive API cost + if (body.text.length > 50_000) { + return NextResponse.json( + { translatedText: '', error: 'Text too long (max 50,000 characters)' }, + { status: 413 }, + ) + } + + // Validate locale + const supportedLocales = [ + 'en', 'zh', 'zh-TW', 'zh-HK', 'ja', 'ko', 'fr', 'de', 'es', + 'pt', 'ru', 'ar', 'hi', 'it', 'nl', 'tr', 'pl', 'vi', 'th', 'id', + ] + if (!supportedLocales.includes(body.targetLocale)) { + return NextResponse.json( + { translatedText: '', error: `Unsupported locale: ${body.targetLocale}` }, + { status: 400 }, + ) + } + + // No-op for Chinese locales (blog content is primarily Chinese) + if (!shouldTranslate(body.targetLocale)) { + return NextResponse.json({ translatedText: body.text }) + } + + const translatedText = body.isMarkdown + ? await translateMarkdown(body.text, body.targetLocale) + : (await translateText(body.text, body.targetLocale)).translatedText + + return NextResponse.json({ translatedText }) + } catch (error) { + console.error('[translate API] Error:', error) + return NextResponse.json( + { + translatedText: '', + error: error instanceof Error ? error.message : 'Internal translation error', + }, + { status: 500 }, + ) + } +} diff --git a/components/BlogCard.tsx b/components/BlogCard.tsx index 84abf342..4fb9a2bc 100644 --- a/components/BlogCard.tsx +++ b/components/BlogCard.tsx @@ -2,6 +2,32 @@ import { BlogPost } from "@/lib/types"; import Link from "next/link"; +import { useTranslation } from '@/hooks/useTranslation'; + +const BlogCardContent = ({ post }: { post: BlogPost }) => { + const { + translatedText: translatedTitle, + isTranslating, + } = useTranslation(post.title, false, `blog-card:${post.id}`); + + return ( + +
+

+ {translatedTitle} + {isTranslating && translating...} +

+

+ {formatDate(post.date)} +

+
+ + ); +}; export const BlogCard = ({ post }: { post: BlogPost }) => (
@@ -17,20 +43,7 @@ export const BlogCard = ({ post }: { post: BlogPost }) => (
)} - -
-

- {post.title} -

-

- {formatDate(post.date)} -

-
- + ); diff --git a/components/BlogPostContent.tsx b/components/BlogPostContent.tsx index c6642bf3..f97bf569 100644 --- a/components/BlogPostContent.tsx +++ b/components/BlogPostContent.tsx @@ -13,6 +13,9 @@ import remarkMath from 'remark-math' import { tomorrow } from 'react-syntax-highlighter/dist/esm/styles/prism' import Image from 'next/image' import LikeButton from './LikeButton' +import { useTranslation } from '@/hooks/useTranslation' +import { TranslationIndicator } from './TranslationIndicator' +import { useLocale } from 'next-intl' interface BlogPostContentProps { title: string date: string @@ -27,6 +30,22 @@ interface BlogPostContentProps { } export function BlogPostContent({ title, date, content, slug, headerContent, discussionsComponent, location }: BlogPostContentProps) { + const locale = useLocale() + + // Auto-translate title and content + const { + translatedText: translatedContent, + isTranslating: contentTranslating, + toggleOriginal: toggleContentOriginal, + showOriginal: contentShowOriginal, + actuallyTranslated: contentActuallyTranslated, + } = useTranslation(content, true, `blog-content:${slug}`) + + const { + translatedText: translatedTitle, + isTranslating: titleTranslating, + } = useTranslation(title, false, `blog-title:${slug}`) + return (
{headerContent && ( @@ -36,7 +55,10 @@ export function BlogPostContent({ title, date, content, slug, headerContent, dis )}
-

{title}

+

+ {translatedTitle} + {titleTranslating && translating...} +

+ + +
- {content} + {translatedContent}
diff --git a/components/MemoCard.tsx b/components/MemoCard.tsx index b17902ed..ce090f38 100755 --- a/components/MemoCard.tsx +++ b/components/MemoCard.tsx @@ -18,8 +18,10 @@ import rehypeKatex from 'rehype-katex' import remarkGfm from 'remark-gfm' import remarkMath from 'remark-math' import { tomorrow } from 'react-syntax-highlighter/dist/esm/styles/prism' -import { useTranslations } from 'next-intl' +import { useTranslations, useLocale } from 'next-intl' import LikeButton from './LikeButton' +import { useTranslation } from '@/hooks/useTranslation' +import { TranslationIndicator } from './TranslationIndicator' interface MemoCardProps { memo: Memo @@ -43,8 +45,17 @@ function memoLocationLabel(memo: Memo): string | null { export const MemoCard = ({ memo, onDelete, onEdit, isDeleting = false }: MemoCardProps) => { const t = useTranslations('HomePage') + const locale = useLocale() const location = memoLocationLabel(memo) + const { + translatedText: translatedContent, + isTranslating, + toggleOriginal, + showOriginal, + actuallyTranslated, + } = useTranslation(memo.content, true, `memo:${memo.id}`) + return (
- {memo.content} + {translatedContent} + +
{/* Footer — date + location on the left, actions on the right */} diff --git a/components/TranslationIndicator.tsx b/components/TranslationIndicator.tsx new file mode 100644 index 00000000..4a98c02e --- /dev/null +++ b/components/TranslationIndicator.tsx @@ -0,0 +1,60 @@ +'use client' + +import { localeToLabel } from '@/lib/translate.shared' + +interface TranslationIndicatorProps { + /** The user's locale (used to label the target language). */ + locale: string + /** True while a translation request is in flight. */ + isTranslating: boolean + /** True once a translation actually differs from the original. */ + actuallyTranslated: boolean + /** Whether the original (untranslated) text is currently shown. */ + showOriginal: boolean + /** Toggle between the translated and original text. */ + onToggleOriginal: () => void + /** 'full' for article pages (verbose), 'compact' for cards. */ + variant?: 'full' | 'compact' +} + +/** + * Auto-translation status row: a "show original / show translation" toggle, + * an "auto-translated" badge, and an in-flight spinner. Renders nothing when + * there is neither an in-flight request nor an applied translation. + */ +export function TranslationIndicator({ + locale, + isTranslating, + actuallyTranslated, + showOriginal, + onToggleOriginal, + variant = 'full', +}: TranslationIndicatorProps) { + if (!isTranslating && !actuallyTranslated) return null + + const full = variant === 'full' + + return ( +
+ {actuallyTranslated && ( + + )} + {actuallyTranslated && !showOriginal && ( + + + {full ? `Auto-translated to ${localeToLabel(locale)}` : 'Translated'} + + )} + {isTranslating && ( + translating... + )} +
+ ) +} diff --git a/hooks/useTranslation.ts b/hooks/useTranslation.ts new file mode 100644 index 00000000..15145cae --- /dev/null +++ b/hooks/useTranslation.ts @@ -0,0 +1,151 @@ +'use client' + +import { useState, useEffect, useCallback, useRef } from 'react' +import { useLocale } from 'next-intl' +import { shouldTranslate } from '@/lib/translate.shared' + +interface UseTranslationResult { + /** Translated text, or original text if translation is not needed / pending. */ + translatedText: string + /** True while a translation request is in flight. */ + isTranslating: boolean + /** Error message if translation failed, or null. */ + error: string | null + /** Manually retry translation. */ + retry: () => void + /** Toggle between translated and original text. */ + toggleOriginal: () => void + /** Whether the original text is being shown. */ + showOriginal: boolean + /** True if a translation was actually applied (text differs from original). */ + actuallyTranslated: boolean +} + +function hashText(text: string): string { + let hash = 0 + for (let i = 0; i < text.length; i++) { + const char = text.charCodeAt(i) + hash = ((hash << 5) - hash) + char + hash |= 0 + } + return Math.abs(hash).toString(36) +} + +/** Read a cached translation. Returns null if absent or sessionStorage is unavailable (e.g. SSR). */ +function readSessionCache(key: string): string | null { + try { + return sessionStorage.getItem(key) + } catch { + return null + } +} + +/** Persist a translation. Silently no-ops if sessionStorage is unavailable or over quota. */ +function writeSessionCache(key: string, value: string): void { + try { + sessionStorage.setItem(key, value) + } catch { + /* unavailable or quota exceeded */ + } +} + +/** + * Hook that auto-translates text content into the user's browser language. + * + * - For Chinese (zh*) locales, returns the original text (no-op). + * - For non-Chinese locales, calls /api/translate and caches in sessionStorage. + * - Translations are cached per (contentHash, locale) pair. + * + * @param text The original text to translate. + * @param isMarkdown Whether to preserve markdown syntax during translation. + * @param contentId Optional unique ID for more reliable cache key. + */ +export function useTranslation( + text: string, + isMarkdown = true, + contentId?: string, +): UseTranslationResult { + const locale = useLocale() + const [translatedText, setTranslatedText] = useState(text) + const [isTranslating, setIsTranslating] = useState(false) + const [error, setError] = useState(null) + const [showOriginal, setShowOriginal] = useState(false) + const mountedRef = useRef(true) + + const needsTranslation = shouldTranslate(locale) + const cacheKey = `translate:${contentId ?? hashText(text)}:${locale}` + + useEffect(() => { + mountedRef.current = true + return () => { mountedRef.current = false } + }, []) + + const translate = useCallback(async () => { + setIsTranslating(true) + setError(null) + setShowOriginal(false) + + try { + const response = await fetch('/api/translate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text, targetLocale: locale, isMarkdown }), + }) + + if (!response.ok) { + const errData = await response.json().catch(() => ({})) + throw new Error(errData.error ?? `Translation failed (${response.status})`) + } + + const { translatedText } = await response.json() + if (!mountedRef.current) return + + setTranslatedText(translatedText) + writeSessionCache(cacheKey, translatedText) + } catch (err) { + if (!mountedRef.current) return + console.error('[useTranslation] Error:', err) + setError(err instanceof Error ? err.message : 'Translation error') + setTranslatedText(text) // fall back to original + } finally { + if (mountedRef.current) setIsTranslating(false) + } + }, [text, locale, isMarkdown, cacheKey]) + + // Resolve the displayed text whenever the inputs change: show the original + // for Chinese/empty content, serve a cached translation when present, or + // kick off a fresh translation while showing the original in the meantime. + useEffect(() => { + if (!needsTranslation || !text.trim()) { + setTranslatedText(text) + setError(null) + setIsTranslating(false) + setShowOriginal(false) + return + } + + const cached = readSessionCache(cacheKey) + if (cached !== null) { + setTranslatedText(cached) + setError(null) + setIsTranslating(false) + return + } + + setTranslatedText(text) // show original while the request is in flight + translate() + }, [text, locale, needsTranslation, cacheKey, translate]) + + const displayedText = showOriginal ? text : translatedText + const actuallyTranslated = needsTranslation && translatedText !== text + + return { + translatedText: displayedText, + isTranslating, + error, + retry: translate, + toggleOriginal: () => setShowOriginal((prev) => !prev), + showOriginal, + actuallyTranslated, + } +} diff --git a/lib/translate.shared.ts b/lib/translate.shared.ts new file mode 100644 index 00000000..85df4518 --- /dev/null +++ b/lib/translate.shared.ts @@ -0,0 +1,65 @@ +/** + * Client-safe translation helpers (no Node.js dependencies). + * This module can be imported by both server and client code. + */ + +/** + * Canonicalise a locale code for use as a stable cache key and for Chinese + * detection: collapse regional Chinese variants (zh-HK → zh-TW) and strip the + * region from everything else (en-US → en). + */ +export function normalizeLocale(locale: string): string { + if (locale === 'zh') return 'zh-CN' + if (locale === 'zh-HK' || locale === 'zh-TW') return 'zh-TW' + return locale.split('-')[0] +} + +/** Locales that should NOT trigger translation (reader likely understands Chinese). */ +const CHINESE_LOCALES = new Set(['zh', 'zh-CN', 'zh-TW', 'zh-HK']) + +/** + * Whether translation is needed for a given user locale. + * Returns false for Chinese locales since the blog content is primarily in Chinese. + */ +export function shouldTranslate(targetLocale: string): boolean { + return !CHINESE_LOCALES.has(normalizeLocale(targetLocale)) +} + +/** + * Normalised locale → human-readable label. + */ +export function localeToLabel(locale: string): string { + const labels: Record = { + en: 'English', + ja: '日本語', + ko: '한국어', + fr: 'Français', + de: 'Deutsch', + es: 'Español', + pt: 'Português', + ru: 'Русский', + ar: 'العربية', + hi: 'हिन्दी', + it: 'Italiano', + nl: 'Nederlands', + tr: 'Türkçe', + pl: 'Polski', + vi: 'Tiếng Việt', + th: 'ไทย', + id: 'Bahasa Indonesia', + zh: '中文', + 'zh-TW': '繁體中文', + 'zh-HK': '繁體中文', + } + return labels[locale] ?? locale +} + +/** + * List the locales we can translate into (all non-Chinese supported locales). + */ +export function getTranslatableLocales(): string[] { + return [ + 'en', 'ja', 'ko', 'fr', 'de', 'es', 'pt', 'ru', 'ar', + 'hi', 'it', 'nl', 'tr', 'pl', 'vi', 'th', 'id', + ] +} diff --git a/lib/translate.ts b/lib/translate.ts new file mode 100644 index 00000000..30370167 --- /dev/null +++ b/lib/translate.ts @@ -0,0 +1,193 @@ +/** + * Auto-translation utilities for blog.minghe.me + * + * Uses DeepSeek Chat API for translation, with file-based caching to minimise + * API calls and avoid re-translating unchanged content. + * + * Environment variables: + * DEEPSEEK_APIKEY – API key for DeepSeek Chat (required) + * TRANSLATE_CACHE_DIR – custom cache dir (default: data/translations) + * + * NOTE: This module uses Node.js built-in modules (fs, path, crypto) and + * can only be imported by server-side code (API routes, server components, etc.). + * Client code should import from './translate.shared' instead. + */ + +import fs from 'node:fs' +import path from 'node:path' +import crypto from 'node:crypto' +import { normalizeLocale } from './translate.shared' + +export { shouldTranslate, localeToLabel, getTranslatableLocales } from './translate.shared' + +const DEEPSEEK_API_URL = 'https://api.deepseek.com/chat/completions' +const DEEPSEEK_MODEL = 'deepseek-chat' + +export interface TranslateResult { + translatedText: string + detectedSourceLanguage?: string +} + +function hash(text: string): string { + return crypto.createHash('sha256').update(text, 'utf-8').digest('hex') +} + +function localeToLanguageName(locale: string): string { + const names: Record = { + en: 'English', ja: 'Japanese', ko: 'Korean', fr: 'French', de: 'German', + es: 'Spanish', pt: 'Portuguese', ru: 'Russian', ar: 'Arabic', hi: 'Hindi', + it: 'Italian', nl: 'Dutch', tr: 'Turkish', pl: 'Polish', vi: 'Vietnamese', + th: 'Thai', id: 'Indonesian', + } + return names[normalizeLocale(locale)] ?? 'English' +} + +function buildTranslationPrompt(targetLanguage: string): string { + return `You are a professional translator. Translate the following text from Chinese to ${targetLanguage}. + +Rules: +1. Preserve ALL markdown formatting exactly as-is (headings, lists, tables, bold, italic, etc.) +2. Preserve ALL code blocks, inline code, URLs, and special characters verbatim +3. Preserve all LaTeX math expressions ($$...$$, $...$) exactly +4. Preserve all image markdown ![alt](url) unchanged +5. Only translate the natural language text — do not modify or translate code, URLs, or syntax +6. Keep the same line breaks and paragraph structure +7. Do not add any explanations, notes, or commentary — output ONLY the translated text` +} + +function deepSeekApiKey(): string | undefined { + return process.env.DEEPSEEK_APIKEY ?? process.env.DEEPSEEK_API_KEY +} + +async function callDeepSeek(text: string, targetLanguage: string): Promise { + const apiKey = deepSeekApiKey() + if (!apiKey) { + console.warn('[translate] No DEEPSEEK_APIKEY / DEEPSEEK_API_KEY set — falling back to identity.') + return { translatedText: text } + } + + const res = await fetch(DEEPSEEK_API_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model: DEEPSEEK_MODEL, + messages: [ + { role: 'system', content: buildTranslationPrompt(targetLanguage) }, + { role: 'user', content: text }, + ], + temperature: 0.1, + max_tokens: 8192, + }), + }) + + if (!res.ok) { + const body = await res.text() + throw new Error(`DeepSeek API error (${res.status}): ${body}`) + } + + const json = await res.json() + const translatedText = json.choices?.[0]?.message?.content + + if (!translatedText) { + throw new Error(`Unexpected DeepSeek API response: ${JSON.stringify(json)}`) + } + + return { + translatedText: translatedText.trim(), + detectedSourceLanguage: 'zh', + } +} + +let cacheDir: string | null = null + +function ensureCacheDir(): string { + if (cacheDir) return cacheDir + cacheDir = process.env.TRANSLATE_CACHE_DIR ?? path.join(process.cwd(), 'data', 'translations') + if (!fs.existsSync(cacheDir)) { + fs.mkdirSync(cacheDir, { recursive: true }) + } + return cacheDir +} + +function cachePath(contentHash: string, targetLocale: string): string { + return path.join(ensureCacheDir(), `${contentHash}-${targetLocale}.json`) +} + +function readCache(contentHash: string, targetLocale: string): string | null { + try { + const p = cachePath(contentHash, targetLocale) + if (!fs.existsSync(p)) return null + const entry = JSON.parse(fs.readFileSync(p, 'utf-8')) + return entry.translatedText ?? null + } catch { + return null + } +} + +function writeCache(contentHash: string, targetLocale: string, translatedText: string): void { + try { + const p = cachePath(contentHash, targetLocale) + fs.writeFileSync(p, JSON.stringify({ translatedText, cachedAt: new Date().toISOString() }), 'utf-8') + } catch (err) { + console.warn('[translate] Failed to write translation cache:', err) + } +} + +/** + * Translate a block of text. + * + * Checks the file-based cache first. If a cache hit exists, returns instantly. + * Otherwise calls the translation API, stores the result, and returns it. + */ +export async function translateText(text: string, targetLocale: string): Promise { + if (!text.trim()) return { translatedText: text } + + const normalised = normalizeLocale(targetLocale) + const contentHash = hash(text) + const cached = readCache(contentHash, normalised) + + if (cached !== null) { + return { translatedText: cached } + } + + const result = await callDeepSeek(text, localeToLanguageName(targetLocale)) + + if (result.translatedText !== text) { + writeCache(contentHash, normalised, result.translatedText) + } + + return result +} + +const MAX_SINGLE_REQUEST_CHARS = 8_000 + +/** + * Translate markdown content while preserving markdown syntax. + * + * Sends the full content in one request when short (better context for the LLM). + * Falls back to paragraph-by-paragraph for longer content. + */ +export async function translateMarkdown( + content: string, + targetLocale: string, +): Promise { + if (!content.trim()) return content + + if (content.length < MAX_SINGLE_REQUEST_CHARS) { + const result = await translateText(content, targetLocale) + return result.translatedText + } + + const blocks = content.split('\n\n') + const translatedBlocks: string[] = [] + + for (const block of blocks) { + const result = await translateText(block, targetLocale) + translatedBlocks.push(result.translatedText) + } + + return translatedBlocks.join('\n\n') +}