-
GPU
+
{{ $t('statusBar.gpu') }}
{
>
{{ models.readyCount }}/{{ models.total }}
- ready
+ {{ $t('statusBar.ready') }}
{{ connMeta.label }}
-
diff --git a/apps/frontend_llmops/src/composables/useModelControl.ts b/apps/frontend_llmops/src/composables/useModelControl.ts
index 3e99bf1..af02adf 100644
--- a/apps/frontend_llmops/src/composables/useModelControl.ts
+++ b/apps/frontend_llmops/src/composables/useModelControl.ts
@@ -1,4 +1,5 @@
import { computed } from 'vue'
+import i18n from '@/i18n'
import { useModelsStore } from '@/stores/models'
import { toast } from '@/lib/toast'
import { ApiError } from '@/lib/api'
@@ -14,6 +15,7 @@ type Action = 'start' | 'stop'
export function useModelControl() {
const models = useModelsStore()
const { ensureUnlocked } = useAuth()
+ const t = i18n.global.t
// Only one LLM may be in the `starting` phase at a time — loading two model
// weights at once OOMs a single GPU. Multiple *ready* LLMs are fine, and
@@ -58,31 +60,43 @@ export function useModelControl() {
async function runOne(key: string, action: Action, force = false) {
const name = key.split('::')[0]
if (action === 'start' && isStartBlocked(key)) {
- toast.warning('一次只能啟動一顆模型', {
- description: `「${startingLlmName()}」正在啟動中,請待其完成後再啟動 ${name}。`,
+ toast.warning(t('modelControl.oneAtATime'), {
+ description: t('modelControl.alreadyStarting', {
+ current: startingLlmName(),
+ name,
+ }),
})
return
}
try {
if (action === 'start') {
await models.start(key, force)
- toast.success(`正在啟動 ${name}`, { description: '等待 /health 通過…' })
+ toast.success(t('modelControl.starting', { name }), {
+ description: t('modelControl.startingDesc'),
+ })
} else {
await models.stop(key)
- toast.info(`正在停止 ${name}`, { description: '釋放 GPU 資源…' })
+ toast.info(t('modelControl.stopping', { name }), {
+ description: t('modelControl.stoppingDesc'),
+ })
}
} catch (e) {
// A VRAM pre-flight block (409 mentioning force) gets a one-click override.
if (action === 'start' && e instanceof ApiError && e.status === 409 && /force=true/i.test(e.message)) {
- toast.warning(`${name}:VRAM 不足`, {
+ toast.warning(t('modelControl.vramInsufficient', { name }), {
description: e.message,
duration: 10000,
- action: { label: '強制啟動', onClick: () => void runOne(key, 'start', true) },
+ action: { label: t('modelControl.forceStart'), onClick: () => void runOne(key, 'start', true) },
})
return
}
const msg = e instanceof ApiError ? `${e.status}: ${e.message}` : String(e)
- toast.error(`${action === 'start' ? '啟動' : '停止'} ${name} 失敗`, { description: msg })
+ toast.error(
+ action === 'start'
+ ? t('modelControl.startFailed', { name })
+ : t('modelControl.stopFailed', { name }),
+ { description: msg },
+ )
}
}
diff --git a/apps/frontend_llmops/src/i18n/index.ts b/apps/frontend_llmops/src/i18n/index.ts
new file mode 100644
index 0000000..0621ace
--- /dev/null
+++ b/apps/frontend_llmops/src/i18n/index.ts
@@ -0,0 +1,24 @@
+import { createI18n } from 'vue-i18n'
+import en from './locales/en'
+import zhTW from './locales/zh-TW'
+
+const STORAGE_KEY = 'llmops-locale'
+
+const i18n = createI18n({
+ legacy: false,
+ globalInjection: true,
+ locale: localStorage.getItem(STORAGE_KEY) || 'en',
+ fallbackLocale: 'en',
+ messages: { en, 'zh-TW': zhTW },
+})
+
+export function setLocale(locale: 'en' | 'zh-TW') {
+ ;(i18n.global.locale as unknown as { value: string }).value = locale
+ localStorage.setItem(STORAGE_KEY, locale)
+}
+
+export function currentLocale(): string {
+ return (i18n.global.locale as unknown as { value: string }).value
+}
+
+export default i18n
diff --git a/apps/frontend_llmops/src/i18n/locales/en.ts b/apps/frontend_llmops/src/i18n/locales/en.ts
new file mode 100644
index 0000000..9637009
--- /dev/null
+++ b/apps/frontend_llmops/src/i18n/locales/en.ts
@@ -0,0 +1,1001 @@
+export default {
+ // ---- Common / shared ----
+ common: {
+ refresh: 'Refresh',
+ delete: 'Delete',
+ cancel: 'Cancel',
+ save: 'Save',
+ create: 'Create',
+ add: 'Add',
+ remove: 'Remove',
+ edit: 'Edit',
+ close: 'Close',
+ done: 'Done',
+ loading: 'Loading…',
+ download: 'Download',
+ retry: 'Retry',
+ start: 'Start',
+ stop: 'Stop',
+ clear: 'Clear',
+ clearAll: 'Clear all',
+ search: 'Search',
+ all: 'All',
+ none: 'None',
+ model: 'Model',
+ models: 'Models',
+ status: 'Status',
+ latency: 'Latency',
+ tokens: 'Tokens',
+ time: 'Time',
+ path: 'Path',
+ cost: 'Cost',
+ key: 'Key',
+ name: 'Name',
+ ready: 'ready',
+ failed: 'failed',
+ stopped: 'stopped',
+ starting: 'starting',
+ stopping: 'stopping',
+ noData: 'No data.',
+ errorsOnly: 'Errors only',
+ allModels: 'All models',
+ preDownload: 'Pre-download',
+ manage: 'Manage →',
+ viewAll: 'All →',
+ more: 'more',
+ groups: 'groups',
+ embedding: 'Embedding',
+ reranking: 'Reranking',
+ completed: 'Completed',
+ running: 'Running',
+ queued: 'Queued',
+ cancelled: 'Cancelled',
+ dataset: 'Dataset',
+ target: 'Target',
+ },
+
+ // ---- Sidebar ----
+ sidebar: {
+ console: 'Console',
+ overview: 'Overview',
+ models: 'Models',
+ traffic: 'Traffic',
+ requests: 'Requests',
+ monitoring: 'Monitoring',
+ playground: 'Playground',
+ benchmark: 'Benchmark',
+ eval: 'Evaluation',
+ library: 'Model Library',
+ loraLibrary: 'LoRA Library',
+ datasets: 'Datasets',
+ keys: 'API Keys',
+ usage: 'Usage Guide',
+ resources: 'Resources',
+ activity: 'Activity',
+ modelCount: 'Models · {ready}/{total} ready',
+ noModels: 'No models configured',
+ },
+
+ // ---- StatusBar ----
+ statusBar: {
+ gpu: 'GPU',
+ ready: 'ready',
+ switchLight: 'Switch to light',
+ switchDark: 'Switch to dark',
+ switchToChinese: 'Switch to Chinese',
+ switchToEnglish: 'Switch to English',
+ gpuUtilTitle: 'Average GPU utilization',
+ updatedAgo: 'Updated {ago}',
+ live: 'Live',
+ polling: 'Polling',
+ connecting: 'Connecting',
+ offline: 'Offline',
+ },
+
+ // ---- Overview ----
+ overview: {
+ readyModels: 'Ready Models',
+ requestCount: 'Requests',
+ latencyP95: 'Latency p95',
+ gpuUtil: 'GPU Utilization',
+ failedCount: '{n} failed | {n} failed',
+ allNormal: 'All normal',
+ errorRate: '{rate} error rate',
+ processedTokens: 'Processed {n} tokens',
+ deviceCount: '{n} devices | {n} devices',
+ models: 'Models',
+ noModelsYet: 'No models configured.',
+ activity: 'Activity',
+ noRecentEvents: 'No recent events.',
+ recentRequests: 'Recent Requests',
+ traffic: 'Traffic →',
+ noRequestRecords: 'No request records.',
+ tableTime: 'Time',
+ tableModel: 'Model',
+ tablePath: 'Path',
+ tableStatus: 'Status',
+ tableLatency: 'Latency',
+ },
+
+ // ---- Models ----
+ models: {
+ searchPlaceholder: 'Search models…',
+ all: 'All',
+ llm: 'LLM',
+ embedding: 'Embedding',
+ groupCount: '{n} groups · {ready} ready · {failed} failed',
+ addModel: 'Add Model',
+ noMatch: 'No matching models found.',
+ clearFilter: 'Try clearing the filter.',
+ noConfig: 'No models configured in config.yaml.',
+ },
+
+ // ---- Traffic ----
+ traffic: {
+ modelUsage: 'Model Usage',
+ tableModel: 'Model',
+ tableRequests: 'Requests',
+ tableErrors: 'Errors',
+ tableAvg: 'Avg',
+ noUsage: 'No usage records.',
+ routerLoadBalancing: 'Router Load Balancing',
+ routerLoadDesc: 'Line width = actual traffic share (from request logs) · ★ = lowest-score instance the router picks next',
+ strategy: 'Strategy',
+ scenarioTitle: 'Scenario guide for each strategy',
+ scenarioFooter: 'This dropdown controls the global default; setting routing_strategy in config.yaml for a specific group overrides it.',
+ noLlmGroups: 'No LLM groups configured.',
+ requestLog: 'Request Log',
+ noRequests: 'No requests.',
+ strategyChanged: 'Routing strategy changed to "{name}"',
+ strategyChangedDesc: 'Effective from the next request. Not persisted — reverts on router restart.',
+ strategyChangeFailed: 'Failed to change routing strategy',
+ strategyInfo: {
+ least_load: 'Requests vary in length; evenly saturate replicas. The safe general-purpose default.',
+ round_robin: 'Homogeneous GPUs, similar request sizes, or when you want predictable even splits / a baseline.',
+ random: 'Many short requests; lowest decision cost with stateless splitting.',
+ least_inflight: 'Requests take similar time and you don\'t want ~1s metric-scrape lag to matter.',
+ p2c: 'Under bursty traffic, avoids the thundering-herd rush to the single "currently least loaded" replica.',
+ session_affinity: 'Multi-turn chat / Playground: same session sticks to one instance for KV cache reuse (needs X-Session-Id or user; falls back to least-load otherwise).',
+ prefix_affinity: 'Fixed system prompts, RAG / few-shot templates, or other high-prefix-overlap requests.',
+ },
+ },
+
+ // ---- Requests ----
+ requests: {
+ searchPlaceholder: 'Search model / path / key…',
+ errorsOnly: 'Errors only',
+ costPerMTok: '$/1M tokens',
+ recent: 'Recent {n}',
+ requestCount: 'Requests',
+ totalTokens: 'Total tokens',
+ errorRate: 'Error rate',
+ estimatedCost: 'Estimated cost',
+ tableTime: 'Time',
+ tableModel: 'Model',
+ tablePath: 'Path',
+ tableStatus: 'Status',
+ tableLatency: 'Latency',
+ tableTokens: 'tokens',
+ tableCost: 'Cost',
+ tableKey: 'Key',
+ noRecords: 'No request records.',
+ loadFailed: 'Failed to load request records',
+ },
+
+ // ---- Monitoring ----
+ monitoring: {
+ overview: 'Overview',
+ vllmCapacity: 'vLLM Capacity',
+ vllmPerf: 'vLLM Performance',
+ vllmQuery: 'vLLM Queries',
+ gpu: 'GPU',
+ host: 'Host',
+ openGrafana: 'Open in Grafana',
+ },
+
+ // ---- Playground ----
+ playground: {
+ chat: 'Chat',
+ embeddingRerank: 'Embedding / Rerank',
+ thinkingProcess: '💭 Thinking',
+ sendPrompt: 'Send a message to test {model} via the router.',
+ inputPlaceholder: 'Type a message… (Enter to send, Shift+Enter for newline)',
+ comparePlaceholder: 'Same question sent to all selected models… (Enter to send)',
+ selectModelsHint: 'Select at least one model on the right to start side-by-side comparison.',
+ waitingPrompt: 'Waiting for prompt…',
+ modelsParallel: '{n} models side by side',
+ params: 'Parameters',
+ compareMode: 'Compare mode (multi-model same prompt)',
+ systemPrompt: 'System prompt',
+ systemPromptPlaceholder: 'Leave empty to omit. e.g. You are a professional assistant.',
+ modelReady: 'Model (only showing ready)',
+ noReadyModels: 'No ready models — please start one on the Models page.',
+ modelMulti: 'Model (multi-select, only showing ready)',
+ noReadyModelsShort: 'No ready models available.',
+ maxTokens: 'Max Tokens',
+ temperature: 'Temperature',
+ streamResponse: 'Stream response',
+ request: 'Request',
+ result: 'Result',
+ modelLabel: 'Model ({mode}, only showing ready)',
+ noModelsForMode: 'No models available for this mode.',
+ embNotStarted: 'Embedding server not started — please start it on the Models page.',
+ queryLabel: 'Query (enter to switch to rerank mode)',
+ queryPlaceholder: 'Leave empty for pure embedding mode',
+ inputLabel: 'Input (one per line)',
+ rerank: 'Rerank',
+ embed: 'Embed',
+ dimVector: '{n}-dim vector',
+ runToSee: 'Run a request to see {type}.',
+ relevanceScores: 'relevance scores',
+ vectorDimensions: 'vector dimensions',
+ firstToken: 'First',
+ chatFailed: 'Chat request failed',
+ embedFailed: 'Embedding request failed',
+ emptyResponse: '(empty response)',
+ stopGeneration: 'Stop generation',
+ },
+
+ // ---- Benchmark ----
+ benchmark: {
+ title: 'Benchmark',
+ description: 'Measure model "speed" (throughput / latency — different from eval accuracy). Supports concurrency sweep, open-loop rate, multi-turn, SLA auto-tune, and embedding / rerank throughput plus single-request speed baseline. Some modes can pre-download datasets from the',
+ datasetsLink: 'Datasets',
+ descriptionEnd: ' library.',
+ configTitle: 'Benchmark Config',
+ modelLabel: 'Model',
+ notStarted: '(not started)',
+ embModelLabel: 'Embedding model',
+ rerankModelLabel: 'Rerank model',
+ embNotStarted: 'Embedding server not started — please start it on the Models page.',
+ noModelsForMode: 'No models available for this mode.',
+ modeSweep: 'Concurrency Sweep',
+ modeOpenloop: 'Rate Open-loop',
+ modeMultiturn: 'Multi-turn',
+ modeSla: 'SLA Tune',
+ modeSpeed: 'Speed Baseline',
+ modeEmbedding: 'Embedding',
+ modeRerank: 'Rerank',
+ targetLabel: 'Target',
+ targetRouter: 'Router (overall + load balancing)',
+ targetInstance: 'Single instance',
+ instanceLabel: 'Instance',
+ datasetLabel: 'Dataset',
+ endpointLabel: 'Endpoint',
+ parallelLabel: 'Concurrency points (comma-separated, sweep)',
+ reqPerPoint: 'Requests per point',
+ warmupRatio: 'Warmup ratio',
+ rateLabel: 'Rate (req/s, comma-separated, sweep)',
+ reqPerRate: 'Requests per rate',
+ maxDuration: 'Max seconds (0=unlimited)',
+ mtDatasetLabel: 'Dataset',
+ mtShareGpt: 'ShareGPT Chinese (real conversations)',
+ mtRandom: 'random (generated)',
+ mtCustom: 'custom (JSONL)',
+ mtDatasetPath: 'Dataset path (server-side JSONL)',
+ mtConcurrent: 'Concurrent conversations (comma sweep)',
+ mtConvPerPoint: 'Conversations per point',
+ mtMinTurns: 'Min turns',
+ mtMaxTurns: 'Max turns',
+ mtMaxDuration: 'Max seconds (0=unlimited, prevents runaway wall-clock)',
+ slaVariable: 'Search variable',
+ slaParallel: 'Concurrency parallel',
+ slaRate: 'Rate (open-loop)',
+ slaConditions: 'SLA conditions (multiple = OR)',
+ slaLower: 'Lower bound',
+ slaUpper: 'Upper bound',
+ slaRunsPerPoint: 'Runs per point',
+ slaFixedParallel: 'Fixed concurrency (rate mode)',
+ speedDesc: 'Single-request standard speed (concurrency 1). Fixed prompt lengths{lengths}, hitting /v1/completions to avoid chat-template interference.',
+ speedLengthsShort: ' 1 / 6k / 14k / 30k',
+ speedLengthsLong: ' 63k / 129k',
+ speedLongCtx: 'Long context (63k / 129k)',
+ embParallel: 'Concurrency points (comma-separated, sweep)',
+ embReqPerPoint: 'Requests per point',
+ embInputLen: 'Input length (token)',
+ embDocsPerReq: 'Documents per request',
+ outputTokens: 'Output tokens',
+ outputTokensFixed: 'Output tokens (fixed)',
+ inputLength: 'Input length',
+ prefixLength: 'Prefix length (prefix cache)',
+ nameLabel: 'Name (optional)',
+ namePlaceholder: 'e.g. q05b-baseline',
+ streamLabel: 'Stream (required for TTFT measurement)',
+ runningBusy: 'Benchmark in progress…',
+ startBenchmark: 'Start Benchmark',
+ history: 'Benchmark History',
+ historyCount: '({n})',
+ compareHint: 'Check ≥2 to compare',
+ noHistory: 'No benchmark records.',
+ comparison: 'Benchmark Comparison',
+ failed: 'Failed',
+ runSummary: 'Run Summary',
+ fullReport: 'Full Report',
+ slaMaxVar: 'max {var}',
+ slaVarRate: 'rate',
+ slaVarParallel: 'concurrency',
+ multiTurnMetrics: 'Multi-turn Metrics',
+ turnsPerConv: 'Turns/conv',
+ kvCacheHit: 'KV cache hit',
+ firstTtft: 'First-turn TTFT',
+ subsequentTtft: 'Subsequent TTFT',
+ chartRps: 'RPS (req/s)',
+ chartGenTps: 'Output throughput Gen/s (tok/s)',
+ chartTtft: 'TTFT p99 (ms)',
+ chartAvgLatency: 'Avg latency (ms)',
+ chartP99Latency: 'Latency p99 (ms)',
+ tableParallel: 'Concurrency',
+ tableRate: 'Rate',
+ tableAvgLatency: 'Avg latency',
+ tableInputOutput: 'Input/Output',
+ tableSuccessTotal: 'Success/Total',
+ latencyDetail: 'Latency (s)',
+ executionLog: 'Execution log',
+ waitingOutput: '(waiting for output…)',
+ stoppingHint: 'Stopping… takes up to ~10s; still stuck, press "Force".',
+ forceKill: 'Force kill',
+ forceConfirm: 'Force kill benchmark #{id}? Will immediately SIGKILL, results will be invalid.',
+ cancelFailed: 'Cancel failed',
+ deleteFailed: 'Delete failed',
+ deleteConfirm: 'Delete benchmark #{id}?',
+ loadResultFailed: 'Failed to load result',
+ startedToast: 'Started benchmark #{id}',
+ startedDesc: 'You can leave this page — it runs in the background.',
+ startFailed: 'Failed to start benchmark',
+ compareMax: 'Compare up to {n} runs',
+ compareLoadFailed: 'Failed to load comparison data',
+ selectModel: 'Please select a model',
+ enterRate: 'Enter at least one rate',
+ enterParallel: 'Enter at least one concurrency value',
+ enterMtConcurrent: 'Enter at least one concurrent conversation count',
+ addSlaCondition: 'Add at least one SLA condition',
+ instanceNotReady: 'Instance not ready — cannot direct-connect',
+ modeLabel: 'Mode',
+ totalTestDuration: 'Total test time',
+ totalGenerated: 'Total generated',
+ avgOutputRate: 'Avg output rate',
+ stoppingBadge: 'Stopping…',
+ cancelStop: 'Cancel (stop)',
+ forceKillHint: 'Force kill (immediate SIGKILL)',
+ addToCompare: 'Add to compare',
+ slaMetrics: {
+ p99_latency: 'p99 latency (s)',
+ avg_latency: 'Avg latency (s)',
+ p99_ttft: 'p99 TTFT (ms)',
+ avg_ttft: 'Avg TTFT (ms)',
+ p99_tpot: 'p99 TPOT (ms)',
+ avg_tpot: 'Avg TPOT (ms)',
+ rps: 'RPS (req/s)',
+ tps: 'Output tok/s',
+ },
+ },
+
+ // ---- Evaluation ----
+ eval: {
+ title: 'Model Evaluation',
+ description: 'Evaluate model "accuracy / quality" (different from benchmark speed). Pre-download datasets on the',
+ descriptionEnd: 'page to avoid waiting. Low scores on small models are normal — focus on comparing across models / parameter changes.',
+ configTitle: 'Evaluation Config',
+ modelLabel: 'Model',
+ selected: 'selected',
+ noModels: 'No models configured',
+ offline: '· offline',
+ multiModelHint: 'Multi-select = each model queues a separate eval, running in parallel / queued against shared concurrency budget.',
+ targetRouter: 'Router',
+ targetInstance: 'Direct instance',
+ instanceOnly: 'Direct instance supports single model only',
+ datasetLabel: 'Datasets',
+ cachedHint: '● = cached; uncached will download at runtime; ⏬ downloading, wait until complete to select. ⚖ = needs judge model for scoring.',
+ longContextWarn: '⚠ Long-context dataset selected — model needs a large max_model_len (tens of thousands of tokens), otherwise truncation or 400 errors.',
+ toolParserWarn: '⚠ Real function-call dataset selected — model needs vLLM tool parser enabled (enable_auto_tool_choice + tool_call_parser), otherwise scores will be 0.',
+ subsetLabel: 'Selected datasets — Subjects',
+ subsetHint: 'Subjects (subset, {n} total, none selected = all)',
+ clearSubsets: 'Clear ({n})',
+ samplesPerDataset: 'Samples per dataset (0=all)',
+ repeats: 'Repeat count',
+ temperature: 'Temperature',
+ maxOutputTokens: 'Max output tokens',
+ batchSize: 'Concurrency (batch size)',
+ batchSizeHint: 'How many concurrent requests to fire at the model; higher fills vLLM batch better (faster) but too high can cause queue timeouts. Does not affect scores.',
+ advanced: 'Advanced settings',
+ fewShot: 'Few-shot examples (0=dataset default)',
+ datasetArgsJson: 'dataset_args JSON (per-dataset override, optional)',
+ datasetArgsHint: 'e.g. few_shot_num, subset_list, shuffle. For math multi-sample use "Repeat count" + aggregation.',
+ judgeTitle: '⚖ Judge Model',
+ judgeHint: 'Selected QA datasets need LLM scoring',
+ judgeInternal: 'Internal model',
+ judgeExternal: 'External API',
+ judgeApiUrl: 'API URL, e.g. https://api.openai.com/v1',
+ judgeModelId: 'Judge model ID, e.g. gpt-4o-mini',
+ judgeApiKey: 'API Key (optional)',
+ judgeQualityHint: 'Stronger judge = more reliable scores; small models as judges are for reference only.',
+ nameLabel: 'Name (optional)',
+ namePlaceholder: 'e.g. Qwen3-0.6B baseline',
+ startEvalMulti: 'Queue {n} evaluations',
+ startEval: 'Start Evaluation',
+ runHistory: 'Evaluation History',
+ runningCount: 'Running {n}',
+ queuedCount: 'Queued {n}',
+ concurrencyBudget: 'Concurrency budget',
+ budgetUsage: 'Usage {used}/{total}',
+ noHistory: 'No evaluation records.',
+ scoreComparison: 'Score Comparison',
+ compareDataset: 'Dataset',
+ compareBestHint: 'Best score per row is highlighted in accent color.',
+ detailReport: 'Full report',
+ queuedHint: 'Queued… waiting for concurrency budget or other benchmarks to finish.',
+ runningHint: 'Evaluation running… (you can leave this page)',
+ failedPrefix: 'Failed: ',
+ scores: 'Scores',
+ metric: 'Metric',
+ samples: 'Samples',
+ score: 'Score',
+ loadingDetail: 'Loading detailed data…',
+ latencyP50P99: 'Latency p50 / p99',
+ outputThroughput: 'Output throughput',
+ avgInputTok: 'Avg input tok',
+ avgOutputTok: 'Avg output tok',
+ subsetBreakdown: '· by subject',
+ whatDoesItTest: 'What does this benchmark test?',
+ collapseSamples: 'Collapse sample browser',
+ expandSamples: 'Sample browser (view per-question correctness / answers)',
+ executionLog: 'Execution log',
+ noLog: '(no log)',
+ loadResultFailed: 'Failed to load result',
+ loadCatalogFailed: 'Failed to load eval datasets',
+ selectModelRequired: 'Please select at least one model',
+ selectDatasetRequired: 'Please select at least one dataset',
+ instanceSingleOnly: 'Direct instance supports single model only',
+ instanceNotReady: 'Instance not ready — cannot direct-connect',
+ judgeModelRequired: 'Please set a judge model',
+ judgeApiRequired: 'External judge requires an API URL',
+ invalidDatasetArgs: 'Advanced dataset_args is not valid JSON',
+ startedToast: 'Queued {n} evaluations',
+ startedDesc: 'You can leave this page — they run in the background.',
+ evalFailed: 'Failed to evaluate {model}',
+ cancelRequested: 'Cancel requested',
+ cancelFailed: 'Cancel failed',
+ deleteConfirm: 'Delete evaluation #{id}?',
+ deleteFailed: 'Delete failed',
+ budgetSet: 'Concurrency budget set to {n}',
+ budgetSetFailed: 'Failed to adjust budget',
+ addCompare: 'Add to comparison',
+ cancelQueue: 'Cancel queue',
+ downloading: '⏬Downloading',
+ defaultFewShot: '· default {n}-shot',
+ datasetArgsPlaceholder: 'e.g. {"arc": {"subset_list": ["ARC-Challenge"]}}',
+ concurrencyBudgetHint: 'Sum of batch_size across all parallel evals; when full, new evals queue. Takes effect immediately, resets on router restart.',
+ },
+
+ // ---- Library (Model Library) ----
+ library: {
+ title: 'Model Library',
+ description: 'Pre-download Hugging Face weights to the shared cache — model startup won\'t have to wait. Gated models need',
+ descriptionEnd: 'set on the backend.',
+ cachedModels: 'Cached models',
+ diskRemaining: 'Disk remaining',
+ diskUsed: 'Disk used',
+ downloadNew: 'Download new model',
+ repoId: 'Hugging Face repo id',
+ repoPlaceholder: 'e.g. Qwen/Qwen3-0.6B',
+ noCachedModels: 'No cached models.',
+ files: 'files',
+ size: 'Size',
+ updated: 'Updated',
+ deleteCache: 'Delete cache',
+ deleteConfirm: 'Delete cached {repo}? This will free disk space.',
+ deleteFailed: 'Delete failed',
+ deleted: 'Deleted {repo}',
+ downloadStarted: 'Started downloading {repo}',
+ downloadStartedDesc: 'You can leave this page — download continues in the background.',
+ downloadFailed: 'Failed to start download',
+ loadCacheFailed: 'Failed to load cache',
+ occupiedSpace: 'Disk usage',
+ downloading: 'Downloading',
+ downloadComplete: 'Done',
+ downloadFailedBadge: 'Failed',
+ },
+
+ // ---- LoRA Library ----
+ loraLibrary: {
+ title: 'LoRA Library',
+ description: 'Manage local LoRA adapters. Download from Hugging Face, or place adapter folders directly into',
+ descriptionEnd: 'and they will appear here. When adding / editing models, pick from the "LoRA Adapters" section to mount.',
+ adapters: 'Adapters',
+ diskUsed: 'Disk usage',
+ diskRemaining: 'Disk remaining',
+ downloadAdapter: 'Download adapter',
+ repoId: 'Hugging Face repo id',
+ repoPlaceholder: 'e.g. jeeejeee/llama32-3b-text2sql-spider',
+ localName: 'Local name (optional)',
+ localNamePlaceholder: 'Defaults to repo tail',
+ localAdapters: 'Local adapters',
+ unknownBase: 'Unknown base',
+ deleteConfirm: 'Delete LoRA "{name}"? This will remove the adapter files from disk.',
+ deleteFailed: 'Delete failed',
+ deleted: 'Deleted {name}',
+ downloadStarted: 'Started downloading {repo}',
+ downloadStartedDesc: 'You can leave this page — download continues in the background.',
+ downloadFailed: 'Failed to start download',
+ loadFailed: 'Failed to load LoRA library',
+ noAdapters: 'No adapters in the library. Download from above, or place folders into',
+ },
+
+ // ---- Datasets ----
+ datasets: {
+ title: 'Datasets',
+ description: 'Pre-download datasets to the shared ModelScope cache — benchmarks / evaluations won\'t wait for the first download.',
+ descriptionRandom: 'random / speed baseline are generated on-the-fly; no download needed.',
+ cachedTotal: 'Cached / Total',
+ diskUsed: 'Disk usage',
+ diskRemaining: 'Disk remaining',
+ diskTotal: 'total {size}',
+ perfDatasets: 'Benchmark datasets',
+ perfHint: 'After download, selectable on the Benchmark page',
+ evalDatasets: 'Evaluation datasets',
+ evalHint: 'After download, selectable on the Evaluation page',
+ downloadFailed: 'Download "{label}" failed: ',
+ loadFailed: 'Failed to load datasets',
+ downloadStarted: 'Download started',
+ downloadStartedDesc: 'You can leave this page — download continues in the background.',
+ downloadStartFailed: 'Failed to start download',
+ deleteConfirm: 'Delete cached "{label}"? This will free disk space.',
+ deleted: 'Deleted {label}',
+ deleteFailed: 'Delete failed',
+ },
+
+ // ---- Keys ----
+ keys: {
+ title: 'API Keys',
+ description: 'Used to send inference requests to the router (Authorization: Bearer …). Keys are shown only once at creation.',
+ authDisabled: 'Backend has no LLMOPS_ADMIN_TOKEN set — authentication is currently disabled. Keys can be created but the router does not enforce them yet (set LLMOPS_REQUIRE_API_KEY=true).',
+ locked: 'Admin token required to manage keys.',
+ unlock: 'Unlock',
+ createNew: 'Create new key',
+ nameLabel: 'Name (for usage attribution)',
+ namePlaceholder: 'e.g. team-rag, ci-bot',
+ rateLimit: 'Rate limit (req/min)',
+ ratePlaceholder: 'Unlimited',
+ issuedKeys: 'Issued keys',
+ revoked: 'Revoked',
+ perMin: '/min',
+ requestCount: 'requests',
+ lastUsed: 'Last used: ',
+ noKeys: 'No keys yet.',
+ revokeTitle: 'Revoke key',
+ keyCreated: 'Key created',
+ copyImmediate: 'Copy this key now — it will not be shown again after closing.',
+ copySuccess: 'Copied to clipboard',
+ copyFailed: 'Copy failed — please select manually',
+ createFailed: 'Failed to create key',
+ revokeFailed: 'Revoke failed',
+ revokeSuccess: 'Revoked {name}',
+ loadFailed: 'Failed to load keys',
+ },
+
+ // ---- Usage (Guide) ----
+ usage: {
+ quickStart: 'Quick Start',
+ quickStartDesc: 'Router is an OpenAI-compatible unified endpoint. Set the model field in requests to the "group name" — the Router auto-selects the lowest-load instance.',
+ step1: 'Confirm Router is running: ',
+ step2: 'Start the desired model to',
+ step2End: 'on the Models page',
+ step3: 'Call with any language below; API key can be anything (Router does not verify)',
+ chatCompletions: 'Chat Completions',
+ modelLabel: 'Model',
+ streamLabel: 'Stream',
+ loraDropdownHint: 'LoRA items in the dropdown: just change the model field to its served name — the rest of the request stays the same.',
+ loraTitle: 'LoRA Adapters',
+ loraDesc: 'Calling LoRA is the same as a regular model — just change model to the adapter\'s served name (e.g.',
+ loraDescEnd: '), Router routes to the corresponding base model\'s instances.',
+ listModelsLabel: 'List available models / LoRA · cURL',
+ filterLoraLabel: 'Filter LoRA only · Python',
+ loraNote1: '· Prerequisite: base model must be started with enable_lora, and adapter must be mounted (static in config, or hot-loaded from the model detail drawer).',
+ loraNote2: '· Requesting an unmounted name returns 404 — Model not found.',
+ loraNote3: '· Base vs LoRA A/B: same request, just change model; Playground compare mode for side-by-side.',
+ embTitle: 'Embeddings & Rerank',
+ embDesc: 'Same /v1/embeddings endpoint: add a query field to switch to reranking.',
+ embCurlLabel: 'Embedding · cURL',
+ embPyLabel: 'Embedding · Python',
+ rerankCurlLabel: 'Rerank · cURL',
+ samplePrompt: 'Write a one-sentence summary of the moon landing.',
+ sampleInputA: 'The quick brown fox',
+ sampleInputB: 'A lazy dog sleeps',
+ sampleQuery: 'Which passage is more relevant to the question?',
+ sampleDocA: 'Passage A',
+ sampleDocB: 'Passage B',
+ routerNoAuthComment: 'Router does not validate the API key',
+ streamComment: 'stream raw SSE `data:` frames',
+ modelsComment: 'Items with a `parent` field are LoRA adapters; the parent is the base model.',
+ loraListComment: 'Pass any listed item in the chat request `model` field',
+ vectorLengthComment: 'vector length',
+ rerankComment: 'Including the query field switches the endpoint into rerank mode and returns a relevance score for each candidate.',
+ },
+
+ // ---- Resources ----
+ resources: {
+ cpu: 'CPU',
+ memory: 'Memory',
+ memorySummary: '{used} used / {total} total · {available} available',
+ gpuProcesses: 'GPU Processes',
+ gpuProcessesDesc: 'Processes using GPU memory (updates ~every 5s)',
+ tablePid: 'PID',
+ tableUser: 'User',
+ tableName: 'Name',
+ tableCommand: 'Command',
+ tableGpuMem: 'GPU Memory',
+ noGpuProcesses: 'No GPU processes.',
+ },
+
+ // ---- Activity ----
+ activity: {
+ eventCount: '{n} events',
+ noEvents: 'No event records.',
+ },
+
+ // ---- Topology ----
+ topology: {
+ title: 'System Topology',
+ clickHint: '· Click a node for details',
+ legendData: 'Data',
+ legendPlacement: 'Placement',
+ legendControl: 'Control',
+ legendLora: 'LoRA',
+ client: 'Client',
+ clientRequests: '{n} requests',
+ router: 'Router',
+ routerError: '{rate} error',
+ backend: 'Backend',
+ backendReady: '{ready}/{total} ready',
+ groupRunWait: 'Run {running} · Wait {waiting}',
+ embeddingNode: 'Embedding',
+ gpuConfigured: 'Configured · not detected',
+ kvSharedLabel: 'Shared KV cache',
+ },
+
+ // ---- AddModelDialog ----
+ addModel: {
+ createTitle: 'Add Model',
+ editTitle: 'Edit Model',
+ pasteCommand: 'Paste vLLM launch command',
+ parseCommand: 'Parse command',
+ parseFailed: 'Failed to parse command',
+ groupLabel: 'Group',
+ groupExists: 'Group already exists — will add as a new replica.',
+ instanceLabel: 'Instance ID',
+ keyExists: '{key} already exists.',
+ hostLabel: 'Host',
+ portLabel: 'Port',
+ portInUse: 'Port {port} is already used by another instance.',
+ gpuLabel: 'GPU (cuda_device)',
+ gpuAuto: 'None / Auto',
+ modelTagLabel: 'Model tag',
+ routingLabel: 'Routing strategy (load balancing)',
+ routingDefault: 'Follow global default',
+ routingHint: 'Traffic distribution policy for this group; empty inherits the global setting (switchable on the Traffic page). Only effective with multiple replicas.',
+ kvShareLabel: 'Shared KV Cache (cross-instance)',
+ kvShareDesc: 'Replicas within the group share a KV store (/kv_cache) to reuse computed KV across instances. Saves re-computation for matching prefixes. Best with multiple replicas + high prefix overlap (fixed system prompts / RAG / multi-turn chat); off = each instance has independent KV.',
+ groupSharedWarn: 'vLLM parameters are group-shared — this change applies to all {n} replicas in group {group}.',
+ weightsCached: 'Weights cached',
+ weightsDownloading: 'Downloading weights…',
+ weightsDownloadFailed: 'Download failed: ',
+ weightsNotCached: 'Weights not cached — first start will download live (slower).',
+ accelTitle: '⚡ Acceleration settings (vLLM inference params)',
+ accelTemplates: 'Scenario templates:',
+ accelLatency: 'Low latency (chat / agent)',
+ accelThroughput: 'High throughput (concurrent)',
+ accelClear: 'Clear (reset to defaults)',
+ accelHint: 'Empty = vLLM default. Changes require model restart. Combinations have the most impact.',
+ advancedTitle: 'Advanced (speculative decoding / prefix hash / chunked prefill / offload)',
+ ngramSpec: 'N-gram speculative decoding (reduces single-request latency at low QPS)',
+ offloadHint: 'Offloads weights to CPU RAM for "it runs" (slower), not acceleration.',
+ partialPrefillHint: 'With mixed short + long prompts: set partial >1 and long small so short requests can cut in line.',
+ vllmParams: 'vLLM parameters (model_config)',
+ addParam: 'Add',
+ noExtraParams: 'No extra parameters.',
+ flagPlaceholder: 'Flag (snake_case)',
+ valuePlaceholder: 'Value',
+ toolCallingTitle: '🛠 Tool calling parameter reference',
+ toolCallingDesc: 'To enable tool_choice="auto", add enable_auto_tool_choice=true + tool_call_parser=
; reasoning models also need reasoning_parser. Parser must match the model\'s output format.',
+ toolCallingPresetHint: 'Click to apply recommended parameters:',
+ toolCallingFooter: 'No matching parser = don\'t add it.',
+ loraTitle: 'LoRA Adapters',
+ addLora: 'Add',
+ loraServedName: 'served name (e.g. sql-lora)',
+ loraPickAdapter: '— Pick adapter / type path —',
+ loraTyped: '(typed)',
+ noLora: 'No LoRA. Adding a row auto-adds enable_lora=true; served name is the name to put in the model field for inference. Pick adapters from the LoRA Library, or type a path.',
+ loraAutoHint: 'Picking from the library auto-fills base and sets max_lora_rank to the aligned rank. Base model must support LoRA (vLLM SupportsLoRA).',
+ loraBaseMismatch: 'This adapter\'s base is {adapterBase}, which doesn\'t match the model {modelTag}',
+ newGroup: 'New group',
+ saveChanges: 'Save Changes',
+ addModelBtn: 'Add Model',
+ createSuccess: 'Added {key}',
+ createSuccessDesc: 'Currently stopped — press "Start" to activate.',
+ editSuccess: 'Updated {key}',
+ editSuccessDesc: 'Changes take effect on next startup.',
+ createFailed: 'Failed to add model',
+ editFailed: 'Failed to update model',
+ routerReloadFailed: 'Router not refreshed',
+ routerReloadCreateDesc: 'Model added but router unreachable — model won\'t be routable until reloaded.',
+ routerReloadEditDesc: 'Changes saved but router unreachable.',
+ downloadStarted: 'Started downloading {repo}',
+ downloadStartedDesc: 'You can close this dialog — download continues in the background.',
+ downloadFailed: 'Failed to start download',
+ quantHint: 'Pre-quantized models (AWQ / GPTQ): just change model_tag to the quantized version — vLLM auto-detects; no need to set this field.',
+ defaultLabel: 'Default',
+ autoLabel: 'Auto',
+ notSet: 'Not set',
+ forceOn: 'Force on',
+ off: 'Off',
+ on: 'On',
+ onExperimental: 'On (experimental)',
+ fastestStartup: 'Fastest startup',
+ mostAggressive: 'Most aggressive',
+ gpuMemHint: 'Keep current; empty = vLLM default',
+ kvCacheDefault: 'Default (auto, no quantization)',
+ prefixCacheDefault: 'Default (on)',
+ chunkedPrefillDefault: 'Default (on)',
+ asyncSchedDefault: 'Default (off)',
+ quantDefault: 'Default (no quantization, or auto-detect quantized model)',
+ quantBnb: 'bitsandbytes (4-bit NF4, online quantization for any model)',
+ quantFp8Tensor: 'fp8_per_tensor (8-bit; Ampere falls back to W8A16)',
+ quantFp8Block: 'fp8_per_block (8-bit; Ampere falls back to W8A16)',
+ quantInt8: 'int8_per_channel_weight_only (8-bit INT8)',
+ hashDefault: 'Default (sha256)',
+ hashSha256: 'sha256 (most stable)',
+ hashXxhash: 'xxhash (faster, not cryptographically secure)',
+ qwen3Thinking: 'Qwen3 (with thinking)',
+ toolCallingDocRef: 'See full reference in docs/vllm_auto_tool_reference.md. No matching parser = don\'t add it.',
+ },
+
+ // ---- ModelDetailDrawer ----
+ modelDetail: {
+ overview: 'Overview',
+ events: 'Events',
+ logs: 'Logs',
+ endpoint: 'Endpoint',
+ gpu: 'GPU',
+ pid: 'Process ID',
+ managedLabel: 'Managed',
+ managedYes: 'Yes (controllable)',
+ managedExternal: 'External',
+ uptime: 'Uptime',
+ autoRestarts: 'Auto-restarts',
+ startupSnapshot: 'Startup Capacity Snapshot',
+ kvCacheCapacity: 'KV cache capacity',
+ maxConcurrency: 'Max concurrency',
+ concurrencyReqTok: '{n} tok/req',
+ concurrencyHint: '≈ Can serve {n} concurrent requests',
+ memWeights: 'Weights',
+ memCudaGraph: 'CUDA graph',
+ memKvCache: 'KV cache',
+ startupWeightsLoad: 'Weights load',
+ startupModelLoad: 'Model load',
+ startupCompile: 'torch.compile',
+ startupWarmup: 'warmup',
+ gpuMemUtilTitle: '⚙ About gpu_memory_utilization',
+ gpuMemUtilDesc: 'Newer vLLM includes CUDA graph memory in this budget. You set {current}; after deducting, the effective KV cache space equals {effective} in older versions. To increase KV cache (higher concurrency / longer context), edit to {suggested} after stopping — but VRAM headroom decreases and OOM risk increases (especially on small GPUs).',
+ servedModels: 'Served models',
+ routingPolicy: 'Routing policy (load balancing)',
+ vllmParams: 'vLLM parameters (model_config)',
+ loraAdapters: 'LoRA Adapters',
+ hotLoadEnabled: 'Hot-load enabled',
+ hotLoad: 'Load',
+ hotUnload: 'Hot-unload',
+ hotLoadPick: '— Pick adapter from LoRA Library to hot-load —',
+ hotLoadSuccess: 'Hot-loaded {name}',
+ hotLoadSuccessDesc: 'Applied to all ready instances and routing updated.',
+ hotLoadFailed: 'Hot-load failed',
+ hotUnloadConfirm: 'Unload LoRA "{name}"? Will remove from all ready instances and stop routing.',
+ hotUnloadSuccess: 'Unloaded {name}',
+ hotUnloadFailed: 'Unload failed',
+ hotLoadHint: 'Hot-load applies to all ready instances and updates routing; written to overlay (persists after restart).',
+ coldHint: 'Put the served name in the model field for inference (Playground / Eval / Benchmark all support it).',
+ hotLoadEnableHint: 'To hot-load LoRA without restart: edit this model, enable enable_lora + allow_runtime_lora, then restart.',
+ liveMetrics: 'Live load (router /metrics)',
+ metricsRunning: 'Running',
+ metricsWaiting: 'Waiting',
+ metricsKvCache: 'KV Cache',
+ metricsGenTokens: 'Gen tokens',
+ noMetrics: 'No live metrics (router unreachable or model idle).',
+ usageSection: 'Usage',
+ requestCount: 'Requests',
+ lastError: 'Last error',
+ noEventRecords: 'No event records.',
+ filterLogs: 'Filter log lines…',
+ downloadLogs: 'Download full logs',
+ noLogContent: 'No log content.',
+ noFilterMatch: 'No log lines match the filter.',
+ startLabel: 'Start',
+ startLocked: 'Another model is starting ({name}), please wait',
+ stopLabel: 'Stop',
+ terminateLabel: 'Terminate',
+ abortLabel: 'Abort startup',
+ editParams: 'Edit parameters (must stop first; vLLM params are group-shared)',
+ removeModel: 'Remove model (dynamic models only)',
+ removeSuccess: 'Removed {key}',
+ removeFailed: 'Failed to remove model',
+ externalModel: 'External model — not managed by this backend',
+ editEmbedding: 'Edit parameters (must stop embedding server first)',
+ },
+
+ // ---- ModelGroupCard ----
+ modelGroup: {
+ readyCount: '{ready}/{total} ready',
+ embedding: 'Embedding',
+ reranking: 'Reranking',
+ crashRestart: 'Crashed and auto-restarted {n} times',
+ liveLoad: 'Live load (router /metrics)',
+ runningDesc: ' Running — currently generating requests',
+ waitingDesc: ' Waiting — queued requests for this instance',
+ kvCacheUsed: 'KV Cache {pct} used',
+ externalNotManaged: 'External model — not managed by this backend',
+ terminateHint: 'Terminate residual process',
+ abortStartup: 'Abort startup',
+ stopHint: 'Stop',
+ startAll: 'Start All',
+ stopAll: 'Stop All',
+ addInstance: 'Add Instance',
+ showMore: 'Show {n} more',
+ collapse: 'Collapse',
+ servedModels: 'Served models',
+ },
+
+ // ---- Routing strategies ----
+ routingStrategies: {
+ least_load: 'Least Load',
+ round_robin: 'Round Robin',
+ random: 'Random',
+ least_inflight: 'Least In-flight',
+ p2c: 'Power of 2 Choices',
+ session_affinity: 'Session Affinity',
+ prefix_affinity: 'Prefix Affinity',
+ },
+
+ // ---- Router fan diagram ----
+ routerFan: {
+ nextPick: 'Next pick',
+ kvShared: 'Shared KV',
+ kvIndependent: 'Independent KV',
+ kvSharedTooltip: 'Replicas in this group share KV cache (/kv_cache)',
+ kvIndependentTooltip: 'Each replica has independent KV cache',
+ kvStoreLabel: 'Shared KV Cache · /kv_cache',
+ instancesCount: '{n} instances',
+ idleHint: 'idle — no instance running',
+ readyCount: '{ready}/{total} ready',
+ waiting: 'Waiting',
+ },
+
+ systemTopology: {
+ title: 'System Topology',
+ clickHint: '· Click a node to drill in',
+ dataPlane: 'Data',
+ placementPlane: 'Placement',
+ controlPlane: 'Control',
+ client: 'Client',
+ requests: 'requests',
+ router: 'Router',
+ errors: 'errors',
+ backend: 'Backend',
+ waiting: 'Waiting',
+ configuredUndetected: 'Configured · not detected',
+ },
+
+ modelControl: {
+ oneAtATime: 'Only one model can start at a time',
+ alreadyStarting: '"{current}" is already starting, please wait before starting {name}.',
+ starting: 'Starting {name}',
+ startingDesc: 'Waiting for /health to pass…',
+ stopping: 'Stopping {name}',
+ stoppingDesc: 'Releasing GPU resources…',
+ vramInsufficient: '{name}: insufficient VRAM',
+ forceStart: 'Force start',
+ startFailed: 'Failed to start {name}',
+ stopFailed: 'Failed to stop {name}',
+ title: 'Admin verification',
+ description: 'This operation requires an admin token (backend LLMOPS_ADMIN_TOKEN).',
+ tokenPlaceholder: 'Admin token',
+ tokenInvalid: 'Token invalid.',
+ confirm: 'Confirm',
+ },
+
+ // ---- GpuGauge ----
+ gpuGauge: {
+ util: 'Utilization',
+ vram: 'VRAM',
+ power: 'Power',
+ temp: 'Temp',
+ memory: 'Memory',
+ },
+
+ // ---- CodeBlock ----
+ codeBlock: {
+ copy: 'Copy',
+ copied: 'Copied!',
+ copyFailed: 'Copy failed',
+ },
+
+ // ---- AddInstanceDialog ----
+ addInstance: {
+ title: 'Add Instance',
+ sharedSettingsTitle: 'Inherited shared settings from this group',
+ sharedSettingsHint: 'All instances in this group share vLLM parameters; only set the location for this instance here.',
+ instanceIdLabel: 'Instance ID',
+ instanceIdPlaceholder: 'e.g. qwen3-5',
+ idConflict: 'This ID already exists in the group.',
+ portConflict: 'This port is already in use.',
+ cudaDeviceLabel: 'CUDA device',
+ createSuccess: 'Added instance {key}',
+ createSuccessDesc: 'Currently stopped — press "Start" to activate.',
+ createFailed: 'Failed to add instance',
+ },
+
+ // ---- DatasetCard ----
+ datasetCard: {
+ cached: 'Cached',
+ notCached: 'Not cached',
+ downloading: 'Downloading',
+ downloadFailed: 'Failed',
+ preview: 'Preview',
+ deleteCache: 'Delete cache',
+ warmingHint: 'Building cache, preview will be instant once complete',
+ warming: 'Warming…',
+ },
+
+ // ---- DatasetPreviewDialog ----
+ datasetPreview: {
+ title: 'Preview · {key}',
+ loading: 'Loading data… (first preview needs to build cache, may take a moment)',
+ introTab: 'Introduction',
+ samplesTab: 'Sample data ({n})',
+ subjectCount: '{n} subjects',
+ noDescription: 'This dataset has no description.',
+ showingFirst: 'Showing first {n} rows (dataset is larger)',
+ totalRows: 'Total {n} rows',
+ answer: 'Answer: ',
+ },
+
+ // ---- EmbeddingModelDialog ----
+ embeddingModel: {
+ editTitle: 'Edit {type} model',
+ typeEmbedding: 'Embedding',
+ typeReranking: 'Reranking',
+ paramHint: 'Edit parameters after embedding server is stopped; changes take effect on next start.',
+ paramsLabel: 'Parameters',
+ paramKeyPlaceholder: 'Param name (snake_case)',
+ paramValuePlaceholder: 'Value (true/false/number/string)',
+ noParams: 'No parameters.',
+ updateSuccess: 'Updated {name}',
+ updateSuccessDesc: 'Changes take effect on next embedding server start.',
+ updateFailed: 'Update failed',
+ },
+
+ // ---- EvalSampleBrowser ----
+ evalSamples: {
+ dataset: 'Dataset',
+ loadFailed: 'Failed to load samples',
+ loadDetailFailed: 'Failed to load sample detail',
+ filterAll: 'All',
+ filterCorrect: 'Correct',
+ filterWrong: 'Wrong',
+ pageInfo: 'Page {page} / {total} · {count} samples',
+ colResult: 'Result',
+ colModelAnswer: 'Model answer (excerpt)',
+ colModelOutput: 'Model output (excerpt)',
+ colStandardAnswer: 'Standard answer',
+ correct: 'Correct',
+ wrong: 'Wrong',
+ ruleScoreHint: 'This is a rule-scored evaluation (checks instruction compliance), no single correct answer; see individual metrics above.',
+ standardAnswer: 'Standard answer',
+ modelAnswer: 'Model answer',
+ promptLabel: 'Prompt / Question',
+ noMatchingSamples: 'No matching samples.',
+ },
+
+ // ---- PerfSweepChart / PerfCompareChart ----
+ perfChart: {
+ noData: 'No data',
+ concurrency: 'Concurrency',
+ },
+}
diff --git a/apps/frontend_llmops/src/i18n/locales/zh-TW.ts b/apps/frontend_llmops/src/i18n/locales/zh-TW.ts
new file mode 100644
index 0000000..60239f2
--- /dev/null
+++ b/apps/frontend_llmops/src/i18n/locales/zh-TW.ts
@@ -0,0 +1,969 @@
+export default {
+ common: {
+ refresh: '重新整理',
+ delete: '刪除',
+ cancel: '取消',
+ save: '儲存',
+ create: '建立',
+ add: '新增',
+ remove: '移除',
+ edit: '編輯',
+ close: '關閉',
+ done: '完成',
+ loading: '載入中…',
+ download: '下載',
+ retry: '重試',
+ start: '啟動',
+ stop: '停止',
+ clear: '清除',
+ clearAll: '清除全部',
+ search: '搜尋',
+ all: '全部',
+ none: '無',
+ model: '模型',
+ models: '模型',
+ status: '狀態',
+ latency: '延遲',
+ tokens: 'Token 數',
+ time: '時間',
+ path: '路徑',
+ cost: '成本',
+ key: '金鑰',
+ name: '名稱',
+ ready: '就緒',
+ failed: '失敗',
+ stopped: '已停止',
+ starting: '啟動中',
+ stopping: '停止中',
+ noData: '無資料。',
+ errorsOnly: '僅錯誤',
+ allModels: '全部模型',
+ preDownload: '先下載',
+ manage: '管理 →',
+ viewAll: '全部 →',
+ more: '更多',
+ groups: '個群組',
+ embedding: '嵌入',
+ reranking: '重排序',
+ completed: '完成',
+ running: '執行中',
+ queued: '排隊中',
+ cancelled: '已取消',
+ dataset: '資料集',
+ target: '目標',
+ },
+
+ sidebar: {
+ console: '控制台',
+ overview: '總覽',
+ models: '模型',
+ traffic: '流量',
+ requests: '請求',
+ monitoring: '監控',
+ playground: '測試台',
+ benchmark: '壓測',
+ eval: '評測',
+ library: '模型庫',
+ loraLibrary: 'LoRA 庫',
+ datasets: '資料集庫',
+ keys: 'API 金鑰',
+ usage: '使用指南',
+ resources: '資源',
+ activity: '活動',
+ modelCount: '模型 · {ready}/{total} 就緒',
+ noModels: '尚未設定模型',
+ },
+
+ statusBar: {
+ gpu: 'GPU',
+ ready: '就緒',
+ switchLight: '切換至淺色',
+ switchDark: '切換至深色',
+ switchToChinese: '切換至中文',
+ switchToEnglish: '切換至英文',
+ gpuUtilTitle: 'GPU 平均使用率',
+ updatedAgo: '更新於 {ago}',
+ live: '即時',
+ polling: '輪詢',
+ connecting: '連線中',
+ offline: '離線',
+ },
+
+ overview: {
+ readyModels: '就緒模型',
+ requestCount: '請求次數',
+ latencyP95: '延遲 p95',
+ gpuUtil: 'GPU 使用率',
+ failedCount: '{n} 個失敗',
+ allNormal: '全部正常',
+ errorRate: '{rate} 錯誤率',
+ processedTokens: '已處理 {n} tokens',
+ deviceCount: '{n} 個裝置',
+ models: '模型',
+ noModelsYet: '尚未設定模型。',
+ activity: '活動',
+ noRecentEvents: '無最近事件。',
+ recentRequests: '最近請求',
+ traffic: '流量 →',
+ noRequestRecords: '尚無請求記錄。',
+ tableTime: '時間',
+ tableModel: '模型',
+ tablePath: '路徑',
+ tableStatus: '狀態',
+ tableLatency: '延遲',
+ },
+
+ models: {
+ searchPlaceholder: '搜尋模型…',
+ all: '全部',
+ llm: 'LLM',
+ embedding: '嵌入',
+ groupCount: '{n} 個群組 · {ready} 就緒 · {failed} 失敗',
+ addModel: '新增模型',
+ noMatch: '找不到符合的模型。',
+ clearFilter: '試著清除篩選條件。',
+ noConfig: 'config.yaml 中尚未設定模型。',
+ },
+
+ traffic: {
+ modelUsage: '各模型用量',
+ tableModel: '模型',
+ tableRequests: '請求次數',
+ tableErrors: '錯誤數',
+ tableAvg: '平均',
+ noUsage: '尚無用量記錄。',
+ routerLoadBalancing: '路由器負載均衡',
+ routerLoadDesc: '線條粗細 = 實際流量佔比(來自請求記錄)· ★ = 路由器下次選擇的最低分實例',
+ strategy: '策略',
+ scenarioTitle: '各策略適合的場景',
+ scenarioFooter: '此下拉切換的是全域預設;在 config.yaml 為某群組設定 routing_strategy 會覆寫此處。',
+ noLlmGroups: '尚未設定 LLM 群組。',
+ requestLog: '請求記錄',
+ noRequests: '尚無請求。',
+ strategyChanged: '路由策略已切換為「{name}」',
+ strategyChangedDesc: '下一個請求起生效。未持久化,router 重啟後回到預設。',
+ strategyChangeFailed: '切換路由策略失敗',
+ strategyInfo: {
+ least_load: '請求長短不一、要平均各副本飽和度。通用安全的預設。',
+ round_robin: '同質 GPU、請求差異不大,或想要可預測的均分 / 當 baseline。',
+ random: '大量短請求、要最低決策成本的無狀態分流。',
+ least_inflight: '請求耗時相近,且不想被 ~1 秒 metrics 抓取延遲影響時。',
+ p2c: '突發流量下,想避免大家一窩蜂衝向同一個「目前最閒」的副本。',
+ session_affinity: '多輪對話 / Playground:同一會話黏同一台,提升 KV cache 重用(需帶 X-Session-Id 或 user,否則退回最低負載)。',
+ prefix_affinity: '固定 system prompt、RAG / few-shot 模板等高前綴重複率的請求。',
+ },
+ },
+
+ requests: {
+ searchPlaceholder: '搜尋模型 / 路徑 / 金鑰…',
+ errorsOnly: '僅錯誤',
+ costPerMTok: '每百萬 tokens 成本',
+ recent: '最近 {n}',
+ requestCount: '請求數',
+ totalTokens: '總 Token 數',
+ errorRate: '錯誤率',
+ estimatedCost: '估算成本',
+ tableTime: '時間',
+ tableModel: '模型',
+ tablePath: '路徑',
+ tableStatus: '狀態',
+ tableLatency: '延遲',
+ tableTokens: 'Token 數',
+ tableCost: '成本',
+ tableKey: '金鑰',
+ noRecords: '尚無請求紀錄。',
+ loadFailed: '無法載入請求紀錄',
+ },
+
+ monitoring: {
+ overview: '總覽',
+ vllmCapacity: 'vLLM 容量',
+ vllmPerf: 'vLLM 效能',
+ vllmQuery: 'vLLM 請求',
+ gpu: 'GPU',
+ host: '主機',
+ openGrafana: '在 Grafana 開啟',
+ },
+
+ playground: {
+ chat: '對話',
+ embeddingRerank: '嵌入 / 重排序',
+ thinkingProcess: '💭 思考過程',
+ sendPrompt: '發送訊息以透過路由器測試 {model}。',
+ inputPlaceholder: '輸入訊息…(Enter 送出,Shift+Enter 換行)',
+ comparePlaceholder: '同一個問題會同時送給所有選中的模型…(Enter 送出)',
+ selectModelsHint: '在右側選擇至少一個模型以開始並排對比。',
+ waitingPrompt: '等待提問…',
+ modelsParallel: '{n} 個模型並排',
+ params: '參數',
+ compareMode: '對比模式(多模型同問題)',
+ systemPrompt: '系統提示(System prompt)',
+ systemPromptPlaceholder: '留空則不送。例如:你是一位專業的繁體中文助理。',
+ modelReady: '模型(僅顯示已就緒)',
+ noReadyModels: '目前沒有已啟動的模型,請先至「模型」頁啟動。',
+ modelMulti: '模型(可多選,僅顯示已就緒)',
+ noReadyModelsShort: '目前沒有已啟動的模型。',
+ maxTokens: '最大 Tokens',
+ temperature: '溫度',
+ streamResponse: '串流回應',
+ request: '請求',
+ result: '結果',
+ modelLabel: '模型({mode},僅顯示已就緒)',
+ noModelsForMode: '此模式無可用模型。',
+ embNotStarted: 'embedding server 未啟動,請先至「模型」頁啟動。',
+ queryLabel: '查詢(設定後進入重排序模式)',
+ queryPlaceholder: '留空則為純嵌入模式',
+ inputLabel: '輸入(每行一筆)',
+ rerank: '重排序',
+ embed: '嵌入',
+ dimVector: '{n} 維向量',
+ runToSee: '執行請求以查看{type}。',
+ relevanceScores: '相關性分數',
+ vectorDimensions: '向量維度',
+ firstToken: '首字',
+ chatFailed: '對話請求失敗',
+ embedFailed: '嵌入請求失敗',
+ emptyResponse: '(空回應)',
+ stopGeneration: '停止生成',
+ },
+
+ benchmark: {
+ title: '壓測',
+ description: '評測模型的「速度」(吞吐 / 延遲,與評測的答對率不同)。支援並發 sweep、速率 open-loop、多輪對話、SLA 自動調優,以及 embedding / rerank 吞吐與單請求速度基準。部分模式可先在',
+ datasetsLink: '資料集庫',
+ descriptionEnd: '下載資料集。',
+ configTitle: '壓測設定',
+ modelLabel: '模型',
+ notStarted: '(未啟動)',
+ embModelLabel: '嵌入模型',
+ rerankModelLabel: '重排序模型',
+ embNotStarted: 'embedding server 未啟動,請先至「模型」頁啟動。',
+ noModelsForMode: '此模式無可用模型。',
+ modeSweep: '並發 Sweep',
+ modeOpenloop: '速率 Open-loop',
+ modeMultiturn: '多輪對話',
+ modeSla: 'SLA 調優',
+ modeSpeed: '速度基準',
+ modeEmbedding: '嵌入 Embedding',
+ modeRerank: '重排序 Rerank',
+ targetLabel: '目標',
+ targetRouter: '路由器(整體 + 負載平衡)',
+ targetInstance: '單一實例',
+ instanceLabel: '實例',
+ datasetLabel: '資料集',
+ endpointLabel: '端點',
+ parallelLabel: '並發點(逗號分隔,掃描)',
+ reqPerPoint: '每點請求數',
+ warmupRatio: '預熱比例',
+ rateLabel: '速率(req/s,逗號分隔,掃描)',
+ reqPerRate: '每速率請求數',
+ maxDuration: '最長秒數(0=不限)',
+ mtDatasetLabel: '資料集',
+ mtShareGpt: 'ShareGPT 中文(真實對話)',
+ mtRandom: 'random(隨機生成)',
+ mtCustom: 'custom(自備 JSONL)',
+ mtDatasetPath: '資料路徑(伺服器端 JSONL)',
+ mtConcurrent: '並發對話數(可逗號掃描)',
+ mtConvPerPoint: '每點對話數',
+ mtMinTurns: '最少輪數',
+ mtMaxTurns: '最多輪數',
+ mtMaxDuration: '最長秒數(0=不限,避免牆鐘失控)',
+ slaVariable: '搜尋變數',
+ slaParallel: '並發 parallel',
+ slaRate: '速率 rate(open-loop)',
+ slaConditions: 'SLA 條件(多條 = OR)',
+ slaLower: '下界',
+ slaUpper: '上界',
+ slaRunsPerPoint: '每點 runs',
+ slaFixedParallel: '固定並發(rate 模式)',
+ speedDesc: '單請求標準速度(並發 1)。固定 prompt 長度{lengths},打 /v1/completions 避免 chat template 干擾。',
+ speedLengthsShort: ' 1 / 6k / 14k / 30k',
+ speedLengthsLong: ' 63k / 129k',
+ speedLongCtx: '長上下文(63k / 129k)',
+ embParallel: '並發點(逗號分隔,掃描)',
+ embReqPerPoint: '每點請求數',
+ embInputLen: '輸入長度(token)',
+ embDocsPerReq: '每筆文件數',
+ outputTokens: '輸出 tokens',
+ outputTokensFixed: '輸出 tokens(固定)',
+ inputLength: '輸入長度',
+ prefixLength: '前綴長度(prefix cache)',
+ nameLabel: '名稱(選填)',
+ namePlaceholder: '例如:q05b-baseline',
+ streamLabel: '串流(量 TTFT 必須)',
+ runningBusy: '有壓測進行中…',
+ startBenchmark: '開始壓測',
+ history: '壓測歷史',
+ historyCount: '({n})',
+ compareHint: '勾選 ≥2 筆比較',
+ noHistory: '尚無壓測紀錄。',
+ comparison: '壓測比較',
+ failed: '失敗',
+ runSummary: '基本資訊',
+ fullReport: '完整報告',
+ slaMaxVar: '最大 {var}',
+ slaVarRate: '速率',
+ slaVarParallel: '並發',
+ multiTurnMetrics: '多輪指標',
+ turnsPerConv: '輪數/對話',
+ kvCacheHit: 'KV 快取命中',
+ firstTtft: '首輪 TTFT',
+ subsequentTtft: '後續輪 TTFT',
+ chartRps: 'RPS(req/s)',
+ chartGenTps: '輸出吞吐 Gen/s(tok/s)',
+ chartTtft: 'TTFT p99(ms)',
+ chartAvgLatency: '平均延遲(ms)',
+ chartP99Latency: '延遲 p99(ms)',
+ tableParallel: '並發',
+ tableRate: '速率',
+ tableAvgLatency: '平均延遲',
+ tableInputOutput: '輸入/輸出',
+ tableSuccessTotal: '成功/總數',
+ latencyDetail: '延遲 Latency (s)',
+ executionLog: '執行日誌',
+ waitingOutput: '(等待輸出…)',
+ stoppingHint: '正在停止,最多約 10 秒;仍卡住可按「強制」。',
+ forceKill: '強制終止',
+ forceConfirm: '強制終止壓測 #{id}?將立即 SIGKILL,結果會作廢。',
+ cancelFailed: '取消失敗',
+ deleteFailed: '刪除失敗',
+ deleteConfirm: '刪除壓測 #{id}?',
+ loadResultFailed: '無法載入結果',
+ startedToast: '已開始壓測 #{id}',
+ startedDesc: '可離開此頁,背景持續執行。',
+ startFailed: '無法開始壓測',
+ compareMax: '最多比較 {n} 筆',
+ compareLoadFailed: '無法載入比較資料',
+ selectModel: '請選擇一個模型',
+ enterRate: '請輸入至少一個速率',
+ enterParallel: '請輸入至少一個並發數',
+ enterMtConcurrent: '請輸入至少一個並發對話數',
+ addSlaCondition: '請至少加入一個 SLA 條件',
+ instanceNotReady: '該實例尚未就緒,無法直連',
+ modeLabel: '模式',
+ totalTestDuration: '總測試時間',
+ totalGenerated: '總生成',
+ avgOutputRate: '平均輸出速率',
+ stoppingBadge: '停止中…',
+ cancelStop: '取消(停止)',
+ forceKillHint: '強制終止(立即 SIGKILL)',
+ addToCompare: '加入比較',
+ slaMetrics: {
+ p99_latency: 'p99 延遲 (s)',
+ avg_latency: '平均延遲 (s)',
+ p99_ttft: 'p99 TTFT (ms)',
+ avg_ttft: '平均 TTFT (ms)',
+ p99_tpot: 'p99 TPOT (ms)',
+ avg_tpot: '平均 TPOT (ms)',
+ rps: 'RPS (req/s)',
+ tps: '輸出 tok/s',
+ },
+ },
+
+ eval: {
+ title: '模型評測',
+ description: '評測模型的「答對率 / 品質」(與壓測的速度不同)。先在',
+ descriptionEnd: '下載資料集,跑評測就不必等。小模型分數偏低屬正常,重點是換模型 / 調參時的比較。',
+ configTitle: '評測設定',
+ modelLabel: '模型',
+ selected: '已選',
+ noModels: '尚未設定模型',
+ offline: '· 離線',
+ multiModelHint: '多選 = 每個模型各排一個評測,依共用並發預算並行 / 排隊。',
+ targetRouter: '路由器',
+ targetInstance: '直連實例',
+ instanceOnly: '直連實例僅支援單一模型',
+ datasetLabel: '資料集',
+ cachedHint: '● = 已快取,未快取的會在執行時下載;⏬下載中的需等完成才可選。⚖ = 需裁判模型評分。',
+ longContextWarn: '⚠ 已選長上下文資料集,需模型有夠大的 max_model_len(數萬 token),否則會被截斷或回 400。',
+ toolParserWarn: '⚠ 已選真實函數調用資料集,需模型啟用 vLLM tool parser(enable_auto_tool_choice + tool_call_parser),否則分數恆為 0。',
+ subsetLabel: '已選資料集 — 主題',
+ subsetHint: '主題(subset,{n} 個,不選=全部)',
+ clearSubsets: '清除({n})',
+ samplesPerDataset: '每集樣本數(0=全部)',
+ repeats: '重複次數',
+ temperature: '溫度',
+ maxOutputTokens: '最大輸出 tokens',
+ batchSize: '並發數(batch size)',
+ batchSizeHint: '一次對模型發幾個並發請求;調高可跑更快,太高會排隊逾時。不影響分數。',
+ advanced: '進階設定',
+ fewShot: 'few-shot 範例數(0=各資料集預設)',
+ datasetArgsJson: 'dataset_args JSON(依資料集名覆寫,選填)',
+ datasetArgsHint: '如 few_shot_num、subset_list、shuffle。math 多採樣可配「重複次數」+ aggregation。',
+ judgeTitle: '⚖ 裁判模型',
+ judgeHint: '所選問答資料集需要 LLM 評分',
+ judgeInternal: '內部模型',
+ judgeExternal: '外部 API',
+ judgeApiUrl: 'API URL,例:https://api.openai.com/v1',
+ judgeModelId: '裁判模型 ID,例:gpt-4o-mini',
+ judgeApiKey: 'API Key(選填)',
+ judgeQualityHint: '裁判越強分數越可靠;小模型當裁判僅供參考。',
+ nameLabel: '名稱(選填)',
+ namePlaceholder: '例如:Qwen3-0.6B 基線',
+ startEvalMulti: '排入 {n} 個評測',
+ startEval: '開始評測',
+ runHistory: '評測紀錄',
+ runningCount: '執行中 {n}',
+ queuedCount: '排隊 {n}',
+ concurrencyBudget: '並發預算',
+ budgetUsage: '用量 {used}/{total}',
+ noHistory: '尚無評測紀錄。',
+ scoreComparison: '分數比較',
+ compareDataset: '資料集',
+ compareBestHint: '每列最高分以主色標示。',
+ detailReport: '完整報告',
+ queuedHint: '排隊中…等待並發預算或壓測結束。',
+ runningHint: '評測執行中…(可離開此頁)',
+ failedPrefix: '失敗:',
+ scores: '分數',
+ metric: '指標',
+ samples: '樣本',
+ score: '分數',
+ loadingDetail: '載入詳細數據…',
+ latencyP50P99: '延遲 p50 / p99',
+ outputThroughput: '輸出吞吐',
+ avgInputTok: '平均輸入 tok',
+ avgOutputTok: '平均輸出 tok',
+ subsetBreakdown: '· 分科目',
+ whatDoesItTest: '這個評測在測什麼?',
+ collapseSamples: '收起逐題瀏覽',
+ expandSamples: '逐題瀏覽(看每題對錯 / 答案)',
+ executionLog: '執行日誌',
+ noLog: '(無日誌)',
+ loadResultFailed: '無法載入結果',
+ loadCatalogFailed: '無法讀取評測資料集',
+ selectModelRequired: '請至少選擇一個模型',
+ selectDatasetRequired: '請至少選擇一個資料集',
+ instanceSingleOnly: '直連實例僅支援單一模型',
+ instanceNotReady: '該實例尚未就緒,無法直連',
+ judgeModelRequired: '請設定裁判模型',
+ judgeApiRequired: '外部裁判需要 API URL',
+ invalidDatasetArgs: '進階 dataset_args 不是合法 JSON',
+ startedToast: '已排入 {n} 個評測',
+ startedDesc: '可離開此頁,背景持續執行。',
+ evalFailed: '無法評測 {model}',
+ cancelRequested: '已要求取消',
+ cancelFailed: '取消失敗',
+ deleteConfirm: '刪除評測 #{id}?',
+ deleteFailed: '刪除失敗',
+ budgetSet: '並發預算已設為 {n}',
+ budgetSetFailed: '無法調整預算',
+ addCompare: '加入比較',
+ cancelQueue: '取消排隊',
+ downloading: '⏬下載中',
+ defaultFewShot: '· 預設 {n}-shot',
+ datasetArgsPlaceholder: '例:{"arc": {"subset_list": ["ARC-Challenge"]}}',
+ concurrencyBudgetHint: '所有並行評測的 batch_size 加總上限;填滿就排隊。即時生效,重啟回預設。',
+ },
+
+ library: {
+ title: '模型庫',
+ description: '預先下載 Hugging Face 權重到共用快取,啟動模型時就不必等待。Gated 模型需在後端設定',
+ descriptionEnd: '。',
+ cachedModels: '已快取模型',
+ diskRemaining: '磁碟剩餘',
+ diskUsed: '佔用空間',
+ downloadNew: '下載新模型',
+ repoId: 'Hugging Face repo id',
+ repoPlaceholder: '例如:Qwen/Qwen3-0.6B',
+ noCachedModels: '快取中尚無模型。',
+ files: '檔',
+ size: '大小',
+ updated: '更新',
+ deleteCache: '刪除快取',
+ deleteConfirm: '確定刪除已快取的 {repo}?此操作會釋放磁碟空間。',
+ deleteFailed: '刪除失敗',
+ deleted: '已刪除 {repo}',
+ downloadStarted: '開始下載 {repo}',
+ downloadStartedDesc: '可離開此頁,下載會在背景繼續。',
+ downloadFailed: '無法開始下載',
+ loadCacheFailed: '無法讀取快取',
+ occupiedSpace: '佔用空間',
+ downloading: '下載中',
+ downloadComplete: '完成',
+ downloadFailedBadge: '失敗',
+ },
+
+ loraLibrary: {
+ title: 'LoRA 庫',
+ description: '管理本地 LoRA adapter。從 Hugging Face 下載、或把 adapter 資料夾直接放進',
+ descriptionEnd: '也會出現在這。新增 / 編輯模型時可在「LoRA Adapters」區從這裡挑選掛載。',
+ adapters: 'Adapters',
+ diskUsed: '佔用空間',
+ diskRemaining: '磁碟剩餘',
+ downloadAdapter: '下載 adapter',
+ repoId: 'Hugging Face repo id',
+ repoPlaceholder: '例如:jeeejeee/llama32-3b-text2sql-spider',
+ localName: '本地名稱(選填)',
+ localNamePlaceholder: '預設取 repo 結尾',
+ localAdapters: '本地 adapters',
+ unknownBase: '未知 base',
+ deleteConfirm: '確定刪除 LoRA「{name}」?此操作會移除磁碟上的 adapter 檔案。',
+ deleteFailed: '刪除失敗',
+ deleted: '已刪除 {name}',
+ downloadStarted: '開始下載 {repo}',
+ downloadStartedDesc: '可離開此頁,下載會在背景繼續。',
+ downloadFailed: '無法開始下載',
+ loadFailed: '無法讀取 LoRA 庫',
+ noAdapters: '庫中尚無 adapter。從上方下載,或把資料夾放進',
+ },
+
+ datasets: {
+ title: '資料集庫',
+ description: '預先下載資料集到共用 ModelScope 快取,跑壓測 / 評測時就不必等首次下載。',
+ descriptionRandom: 'random / 速度基準為即時生成,無需下載。',
+ cachedTotal: '已快取 / 總數',
+ diskUsed: '佔用空間',
+ diskRemaining: '磁碟剩餘',
+ diskTotal: '共 {size}',
+ perfDatasets: '壓測資料集',
+ perfHint: '下載後可在「壓測」頁選用',
+ evalDatasets: '評測資料集',
+ evalHint: '下載後可在「評測」頁選用',
+ downloadFailed: '下載「{label}」失敗:',
+ loadFailed: '無法讀取資料集',
+ downloadStarted: '開始下載',
+ downloadStartedDesc: '可離開此頁,下載會在背景繼續。',
+ downloadStartFailed: '無法開始下載',
+ deleteConfirm: '確定刪除已快取的「{label}」?此操作會釋放磁碟空間。',
+ deleted: '已刪除 {label}',
+ deleteFailed: '刪除失敗',
+ },
+
+ keys: {
+ title: 'API 金鑰',
+ description: '用於向路由器發送推論請求(Authorization: Bearer …)。金鑰只在建立時顯示一次。',
+ authDisabled: '後端未設定 LLMOPS_ADMIN_TOKEN,目前驗證為關閉狀態 — 金鑰可建立,但路由器尚未強制要求金鑰(需設定 LLMOPS_REQUIRE_API_KEY=true)。',
+ locked: '需要管理員權杖才能管理金鑰。',
+ unlock: '解鎖',
+ createNew: '建立新金鑰',
+ nameLabel: '名稱(用於用量歸屬)',
+ namePlaceholder: '例如:team-rag、ci-bot',
+ rateLimit: '速率上限(次/分)',
+ ratePlaceholder: '不限',
+ issuedKeys: '已發行金鑰',
+ revoked: '已撤銷',
+ perMin: '/分',
+ requestCount: '次',
+ lastUsed: '最後使用:',
+ noKeys: '尚無金鑰。',
+ revokeTitle: '撤銷金鑰',
+ keyCreated: '金鑰已建立',
+ copyImmediate: '請立即複製此金鑰,關閉後將無法再次顯示。',
+ copySuccess: '已複製到剪貼簿',
+ copyFailed: '複製失敗,請手動選取',
+ createFailed: '建立金鑰失敗',
+ revokeFailed: '撤銷失敗',
+ revokeSuccess: '已撤銷 {name}',
+ loadFailed: '無法載入金鑰',
+ },
+
+ usage: {
+ quickStart: '快速開始',
+ quickStartDesc: 'Router 是 OpenAI 相容的統一入口,請求的 model 欄位填「群組名」,Router 會自動選負載最低的實例。',
+ step1: '確認 Router 已啟動:',
+ step2: '在 Models 頁把要用的模型 Start 到',
+ step2End: '',
+ step3: '用下方任一語言呼叫;金鑰可任意填(Router 不驗證)',
+ chatCompletions: 'Chat Completions',
+ modelLabel: '模型',
+ streamLabel: '串流 (stream)',
+ loraDropdownHint: '下拉選單裡的 LoRA 項:把 model 換成它的 served name 即可,其餘請求完全不變。',
+ loraTitle: 'LoRA Adapters',
+ loraDesc: '呼叫 LoRA 與一般模型相同,只把 model 改成 adapter 的 served name(例',
+ loraDescEnd: '),Router 會路由到對應 base 模型的實例。',
+ listModelsLabel: '列出可用模型 / LoRA · cURL',
+ filterLoraLabel: '只挑出 LoRA · Python',
+ loraNote1: '· 前提:base 模型需以 enable_lora 啟動,且 adapter 已掛載(config 靜態掛,或在模型詳情抽屜熱載入)。',
+ loraNote2: '· 打一個未掛載的名稱會回 404 — Model not found。',
+ loraNote3: '· Base vs LoRA A/B:同一請求只換 model;Playground 比較模式可並排對照。',
+ embTitle: 'Embeddings & Rerank',
+ embDesc: '同一個 /v1/embeddings 端點:帶 query 欄位即切換為 reranking。',
+ embCurlLabel: 'Embedding · cURL',
+ embPyLabel: 'Embedding · Python',
+ rerankCurlLabel: 'Rerank · cURL',
+ samplePrompt: '請用一句話總結登月。',
+ sampleInputA: '敏捷的棕狐跳過圍欄',
+ sampleInputB: '慵懶的狗正在打盹',
+ sampleQuery: '哪一段更符合這個問題?',
+ sampleDocA: '段落 A',
+ sampleDocB: '段落 B',
+ routerNoAuthComment: 'Router 不會驗證 API key',
+ streamComment: '直接輸出原始 SSE `data:` frame',
+ modelsComment: '有 `parent` 欄位的項目就是 LoRA,父模型即為它所掛載的 base。',
+ loraListComment: '把列出的名稱放進聊天請求的 `model` 欄位即可',
+ vectorLengthComment: '向量長度',
+ rerankComment: '帶上 query 欄位後,端點會切成 rerank 模式,回傳每個候選的相關性分數。',
+ },
+
+ resources: {
+ cpu: '處理器',
+ memory: '記憶體',
+ memorySummary: '已用 {used} / 總計 {total} · 可用 {available}',
+ gpuProcesses: 'GPU 程序',
+ gpuProcessesDesc: '佔用 GPU 記憶體的程序(約每 5 秒更新)',
+ tablePid: 'PID',
+ tableUser: '使用者',
+ tableName: '名稱',
+ tableCommand: '指令',
+ tableGpuMem: 'GPU 記憶體',
+ noGpuProcesses: '無 GPU 程序。',
+ },
+
+ activity: {
+ eventCount: '{n} 個事件',
+ noEvents: '尚無事件記錄。',
+ },
+
+ topology: {
+ title: '系統拓撲',
+ clickHint: '· 點擊節點可深入查看',
+ legendData: '資料',
+ legendPlacement: '部署',
+ legendControl: '控制',
+ legendLora: 'LoRA',
+ client: '用戶端',
+ clientRequests: '{n} 請求',
+ router: '路由器',
+ routerError: '{rate} 錯誤',
+ backend: '後端',
+ backendReady: '{ready}/{total} 就緒',
+ groupRunWait: '執行 {running} · 等待 {waiting}',
+ embeddingNode: '嵌入',
+ gpuConfigured: '已設定 · 未偵測到',
+ kvSharedLabel: '共用 KV cache',
+ },
+
+ addModel: {
+ createTitle: '新增模型',
+ editTitle: '編輯模型',
+ pasteCommand: '貼上 vLLM 啟動指令',
+ parseCommand: '解析指令',
+ parseFailed: '無法解析指令',
+ groupLabel: '群組',
+ groupExists: '已存在群組 — 將新增為新副本。',
+ instanceLabel: '實例 ID',
+ keyExists: '{key} 已存在。',
+ hostLabel: '主機',
+ portLabel: '連接埠',
+ portInUse: '連接埠 {port} 已被其他實例使用。',
+ gpuLabel: 'GPU(cuda_device)',
+ gpuAuto: '無 / 自動',
+ modelTagLabel: '模型標籤',
+ routingLabel: '路由策略(負載平衡)',
+ routingDefault: '跟隨全域預設',
+ routingHint: '此群組請求的分流方式;留空則跟隨全域設定(可在「流量」頁切換)。多副本才有效。',
+ kvShareLabel: '共用 KV Cache(跨 instance)',
+ kvShareDesc: '同群組各副本透過共享 store(/kv_cache)重用彼此算過的 KV,相同前綴不必重算。多副本 + 高前綴重複率(固定 system prompt / RAG / 多輪對話)效益最大;關閉則各副本各自獨立 KV。',
+ groupSharedWarn: 'vLLM 參數為群組共用 — 此變更將套用至群組 {group} 的全部 {n} 個副本。',
+ weightsCached: '權重已快取',
+ weightsDownloading: '下載權重中…',
+ weightsDownloadFailed: '下載失敗:',
+ weightsNotCached: '權重尚未快取 — 首次啟動會即時下載(較慢)。',
+ accelTitle: '⚡ 加速設定(vLLM 推理參數)',
+ accelTemplates: '情境模板:',
+ accelLatency: '低延遲(聊天 / agent)',
+ accelThroughput: '高吞吐(多併發)',
+ accelClear: '清除(回預設)',
+ accelHint: '空白=用 vLLM 預設。改了需重啟模型生效。組合才有感。',
+ advancedTitle: '進階(推測解碼 / prefix hash / chunked prefill / offload)',
+ ngramSpec: 'N-gram 推測解碼(低 QPS 降單請求延遲)',
+ offloadHint: '把權重 offload 到 CPU RAM 換「跑得起來」(會變慢),非加速。',
+ partialPrefillHint: '同時有短問題+長 prompt 時:把 partial 設 >1 且 long 設小,短請求可插隊不被長的卡住。',
+ vllmParams: 'vLLM 參數(model_config)',
+ addParam: '新增',
+ noExtraParams: '無額外參數。',
+ flagPlaceholder: '旗標(snake_case)',
+ valuePlaceholder: '值',
+ toolCallingTitle: '🛠 工具調用(tool calling)參數參考',
+ toolCallingDesc: '要讓模型支援 tool_choice="auto",需加 enable_auto_tool_choice=true + tool_call_parser=,reasoning 模型再加 reasoning_parser。parser 要對得上模型輸出格式,別看品牌猜。',
+ toolCallingPresetHint: '點一下帶入推薦參數:',
+ toolCallingFooter: '沒有對應 parser 的模型請勿亂加。',
+ loraTitle: 'LoRA Adapters',
+ addLora: '新增',
+ loraServedName: 'served name(如 sql-lora)',
+ loraPickAdapter: '— 選 adapter / 自填 path —',
+ loraTyped: '(自填)',
+ noLora: '無 LoRA。新增一列會自動補上 enable_lora=true;served name 即推論時 model 欄位要填的名稱。adapter 從 LoRA 庫挑選,或自填 path。',
+ loraAutoHint: '從庫選 adapter 會自動帶入 base 並把 max_lora_rank 設到對齊的 rank。Base model 須支援 LoRA(vLLM SupportsLoRA)。',
+ loraBaseMismatch: '此 adapter 的 base 是 {adapterBase},與本模型 {modelTag} 不符',
+ newGroup: '新群組',
+ saveChanges: '儲存變更',
+ addModelBtn: '新增模型',
+ createSuccess: '已新增 {key}',
+ createSuccessDesc: '目前已停止 — 請按「啟動」以啟用。',
+ editSuccess: '已更新 {key}',
+ editSuccessDesc: '變更將於下次啟動時生效。',
+ createFailed: '新增模型失敗',
+ editFailed: '更新模型失敗',
+ routerReloadFailed: '路由器未重新整理',
+ routerReloadCreateDesc: '模型已新增,但路由器無法連線 — 重新載入前將無法路由至此模型。',
+ routerReloadEditDesc: '變更已儲存,但路由器無法連線。',
+ downloadStarted: '開始下載 {repo}',
+ downloadStartedDesc: '可關閉此視窗,下載會在背景繼續。',
+ downloadFailed: '無法開始下載',
+ quantHint: '預量化模型(AWQ / GPTQ)直接把 model_tag 換成量化版即可,vLLM 會自動偵測,不用設此欄。',
+ defaultLabel: '預設',
+ autoLabel: '自動',
+ notSet: '未設定',
+ forceOn: '強制開',
+ off: '關',
+ on: '開',
+ onExperimental: '開(實驗性)',
+ fastestStartup: '最快啟動',
+ mostAggressive: '最激進',
+ gpuMemHint: '沿用現值;留空=vLLM 預設',
+ kvCacheDefault: '預設(auto,不量化)',
+ prefixCacheDefault: '預設(開)',
+ chunkedPrefillDefault: '預設(開)',
+ asyncSchedDefault: '預設(關)',
+ quantDefault: '預設(不量化,或載入量化版自動偵測)',
+ quantBnb: 'bitsandbytes(4-bit NF4,任何模型線上量化)',
+ quantFp8Tensor: 'fp8_per_tensor(8-bit;Ampere 退 W8A16)',
+ quantFp8Block: 'fp8_per_block(8-bit;Ampere 退 W8A16)',
+ quantInt8: 'int8_per_channel_weight_only(8-bit INT8)',
+ hashDefault: '預設(sha256)',
+ hashSha256: 'sha256(最穩)',
+ hashXxhash: 'xxhash(較快,非密碼安全)',
+ qwen3Thinking: 'Qwen3(含 thinking)',
+ toolCallingDocRef: '完整對照見 docs/vllm_auto_tool_整理.md。沒有對應 parser 的模型請勿亂加。',
+ },
+
+ modelDetail: {
+ overview: '概覽',
+ events: '事件',
+ logs: '日誌',
+ endpoint: '端點',
+ gpu: '顯示卡',
+ pid: '程序 ID',
+ managedLabel: '管理方式',
+ managedYes: '是(可控制)',
+ managedExternal: '外部',
+ uptime: '運行時間',
+ autoRestarts: '自動重啟次數',
+ startupSnapshot: '啟動容量快照',
+ kvCacheCapacity: 'KV cache 容量',
+ maxConcurrency: '最大並發',
+ concurrencyReqTok: '{n} tok/req',
+ concurrencyHint: '≈ 可同時服務 {n} 個請求',
+ memWeights: '權重',
+ memCudaGraph: 'CUDA graph',
+ memKvCache: 'KV cache',
+ startupWeightsLoad: '載權重',
+ startupModelLoad: '模型載入',
+ startupCompile: 'torch.compile',
+ startupWarmup: 'warmup',
+ gpuMemUtilTitle: '⚙ 關於 gpu_memory_utilization',
+ gpuMemUtilDesc: '新版 vLLM 把 CUDA graph 記憶體也算進這個額度。你設 {current},扣掉後實際給 KV cache 的空間只等於舊版的 {effective}。想要更大 KV cache(更高並發 / 更長 context)可在停止後於「編輯參數」提到 {suggested},但顯存餘裕會變小、OOM 風險上升(小顯卡尤其要保守)。',
+ servedModels: '服務的模型',
+ routingPolicy: '路由策略(負載平衡)',
+ vllmParams: 'vLLM 參數(model_config)',
+ loraAdapters: 'LoRA Adapters',
+ hotLoadEnabled: '熱加載已啟用',
+ hotLoad: '載入',
+ hotUnload: '熱卸載',
+ hotLoadPick: '— 從 LoRA 庫選 adapter 熱載入 —',
+ hotLoadSuccess: '已熱載入 {name}',
+ hotLoadSuccessDesc: '已套用到所有就緒實例並更新路由。',
+ hotLoadFailed: '熱載入失敗',
+ hotUnloadConfirm: '卸載 LoRA「{name}」?會從所有就緒實例移除並停止路由。',
+ hotUnloadSuccess: '已卸載 {name}',
+ hotUnloadFailed: '卸載失敗',
+ hotLoadHint: '熱載入會套用到所有就緒實例並更新路由,且寫入 overlay(重啟後仍在)。',
+ coldHint: '推論時把 served name 填進 model 欄位即可(Playground / 評測 / 壓測皆可選)。',
+ hotLoadEnableHint: '要在不重啟下熱加載 LoRA:編輯本模型、勾選 enable_lora + allow_runtime_lora,重啟後即可在此熱載入 / 卸載。',
+ liveMetrics: '即時負載(路由器 /metrics)',
+ metricsRunning: '執行中',
+ metricsWaiting: '等待中',
+ metricsKvCache: 'KV 快取',
+ metricsGenTokens: '生成 tokens',
+ noMetrics: '無即時指標(路由器無法連線或模型閒置)。',
+ usageSection: '用量',
+ requestCount: '請求次數',
+ lastError: '最後錯誤',
+ noEventRecords: '尚無事件記錄。',
+ filterLogs: '篩選日誌行…',
+ downloadLogs: '下載完整日誌',
+ noLogContent: '無日誌內容。',
+ noFilterMatch: '無符合篩選的日誌行。',
+ startLabel: '啟動',
+ startLocked: '已有模型啟動中({name}),請待其完成',
+ stopLabel: '停止',
+ terminateLabel: '終止',
+ abortLabel: '中止啟動',
+ editParams: '編輯參數(需先停止;vLLM 參數為群組共用)',
+ removeModel: '移除模型(僅限動態新增的模型)',
+ removeSuccess: '已移除 {key}',
+ removeFailed: '無法移除模型',
+ externalModel: '外部模型 — 非本後端管理',
+ editEmbedding: '編輯參數(需先停止 embedding server)',
+ },
+
+ modelGroup: {
+ readyCount: '{ready}/{total} 就緒',
+ embedding: '嵌入',
+ reranking: '重排序',
+ crashRestart: '崩潰後自動重啟 {n} 次',
+ liveLoad: '即時負載(路由器 /metrics)',
+ runningDesc: ' 執行中 — 目前正在生成的請求',
+ waitingDesc: ' 等待中 — 此實例的排隊請求',
+ kvCacheUsed: 'KV 快取 {pct} 已使用',
+ externalNotManaged: '外部模型 — 非本後端管理',
+ terminateHint: '終止殘留進程',
+ abortStartup: '中止啟動',
+ stopHint: '停止',
+ startAll: '全部啟動',
+ stopAll: '全部停止',
+ addInstance: '新增實例',
+ showMore: '顯示更多 {n} 個',
+ collapse: '收起',
+ servedModels: '服務的模型',
+ },
+
+ routingStrategies: {
+ least_load: '最低負載',
+ round_robin: '輪詢',
+ random: '隨機',
+ least_inflight: '最低在途',
+ p2c: '二擇一',
+ session_affinity: '會話親和',
+ prefix_affinity: '前綴親和',
+ },
+
+ routerFan: {
+ nextPick: '下次選擇',
+ kvShared: '共用 KV',
+ kvIndependent: '各自 KV',
+ kvSharedTooltip: '此群組各副本共用 KV cache(/kv_cache)',
+ kvIndependentTooltip: '各副本各自獨立 KV cache',
+ kvStoreLabel: '共用 KV Cache · /kv_cache',
+ instancesCount: '{n} 個實例',
+ idleHint: '閒置中 — 沒有執行中的實例',
+ readyCount: '{ready}/{total} 就緒',
+ waiting: '等待',
+ },
+
+ systemTopology: {
+ title: '系統拓撲',
+ clickHint: '· 點擊節點可深入查看',
+ dataPlane: '資料',
+ placementPlane: '部署',
+ controlPlane: '控制',
+ client: '用戶端',
+ requests: '請求',
+ router: '路由器',
+ errors: '錯誤',
+ backend: '後端',
+ waiting: '等待',
+ configuredUndetected: '已設定 · 未偵測到',
+ },
+
+ modelControl: {
+ oneAtATime: '一次只能啟動一顆模型',
+ alreadyStarting: '「{current}」正在啟動中,請待其完成後再啟動 {name}。',
+ starting: '正在啟動 {name}',
+ startingDesc: '等待 /health 通過…',
+ stopping: '正在停止 {name}',
+ stoppingDesc: '釋放 GPU 資源…',
+ vramInsufficient: '{name}:VRAM 不足',
+ forceStart: '強制啟動',
+ startFailed: '啟動 {name} 失敗',
+ stopFailed: '停止 {name} 失敗',
+ title: '管理員驗證',
+ description: '此操作需要管理員權杖(後端 LLMOPS_ADMIN_TOKEN)。',
+ tokenPlaceholder: '管理員權杖',
+ tokenInvalid: '權杖無效。',
+ confirm: '確認',
+ },
+
+ gpuGauge: {
+ util: '使用率',
+ vram: '顯存',
+ power: '功耗',
+ temp: '溫度',
+ memory: '記憶體',
+ },
+
+ codeBlock: {
+ copy: '複製',
+ copied: '已複製!',
+ copyFailed: '複製失敗',
+ },
+
+ addInstance: {
+ title: '新增實例',
+ sharedSettingsTitle: '沿用此群組的共用設定',
+ sharedSettingsHint: '同群組的所有實例共用 vLLM 參數,這裡只需設定本實例的位置。',
+ instanceIdLabel: '實例 ID',
+ instanceIdPlaceholder: '例如:qwen3-5',
+ idConflict: '此 ID 已存在於群組中。',
+ portConflict: '此 port 已被佔用。',
+ cudaDeviceLabel: 'CUDA 裝置',
+ createSuccess: '已新增實例 {key}',
+ createSuccessDesc: '目前已停止 — 請按「啟動」以啟用。',
+ createFailed: '新增實例失敗',
+ },
+
+ datasetCard: {
+ cached: '已快取',
+ notCached: '未快取',
+ downloading: '下載中',
+ downloadFailed: '失敗',
+ preview: '預覽',
+ deleteCache: '刪除快取',
+ warmingHint: '正在建立快取,完成後預覽即可秒開',
+ warming: '預熱中…',
+ },
+
+ datasetPreview: {
+ title: '預覽 · {key}',
+ loading: '載入資料中…(首次預覽需建立快取,可能較久)',
+ introTab: '介紹',
+ samplesTab: '範例資料({n})',
+ subjectCount: '{n} 個主題',
+ noDescription: '此資料集沒有提供介紹。',
+ showingFirst: '顯示前 {n} 筆(資料集更大)',
+ totalRows: '共 {n} 筆',
+ answer: '答案:',
+ },
+
+ embeddingModel: {
+ editTitle: '編輯{type}模型',
+ typeEmbedding: '嵌入',
+ typeReranking: '重排序',
+ paramHint: '參數於 embedding server 停止後編輯,下次啟動生效。',
+ paramsLabel: '參數',
+ paramKeyPlaceholder: '參數名(snake_case)',
+ paramValuePlaceholder: '值(true/false/數字/字串)',
+ noParams: '無參數。',
+ updateSuccess: '已更新 {name}',
+ updateSuccessDesc: '變更將於 embedding server 下次啟動時生效。',
+ updateFailed: '更新失敗',
+ },
+
+ evalSamples: {
+ dataset: '資料集',
+ loadFailed: '無法載入逐題資料',
+ loadDetailFailed: '無法載入此題詳情',
+ filterAll: '全部',
+ filterCorrect: '答對',
+ filterWrong: '答錯',
+ pageInfo: '第 {page} / {total} 頁 · 共 {count} 題',
+ colResult: '結果',
+ colModelAnswer: '模型答案(節錄)',
+ colModelOutput: '模型輸出(節錄)',
+ colStandardAnswer: '標準答案',
+ correct: '答對',
+ wrong: '答錯',
+ ruleScoreHint: '此為規則評分(檢查是否遵守指令),無單一標準答案;對錯看上方各項指標。',
+ standardAnswer: '標準答案',
+ modelAnswer: '模型回答',
+ promptLabel: '題目 / Prompt',
+ noMatchingSamples: '沒有符合的題目。',
+ },
+
+ perfChart: {
+ noData: '無資料',
+ concurrency: '並發',
+ },
+}
diff --git a/apps/frontend_llmops/src/lib/routingStrategies.ts b/apps/frontend_llmops/src/lib/routingStrategies.ts
index a09cee6..6590c62 100644
--- a/apps/frontend_llmops/src/lib/routingStrategies.ts
+++ b/apps/frontend_llmops/src/lib/routingStrategies.ts
@@ -1,3 +1,5 @@
+import i18n from '@/i18n'
+
/** Router load-balancing strategies — shared by the Traffic page selector and the
* model edit dialog. Keep in sync with router-server's STRATEGIES registry
* (apps/router-server/src/llm_router/routing_strategies.py).
@@ -14,14 +16,8 @@ export const ROUTING_STRATEGIES = [
export type RoutingStrategy = (typeof ROUTING_STRATEGIES)[number]
-export const ROUTING_STRATEGY_LABELS: Record = {
- least_load: '最低負載(預設)',
- round_robin: '輪詢',
- random: '隨機',
- least_inflight: '最少進行中',
- p2c: '二選一取優',
- session_affinity: '會話黏性',
- prefix_affinity: '前綴黏性',
+export const routingStrategyLabel = (s: string) => {
+ const key = `routingStrategies.${s}`
+ const label = i18n.global.t(key)
+ return label === key ? s : label
}
-
-export const routingStrategyLabel = (s: string) => ROUTING_STRATEGY_LABELS[s] ?? s
diff --git a/apps/frontend_llmops/src/lib/utils.ts b/apps/frontend_llmops/src/lib/utils.ts
index 8ca370a..25478e7 100644
--- a/apps/frontend_llmops/src/lib/utils.ts
+++ b/apps/frontend_llmops/src/lib/utils.ts
@@ -1,6 +1,7 @@
import type { ClassValue } from "clsx"
import { clsx } from "clsx"
import { twMerge } from "tailwind-merge"
+import { currentLocale } from '@/i18n'
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
@@ -51,17 +52,19 @@ export function formatPercent(n: number | null | undefined): string {
export function timeAgo(unixSeconds: number | null | undefined): string {
if (unixSeconds == null) return '—'
const diff = Date.now() / 1000 - unixSeconds
- if (diff < 5) return 'just now'
- if (diff < 60) return `${Math.floor(diff)}s ago`
- if (diff < 3600) return `${Math.floor(diff / 60)}m ago`
- if (diff < 86400) return `${Math.floor(diff / 3600)}h ago`
- return `${Math.floor(diff / 86400)}d ago`
+ const zh = currentLocale().startsWith('zh')
+ if (diff < 5) return zh ? '剛剛' : 'just now'
+ if (diff < 60) return zh ? `${Math.floor(diff)} 秒前` : `${Math.floor(diff)}s ago`
+ if (diff < 3600) return zh ? `${Math.floor(diff / 60)} 分前` : `${Math.floor(diff / 60)}m ago`
+ if (diff < 86400) return zh ? `${Math.floor(diff / 3600)} 小時前` : `${Math.floor(diff / 3600)}h ago`
+ return zh ? `${Math.floor(diff / 86400)} 天前` : `${Math.floor(diff / 86400)}d ago`
}
/** Clock time (HH:MM:SS) from Unix seconds. */
export function formatTime(unixSeconds: number | null | undefined): string {
if (unixSeconds == null) return '—'
- return new Date(unixSeconds * 1000).toLocaleTimeString('en-GB')
+ const locale = currentLocale().startsWith('zh') ? 'zh-TW' : 'en-GB'
+ return new Date(unixSeconds * 1000).toLocaleTimeString(locale)
}
/** Duration between two Unix-seconds stamps as e.g. "1m 34s". */
diff --git a/apps/frontend_llmops/src/main.ts b/apps/frontend_llmops/src/main.ts
index 5dcad83..e1ed49b 100644
--- a/apps/frontend_llmops/src/main.ts
+++ b/apps/frontend_llmops/src/main.ts
@@ -5,10 +5,12 @@ import { createPinia } from 'pinia'
import App from './App.vue'
import router from './router'
+import i18n from './i18n'
const app = createApp(App)
app.use(createPinia())
app.use(router)
+app.use(i18n)
app.mount('#app')
diff --git a/apps/frontend_llmops/src/router/index.ts b/apps/frontend_llmops/src/router/index.ts
index dc0e50d..52ba0e6 100644
--- a/apps/frontend_llmops/src/router/index.ts
+++ b/apps/frontend_llmops/src/router/index.ts
@@ -6,91 +6,91 @@ const router = createRouter({
{
path: '/',
name: 'overview',
- meta: { title: 'Overview' },
+ meta: { title: 'overview' },
component: () => import('@/views/OverviewView.vue'),
},
{
path: '/models',
name: 'models',
- meta: { title: 'Models' },
+ meta: { title: 'models' },
component: () => import('@/views/ModelsView.vue'),
},
{
path: '/traffic',
name: 'traffic',
- meta: { title: 'Traffic' },
+ meta: { title: 'traffic' },
component: () => import('@/views/TrafficView.vue'),
},
{
path: '/requests',
name: 'requests',
- meta: { title: 'Requests' },
+ meta: { title: 'requests' },
component: () => import('@/views/RequestsView.vue'),
},
{
path: '/monitoring',
name: 'monitoring',
- meta: { title: 'Monitoring' },
+ meta: { title: 'monitoring' },
component: () => import('@/views/MonitoringView.vue'),
},
{
path: '/benchmark',
name: 'benchmark',
- meta: { title: 'Benchmark' },
+ meta: { title: 'benchmark' },
component: () => import('@/views/BenchmarkView.vue'),
},
{
path: '/playground',
name: 'playground',
- meta: { title: 'Playground' },
+ meta: { title: 'playground' },
component: () => import('@/views/PlaygroundView.vue'),
},
{
path: '/library',
name: 'library',
- meta: { title: 'Model Library' },
+ meta: { title: 'library' },
component: () => import('@/views/LibraryView.vue'),
},
{
path: '/lora-library',
name: 'lora-library',
- meta: { title: 'LoRA Library' },
+ meta: { title: 'loraLibrary' },
component: () => import('@/views/LoraLibraryView.vue'),
},
{
path: '/datasets',
name: 'datasets',
- meta: { title: 'Datasets' },
+ meta: { title: 'datasets' },
component: () => import('@/views/DatasetsView.vue'),
},
{
path: '/eval',
name: 'eval',
- meta: { title: 'Evaluation' },
+ meta: { title: 'eval' },
component: () => import('@/views/EvalView.vue'),
},
{
path: '/keys',
name: 'keys',
- meta: { title: 'API Keys' },
+ meta: { title: 'keys' },
component: () => import('@/views/KeysView.vue'),
},
{
path: '/usage',
name: 'usage',
- meta: { title: '使用指南' },
+ meta: { title: 'usage' },
component: () => import('@/views/UsageView.vue'),
},
{
path: '/resources',
name: 'resources',
- meta: { title: 'Resources' },
+ meta: { title: 'resources' },
component: () => import('@/views/ResourcesView.vue'),
},
{
path: '/activity',
name: 'activity',
- meta: { title: 'Activity' },
+ meta: { title: 'activity' },
component: () => import('@/views/ActivityView.vue'),
},
{ path: '/:pathMatch(.*)*', redirect: '/' },
diff --git a/apps/frontend_llmops/src/views/ActivityView.vue b/apps/frontend_llmops/src/views/ActivityView.vue
index 796d9ff..b2f6609 100644
--- a/apps/frontend_llmops/src/views/ActivityView.vue
+++ b/apps/frontend_llmops/src/views/ActivityView.vue
@@ -49,13 +49,13 @@ const stateColor: Record = {
v-model="filter"
class="h-8 rounded-md border border-input bg-background/40 px-2 text-xs"
>
-
+
- {{ filtered.length }} 個事件
- 重新整理
+ {{ $t('activity.eventCount', { n: filtered.length }) }}
+ {{ $t('common.refresh') }}