SloMR · SloMR · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/README.md b/README.md
@@ -85,6 +85,12 @@ Frequently used flags:
 | `--force` | Re-translate even if the output exists |
 | `-v, --verbose` | Show retry/validation warnings (hidden by default) |
 | `-o, --output` | Output path (single file only) |
+| `--scan-budget` | Chars sent to the prepass scan (default **24000**). Lower on tight-context local models (~8k window); raise on large-context cloud models for full-file scans. |
+| `--context-overlap` | Previous-batch source blocks shown as read-only context (default **2**, `0` to disable). Helps speaker continuity across batch boundaries. |
+| `--no-review` | Disable the post-edit review pass. Saves one extra LLM call per batch — useful on metered providers. |
+| `--no-refine-attribution` | Disable per-block speaker attribution for mixed-gender scenes (saves one small call per ambiguous scene). |
+
+The defaults are tuned for best translation quality. On metered cloud providers you can pass `--no-review` and/or `--no-refine-attribution` to cut LLM calls. On tight-context local models, lower `--scan-budget` (e.g. `8000`) so the scan prompt fits.
 
 Set `NO_COLOR=1` to disable ANSI colors; output auto-falls back to plain lines when piped.
 

diff --git a/cli/core/batch_runner.py b/cli/core/batch_runner.py
@@ -8,15 +8,20 @@
 
 import httpx
 
+from .config import TranslationConfig
+from .constants import (
+    ATTEMPTS_BEFORE_SPLIT,
+    CRED_QUERY_PARAMS,
+    REQUEST_TIMEOUT_SECS,
+)
 from .context_pass import FileContext
+from .prompt import (
+    REVIEW_SYSTEM_PROMPT,
+    SYSTEM_PROMPT,
+    build_review_user_message,
+    build_translate_user_message,
+)
 from .srt_parser import SubtitleBlock, parse_lite, serialize_lite, validate_batch
-from .config import TranslationConfig
-from .prompt import SYSTEM_PROMPT
-
-
-REQUEST_TIMEOUT_SECS = 120.0
-
-_CRED_QUERY_PARAMS = {"key", "api_key", "apikey", "access_token"}
 
 
 class FileTranslationError(Exception):
@@ -31,7 +36,7 @@ def sanitize_api_url(url: str) -> str:
     try:
         parts = urlsplit(url)
         kept = [(k, v) for k, v in parse_qsl(parts.query, keep_blank_values=True)
-                if k.lower() not in _CRED_QUERY_PARAMS]
+                if k.lower() not in CRED_QUERY_PARAMS]
         return urlunsplit((parts.scheme, parts.netloc, parts.path,
                            urlencode(kept), parts.fragment))
     except Exception:
@@ -90,24 +95,35 @@ async def call_chat_api(
     return resp.json()["choices"][0]["message"]["content"]
 
 
-def _build_user_message(
+async def _review_pass(
+    client: httpx.AsyncClient,
+    batch: list[SubtitleBlock],
+    first_pass: list[SubtitleBlock],
     cfg: TranslationConfig,
-    batch_wire: str,
     file_context: FileContext | None,
-    batch: list[SubtitleBlock],
-) -> str:
-    if cfg.source_lang:
-        header = f"Translate from {cfg.source_lang} to {cfg.target_lang}:"
-    else:
-        header = f"Translate to {cfg.target_lang}:"
-    if file_context is not None:
-        ctx = file_context.render_for_batch(batch)
-        if ctx:
-            return f"Glossary for this scene:\n{ctx}\n\n{header}\n\n{batch_wire}"
-    return f"{header}\n\n{batch_wire}"
-
-
-_ATTEMPTS_BEFORE_SPLIT = 2
+) -> list[SubtitleBlock]:
+    """Re-check first-pass against the glossary; returns first-pass unchanged
+    if review output fails validation or there's no glossary to check against."""
+    glossary = file_context.render_for_batch(batch) if file_context else ""
+    if not glossary:
+        return first_pass
+    user_msg = build_review_user_message(batch, first_pass, glossary)
+    try:
+        raw = await call_chat_api(
+            client, REVIEW_SYSTEM_PROMPT, user_msg, cfg, max(len(batch), 1) * 120)
+    except Exception as e:
+        cfg.warn(f"    Review failed, keeping first-pass: {e}")
+        return first_pass
+    parsed = parse_lite(strip_markdown_fences(raw))
+    if len(parsed) != len(batch):
+        return first_pass
+    revised = [
+        SubtitleBlock(number=batch[i].number,
+                      timestamp=batch[i].timestamp,
+                      text=parsed[i].text)
+        for i in range(len(batch))
+    ]
+    return revised if validate_batch(batch, revised).ok else first_pass
 
 
 async def translate_batch_with_retry(
@@ -117,6 +133,7 @@ async def translate_batch_with_retry(
     cfg: TranslationConfig,
     file_context: FileContext | None = None,
     _split_path: str = "",
+    prev_tail: list[SubtitleBlock] | None = None,
 ) -> list[SubtitleBlock]:
     """Translate one batch; on repeated validation failure, halve and recurse.
 
@@ -125,12 +142,15 @@ async def translate_batch_with_retry(
     because at N=1 a count mismatch is impossible.
     """
     batch_wire = serialize_lite(batch)
-    user_msg = _build_user_message(cfg, batch_wire, file_context, batch)
+    glossary = file_context.render_for_batch(batch) if file_context else ""
+    user_msg = build_translate_user_message(
+        cfg.source_lang, cfg.target_lang, batch_wire, glossary, prev_tail or [],
+    )
     label = f"Batch {batch_idx + 1}" + (f".{_split_path}" if _split_path else "")
     first_block = batch[0].number
 
     can_split = len(batch) > 1
-    attempts = _ATTEMPTS_BEFORE_SPLIT if can_split else cfg.max_retries
+    attempts = ATTEMPTS_BEFORE_SPLIT if can_split else cfg.max_retries
     hit_validation_failure = False
 
     for attempt in range(1, attempts + 1):
@@ -149,6 +169,10 @@ async def translate_batch_with_retry(
                 ]
             check = validate_batch(batch, output)
             if check.ok:
+                if cfg.review:
+                    output = await _review_pass(
+                        client, batch, output, cfg, file_context,
+                    )
                 return output
             hit_validation_failure = True
             cfg.warn(f"    {label} validation failed ({tag}): {check.error}")
@@ -185,9 +209,12 @@ async def translate_batch_with_retry(
         # Sequential: parallel halves would oversubscribe the outer semaphore.
         left_result = await translate_batch_with_retry(
             client, batch_idx, left, cfg, file_context, left_path,
+            prev_tail=prev_tail,
         )
+        right_prev = left[-cfg.context_overlap:] if cfg.context_overlap else []
-        right_prev = left[-cfg.context_overlap:] if cfg.context_overlap else []
+        overlap = max(cfg.context_overlap, 0)
+        right_prev = left[-overlap:] if overlap > 0 else []
-        right_prev = left[-cfg.context_overlap:] if cfg.context_overlap else []
+        overlap = max(cfg.context_overlap, 0)
+        right_prev = left[-overlap:] if overlap > 0 else []
         right_result = await translate_batch_with_retry(
             client, batch_idx, right, cfg, file_context, right_path,
+            prev_tail=right_prev,
         )
         return left_result + right_result
 

diff --git a/cli/core/config.py b/cli/core/config.py
@@ -4,8 +4,13 @@
 from dataclasses import dataclass, field
 from typing import Callable
 
-
-DEFAULT_MAX_RETRIES = 5
+from .constants import (
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_CONCURRENCY,
+    DEFAULT_CONTEXT_OVERLAP,
+    DEFAULT_MAX_RETRIES,
+    DEFAULT_SCAN_CHAR_BUDGET,
+)
 
 
 def _silent_warn(msg: str) -> None:
@@ -25,9 +30,15 @@ class TranslationConfig:
     api_url: str
     api_key: str
     model: str | None = None
-    batch_size: int = 10
-    concurrency: int = 1
+    batch_size: int = DEFAULT_BATCH_SIZE
+    concurrency: int = DEFAULT_CONCURRENCY
     max_retries: int = DEFAULT_MAX_RETRIES
+    scan_char_budget: int = DEFAULT_SCAN_CHAR_BUDGET
+    context_overlap: int = DEFAULT_CONTEXT_OVERLAP
+    # One small LLM call per ambiguous scene; fixes cross-gender addressee slips.
+    refine_attribution: bool = True
+    # One extra call per batch; fixes gender/number/consistency slips. Doubles cost.
+    review: bool = True
     quiet: bool = False
     verbose: bool = False
     warn: Callable[[str], None] = field(default=_silent_warn)
diff --git a/cli/core/constants.py b/cli/core/constants.py
@@ -0,0 +1,24 @@
+"""Public defaults and tuning constants shared across CLI modules."""
+
+# === Translation defaults (mirrored in TranslationConfig field defaults) ===
+DEFAULT_BATCH_SIZE = 10
+DEFAULT_CONCURRENCY = 1
+DEFAULT_MAX_RETRIES = 5
+# Sized for full-quality scans on typical TV episodes; lower on tight-context
+# local models (~8k window), raise on large-context cloud models.
+DEFAULT_SCAN_CHAR_BUDGET = 24_000
+DEFAULT_CONTEXT_OVERLAP = 2
+
+# === Prepass / attribution scan ===
+SCAN_MAX_TOKENS = 3000
+# 2-char names collide with common target-language words.
+MIN_NAME_LEN = 3
+# Single-block scenes never need per-block speaker attribution.
+ATTRIB_MIN_BLOCKS = 3
+
+# === Batch retry/split ===
+ATTEMPTS_BEFORE_SPLIT = 2
+
+# === HTTP ===
+REQUEST_TIMEOUT_SECS = 120.0
+CRED_QUERY_PARAMS = frozenset({"key", "api_key", "apikey", "access_token"})