From dec778fe22b47a175fb3005ae3b6fa4ce15c201a Mon Sep 17 00:00:00 2001 From: mgros Date: Fri, 5 Jun 2026 23:06:37 +0200 Subject: [PATCH 1/4] Add source language option to translation audit tool Allow audit-translations to compare a target language against any source language via --source, defaulting to English for existing workflows. Update CLI output, docs, and tests for dynamic source/target labels, including Swedish/Norwegian comparison coverage. Normalize Rich ANSI output in tests so golden and string assertions remain stable when terminal color is forced. --- PythonScripts/audit_translations/README.md | 19 ++-- PythonScripts/audit_translations/__init__.py | 2 +- PythonScripts/audit_translations/auditor.py | 46 ++++++---- PythonScripts/audit_translations/cli.py | 5 +- PythonScripts/audit_translations/differ.py | 2 +- PythonScripts/audit_translations/models.py | 10 +- PythonScripts/audit_translations/renderer.py | 23 +++-- .../audit_translations/tests/conftest.py | 34 +++++++ .../golden/rich/cli_calculus_verbose.golden | 6 +- .../rich/structure_diff_nonverbose.golden | 2 +- .../golden/rich/structure_diff_verbose.golden | 2 +- .../audit_translations/tests/test_auditor.py | 92 ++++++++++++++++--- .../tests/test_cli_end_to_end.py | 37 ++++++-- 13 files changed, 213 insertions(+), 67 deletions(-) diff --git a/PythonScripts/audit_translations/README.md b/PythonScripts/audit_translations/README.md index ee4af211d..b4d611d94 100644 --- a/PythonScripts/audit_translations/README.md +++ b/PythonScripts/audit_translations/README.md @@ -1,15 +1,15 @@ # MathCAT Translation Audit Tool -This tool compares English YAML rule files with translated versions to identify translation gaps and formatting issues. It assists translators in ensuring their translations are complete, consistent, and properly formatted. +This tool compares YAML rule files from a source language with translated versions to identify translation gaps and formatting issues. It assists translators in ensuring their translations are complete, consistent, and properly formatted. ### 🔍 Detection Capabilities The tool analyzes rule files to detect the following issues: -* **Missing Rules:** Rules present in the master English file but missing in the target translation. -* **Extra Rules:** Rules present in the translation but absent in English (flagged as potentially intentional language-specific additions). +* **Missing Rules:** Rules present in the source file but missing in the target translation. +* **Extra Rules:** Rules present in the target translation but absent in the source (flagged as potentially intentional language-specific additions). * **Untranslated Text:** Detects text keys that still use **lowercase** formatting, indicating they haven't been verified or translated yet. -* **Rule Differences:** Structural changes (match expressions, conditions, variables, or test/replace layout) between English and the translation. +* **Rule Differences:** Structural changes (match expressions, conditions, variables, or test/replace layout) between the source and target translation. Add `# audit-ignore` to a rule block to suppress auditing that rule. @@ -57,20 +57,23 @@ The tool automatically adjusts its matching logic based on the file type: **Syntax:** ```bash uv run audit-translations [--file ] +uv run audit-translations --source uv run audit-translations --list # If running from the repo root, point uv at the project: uv run --project PythonScripts audit-translations +uv run --project PythonScripts audit-translations --source uv run --project PythonScripts audit-translations --list ``` **Convenience Features:** * `--list`: Displays all available languages. * Region variants are shown as `lang-region` (e.g., `zz-aa`) based on subdirectories under `Rules/Languages/`. +* `--source`: Sets the source/reference language. Defaults to `en`. * `--file`: Audits a single specific file instead of the whole directory. * `--rules-dir`: Override the Rules/Languages directory path. * `--only`: Filter issue types (comma-separated): `missing`, `untranslated`, `extra`, `diffs`, `all`. -* `--verbose`: Show detailed output including English/translated snippets for rule differences. +* `--verbose`: Show detailed output including source/target snippets for rule differences. * **Summary Stats:** Provides a statistical summary after every run. **Examples:** @@ -88,19 +91,23 @@ uv run audit-translations es # Audit German translations uv run audit-translations de +# Compare Norwegian Bokmal against Swedish instead of English +uv run audit-translations nb --source sv + # Audit only a specific file uv run audit-translations es --file SharedRules/default.yaml # Audit a regional variant (merges Rules/Languages/de and Rules/Languages/de/CH) uv run audit-translations de-CH -# Show detailed output with English/translated snippets for rule differences +# Show detailed output with source/target snippets for rule differences uv run audit-translations es --verbose ``` **Running from the repo root (without `cd PythonScripts`):** ```bash uv run --project PythonScripts audit-translations es +uv run --project PythonScripts audit-translations nb --source sv uv run --project PythonScripts audit-translations --list ``` diff --git a/PythonScripts/audit_translations/__init__.py b/PythonScripts/audit_translations/__init__.py index 1d0764943..5773302a7 100644 --- a/PythonScripts/audit_translations/__init__.py +++ b/PythonScripts/audit_translations/__init__.py @@ -1,7 +1,7 @@ """ MathCAT Translation Audit Tool -Compares English YAML rule files with translated versions to identify translation +Compares source YAML rule files with translated versions to identify translation gaps and issues. This tool helps translators ensure their translations are complete and properly formatted. diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 3c333628b..efc14790e 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -1,7 +1,7 @@ """ Auditing and comparison logic. -Contains functions for comparing English and translated files, +Contains functions for comparing source and translated files, and for performing full language audits. """ @@ -60,7 +60,7 @@ def compare_files( translated_region_path: Path | None = None, english_region_path: Path | None = None, ) -> ComparisonResult: - """Compare English and translated YAML files""" + """Compare source and translated YAML files""" def load_rules(path: Path | None) -> list[RuleInfo]: if path and path.exists(): @@ -95,14 +95,14 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis include_extra = include_all or "extra" in issue_filter include_diffs = include_all or "diffs" in issue_filter - # Find missing rules (in English but not in translation) + # Find missing rules (in source but not in translation) missing_rules = [] if include_missing: for key, rule in english_by_key.items(): if key not in translated_by_key: missing_rules.append(rule) - # Find extra rules (in translation but not in English) + # Find extra rules (in translation but not in source) extra_rules = [] if include_extra: for key, rule in translated_by_key.items(): @@ -142,29 +142,35 @@ def audit_language( rules_dir: str | None = None, issue_filter: set[str] | None = None, verbose: bool = False, + source_language: str = "en", ) -> int: """Audit translations for a specific language. Returns total issue count.""" rules_dir_path = get_rules_dir(rules_dir) - english_dir = rules_dir_path / "en" - base_language, region = split_language_into_base_and_region(language) - translated_dir = rules_dir_path / base_language - translated_region_dir = translated_dir / region if region else None - english_region_dir = english_dir / region if region else None + source_base_language, source_region = split_language_into_base_and_region(source_language) + source_dir = rules_dir_path / source_base_language + source_region_dir = source_dir / source_region if source_region else None - if not english_dir.exists(): - raise AuditError(f"English rules directory not found: {english_dir}") + target_base_language, target_region = split_language_into_base_and_region(language) + translated_dir = rules_dir_path / target_base_language + translated_region_dir = translated_dir / target_region if target_region else None + + if not source_dir.exists(): + raise AuditError(f"Source rules directory not found: {source_dir}") + + if source_region and not (source_region_dir and source_region_dir.exists()): + raise AuditError(f"Source region directory not found: {source_region_dir}") if not translated_dir.exists(): - raise AuditError(f"Translation directory not found: {translated_dir}") + raise AuditError(f"Target rules directory not found: {translated_dir}") - if region and not (translated_region_dir and translated_region_dir.exists()): - raise AuditError(f"Region directory not found: {translated_region_dir}") + if target_region and not (translated_region_dir and translated_region_dir.exists()): + raise AuditError(f"Target region directory not found: {translated_region_dir}") # Get list of files to audit - files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir) + files = [specific_file] if specific_file else get_yaml_files(source_dir, source_region_dir) - print_audit_header(language, len(files)) + print_audit_header(language, len(files), source_language) total_issues = 0 total_missing = 0 @@ -175,13 +181,13 @@ def audit_language( files_ok = 0 for file_name in files: - english_path = english_dir / file_name + english_path = source_dir / file_name translated_path = translated_dir / file_name translated_region_path = translated_region_dir / file_name if translated_region_dir else None - english_region_path = english_region_dir / file_name if english_region_dir else None + english_region_path = source_region_dir / file_name if source_region_dir else None if not english_path.exists(): - console.print(f"\n[yellow]⚠ Warning:[/] English file not found: {english_path}") + console.print(f"\n[yellow]⚠ Warning:[/] Source file not found: {english_path}") continue result = compare_files( @@ -193,7 +199,7 @@ def audit_language( ) if result.has_issues: - issues = print_warnings(result, file_name, verbose, language) + issues = print_warnings(result, file_name, verbose, language, source_language) if issues > 0: files_with_issues += 1 total_issues += issues diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py index aaba14449..ee1b48739 100644 --- a/PythonScripts/audit_translations/cli.py +++ b/PythonScripts/audit_translations/cli.py @@ -17,17 +17,19 @@ def main() -> None: sys.stdout.reconfigure(encoding="utf-8") parser = argparse.ArgumentParser( - description="Audit MathCAT translation files against English originals", + description="Audit MathCAT translation files against a source language", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: uv run audit-translations es + uv run audit-translations nb --source sv uv run audit-translations de --file SharedRules/default.yaml uv run audit-translations --list """, ) parser.add_argument("language", nargs="?", help="Language code to audit (e.g., 'es', 'de', 'fi')") + parser.add_argument("--source", default="en", help="Source/reference language code (default: 'en')") parser.add_argument("--file", dest="specific_file", help="Audit only a specific file (e.g., 'SharedRules/default.yaml')") parser.add_argument("--list", action="store_true", help="List available languages") parser.add_argument("--rules-dir", help="Override Rules/Languages directory path") @@ -68,6 +70,7 @@ def main() -> None: args.rules_dir, issue_filter, args.verbose, + args.source, ) except AuditError as exc: console.print(f"\n[red]✗ Error:[/] {exc}") diff --git a/PythonScripts/audit_translations/differ.py b/PythonScripts/audit_translations/differ.py index 574c43993..5ae4483ce 100644 --- a/PythonScripts/audit_translations/differ.py +++ b/PythonScripts/audit_translations/differ.py @@ -1,7 +1,7 @@ """ Rule diffing logic. -Compares English and translated rules to find fine-grained structural differences. +Compares source and translated rules to find fine-grained structural differences. """ from .extractors import ( diff --git a/PythonScripts/audit_translations/models.py b/PythonScripts/audit_translations/models.py index a54dadb72..350ffed33 100644 --- a/PythonScripts/audit_translations/models.py +++ b/PythonScripts/audit_translations/models.py @@ -25,7 +25,7 @@ class IssueType(StrEnum): class DiffType(StrEnum): """Rule-difference subcategories used for fine-grained diagnostics.""" - MATCH = "match" # `match` XPath differs between English and translation. + MATCH = "match" # `match` XPath differs between source and translation. CONDITION = "condition" # `if` / `test` condition expressions differ. VARIABLES = "variables" # Variable names defined in `variables` differ. STRUCTURE = "structure" # Control-flow block shape/order differs (if/then/else/with/replace). @@ -91,7 +91,7 @@ def untranslated_keys(self) -> list[str]: @dataclass class RuleDifference: - """Fine-grained difference between English and translated rule""" + """Fine-grained difference between source and translated rule""" english_rule: RuleInfo translated_rule: RuleInfo @@ -107,10 +107,10 @@ def __post_init__(self) -> None: @dataclass class ComparisonResult: - """Results from comparing English and translated files""" + """Results from comparing source and translated files""" - missing_rules: list[RuleInfo] # Rules in English but not in translation - extra_rules: list[RuleInfo] # Rules in translation but not in English + missing_rules: list[RuleInfo] # Rules in source but not in translation + extra_rules: list[RuleInfo] # Rules in translation but not in source untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]] english_rule_count: int translated_rule_count: int diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 2d0f975d3..a49d9d5c5 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -44,16 +44,23 @@ def rule_label(rule: RuleInfo) -> str: return f"[cyan]{escape(rule.name)}[/] [dim]({escape(tag)})[/]" +def language_label(language: str) -> str: + """Normalize a language code for display.""" + return language.lower().replace("_", "-") + + def print_warnings( result: ComparisonResult, file_name: str, verbose: bool = False, target_language: str = "tr", + source_language: str = "en", ) -> int: """Print warnings to console. Returns count of issues found.""" issues = 0 display_name = Path(file_name).as_posix() - target_label = target_language.lower().replace("_", "-") + source_label = language_label(source_language) + target_label = language_label(target_language) if not result.has_issues: return issues @@ -68,7 +75,7 @@ def print_warnings( console.print() console.rule(style="cyan") console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") - console.print(f" [dim]English: {result.english_rule_count} rules → Translated: {result.translated_rule_count} rules[/]") + console.print(f" [dim]{source_label}: {result.english_rule_count} rules → {target_label}: {result.translated_rule_count} rules[/]") console.rule(style="cyan") grouped_issues: dict[str, dict[str, Any]] = {} @@ -120,7 +127,7 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) console.print(f" [dim]{ISSUE_GROUP_LABELS[group_key]} [{len(entries)}][/]") for entry in entries: if issue_type is IssueType.MISSING_RULE: - console.print(f" [dim]•[/] [dim](line {entry['line_en']} in English)[/]") + console.print(f" [dim]•[/] [dim](line {entry['line_en']} in {source_label})[/]") elif issue_type is IssueType.EXTRA_RULE: console.print(f" [dim]•[/] [dim](line {entry['line_tr']} in {target_label})[/]") elif issue_type is IssueType.UNTRANSLATED_TEXT: @@ -131,11 +138,11 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) else: diff: RuleDifference = entry["diff"] console.print( - f" [dim]•[/] [dim](line {entry['line_en']} en, {entry['line_tr']} {target_label})[/]" + f" [dim]•[/] [dim](line {entry['line_en']} {source_label}, {entry['line_tr']} {target_label})[/]" ) console.print(f" [dim]{diff.description}[/]") if verbose: - console.print(f" [green]en:[/] {escape(diff.english_snippet)}") + console.print(f" [green]{source_label}:[/] {escape(diff.english_snippet)}") console.print(f" [red]{target_label}:[/] {escape(diff.translated_snippet)}") issues += len(entries) @@ -155,10 +162,10 @@ def file_count_color(file_count: int) -> str: return "red" -def print_audit_header(language: str, file_count: int) -> None: +def print_audit_header(language: str, file_count: int, source_language: str = "en") -> None: """Print the audit header panel.""" console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan")) - console.print("\n [dim]Comparing against English (en) reference files[/]") + console.print(f"\n [dim]Comparing against {language_label(source_language)} reference files[/]") console.print(f" [dim]Files to check: {file_count}[/]") @@ -197,4 +204,4 @@ def print_language_list(languages: list[tuple[str, int]]) -> None: table.add_row(code, f"[{color}]{count}[/] files") console.print(table) - console.print("\n [dim]Reference: en (English) - base translation[/]\n") + console.print("\n [dim]Default reference: en; use --source to compare against another language[/]\n") diff --git a/PythonScripts/audit_translations/tests/conftest.py b/PythonScripts/audit_translations/tests/conftest.py index c58afaada..b5c844b41 100644 --- a/PythonScripts/audit_translations/tests/conftest.py +++ b/PythonScripts/audit_translations/tests/conftest.py @@ -1,4 +1,38 @@ +""" +Shared pytest configuration for audit translation tests. + +Rich can emit ANSI styling codes into captured test output when a terminal or +environment variable forces color output. That made string and golden-output +assertions fail on some machines even though the visible CLI output was correct. +These helpers normalize captured renderer/CLI output so tests compare the text +users see, not terminal control bytes. +""" + import sys +import re + +import pytest + +from audit_translations.renderer import console # needed for running tests on Windows sys.stdout.reconfigure(encoding="utf-8") + +ANSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]") + + +def strip_ansi(text: str) -> str: + """Remove ANSI escape sequences from Rich output captured in tests.""" + return ANSI_RE.sub("", text) + + +@pytest.fixture(autouse=True) +def deterministic_rich_output(): + """Keep Rich output assertions stable when the shell forces ANSI colors.""" + old_no_color = console.no_color + old_force_terminal = console._force_terminal + console.no_color = True + console._force_terminal = False + yield + console.no_color = old_no_color + console._force_terminal = old_force_terminal diff --git a/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden index 776f9f3b2..a75c81d3d 100644 --- a/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden +++ b/PythonScripts/audit_translations/tests/golden/rich/cli_calculus_verbose.golden @@ -2,18 +2,18 @@ │ MathCAT Translation Audit: ES │ ╰──────────────────────────────────────────────────────────────────────────────╯ - Comparing against English (en) reference files + Comparing against en reference files Files to check: 1 ──────────────────────────────────────────────────────────────────────────────── ⚠ SharedRules/calculus.yaml - English: 4 rules → Translated: 3 rules + en: 4 rules → es: 3 rules ──────────────────────────────────────────────────────────────────────────────── ≠ Rule Issues [13] (grouped by rule and issue type) • laplacian (laplacian) Missing in Translation [1] - • (line 4 in English) + • (line 4 in en) • divergence (divergence) Untranslated Text [3] • (line 10 es) "divergence" diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden index 868bf0071..ad5320aa8 100644 --- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden +++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_nonverbose.golden @@ -1,7 +1,7 @@ ──────────────────────────────────────────────────────────────────────────────── ✓ structure_diff.yaml - English: 1 rules → Translated: 1 rules + en: 1 rules → tr: 1 rules ──────────────────────────────────────────────────────────────────────────────── ≠ Rule Issues [1] (grouped by rule and issue type) diff --git a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden index bde66a075..efd4de940 100644 --- a/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden +++ b/PythonScripts/audit_translations/tests/golden/rich/structure_diff_verbose.golden @@ -1,7 +1,7 @@ ──────────────────────────────────────────────────────────────────────────────── ✓ structure_diff.yaml - English: 1 rules → Translated: 1 rules + en: 1 rules → tr: 1 rules ──────────────────────────────────────────────────────────────────────────────── ≠ Rule Issues [1] (grouped by rule and issue type) diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index e1cd94b73..fafc59a3b 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -6,10 +6,11 @@ import pytest -from ..auditor import compare_files, get_yaml_files, list_languages +from ..auditor import audit_language, compare_files, get_yaml_files, list_languages from ..line_resolver import resolve_diff_lines from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry from ..renderer import console, print_warnings +from .conftest import strip_ansi @pytest.fixture() @@ -234,6 +235,73 @@ def test_compare_files_skips_untranslated_and_diffs_when_audit_ignored(tmp_path) assert result.rule_differences == [] +def test_audit_language_uses_configurable_source_language(tmp_path, fixed_console_width) -> None: + """ + Ensure non-English source comparisons remain directional. + + Source rules define missing items and source-side snippets. Target rules + define extra items and target-only untranslated text. + """ + rules_dir = tmp_path / "Rules" / "Languages" + source_dir = rules_dir / "sv" + target_dir = rules_dir / "nb" + source_dir.mkdir(parents=True) + target_dir.mkdir(parents=True) + + (source_dir / "sample.yaml").write_text( + """- name: common-rule + tag: mo + match: "self::m:mo" + replace: + - T: "source" +- name: source-only + tag: mi + match: "." + replace: + - T: "source only" +""", + encoding="utf-8", + ) + (target_dir / "sample.yaml").write_text( + """- name: common-rule + tag: mo + match: "self::m:mi" + replace: + - t: "target" +- name: target-only + tag: mi + match: "." + replace: + - T: "target only" +""", + encoding="utf-8", + ) + + with console.capture() as capture: + total_issues = audit_language( + "nb", + specific_file="sample.yaml", + rules_dir=str(rules_dir), + verbose=True, + source_language="sv", + ) + output = strip_ansi(capture.get()) + + assert total_issues == 4 + assert "Comparing against sv reference files" in output + assert "sv: 2 rules → nb: 2 rules" in output + assert "Missing in Translation [1]" in output + assert "(line 6 in sv)" in output + assert "Extra in Translation [1]" in output + assert "(line 6 in nb)" in output + assert 'Untranslated Text [1]' in output + assert '(line 5 nb) "target"' in output + assert "Match Pattern Differences [1]" in output + assert "(line 3 sv, 3 nb)" in output + assert "sv: self::m:mo" in output + assert "nb: self::m:mi" in output + + def test_get_yaml_files_includes_region(tmp_path) -> None: """ Ensures get_yaml_files merges base and region file lists. @@ -273,7 +341,7 @@ def test_list_languages_includes_region_codes(tmp_path) -> None: with console.capture() as capture: list_languages(str(rules_dir)) - output = capture.get() + output = strip_ansi(capture.get()) assert "zz" in output assert "zz-aa" in output @@ -298,7 +366,7 @@ def test_list_languages_ignores_sharedrules_as_region(tmp_path) -> None: with console.capture() as capture: list_languages(str(rules_dir)) - output = capture.get() + output = strip_ansi(capture.get()) assert "zz-aa" in output assert "zz-SharedRules" not in output @@ -320,7 +388,7 @@ def test_print_warnings_omits_snippets_when_not_verbose(fixed_console_width) -> with console.capture() as capture: print_warnings(result, "structure_diff.yaml", verbose=False) - output = capture.get() + output = strip_ansi(capture.get()) assert output == golden_path.read_text(encoding="utf-8") @@ -341,7 +409,7 @@ def test_print_warnings_includes_snippets_when_verbose(fixed_console_width) -> N with console.capture() as capture: print_warnings(result, "structure_diff.yaml", verbose=True) - output = capture.get() + output = strip_ansi(capture.get()) assert output == golden_path.read_text(encoding="utf-8") @@ -511,7 +579,7 @@ def test_print_warnings_shows_misaligned_structures() -> None: with console.capture() as capture: issues_count = print_warnings(result, "structure_misaligned.yaml", verbose=False) - output = capture.get() + output = strip_ansi(capture.get()) # Misaligned structure differences should be rendered. assert "Rule structure differs" in output, "Expected misaligned structure differences to be shown in display" @@ -538,7 +606,7 @@ def test_print_warnings_still_shows_missing_else() -> None: with console.capture() as capture: issues_count = print_warnings(result, "structure_missing_else.yaml", verbose=False) - output = capture.get() + output = strip_ansi(capture.get()) # CRITICAL: This legitimate difference should appear in output assert "Rule structure differs" in output, "Expected missing else block to be shown in output" @@ -597,7 +665,7 @@ def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_ with console.capture() as capture: issues_count = print_warnings(result, "grouped.yaml", verbose=False) - output = capture.get() + output = strip_ansi(capture.get()) assert output.count("• grouped-rule (mi)") == 1 assert "Untranslated Text [2]" in output @@ -648,7 +716,7 @@ def test_print_warnings_groups_missing_and_extra_by_rule(fixed_console_width) -> with console.capture() as capture: issues_count = print_warnings(result, "mixed.yaml", verbose=False) - output = capture.get() + output = strip_ansi(capture.get()) assert output.count("• missing-rule (mn)") == 1 assert output.count("• extra-rule (mo)") == 1 @@ -692,13 +760,13 @@ def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_consol with console.capture() as capture: issues_count = print_warnings(result, "verbose.yaml", verbose=True) - output = capture.get() + output = strip_ansi(capture.get()) assert "Missing in Translation [1]" in output assert "Untranslated Text [1]" in output assert "Match Pattern Differences [1]" in output - assert output.count("en:") == 1 - assert output.count("tr:") == 1 + assert output.count(" en:") == 1 + assert output.count(" tr:") == 1 assert "en-snippet" in output assert "tr-snippet" in output assert issues_count == 3 diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py index 2547b9eca..c4c8364b7 100644 --- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py +++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py @@ -13,6 +13,7 @@ from .. import cli as audit_cli from ..renderer import console +from .conftest import strip_ansi def fixture_rules_dir() -> Path: @@ -33,7 +34,7 @@ def test_cli_main_rich_only_filters_issue_groups(capsys, monkeypatch) -> None: try: monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) finally: console.width = old_width @@ -46,6 +47,26 @@ def test_cli_main_rich_only_filters_issue_groups(capsys, monkeypatch) -> None: assert "Structure Differences" not in output +def test_cli_main_accepts_source_language(capsys, monkeypatch) -> None: + """ + Ensure --source changes the reference language without changing target semantics. + """ + args = ["en", "--source", "es", "--rules-dir", str(fixture_rules_dir()), "--file", "overview.yaml"] + + old_width = console.width + console.width = 80 + try: + monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) + audit_cli.main() + output = strip_ansi(capsys.readouterr().out) + finally: + console.width = old_width + + assert "Comparing against es reference files" in output + assert "es:" in output + assert "en:" in output + + def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> None: """ Ensure rich CLI output is grouped by rule and subgrouped by issue type. @@ -61,7 +82,7 @@ def test_cli_main_rich_output_groups_by_rule_and_type(capsys, monkeypatch) -> No try: monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) finally: console.width = old_width @@ -93,7 +114,7 @@ def test_cli_main_rich_output_matches_grouped_golden(capsys, monkeypatch) -> Non try: monkeypatch.setattr(sys, "argv", ["audit_translations", *args]) audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) finally: console.width = old_width @@ -110,7 +131,7 @@ def test_cli_main_requires_language_or_list(capsys, monkeypatch) -> None: with pytest.raises(SystemExit) as exc: audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) assert exc.value.code == 1 assert "Please specify a language code or use --list" in output @@ -127,7 +148,7 @@ def test_cli_main_rejects_unknown_only_token(capsys, monkeypatch) -> None: with pytest.raises(SystemExit) as exc: audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) assert exc.value.code == 1 assert "Unknown issue types: bogus" in output @@ -144,10 +165,10 @@ def test_cli_main_reports_missing_region_directory(capsys, monkeypatch) -> None: with pytest.raises(SystemExit) as exc: audit_cli.main() - output = capsys.readouterr().out + output = strip_ansi(capsys.readouterr().out) assert exc.value.code == 1 - assert "Region directory not found" in output + assert "Target region directory not found" in output def test_cli_module_rich_output_groups_by_rule_and_type() -> None: @@ -173,7 +194,7 @@ def test_cli_module_rich_output_groups_by_rule_and_type() -> None: check=True, ) - output = result.stdout + output = strip_ansi(result.stdout) assert "≠ Rule Issues [13] (grouped by rule and issue type)" in output assert "• laplacian (laplacian)" in output assert "• divergence (divergence)" in output From 7ee775a9fcced17dacdd7fad201ba3055c49fb59 Mon Sep 17 00:00:00 2001 From: mgros Date: Fri, 5 Jun 2026 23:14:07 +0200 Subject: [PATCH 2/4] fix ruff --- PythonScripts/audit_translations/renderer.py | 8 ++++++-- PythonScripts/audit_translations/tests/conftest.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index a49d9d5c5..38da74346 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -75,7 +75,10 @@ def print_warnings( console.print() console.rule(style="cyan") console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") - console.print(f" [dim]{source_label}: {result.english_rule_count} rules → {target_label}: {result.translated_rule_count} rules[/]") + console.print( + f" [dim]{source_label}: {result.english_rule_count} rules → " + f"{target_label}: {result.translated_rule_count} rules[/]" + ) console.rule(style="cyan") grouped_issues: dict[str, dict[str, Any]] = {} @@ -138,7 +141,8 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) else: diff: RuleDifference = entry["diff"] console.print( - f" [dim]•[/] [dim](line {entry['line_en']} {source_label}, {entry['line_tr']} {target_label})[/]" + f" [dim]•[/] [dim](line {entry['line_en']} {source_label}, " + f"{entry['line_tr']} {target_label})[/]" ) console.print(f" [dim]{diff.description}[/]") if verbose: diff --git a/PythonScripts/audit_translations/tests/conftest.py b/PythonScripts/audit_translations/tests/conftest.py index b5c844b41..a78b24283 100644 --- a/PythonScripts/audit_translations/tests/conftest.py +++ b/PythonScripts/audit_translations/tests/conftest.py @@ -8,8 +8,8 @@ users see, not terminal control bytes. """ -import sys import re +import sys import pytest From d86d7e5e6d254f76d5d1f047351196aeea92ee18 Mon Sep 17 00:00:00 2001 From: mgros Date: Fri, 5 Jun 2026 23:14:07 +0200 Subject: [PATCH 3/4] fix ruff --- PythonScripts/audit_translations/renderer.py | 7 +++++-- PythonScripts/audit_translations/tests/conftest.py | 2 +- PythonScripts/audit_translations/tests/test_auditor.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index a49d9d5c5..c76da6b2c 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -75,7 +75,9 @@ def print_warnings( console.print() console.rule(style="cyan") console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") - console.print(f" [dim]{source_label}: {result.english_rule_count} rules → {target_label}: {result.translated_rule_count} rules[/]") + console.print( + f" [dim]{source_label}: {result.english_rule_count} rules → {target_label}: {result.translated_rule_count} rules[/]" + ) console.rule(style="cyan") grouped_issues: dict[str, dict[str, Any]] = {} @@ -138,7 +140,8 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) else: diff: RuleDifference = entry["diff"] console.print( - f" [dim]•[/] [dim](line {entry['line_en']} {source_label}, {entry['line_tr']} {target_label})[/]" + f" [dim]•[/] [dim](line {entry['line_en']} {source_label}, " + f"{entry['line_tr']} {target_label})[/]" ) console.print(f" [dim]{diff.description}[/]") if verbose: diff --git a/PythonScripts/audit_translations/tests/conftest.py b/PythonScripts/audit_translations/tests/conftest.py index b5c844b41..a78b24283 100644 --- a/PythonScripts/audit_translations/tests/conftest.py +++ b/PythonScripts/audit_translations/tests/conftest.py @@ -8,8 +8,8 @@ users see, not terminal control bytes. """ -import sys import re +import sys import pytest diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index fafc59a3b..cec10fe40 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -294,7 +294,7 @@ def test_audit_language_uses_configurable_source_language(tmp_path, fixed_consol assert "(line 6 in sv)" in output assert "Extra in Translation [1]" in output assert "(line 6 in nb)" in output - assert 'Untranslated Text [1]' in output + assert "Untranslated Text [1]" in output assert '(line 5 nb) "target"' in output assert "Match Pattern Differences [1]" in output assert "(line 3 sv, 3 nb)" in output From e0180a3e73ffe61556e3e294131a23c8ce582ca0 Mon Sep 17 00:00:00 2001 From: mgros Date: Fri, 5 Jun 2026 23:30:07 +0200 Subject: [PATCH 4/4] fix ruff --- PythonScripts/audit_translations/renderer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 38da74346..c76da6b2c 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -76,8 +76,7 @@ def print_warnings( console.rule(style="cyan") console.print(f"[{style}]{icon}[/] [bold]{escape(display_name)}[/]") console.print( - f" [dim]{source_label}: {result.english_rule_count} rules → " - f"{target_label}: {result.translated_rule_count} rules[/]" + f" [dim]{source_label}: {result.english_rule_count} rules → {target_label}: {result.translated_rule_count} rules[/]" ) console.rule(style="cyan")