From ea7a8e7f8dfb13d076590598f00bc5d3c3521dfc Mon Sep 17 00:00:00 2001 From: heznpc Date: Tue, 2 Jun 2026 13:22:26 +0900 Subject: [PATCH 1/2] fix(i18n): re-translate Italian locale (was ~51% Spanish) + add contamination guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/data/it.json had been populated from es.json and only partially re-translated: 632 long strings were byte-identical Spanish, and the _protected brand map mistranslated Claude→Claudio / Anthropic→Antropico / Claude Code→Codice Claudio — silently breaking runtime brand-term restoration for Italian, which is our #1 install market (verified via CWS CSV; growth driven by an Italian dev-blog recommendation). - Re-translated every contaminated string (exact es-copies + Spanish/hybrid forms) from the English source keys via the extension's own GT endpoint, restored brand/technical terms to canonical English, rebuilt _protected with correct Italian wrong-forms. it↔es overlap: 51% → 0.1% (parity with 10 others). - New guard scripts/check-locale-contamination.js (npm run check:locales, wired into ci.yml) fails when a locale shares >8% of long strings with another — catches the wrong-language bug class that check-i18n/dict-coverage miss (they verify key/shape, not language). Gates green: 488 tests, lint, i18n, dict-coverage, glossary, academy, guard. --- .github/workflows/ci.yml | 3 + CHANGELOG.md | 6 + package.json | 1 + scripts/check-locale-contamination.js | Bin 0 -> 2744 bytes src/data/it.json | 1591 +++++++++++++------------ 5 files changed, 814 insertions(+), 787 deletions(-) create mode 100644 scripts/check-locale-contamination.js diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e21e4a9..8a7e677 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,9 @@ jobs: - name: Check i18n key coverage run: node scripts/check-i18n-keys.js + - name: Check locale cross-contamination (wrong-language values) + run: node scripts/check-locale-contamination.js + - name: Check dictionary coverage (POSITIONING.md "48h" SLA) run: node scripts/check-dict-coverage.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ee7a27..922fedc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Fixed +- **Italian locale was ~51% Spanish.** `src/data/it.json` had been built from `es.json` and only partially re-translated — 632 of its long strings were byte-identical Spanish, and the `_protected` brand map mistranslated `Claude → Claudio`, `Anthropic → Antropico`, `Claude Code → Codice Claudio`, silently breaking runtime brand-term restoration for Italian (our #1 install market). Re-translated every contaminated string from the English source via the same Google Translate endpoint the extension uses, restored brand/technical terms to canonical English, and rebuilt `_protected` with the correct Italian wrong-forms. Italian↔Spanish overlap is now 0.1% (parity with the other 10 locales). + +### Added +- **Locale cross-contamination guard** (`scripts/check-locale-contamination.js`, `npm run check:locales`, wired into CI). Fails when any locale shares >8% of its long strings with another — the bug class the key/shape checks (`check-i18n`, `check-dict-coverage`) cannot see because they only verify structure, not language. Clean locales sit at ≤2.1%; the contaminated Italian file was 51%. + ## [3.5.39] - 2026-06-01 ### Security diff --git a/package.json b/package.json index 542574b..a34ba01 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "check:dicts": "node scripts/check-dicts.js", "check:sync": "node scripts/check-bg-sync.js", "check:i18n": "node scripts/check-i18n-keys.js", + "check:locales": "node scripts/check-locale-contamination.js", "check:dict-coverage": "node scripts/check-dict-coverage.js", "check:academy": "node scripts/check-academy-courses.js", "check:cws-drift": "node scripts/check-cws-drift.js", diff --git a/scripts/check-locale-contamination.js b/scripts/check-locale-contamination.js new file mode 100644 index 0000000000000000000000000000000000000000..8f5a586c4458bcf7da90dda704122a67e204faa7 GIT binary patch literal 2744 zcmaJ@+iu)85bg7R#neS2$zE&M23!Po?Z8Om)J0+kaezLgfx(q1Exj_SlGLsjxga0V zxAd|7xqL~_kXn1=6n%(;rDld_&YT&}58sbEXGasIM^ev8n^F!&o0|jLr01p(jikaF z=e7%@y|`9dcx5!zov>vNBer`Yd@+@d{8ZAUtEp&&bF`RBE6Hd{O^A~?DpkSW!Y*kc z9BftNX|klkv`eLHjwQ4~7~A_$D)Txgq*hq3 zMAIxel$G1w3hjbNPOpJZ-YDmB69I2AbCfb7q&AmU)N)8_=aJ3`zf`rPnOu(CRJ4+V zA+qvIK=yQY{u-$YAM)Lti>8wfz>%!d;moRf>ZuVLEY*^}eEN%=AwiV_o9xK}m7~4! zgJ8d~29R#;Td)_|yk^ue3~Yl|6VGvVE!q|PzsSyVR>eM5#*!4pv}bP!Vml2aQdH!; zMdC=gk(!QZcRVC-$jwwsNYcuZK~2C{iSwD%jv6(SwDID}%Z<^-@ypYVEMO0bPL^J7 zt5RxQD;f^OwRhrLGHc9(;Bcj?Du{p~1PSEy1Xy(c%aK!_kj%$%zW3$R=iLWzEJ9So zB%tAlmOWdV(Bwm=!Z)ETok8bH*}Q|0Io zYWbs6R;EeilI)=W(h5Ia|Hi-hYybJ@(^vS)hxwIJI=#FsmDONjNJ*Y2`0H%J|CPdv zyQnWt&n}-oJ%j7+_<%+uQnfaakDD6n`)c$>=r1}sK7D@q>iG5PIh-B^pH|ue%pO1% z>y)w#e>&XFcewbgjzzR22wwy zcOMRL!oo`9@r+DG=aVa0_#Cx%93zWX9voh*dP0tHOH zmcVRJ+Zt&VlNK(BOfmm&D#o;`#IG0UXE}z9lW7m&1b701JADBQsL3Fx&R36cE2see zMnb~^<|PLd;3ewUG%{~YohGY6ek%wb;b8<~z}|?$Q^3*V0O665 zfd7?rQHg9N=-X@h?&RV?lXI|1v@q8B?Y^1W<9}F*44(9?fFG_`uR16V=e<&b?oRH? zG7Dunf#7mLt+DX!-bPv( Date: Tue, 2 Jun 2026 13:25:34 +0900 Subject: [PATCH 2/2] style: prettier-format check-locale-contamination.js --- scripts/check-locale-contamination.js | Bin 2744 -> 2758 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/scripts/check-locale-contamination.js b/scripts/check-locale-contamination.js index 8f5a586c4458bcf7da90dda704122a67e204faa7..08d6bb990513dc125d108aa0c8aca456f65de0fd 100644 GIT binary patch delta 49 wcmdlXdQ5af5GSL~!x#-A$_06B6DRR910 delta 33 pcmX>mxPLAcu1OT>O3bz0N