From 80b8acd78fd1b9602eca6f08adc1a8adc04f46e8 Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Wed, 6 May 2026 09:32:38 -0700 Subject: [PATCH] Fix ht channel_id and document chef behavior Restore the legacy `ht` source_id override (`channel_PhET_Interactive_Simulations_TEST-ht`) so the chef updates the existing Haitian Creole channel on Studio instead of creating a new one under the default-derived id `phet-html5-simulations-ht`. Add two missing Haitian category labels and drop the now-redundant hardcoded `ht` channel-title fallback in `_localized_channel_title`. Annotate the chef with inline comments covering locale handling, source_id stability, the PhET-API/Kolibri metadata mapping, and the offline-mode HTML patches. Co-Authored-By: Claude Opus 4.7 (1M context) --- phet/chef.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 7 deletions(-) diff --git a/phet/chef.py b/phet/chef.py index ee544d2c..f2d1e9cc 100755 --- a/phet/chef.py +++ b/phet/chef.py @@ -39,6 +39,9 @@ from metadata_tags import METADATA_BY_CAT from ricecooker.config import MAX_CHAR_LIMITS +# Manually-curated localized channel metadata keyed by locale code (row[1] in the CSV). +# The CSV holds human-translated title, tagline, and description for each locale and +# takes priority over the machine-translated fallbacks defined later in this file. CHANNEL_METADATA = {} # see https://docs.google.com/spreadsheets/d/1M5NQLtXUrGymxlvb4reqJmtUGLPCoXYvVB2P8vBVz_k/edit?gid=1471576465#gid=1471576465 @@ -50,6 +53,7 @@ line_count = 0 for row in csv_reader: if line_count == 0: + # Skip the header row. line_count += 1 continue else: @@ -59,12 +63,21 @@ "description": row[4], } +# Raise Ricecooker's description char limit so our longer localized descriptions aren't truncated. MAX_CHAR_LIMITS.update({"description": {"max": 4096}}) +# "en" is the baseline locale: all sims exist in English, so non-English locales +# fall back to the English metadata whenever localized data is missing. DEFAULT_LANG = "en" BASE_TITLE = "PhET Interactive Simulations" +# Channel source_ids for locales without a special override are derived as f"{BASE_SOURCE_ID}-{locale}". +# Studio derives a deterministic channel_id from (source_domain, source_id), so changing this value +# would silently redirect every non-overridden locale to a brand-new channel on Studio. BASE_SOURCE_ID = "phet-html5-simulations" +# Shared HTTP session with retries + on-disk HTTP-cache. The cache lives in `.webcache/` and +# re-uses the Cache-Control headers from PhET's API so repeated chef runs don't re-fetch +# everything from the network. retry_strategy = Retry(total=5, backoff_factor=1) # adapter = HTTPAdapter(max_retries=retry_strategy) @@ -73,6 +86,10 @@ sess = requests.Session() sess.mount("http://", adapter) sess.mount("https://", adapter) + +# Per-locale list of PhET category `name`s to skip entirely while walking the category tree. +# "html" / "by-device" / "new" / "quantum" / "general" are duplicates or non-content buckets, +# and Arabic additionally excludes "by-level" because they don't want grade-level grouping. ID_BLACKLIST_BY_LANG = { "en": ["html", "by-device", "new", "quantum", "general"], "ar": ["html", "by-device", "new", "quantum", "general", "by-level"], @@ -81,6 +98,9 @@ BASE_URL = "https://phet-api.colorado.edu" BASE_URL_DOWNLOAD = "https://phet.colorado.edu" +# Manual translations for category/sim titles where PhET's API either lacks a localized string +# or where we want to override PhET's default. Used as a last-mile lookup right before the label +# is rendered to a TopicNode/HTML5AppNode. ARABIC_NAME_CATEGORY = { "Physics": "الفيزياء", "Biology": "الأحياء", @@ -103,6 +123,8 @@ } HAITIAN_NAME_CATEGORY = { + "Math and Statistics": "Matematik ak Estatistik", + "Earth and Space": "Latè ak Lespas", "Elementary School": "Lekòl Elemantè", "By Level": "Pa Nivo", "Middle School": "Lekòl mwayen", @@ -131,6 +153,8 @@ "Quantum": "Quantum", } +# Spelling corrections for Arabic sim titles returned by the PhET API. +# These are applied only when building the Arabic channel. SIM_TYPO = { "أشكال الجزئ": "أشكال الجزيء", "مولارية": "المولارية", @@ -143,6 +167,9 @@ "رؤية اللّون": "رؤية الألوان", } +# Hard-coded fallback channel descriptions. Only hit when `CHANNEL_METADATA` (from the CSV) +# has no entry for a given locale; if that fallback is also missing we machine-translate +# the English description in run_with_locale. CHANNEL_DESCRIPTIONS = { "en": "The PhET Interactive Simulations project created by the University of Colorado Boulder provides interactive math and science simulations that engage students with intuitive, game-like environments. Students can learn about math, physics, biology, and chemistry through hands-on exploration and discovery. The simulations are appropriate for all ages and include guiding teacher lesson plans.", "ar": "تزوّد هذه القناة والمعمول بمحتواها من قبل جامعة كونيتيكيت الأمريكية مجموعة من برمجيات المحاكاة التي يمكن للمتعلمين في المرحلة الإعدادية والثانوية التفاعل معها لفهم أكبر لما قد يدرسونه من قوانين وتجارب في الرياضيات والعلوم المختلفة وخاصة مادتي الكيمياء والفيزياء.", @@ -165,7 +192,8 @@ def get_channel_description(lang): ) -# Maps of IDs to their metadata +# Maps of IDs to their metadata. Fetched once at import time (module-level) and re-used +# across every locale run. The cached HTTP session makes subsequent chef runs fast. """ The structure / keys we care about: { "": { "strings": [ { "": "