diff --git a/skills/bmad-story-automator/src/story_automator/commands/state.py b/skills/bmad-story-automator/src/story_automator/commands/state.py index 38990141..af0baaf2 100644 --- a/skills/bmad-story-automator/src/story_automator/commands/state.py +++ b/skills/bmad-story-automator/src/story_automator/commands/state.py @@ -11,6 +11,14 @@ from ..core.utils import count_matches, ensure_dir, file_exists, get_project_root, now_utc, now_utc_z, read_text, write_json +def _yaml_value(value: Any) -> str: + # Emit non-ASCII as raw UTF-8, not \uXXXX escapes. These values are read back + # with unquote_scalar (frontmatter.py), which strips the surrounding quotes + # WITHOUT JSON-decoding, so an ensure_ascii escape would round-trip as the + # literal text "\uXXXX" and corrupt the value. + return json.dumps(value, ensure_ascii=False) + + def cmd_build_state_doc(args: list[str]) -> int: template = "" output_folder = "" @@ -76,7 +84,7 @@ def cmd_build_state_doc(args: list[str]) -> int: f" maxParallel: {int(overrides.get('maxParallel', 1) or 1)}\n", text, ) - custom_instructions = json.dumps(config.get("customInstructions", "")) + custom_instructions = _yaml_value(config.get("customInstructions", "")) text = re.sub(r"(?m)^customInstructions:.*$", lambda m: f"customInstructions: {custom_instructions}", text) agent_config = config.get("agentConfig") if isinstance(agent_config, dict): @@ -97,8 +105,8 @@ def cmd_build_state_doc(args: list[str]) -> int: lines = [ "agentConfig:", - f" defaultPrimary: {json.dumps(default_primary)}", - f" defaultFallback: {json.dumps(default_fallback)}", + f" defaultPrimary: {_yaml_value(default_primary)}", + f" defaultFallback: {_yaml_value(default_fallback)}", ] # Model serialization preserves three states so round-trips through # `_load_agent_config_from_state` + `resolve_agent` keep the same @@ -113,7 +121,7 @@ def cmd_build_state_doc(args: list[str]) -> int: # motivated this — without preserving the explicit clear, retro/dev # tasks silently re-inherited `defaultModel` after persistence. if "defaultModel" in agent_config: - lines.append(f" defaultModel: {json.dumps(_model_or_none(agent_config.get('defaultModel')))}") + lines.append(f" defaultModel: {_yaml_value(_model_or_none(agent_config.get('defaultModel')))}") if isinstance(per_task, dict) and per_task: lines.append(" perTask:") for task in sorted(per_task): @@ -122,12 +130,12 @@ def cmd_build_state_doc(args: list[str]) -> int: continue lines.append(f" {task}:") if "primary" in entry: - lines.append(f" primary: {json.dumps(entry['primary'])}") + lines.append(f" primary: {_yaml_value(entry['primary'])}") if "fallback" in entry: value = entry["fallback"] - lines.append(f" fallback: {'false' if value is False else json.dumps(value)}") + lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}") if "model" in entry: - lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}") + lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}") complexity_overrides = agent_config.get("complexityOverrides", {}) if isinstance(complexity_overrides, dict) and complexity_overrides: lines.append(" complexityOverrides:") @@ -142,16 +150,19 @@ def cmd_build_state_doc(args: list[str]) -> int: continue lines.append(f" {task}:") if "primary" in entry: - lines.append(f" primary: {json.dumps(entry['primary'])}") + lines.append(f" primary: {_yaml_value(entry['primary'])}") if "fallback" in entry: value = entry["fallback"] - lines.append(f" fallback: {'false' if value is False else json.dumps(value)}") + lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}") if "model" in entry: - lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}") + lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}") block = "\n".join(lines) + "\n" - text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", block, text) + # Callable repl: the block is inserted verbatim, never parsed for \g / \1 + # backrefs, so a "\uXXXX" or backslash in an agent/model name can't raise + # re.error: bad escape (the same hazard fixed below for scalar values). + text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", lambda m: block, text) for key, value in replacements.items(): - text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {json.dumps(v)}", text) + text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {_yaml_value(v)}", text) story_range = [item for item in config.get("storyRange", []) if isinstance(item, str)] progress_rows = "\n".join(f"| {story_id} | ⏳ | ⏳ | ⏳ | ⏳ | ⏳ | pending |" for story_id in story_range) body = { @@ -166,7 +177,7 @@ def cmd_build_state_doc(args: list[str]) -> int: for key, value in body.items(): text = text.replace(key, value) text = text.replace("", progress_rows) - output_path.write_text(text) + output_path.write_text(text, encoding="utf-8") write_json({"ok": True, "path": str(output_path), "createdAt": now}) return 0 diff --git a/tests/test_replacement_unicode.py b/tests/test_replacement_unicode.py index e5286dbd..6c71615a 100644 --- a/tests/test_replacement_unicode.py +++ b/tests/test_replacement_unicode.py @@ -19,6 +19,7 @@ from story_automator.commands.orchestrator import cmd_orchestrator_helper from story_automator.commands.state import cmd_build_state_doc +from story_automator.core.frontmatter import parse_simple_frontmatter REPO_ROOT = Path(__file__).resolve().parents[1] @@ -111,6 +112,42 @@ def test_replacement_value_with_backslash_in_string(self) -> None: text = state_file.read_text(encoding="utf-8") self.assertIn(r"\to", text) + def test_frontmatter_scalars_round_trip_non_ascii(self) -> None: + # Regression: scalars must survive a write->parse cycle. The frontmatter + # reader (unquote_scalar) strips quotes without JSON-decoding, so values + # have to be written as raw UTF-8 — an ensure_ascii "\uXXXX" escape would + # read back as literal "\uXXXX" text. assertIn on the whole document does + # NOT catch this because the body {{token}} carries the real char. + config = self._default_config() + config["epicName"] = "upmon — Epic 3 obsłuż" + config["aiCommand"] = "claude — go" + config["customInstructions"] = "run probe — ciężki ż" + fields = parse_simple_frontmatter(self._build_state(config).read_text(encoding="utf-8")) + self.assertEqual(fields.get("epicName"), "upmon — Epic 3 obsłuż") + self.assertEqual(fields.get("aiCommand"), "claude — go") + self.assertEqual(fields.get("customInstructions"), "run probe — ciężki ż") + + def test_agent_config_special_chars_do_not_break_resub(self) -> None: + # The agentConfig block is spliced in with re.sub too. A non-ASCII model + # id is unlikely, but a backslash in a model/agent string is realistic + # (e.g. a "\g"- or "\b"-looking token) — as a raw repl it would raise + # re.error: bad escape or corrupt "\b" into a backspace. Both must + # survive verbatim and never become a "\uXXXX" escape. + config = self._default_config() + config["agentConfig"] = { + "defaultPrimary": "claude", + "defaultFallback": "codex", + "perTask": {"dev": {"primary": r"agent\g<0>", "model": "opus — żółw"}}, + } + # _build_state returning a path at all proves the agentConfig re.sub did + # not raise re.error on the "\g<0>" backslash content. + text = self._build_state(config).read_text(encoding="utf-8") + self.assertIn("opus — żółw", text) + # JSON doubles the backslash, so the literal block carries "agent\\g<0>"; + # asserting it proves the value survived the splice, not just that it ran. + self.assertIn(r"agent\\g<0>", text) + self.assertNotIn(r"\u", text) + class StateUpdateUnicodeTests(_FixtureMixin, unittest.TestCase): def test_state_update_with_unicode_value(self) -> None: