Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions skills/bmad-story-automator/src/story_automator/commands/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
from ..core.utils import count_matches, ensure_dir, file_exists, get_project_root, now_utc, now_utc_z, read_text, write_json


def _yaml_value(value: Any) -> str:
# Emit non-ASCII as raw UTF-8, not \uXXXX escapes. These values are read back
# with unquote_scalar (frontmatter.py), which strips the surrounding quotes
# WITHOUT JSON-decoding, so an ensure_ascii escape would round-trip as the
# literal text "\uXXXX" and corrupt the value.
return json.dumps(value, ensure_ascii=False)

@augmentcode augmentcode Bot Jun 7, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_yaml_value() now emits raw non-ASCII characters, but cmd_build_state_doc writes via Path.write_text() without an explicit encoding, which can raise UnicodeEncodeError or corrupt output on non-UTF-8 default locales (notably some Windows setups). This makes the new behavior potentially non-portable unless the state doc is always written as UTF-8.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed in 605554f. write_text was safe before this PR because the output was pure ASCII (\uXXXX); emitting raw UTF-8 made the bare call locale-dependent. Now write_text(text, encoding="utf-8"), matching the repo's read_text helper. Verified the unicode tests stay green under LC_ALL=C, where the unencoded write would have raised UnicodeEncodeError.

Comment thread
coderabbitai[bot] marked this conversation as resolved.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes raw non-ASCII output, but it still leaves the scalar writer and readers on different contracts. _yaml_value() emits JSON string escapes for backslashes, quotes, and control characters, while parse_simple_frontmatter() / unquote_scalar() only strip the surrounding quotes. Those values are persisted differently when the generated state is read back.

Repro:

PYTHONPATH=skills/bmad-story-automator/src python - <<'PY'
from story_automator.commands.state import _yaml_value
from story_automator.core.frontmatter import parse_simple_frontmatter

for value in [r"path\\to\\file", 'say "hi"', "line1\nline2", r"agent\\g<0>", "obsłuż"]:
    doc = f"---\naiCommand: {_yaml_value(value)}\n---\n"
    parsed = parse_simple_frontmatter(doc)["aiCommand"]
    print({"input": value, "serialized": _yaml_value(value), "parsed": parsed, "equal": parsed == value})
PY

The backslash, quote, and newline cases print equal: False; only the non-ASCII case prints True. Please make scalar serialization/parsing a single contract, or update the readers to decode the emitted representation, and add parity coverage for these escaped scalar cases including agentConfig values.



def cmd_build_state_doc(args: list[str]) -> int:
template = ""
output_folder = ""
Expand Down Expand Up @@ -76,7 +84,7 @@ def cmd_build_state_doc(args: list[str]) -> int:
f" maxParallel: {int(overrides.get('maxParallel', 1) or 1)}\n",
text,
)
custom_instructions = json.dumps(config.get("customInstructions", ""))
custom_instructions = _yaml_value(config.get("customInstructions", ""))
text = re.sub(r"(?m)^customInstructions:.*$", lambda m: f"customInstructions: {custom_instructions}", text)
agent_config = config.get("agentConfig")
if isinstance(agent_config, dict):
Expand All @@ -97,8 +105,8 @@ def cmd_build_state_doc(args: list[str]) -> int:

lines = [
"agentConfig:",
f" defaultPrimary: {json.dumps(default_primary)}",
f" defaultFallback: {json.dumps(default_fallback)}",
f" defaultPrimary: {_yaml_value(default_primary)}",
f" defaultFallback: {_yaml_value(default_fallback)}",
]
# Model serialization preserves three states so round-trips through
# `_load_agent_config_from_state` + `resolve_agent` keep the same
Expand All @@ -113,7 +121,7 @@ def cmd_build_state_doc(args: list[str]) -> int:
# motivated this — without preserving the explicit clear, retro/dev
# tasks silently re-inherited `defaultModel` after persistence.
if "defaultModel" in agent_config:
lines.append(f" defaultModel: {json.dumps(_model_or_none(agent_config.get('defaultModel')))}")
lines.append(f" defaultModel: {_yaml_value(_model_or_none(agent_config.get('defaultModel')))}")
if isinstance(per_task, dict) and per_task:
lines.append(" perTask:")
for task in sorted(per_task):
Expand All @@ -122,12 +130,12 @@ def cmd_build_state_doc(args: list[str]) -> int:
continue
lines.append(f" {task}:")
if "primary" in entry:
lines.append(f" primary: {json.dumps(entry['primary'])}")
lines.append(f" primary: {_yaml_value(entry['primary'])}")
if "fallback" in entry:
value = entry["fallback"]
lines.append(f" fallback: {'false' if value is False else json.dumps(value)}")
lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}")
if "model" in entry:
lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}")
lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}")
complexity_overrides = agent_config.get("complexityOverrides", {})
if isinstance(complexity_overrides, dict) and complexity_overrides:
lines.append(" complexityOverrides:")
Expand All @@ -142,16 +150,19 @@ def cmd_build_state_doc(args: list[str]) -> int:
continue
lines.append(f" {task}:")
if "primary" in entry:
lines.append(f" primary: {json.dumps(entry['primary'])}")
lines.append(f" primary: {_yaml_value(entry['primary'])}")
if "fallback" in entry:
value = entry["fallback"]
lines.append(f" fallback: {'false' if value is False else json.dumps(value)}")
lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}")
if "model" in entry:
lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}")
lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}")
block = "\n".join(lines) + "\n"
text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", block, text)
# Callable repl: the block is inserted verbatim, never parsed for \g / \1
# backrefs, so a "\uXXXX" or backslash in an agent/model name can't raise
# re.error: bad escape (the same hazard fixed below for scalar values).
text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", lambda m: block, text)
for key, value in replacements.items():
text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {json.dumps(v)}", text)
text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {_yaml_value(v)}", text)
story_range = [item for item in config.get("storyRange", []) if isinstance(item, str)]
progress_rows = "\n".join(f"| {story_id} | ⏳ | ⏳ | ⏳ | ⏳ | ⏳ | pending |" for story_id in story_range)
body = {
Expand All @@ -166,7 +177,7 @@ def cmd_build_state_doc(args: list[str]) -> int:
for key, value in body.items():
text = text.replace(key, value)
text = text.replace("<!-- Progress rows will be appended here -->", progress_rows)
output_path.write_text(text)
output_path.write_text(text, encoding="utf-8")
write_json({"ok": True, "path": str(output_path), "createdAt": now})
return 0

Expand Down
37 changes: 37 additions & 0 deletions tests/test_replacement_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from story_automator.commands.orchestrator import cmd_orchestrator_helper
from story_automator.commands.state import cmd_build_state_doc
from story_automator.core.frontmatter import parse_simple_frontmatter


REPO_ROOT = Path(__file__).resolve().parents[1]
Expand Down Expand Up @@ -111,6 +112,42 @@ def test_replacement_value_with_backslash_in_string(self) -> None:
text = state_file.read_text(encoding="utf-8")
self.assertIn(r"\to", text)

def test_frontmatter_scalars_round_trip_non_ascii(self) -> None:
# Regression: scalars must survive a write->parse cycle. The frontmatter
# reader (unquote_scalar) strips quotes without JSON-decoding, so values
# have to be written as raw UTF-8 — an ensure_ascii "\uXXXX" escape would
# read back as literal "\uXXXX" text. assertIn on the whole document does
# NOT catch this because the body {{token}} carries the real char.
config = self._default_config()
config["epicName"] = "upmon — Epic 3 obsłuż"
config["aiCommand"] = "claude — go"
config["customInstructions"] = "run probe — ciężki ż"
fields = parse_simple_frontmatter(self._build_state(config).read_text(encoding="utf-8"))
self.assertEqual(fields.get("epicName"), "upmon — Epic 3 obsłuż")
self.assertEqual(fields.get("aiCommand"), "claude — go")
self.assertEqual(fields.get("customInstructions"), "run probe — ciężki ż")

def test_agent_config_special_chars_do_not_break_resub(self) -> None:
# The agentConfig block is spliced in with re.sub too. A non-ASCII model
# id is unlikely, but a backslash in a model/agent string is realistic
# (e.g. a "\g"- or "\b"-looking token) — as a raw repl it would raise
# re.error: bad escape or corrupt "\b" into a backspace. Both must
# survive verbatim and never become a "\uXXXX" escape.
config = self._default_config()
config["agentConfig"] = {
"defaultPrimary": "claude",
"defaultFallback": "codex",
"perTask": {"dev": {"primary": r"agent\g<0>", "model": "opus — żółw"}},
}
# _build_state returning a path at all proves the agentConfig re.sub did
# not raise re.error on the "\g<0>" backslash content.
text = self._build_state(config).read_text(encoding="utf-8")
self.assertIn("opus — żółw", text)
# JSON doubles the backslash, so the literal block carries "agent\\g<0>";
# asserting it proves the value survived the splice, not just that it ran.
self.assertIn(r"agent\\g<0>", text)
self.assertNotIn(r"\u", text)


class StateUpdateUnicodeTests(_FixtureMixin, unittest.TestCase):
def test_state_update_with_unicode_value(self) -> None:
Expand Down