-
Notifications
You must be signed in to change notification settings - Fork 16
fix(build-state-doc): write frontmatter scalars as UTF-8 so non-ASCII round-trips #39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,14 @@ | |
| from ..core.utils import count_matches, ensure_dir, file_exists, get_project_root, now_utc, now_utc_z, read_text, write_json | ||
|
|
||
|
|
||
| def _yaml_value(value: Any) -> str: | ||
| # Emit non-ASCII as raw UTF-8, not \uXXXX escapes. These values are read back | ||
| # with unquote_scalar (frontmatter.py), which strips the surrounding quotes | ||
| # WITHOUT JSON-decoding, so an ensure_ascii escape would round-trip as the | ||
| # literal text "\uXXXX" and corrupt the value. | ||
| return json.dumps(value, ensure_ascii=False) | ||
|
coderabbitai[bot] marked this conversation as resolved.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fixes raw non-ASCII output, but it still leaves the scalar writer and readers on different contracts. Repro: PYTHONPATH=skills/bmad-story-automator/src python - <<'PY'
from story_automator.commands.state import _yaml_value
from story_automator.core.frontmatter import parse_simple_frontmatter
for value in [r"path\\to\\file", 'say "hi"', "line1\nline2", r"agent\\g<0>", "obsłuż"]:
doc = f"---\naiCommand: {_yaml_value(value)}\n---\n"
parsed = parse_simple_frontmatter(doc)["aiCommand"]
print({"input": value, "serialized": _yaml_value(value), "parsed": parsed, "equal": parsed == value})
PYThe backslash, quote, and newline cases print |
||
|
|
||
|
|
||
| def cmd_build_state_doc(args: list[str]) -> int: | ||
| template = "" | ||
| output_folder = "" | ||
|
|
@@ -76,7 +84,7 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
| f" maxParallel: {int(overrides.get('maxParallel', 1) or 1)}\n", | ||
| text, | ||
| ) | ||
| custom_instructions = json.dumps(config.get("customInstructions", "")) | ||
| custom_instructions = _yaml_value(config.get("customInstructions", "")) | ||
| text = re.sub(r"(?m)^customInstructions:.*$", lambda m: f"customInstructions: {custom_instructions}", text) | ||
| agent_config = config.get("agentConfig") | ||
| if isinstance(agent_config, dict): | ||
|
|
@@ -97,8 +105,8 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
|
|
||
| lines = [ | ||
| "agentConfig:", | ||
| f" defaultPrimary: {json.dumps(default_primary)}", | ||
| f" defaultFallback: {json.dumps(default_fallback)}", | ||
| f" defaultPrimary: {_yaml_value(default_primary)}", | ||
| f" defaultFallback: {_yaml_value(default_fallback)}", | ||
| ] | ||
| # Model serialization preserves three states so round-trips through | ||
| # `_load_agent_config_from_state` + `resolve_agent` keep the same | ||
|
|
@@ -113,7 +121,7 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
| # motivated this — without preserving the explicit clear, retro/dev | ||
| # tasks silently re-inherited `defaultModel` after persistence. | ||
| if "defaultModel" in agent_config: | ||
| lines.append(f" defaultModel: {json.dumps(_model_or_none(agent_config.get('defaultModel')))}") | ||
| lines.append(f" defaultModel: {_yaml_value(_model_or_none(agent_config.get('defaultModel')))}") | ||
| if isinstance(per_task, dict) and per_task: | ||
| lines.append(" perTask:") | ||
| for task in sorted(per_task): | ||
|
|
@@ -122,12 +130,12 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
| continue | ||
| lines.append(f" {task}:") | ||
| if "primary" in entry: | ||
| lines.append(f" primary: {json.dumps(entry['primary'])}") | ||
| lines.append(f" primary: {_yaml_value(entry['primary'])}") | ||
| if "fallback" in entry: | ||
| value = entry["fallback"] | ||
| lines.append(f" fallback: {'false' if value is False else json.dumps(value)}") | ||
| lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}") | ||
| if "model" in entry: | ||
| lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}") | ||
| lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}") | ||
| complexity_overrides = agent_config.get("complexityOverrides", {}) | ||
| if isinstance(complexity_overrides, dict) and complexity_overrides: | ||
| lines.append(" complexityOverrides:") | ||
|
|
@@ -142,16 +150,19 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
| continue | ||
| lines.append(f" {task}:") | ||
| if "primary" in entry: | ||
| lines.append(f" primary: {json.dumps(entry['primary'])}") | ||
| lines.append(f" primary: {_yaml_value(entry['primary'])}") | ||
| if "fallback" in entry: | ||
| value = entry["fallback"] | ||
| lines.append(f" fallback: {'false' if value is False else json.dumps(value)}") | ||
| lines.append(f" fallback: {'false' if value is False else _yaml_value(value)}") | ||
| if "model" in entry: | ||
| lines.append(f" model: {json.dumps(_model_or_none(entry.get('model')))}") | ||
| lines.append(f" model: {_yaml_value(_model_or_none(entry.get('model')))}") | ||
| block = "\n".join(lines) + "\n" | ||
| text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", block, text) | ||
| # Callable repl: the block is inserted verbatim, never parsed for \g / \1 | ||
| # backrefs, so a "\uXXXX" or backslash in an agent/model name can't raise | ||
| # re.error: bad escape (the same hazard fixed below for scalar values). | ||
| text = re.sub(r"(?m)^agentConfig:\n(?:(?:\s{2}.*\n)*)", lambda m: block, text) | ||
| for key, value in replacements.items(): | ||
| text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {json.dumps(v)}", text) | ||
| text = re.sub(rf"(?m)^{re.escape(key)}:.*$", lambda m, k=key, v=value: f"{k}: {_yaml_value(v)}", text) | ||
| story_range = [item for item in config.get("storyRange", []) if isinstance(item, str)] | ||
| progress_rows = "\n".join(f"| {story_id} | ⏳ | ⏳ | ⏳ | ⏳ | ⏳ | pending |" for story_id in story_range) | ||
| body = { | ||
|
|
@@ -166,7 +177,7 @@ def cmd_build_state_doc(args: list[str]) -> int: | |
| for key, value in body.items(): | ||
| text = text.replace(key, value) | ||
| text = text.replace("<!-- Progress rows will be appended here -->", progress_rows) | ||
| output_path.write_text(text) | ||
| output_path.write_text(text, encoding="utf-8") | ||
| write_json({"ok": True, "path": str(output_path), "createdAt": now}) | ||
| return 0 | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_yaml_value()now emits raw non-ASCII characters, butcmd_build_state_docwrites viaPath.write_text()without an explicit encoding, which can raiseUnicodeEncodeErroror corrupt output on non-UTF-8 default locales (notably some Windows setups). This makes the new behavior potentially non-portable unless the state doc is always written as UTF-8.Severity: medium
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch — fixed in 605554f.
write_textwas safe before this PR because the output was pure ASCII (\uXXXX); emitting raw UTF-8 made the bare call locale-dependent. Nowwrite_text(text, encoding="utf-8"), matching the repo'sread_texthelper. Verified the unicode tests stay green underLC_ALL=C, where the unencoded write would have raisedUnicodeEncodeError.