From 9b8688d850430c6b3ce964ef71f3c491627ac0a4 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Wed, 24 Jun 2026 11:48:40 +0800 Subject: [PATCH 1/2] fix: std.manifestToml passes through Unicode characters natively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: TomlRenderer hardcoded unicode = true, causing all non-ASCII characters to be escaped as \uXXXX sequences. C++ jsonnet passes through Unicode characters directly in TOML output. Modification: - TomlRenderer.scala: Change unicode from true to false in both visitString (line 54) and writeEscapedKey (line 164). - UnicodeHandlingTests.scala: Update objectFieldOrdering test expectation to use native Unicode instead of escaped form. Result: std.manifestToml({name: "世界"}) now outputs name = "世界" instead of name = "\u4e16\u754c", matching C++ jsonnet. --- sjsonnet/src/sjsonnet/TomlRenderer.scala | 4 ++-- sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sjsonnet/src/sjsonnet/TomlRenderer.scala b/sjsonnet/src/sjsonnet/TomlRenderer.scala index 5d89f4d8d..868d1e254 100644 --- a/sjsonnet/src/sjsonnet/TomlRenderer.scala +++ b/sjsonnet/src/sjsonnet/TomlRenderer.scala @@ -51,7 +51,7 @@ class TomlRenderer( if (s == null) { visitNull(index) } else { - BaseRenderer.escape(out, s, unicode = true) + BaseRenderer.escape(out, s, unicode = false) flush } } @@ -161,7 +161,7 @@ object TomlRenderer { def writeEscapedKey(out: StringBuilderWriter, key: CharSequence): Unit = { if (isBareKey(key)) out.write(key.toString) - else BaseRenderer.escape(out, key, unicode = true) + else BaseRenderer.escape(out, key, unicode = false) } def escapeKey(key: String): String = if (isBareKey(key)) key diff --git a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala index a47814cca..f46cf1c04 100644 --- a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala +++ b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala @@ -294,7 +294,7 @@ object UnicodeHandlingTests extends TestSuite { // TOML manifest: eval(s"std.manifestTomlEx($testObject, ' ')") ==> - ujson.Str("a = 1\nz = 2\n\"\\uffff\" = 3\n\"\\ud800\\udc00\" = 4") + ujson.Str("a = 1\nz = 2\n\"\uFFFF\" = 3\n\"\uD800\uDC00\" = 4") } test("findSubstr") { From d12bb7b28324b0b195c5fe1e282263e477dab18f Mon Sep 17 00:00:00 2001 From: He-Pin Date: Wed, 24 Jun 2026 12:21:15 +0800 Subject: [PATCH 2/2] test: add TOML Unicode native output regression test (golden from C++ jsonnet) --- .../new_test_suite/toml_unicode_native_output.jsonnet | 11 +++++++++++ .../toml_unicode_native_output.jsonnet.golden | 1 + 2 files changed, 12 insertions(+) create mode 100644 sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet create mode 100644 sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet.golden diff --git a/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet b/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet new file mode 100644 index 000000000..ae8f92763 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet @@ -0,0 +1,11 @@ +// Regression test: TOML output must pass through Unicode natively. +// Golden verified against cpp-jsonnet 0.21.0, go-jsonnet 0.22.0, jrsonnet 0.5.0-pre99. +std.assertEqual( + std.manifestToml({name: "世界", drink: "café", emoji: "🌍"}), + "drink = \"café\"\nemoji = \"🌍\"\nname = \"世界\"" +) && +std.assertEqual( + std.manifestToml({section: {key: "日本語"}}), + "\n\n[section]\n key = \"日本語\"" +) && +true diff --git a/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet.golden new file mode 100644 index 000000000..27ba77dda --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/toml_unicode_native_output.jsonnet.golden @@ -0,0 +1 @@ +true