Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/hocon_scanner.xrl
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,14 @@ Float = {Integer}?{Fraction}|{Integer}{Fraction}{Exponent}

%% String
Hex = [0-9A-Fa-f]
Escape = ["\\bfnrt]
EscapeNoQuote = [\\bfnrt]
Escape = "|{EscapeNoQuote}
UnicodeEscape = u{Hex}{Hex}{Hex}{Hex}
Char = ([^\"{LineFeed}]|\\{Escape}|\\{UnicodeEscape})
String = "{Char}*"
MultilineChar = ([^"]|"[^"]|""[^"]|\\{Escape}|\\{UnicodeEscape})
%% Special handling for trailing quote: if we don't assert it's not followed by two other
%% quotes, `{Escape}` would "eat" one of the quotes in the triple quote...
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you help to explain why this "eat" happens?

This should work?

([^"]|"[^"]|""[^"]|\\{EscapeNoQuote}|\\{UnicodeEscape})

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The eat happens because the dangling backslash is the last thing the triple quoted string. ...\\""" essentially eats the last quote and breaks out of the triple quote.

"""aaa\"""\nx = 10\ny = """~...

Since \ was not being escaped when "unparsing" the config, it would eat the triple quote, and everything after that gets interpreted as the string until it hits another triple quote.

It's easy to see if you try the test case out without the code changes.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record (with added print debug to hocon_scanner:unindent's 3rd clause):

======================== EUnit ========================
hocon_pp_tests: triple_quote_string_ending_in_backslash_test...
----------------------------------------------------
2025-04-22 15:10:25.849
{nonode@nohost,hocon_scanner,132,<0.388.0>}>>>>>>>>>
  #{chars => "\t\"\\\"\\t\\\"\"\"\n}\nroot2 {\n  x = "}

*failed*
in function erlymatch:run/5 (/home/thales/dev/emqx/hocon/_build/test/lib/erlymatch/src/erlymatch.erl, line 36)
in call from hocon_pp_tests:triple_quote_string_ending_in_backslash_test/0 (/home/thales/dev/emqx/hocon/test/hocon_pp_tests.erl, line 359)
in call from eunit_test:'-mf_wrapper/2-fun-0-'/2 (eunit_test.erl, line 274)
in call from eunit_test:run_testfun/1 (eunit_test.erl, line 72)
in call from eunit_proc:run_test/1 (eunit_proc.erl, line 544)
in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 369)
in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 527)
in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 469)
**throw:{mismatch,{hocon_pp_tests,359,
                          {details,tuple,mismatch,
                                   [{1,value,mismatch,{ok,error}},
                                    {2,value,mismatch,
                                     {#{<<"root1">> =>
                                            #{<<"x">> => <<"\t\"\\\""...>>},
                                        <<"root2">> =>
                                            #{<<"x">> => <<"sele"...>>}},
                                      {scan_error,#{...}}}}]}}}
  output:<<"Warning: ct_logs not started
{nonode@nohost,hocon_scanner,132,<0.388.0>}>>>>>>>>>
  #{chars => "\t\"\\\"\\t\\\"\"\"\n}\nroot2 {\n  x = "}Match failed in module 'hocon_pp_tests' at line 359:
  details: {...}
            1: EXPECT = ok
                  GOT = error
            2: EXPECT = #{<<"root1">> => #{<<"x">> => <<"\t\"\\\"\\t\\">>},<<"root2">> => #{<<"x">> => <<"select \n from\n \"hello\" ">>}}
                  GOT = {scan_error,#{line => 5,reason => "illegal characters \"~\""}}
">>

=======================================================
  Failed: 1.  Skipped: 0.  Passed: 0.
===> Error running tests

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I had tried your suggestion before, but paired with other changes (escaping or not escaping the string, various combinations of changes), but didn't work before, but now the added test case works with that regex. 🤔

MultilineChar = ([^"]|"[^"]|""[^"]|\\{EscapeNoQuote}|\\{UnicodeEscape})
MultilineString = """{MultilineChar}*"""

%% Bytesize and Duration
Expand Down
25 changes: 25 additions & 0 deletions test/hocon_pp_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,31 @@ no_triple_quote_string_when_oneliner_test_() ->
?_assertEqual([<<"root {a = \"a\\nb\"}">>], hocon_pp:do(Value, #{newline => <<>>}))
].

%% Tests that having an one liner with characters that should be escaped do not interfere
%% badly with other values which are triple quoted with indentation.
%%
%% At the time of writing, the below example does not trigger the original bug if only
%% root2 is present and expected. Also, if the trailing backslash in root1 is removed, it
%% also does not trigger the bug.
triple_quote_string_ending_in_backslash_test() ->
Raw = #{
<<"root1">> => #{<<"x">> => <<"\t\"\\\"\\t\\">>},
<<"root2">> => #{<<"x">> => <<"select \n from\n \"hello\" ">>}
},
Sc = #{
roots => [root1, root2],
fields => #{
root1 => [{"x", hoconsc:mk(binary())}],
root2 => [{"x", hoconsc:mk(binary())}]
}
},
%% Parses fine.
Raw = hocon_tconf:check_plain(Sc, Raw, #{}),
PP = hocon_pp:do(Raw, #{}),
%% Roundtrip: must read back the same thing.
?assertEqual({ok, Raw}, hocon:binary(PP)),
ok.

crlf_multiline_test_() ->
Value = #{<<"root">> => #{<<"x">> => <<"\r\n\r\na\r\nb\n">>}},
CRLF = <<"\r\n">>,
Expand Down