diff --git a/src/hocon_scanner.xrl b/src/hocon_scanner.xrl index 9510bca..f52d173 100644 --- a/src/hocon_scanner.xrl +++ b/src/hocon_scanner.xrl @@ -50,11 +50,14 @@ Float = {Integer}?{Fraction}|{Integer}{Fraction}{Exponent} %% String Hex = [0-9A-Fa-f] -Escape = ["\\bfnrt] +EscapeNoQuote = [\\bfnrt] +Escape = "|{EscapeNoQuote} UnicodeEscape = u{Hex}{Hex}{Hex}{Hex} Char = ([^\"{LineFeed}]|\\{Escape}|\\{UnicodeEscape}) String = "{Char}*" -MultilineChar = ([^"]|"[^"]|""[^"]|\\{Escape}|\\{UnicodeEscape}) +%% Special handling for trailing quote: if we don't assert it's not followed by two other +%% quotes, `{Escape}` would "eat" one of the quotes in the triple quote... +MultilineChar = ([^"]|"[^"]|""[^"]|\\{EscapeNoQuote}|\\{UnicodeEscape}) MultilineString = """{MultilineChar}*""" %% Bytesize and Duration diff --git a/test/hocon_pp_tests.erl b/test/hocon_pp_tests.erl index 8e66324..4042b7d 100644 --- a/test/hocon_pp_tests.erl +++ b/test/hocon_pp_tests.erl @@ -334,6 +334,31 @@ no_triple_quote_string_when_oneliner_test_() -> ?_assertEqual([<<"root {a = \"a\\nb\"}">>], hocon_pp:do(Value, #{newline => <<>>})) ]. +%% Tests that having an one liner with characters that should be escaped do not interfere +%% badly with other values which are triple quoted with indentation. +%% +%% At the time of writing, the below example does not trigger the original bug if only +%% root2 is present and expected. Also, if the trailing backslash in root1 is removed, it +%% also does not trigger the bug. +triple_quote_string_ending_in_backslash_test() -> + Raw = #{ + <<"root1">> => #{<<"x">> => <<"\t\"\\\"\\t\\">>}, + <<"root2">> => #{<<"x">> => <<"select \n from\n \"hello\" ">>} + }, + Sc = #{ + roots => [root1, root2], + fields => #{ + root1 => [{"x", hoconsc:mk(binary())}], + root2 => [{"x", hoconsc:mk(binary())}] + } + }, + %% Parses fine. + Raw = hocon_tconf:check_plain(Sc, Raw, #{}), + PP = hocon_pp:do(Raw, #{}), + %% Roundtrip: must read back the same thing. + ?assertEqual({ok, Raw}, hocon:binary(PP)), + ok. + crlf_multiline_test_() -> Value = #{<<"root">> => #{<<"x">> => <<"\r\n\r\na\r\nb\n">>}}, CRLF = <<"\r\n">>,