From 448ca8a209c24814eeb5f676b6cb69f411710732 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Wed, 26 Mar 2025 09:24:05 +0100 Subject: [PATCH 1/7] scaffolding a simple, first parser --- lib/earmark_parser/nimble_parsers.ex | 9 +++++++++ .../nimble_parsers/html_atts_parser.ex | 13 +++++++++++++ mix.exs | 8 ++++++-- mix.lock | 1 + test/nimble_parsers/html_att_test.exs | 17 +++++++++++++++++ test/support/nimble_test_case.ex | 13 +++++++++++++ test/support/nimble_tests.ex | 19 +++++++++++++++++++ 7 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 lib/earmark_parser/nimble_parsers.ex create mode 100644 lib/earmark_parser/nimble_parsers/html_atts_parser.ex create mode 100644 test/nimble_parsers/html_att_test.exs create mode 100644 test/support/nimble_test_case.ex create mode 100644 test/support/nimble_tests.ex diff --git a/lib/earmark_parser/nimble_parsers.ex b/lib/earmark_parser/nimble_parsers.ex new file mode 100644 index 0000000..d1d64bd --- /dev/null +++ b/lib/earmark_parser/nimble_parsers.ex @@ -0,0 +1,9 @@ +defmodule EarmarkParser.NimbleParsers do + @moduledoc ~S""" + coming soon + """ + + defdelegate parse_html_atts(input), to: __MODULE__.HtmlAttsParser + +end +# SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/lib/earmark_parser/nimble_parsers/html_atts_parser.ex b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex new file mode 100644 index 0000000..09e6a26 --- /dev/null +++ b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex @@ -0,0 +1,13 @@ +defmodule EarmarkParser.NimbleParsers.HtmlAttsParser do + @moduledoc ~S""" + Parses an HTML tag + """ + import NimbleParsec + + end_html_att = + ignore(ascii_char([?>..?>])) + + defparsec(:parse_html_atts, end_html_att) +end + +# SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/mix.exs b/mix.exs index 2666494..343c0d7 100644 --- a/mix.exs +++ b/mix.exs @@ -1,17 +1,21 @@ defmodule EarmarkParser.MixProject do use Mix.Project - @version "1.4.44" + @version "1.4.45" @url "https://github.com/RobertDober/earmark_parser" @deps [ + # production environnement + {:nimble_parsec, "~> 1.4.2", runtime: false}, + + # dev and test environnements {:benchee, "~> 1.3.1", only: [:dev]}, # {:credo, "~> 1.7.5", only: [:dev]}, {:dialyxir, "~> 1.4.5", only: [:dev], runtime: false}, {:earmark_ast_dsl, "~> 0.3.7", only: [:test]}, {:excoveralls, "~> 0.18.3", only: [:test]}, {:extractly, "~> 0.5.3", only: [:dev]}, - {:floki, "~> 0.36", only: [:dev, :test]} + {:floki, "~> 0.36", only: [:dev, :test]}, ] def project do diff --git a/mix.lock b/mix.lock index 36e1702..fad7842 100644 --- a/mix.lock +++ b/mix.lock @@ -17,6 +17,7 @@ "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, "parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"}, diff --git a/test/nimble_parsers/html_att_test.exs b/test/nimble_parsers/html_att_test.exs new file mode 100644 index 0000000..34d51c2 --- /dev/null +++ b/test/nimble_parsers/html_att_test.exs @@ -0,0 +1,17 @@ +defmodule Test.NimbleParsers.HtmlAttTest do + use Support.NimbleTestCase + + describe "empty att list" do + test "returns an empty list if end char (>) is present" do + parse_html_atts(">") + |> parsed_ok([]) + end + + test "does not parse an empty string" do + parse_html_atts("") + |> parsed_error("expected ASCII character in the range \">\" to \">\"") + end + end +end + +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/support/nimble_test_case.ex b/test/support/nimble_test_case.ex new file mode 100644 index 0000000..7789276 --- /dev/null +++ b/test/support/nimble_test_case.ex @@ -0,0 +1,13 @@ +defmodule Support.NimbleTestCase do + defmacro __using__(_options) do + quote do + use ExUnit.Case, async: true + + import EarmarkParser.NimbleParsers + import Support.NimbleTests + + end + end +end + +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/support/nimble_tests.ex b/test/support/nimble_tests.ex new file mode 100644 index 0000000..39d5425 --- /dev/null +++ b/test/support/nimble_tests.ex @@ -0,0 +1,19 @@ +defmodule Support.NimbleTests do + @moduledoc ~S""" + Makes asserting on NimbleParsec results simpler + """ + defmacro parsed_error(parsed, expected) do + quote do + {:error, message, _, _, _, _} = unquote(parsed) # |> IO.inspect() + assert message == unquote(expected) + end + end + + defmacro parsed_ok(parsed, expected) do + quote do + {:ok, result, _, _, _, _} = unquote(parsed) # |> IO.inspect() + assert result == unquote(expected) + end + end +end +# SPDX-License-Identifier: Apache-2.0 From 199820262c73818a950274808c7a8b2a5b2bc9a6 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Wed, 26 Mar 2025 20:20:03 +0100 Subject: [PATCH 2/7] A still very primitive NimbleParsec parser for a subset of HTML attributes --- lib/earmark_parser/nimble_parsers.ex | 2 +- .../nimble_parsers/html_atts_parser.ex | 44 +++++++++++++++++-- mix.exs | 2 +- test/nimble_parsers/html_att_test.exs | 21 ++++++++- test/support/nimble_test_case.ex | 1 - test/support/nimble_tests.ex | 7 ++- 6 files changed, 68 insertions(+), 9 deletions(-) diff --git a/lib/earmark_parser/nimble_parsers.ex b/lib/earmark_parser/nimble_parsers.ex index d1d64bd..927cab7 100644 --- a/lib/earmark_parser/nimble_parsers.ex +++ b/lib/earmark_parser/nimble_parsers.ex @@ -4,6 +4,6 @@ defmodule EarmarkParser.NimbleParsers do """ defdelegate parse_html_atts(input), to: __MODULE__.HtmlAttsParser - end + # SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/lib/earmark_parser/nimble_parsers/html_atts_parser.ex b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex index 09e6a26..7dada46 100644 --- a/lib/earmark_parser/nimble_parsers/html_atts_parser.ex +++ b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex @@ -4,10 +4,48 @@ defmodule EarmarkParser.NimbleParsers.HtmlAttsParser do """ import NimbleParsec - end_html_att = - ignore(ascii_char([?>..?>])) + string_value = + ascii_char([?"]) + |> ignore() + |> repeat( + lookahead_not(ascii_char([?"])) + |> choice([ + ~S(\") |> string() |> replace(?"), + utf8_char([]) + ]) + ) + |> ignore(ascii_char([?"])) - defparsec(:parse_html_atts, end_html_att) + html_att_name = + ascii_string([?a..?z, ?A..?z, ?-..?-], min: 1) + + html_att = + html_att_name + |> choice([ + "=" |> string() |> ignore() |> concat(string_value), + empty() + ]) + |> reduce(:reduce_att) + + html_att_end = + string(">") + + html_atts = + html_att + |> repeat(" " |> string() |> times(min: 1) |> ignore() |> concat(html_att)) + + defparsec(:parse_html_atts, html_atts |> optional() |> ignore(html_att_end)) + + @doc false + def reduce_att(att_ast) + + def reduce_att([name]) do + {name, true} + end + + def reduce_att([name | values]) do + {name, IO.chardata_to_string(values)} + end end # SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/mix.exs b/mix.exs index 343c0d7..305b7f4 100644 --- a/mix.exs +++ b/mix.exs @@ -15,7 +15,7 @@ defmodule EarmarkParser.MixProject do {:earmark_ast_dsl, "~> 0.3.7", only: [:test]}, {:excoveralls, "~> 0.18.3", only: [:test]}, {:extractly, "~> 0.5.3", only: [:dev]}, - {:floki, "~> 0.36", only: [:dev, :test]}, + {:floki, "~> 0.36", only: [:dev, :test]} ] def project do diff --git a/test/nimble_parsers/html_att_test.exs b/test/nimble_parsers/html_att_test.exs index 34d51c2..2575726 100644 --- a/test/nimble_parsers/html_att_test.exs +++ b/test/nimble_parsers/html_att_test.exs @@ -9,7 +9,26 @@ defmodule Test.NimbleParsers.HtmlAttTest do test "does not parse an empty string" do parse_html_atts("") - |> parsed_error("expected ASCII character in the range \">\" to \">\"") + |> parsed_error("expected string \">\"") + end + end + + describe "boolean attribute" do + test "just it's presence" do + parse_html_atts("hidden>") + |> parsed_ok([{"hidden", true}]) + end + + test "two boolean attributes" do + parse_html_atts("hidden and-visible>") + |> parsed_ok([{"hidden", true}, {"and-visible", true}]) + end + end + + describe "a string attribute" do + test "elixir, what else?" do + parse_html_atts(~S{lang="elixir">}) + |> parsed_ok([{"lang", "elixir"}]) end end end diff --git a/test/support/nimble_test_case.ex b/test/support/nimble_test_case.ex index 7789276..8be15f8 100644 --- a/test/support/nimble_test_case.ex +++ b/test/support/nimble_test_case.ex @@ -5,7 +5,6 @@ defmodule Support.NimbleTestCase do import EarmarkParser.NimbleParsers import Support.NimbleTests - end end end diff --git a/test/support/nimble_tests.ex b/test/support/nimble_tests.ex index 39d5425..2d4b459 100644 --- a/test/support/nimble_tests.ex +++ b/test/support/nimble_tests.ex @@ -4,16 +4,19 @@ defmodule Support.NimbleTests do """ defmacro parsed_error(parsed, expected) do quote do - {:error, message, _, _, _, _} = unquote(parsed) # |> IO.inspect() + # |> IO.inspect() + {:error, message, _, _, _, _} = unquote(parsed) assert message == unquote(expected) end end defmacro parsed_ok(parsed, expected) do quote do - {:ok, result, _, _, _, _} = unquote(parsed) # |> IO.inspect() + # |> IO.inspect() + {:ok, result, _, _, _, _} = unquote(parsed) assert result == unquote(expected) end end end + # SPDX-License-Identifier: Apache-2.0 From 6622c17643c29eb2859e447dc259b3e2c81c5159 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Thu, 27 Mar 2025 20:53:32 +0100 Subject: [PATCH 3/7] String parser combinator into own file --- .../nimble_parsers/html_atts_parser.ex | 15 ++---------- .../nimble_parsers/string_parser.ex | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 lib/earmark_parser/nimble_parsers/string_parser.ex diff --git a/lib/earmark_parser/nimble_parsers/html_atts_parser.ex b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex index 7dada46..a48d121 100644 --- a/lib/earmark_parser/nimble_parsers/html_atts_parser.ex +++ b/lib/earmark_parser/nimble_parsers/html_atts_parser.ex @@ -3,18 +3,7 @@ defmodule EarmarkParser.NimbleParsers.HtmlAttsParser do Parses an HTML tag """ import NimbleParsec - - string_value = - ascii_char([?"]) - |> ignore() - |> repeat( - lookahead_not(ascii_char([?"])) - |> choice([ - ~S(\") |> string() |> replace(?"), - utf8_char([]) - ]) - ) - |> ignore(ascii_char([?"])) + alias EarmarkParser.NimbleParsers.StringParser html_att_name = ascii_string([?a..?z, ?A..?z, ?-..?-], min: 1) @@ -22,7 +11,7 @@ defmodule EarmarkParser.NimbleParsers.HtmlAttsParser do html_att = html_att_name |> choice([ - "=" |> string() |> ignore() |> concat(string_value), + "=" |> string() |> ignore() |> parsec({StringParser, :string_value}), empty() ]) |> reduce(:reduce_att) diff --git a/lib/earmark_parser/nimble_parsers/string_parser.ex b/lib/earmark_parser/nimble_parsers/string_parser.ex new file mode 100644 index 0000000..ba39932 --- /dev/null +++ b/lib/earmark_parser/nimble_parsers/string_parser.ex @@ -0,0 +1,23 @@ +defmodule EarmarkParser.NimbleParsers.StringParser do + @moduledoc ~S""" + String combinator + """ + + import NimbleParsec + + defcombinator( + :string_value, + ascii_char([?"]) + |> ignore() + |> repeat( + lookahead_not(ascii_char([?"])) + |> choice([ + ~S(\") |> string() |> replace(?"), + utf8_char([]) + ]) + ) + |> ignore(ascii_char([?"])) + ) +end + +# SPDX-License-Identifier: AGPL-3.0-or-later From a21b90af013042fbc5c1eb3391886a8749197d57 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Thu, 27 Mar 2025 21:23:37 +0100 Subject: [PATCH 4/7] Adapted fomatting to the NimbleParsec convention --- .formatter.exs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.formatter.exs b/.formatter.exs index 6aebc5f..cfeb452 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,3 +1,14 @@ +locals_without_parens = [ + defparsec: 2, + defparsec: 3, + defparsecp: 2, + defparsecp: 3, + defcombinator: 2, + defcombinator: 3, + defcombinatorp: 2, + defcombinatorp: 3 +] + [ force_do_end_blocks: true, inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"], From a6a90b001183aef586922cbf0947f131897f7e9c Mon Sep 17 00:00:00 2001 From: RobertDober Date: Fri, 28 Mar 2025 07:52:44 +0100 Subject: [PATCH 5/7] Reusing combinators with compile time functions i --- .../nimble_parsers/string_parser.ex | 29 ++++++++++++++----- test/nimble_parsers/html_att_test.exs | 14 +++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/lib/earmark_parser/nimble_parsers/string_parser.ex b/lib/earmark_parser/nimble_parsers/string_parser.ex index ba39932..58f63f5 100644 --- a/lib/earmark_parser/nimble_parsers/string_parser.ex +++ b/lib/earmark_parser/nimble_parsers/string_parser.ex @@ -5,18 +5,33 @@ defmodule EarmarkParser.NimbleParsers.StringParser do import NimbleParsec - defcombinator( - :string_value, - ascii_char([?"]) - |> ignore() + inner_string = fn combinator, ch -> + combinator |> repeat( - lookahead_not(ascii_char([?"])) + lookahead_not(ascii_char([ch])) |> choice([ - ~S(\") |> string() |> replace(?"), + IO.chardata_to_string(["\\", ch]) + |> string() + |> replace(ch), utf8_char([]) ]) ) - |> ignore(ascii_char([?"])) + end + + quoted_string = fn ch -> + empty() + |> ascii_char([ch]) + |> ignore() + |> inner_string.(ch) + |> ignore(ascii_char([ch])) + end + + defcombinator( + :string_value, + choice([ + quoted_string.(?"), + quoted_string.(?') + ]) ) end diff --git a/test/nimble_parsers/html_att_test.exs b/test/nimble_parsers/html_att_test.exs index 2575726..cadbcf5 100644 --- a/test/nimble_parsers/html_att_test.exs +++ b/test/nimble_parsers/html_att_test.exs @@ -30,6 +30,20 @@ defmodule Test.NimbleParsers.HtmlAttTest do parse_html_atts(~S{lang="elixir">}) |> parsed_ok([{"lang", "elixir"}]) end + + test "escaped double quote" do + parse_html_atts(~S{lang="\"lua\"">}) + |> parsed_ok([{"lang", "\"lua\""}]) + end + + test "single quoted string too" do + parse_html_atts(~S{lang="\"pt-br\"" lang='fr-fr' lang='de-\'at\''>}) + |> parsed_ok([ + {"lang", "\"pt-br\""}, + {"lang", "fr-fr"}, + {"lang", "de-'at'"} + ]) + end end end From 3449a26eb4b18cad3bd45deebb12858fb6fb85a4 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Sat, 25 Oct 2025 12:38:34 +0200 Subject: [PATCH 6/7] Fixing mix.exs deprecation warning --- mix.exs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/mix.exs b/mix.exs index 305b7f4..fb83c70 100644 --- a/mix.exs +++ b/mix.exs @@ -18,6 +18,17 @@ defmodule EarmarkParser.MixProject do {:floki, "~> 0.36", only: [:dev, :test]} ] + def cli do + [ + preferred_cli_env: [ + coveralls: :test, + "coveralls.detail": :test, + "coveralls.post": :test, + "coveralls.html": :test + ] + ] + end + def project do [ app: :earmark_parser, @@ -28,12 +39,6 @@ defmodule EarmarkParser.MixProject do deps: @deps, description: "AST parser and generator for Markdown", package: package(), - preferred_cli_env: [ - coveralls: :test, - "coveralls.detail": :test, - "coveralls.post": :test, - "coveralls.html": :test - ], test_coverage: [tool: ExCoveralls], aliases: [docs: &build_docs/1] ] From 5e8a4a61705ce0612c59216b1e54e25d84c4957c Mon Sep 17 00:00:00 2001 From: RobertDober Date: Sat, 25 Oct 2025 13:06:05 +0200 Subject: [PATCH 7/7] Working on test coverage and HTML tag parser --- .../nimble_parsers/html_tag_parser.ex | 10 ++++++++++ lib/earmark_parser/options.ex | 4 ++-- mix.exs | 12 ++++++------ test/nimble_parsers/html_oneline_tag_test.exs | 13 +++++++++++++ test/support/nimble_tests.ex | 4 ++-- 5 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 lib/earmark_parser/nimble_parsers/html_tag_parser.ex create mode 100644 test/nimble_parsers/html_oneline_tag_test.exs diff --git a/lib/earmark_parser/nimble_parsers/html_tag_parser.ex b/lib/earmark_parser/nimble_parsers/html_tag_parser.ex new file mode 100644 index 0000000..2046ca1 --- /dev/null +++ b/lib/earmark_parser/nimble_parsers/html_tag_parser.ex @@ -0,0 +1,10 @@ +defmodule EarmarkParser.NimbleParsers.HtmlTagParser do + + @moduledoc ~S""" + Parses an HTML tag + """ + import NimbleParsec + alias EarmarkParser.NimbleParsers.HtmlAttsParser + +end +# SPDX-License-Identifier: Apache-2.0 diff --git a/lib/earmark_parser/options.ex b/lib/earmark_parser/options.ex index 8242c7d..d8a2b91 100644 --- a/lib/earmark_parser/options.ex +++ b/lib/earmark_parser/options.ex @@ -104,8 +104,8 @@ defmodule EarmarkParser.Options do Use normalize before passing it into any API function iex(1)> options = normalize(annotations: "%%") - ...(1)> options.annotations - ~r{\A(.*)(%%.*)} + ...(1)> options.annotations.source + "\\A(.*)(%%.*)" """ @spec normalize(t() | keyword()) :: t() def normalize(options) diff --git a/mix.exs b/mix.exs index fb83c70..1caec0f 100644 --- a/mix.exs +++ b/mix.exs @@ -20,12 +20,6 @@ defmodule EarmarkParser.MixProject do def cli do [ - preferred_cli_env: [ - coveralls: :test, - "coveralls.detail": :test, - "coveralls.post": :test, - "coveralls.html": :test - ] ] end @@ -39,6 +33,12 @@ defmodule EarmarkParser.MixProject do deps: @deps, description: "AST parser and generator for Markdown", package: package(), + preferred_cli_env: [ + coveralls: :test, + "coveralls.detail": :test, + "coveralls.post": :test, + "coveralls.html": :test + ], test_coverage: [tool: ExCoveralls], aliases: [docs: &build_docs/1] ] diff --git a/test/nimble_parsers/html_oneline_tag_test.exs b/test/nimble_parsers/html_oneline_tag_test.exs new file mode 100644 index 0000000..df2e1cc --- /dev/null +++ b/test/nimble_parsers/html_oneline_tag_test.exs @@ -0,0 +1,13 @@ +defmodule Test.NimbleParsers.HtmlOnelineTagTest do + + use Support.NimbleTestCase + + describe "no attributes" do + test "br" do + # parse_html_tag("
") + # |> parsed_ok({"br", [], [], %{verbatim: true}}) + end + end + +end +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/support/nimble_tests.ex b/test/support/nimble_tests.ex index 2d4b459..7dd2edf 100644 --- a/test/support/nimble_tests.ex +++ b/test/support/nimble_tests.ex @@ -4,7 +4,7 @@ defmodule Support.NimbleTests do """ defmacro parsed_error(parsed, expected) do quote do - # |> IO.inspect() + # |> IO.inspect() {:error, message, _, _, _, _} = unquote(parsed) assert message == unquote(expected) end @@ -12,7 +12,7 @@ defmodule Support.NimbleTests do defmacro parsed_ok(parsed, expected) do quote do - # |> IO.inspect() + # |> IO.inspect() {:ok, result, _, _, _, _} = unquote(parsed) assert result == unquote(expected) end