From 6dee609e14aff5e99394792b413b096f4a0a9ebf Mon Sep 17 00:00:00 2001 From: Alejandro Klever <45394625+alejandroklever@users.noreply.github.com> Date: Wed, 17 Apr 2024 00:11:28 -0400 Subject: [PATCH 1/7] create lalr grammar testd --- pyjapt/parsing.py | 13 +++++++------ pyproject.toml | 2 ++ tests/test_arithmetic_grammar.py | 21 ++++++++++++++++---- tests/test_ll1_grammar.py | 33 ++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 tests/test_ll1_grammar.py diff --git a/pyjapt/parsing.py b/pyjapt/parsing.py index 7189aa0..4bedf90 100644 --- a/pyjapt/parsing.py +++ b/pyjapt/parsing.py @@ -2,14 +2,15 @@ import re import sys from typing import ( - List, - FrozenSet, - Optional, - Tuple, - Iterable, Callable, Dict, + FrozenSet, + Iterable, + List, + Literal, + Optional, Set, + Tuple, Union, ) @@ -500,7 +501,7 @@ def get_lexer(self) -> Lexer: self.lexical_error_handler, ) - def get_parser(self, name: str, verbose: bool = False): + def get_parser(self, name: Literal['slr', 'lalr1', 'lr1'], verbose: bool = False): if name == "slr": return SLRParser(self, verbose) diff --git a/pyproject.toml b/pyproject.toml index fd96fe7..50d919a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,5 +21,7 @@ build-backend = "poetry.masonry.api" [tool.pytest.ini_options] markers = [ "slr: test using the slr parser", + "lr1: test using the lr1 parser", + "lalr1: test using the lalr1 parser", "serial", ] \ No newline at end of file diff --git a/tests/test_arithmetic_grammar.py b/tests/test_arithmetic_grammar.py index 152f808..73a0e9d 100644 --- a/tests/test_arithmetic_grammar.py +++ b/tests/test_arithmetic_grammar.py @@ -1,3 +1,4 @@ +from typing import Literal import pytest from pyjapt import Grammar @@ -17,7 +18,7 @@ ] -def get_artithmetic_exception_grammar() -> Grammar: +def get_artithmetic_expressions_grammar() -> Grammar: g = Grammar() expr = g.add_non_terminal("expr", True) term, fact = g.add_non_terminals("term fact") @@ -47,14 +48,26 @@ def empty_expression(s: RuleList): return g -def parse(parser_name: str, text: str): - g = get_artithmetic_exception_grammar() +def parse(parser_name: Literal['slr', 'lr1', 'lalr1'], text: str): + g = get_artithmetic_expressions_grammar() lexer = g.get_lexer() parser = g.get_parser(parser_name) return parser(lexer(text)) - +@pytest.mark.slr @pytest.mark.parametrize("test,expected", tests) def test_slr(test, expected): assert parse("slr", test) == expected, "Bad Parsing" + + +@pytest.mark.lr1 +@pytest.mark.parametrize("test,expected", tests) +def test_lr1(test, expected): + assert parse("lr1", test) == expected, "Bad Parsing" + + +@pytest.mark.lalr1 +@pytest.mark.parametrize("test,expected", tests) +def test_lalr1(test, expected): + assert parse("lalr1", test) == expected, "Bad Parsing" diff --git a/tests/test_ll1_grammar.py b/tests/test_ll1_grammar.py new file mode 100644 index 0000000..50dc036 --- /dev/null +++ b/tests/test_ll1_grammar.py @@ -0,0 +1,33 @@ +import pytest + +from pyjapt import Grammar, Lexer +from pyjapt.parsing import RuleList + + +def grammar(): + g = Grammar() + + S = g.add_non_terminal("S", True) + A, B = g.add_non_terminals("A B") + + g.add_terminals("a b c d") + + S %= "A a" + S %= "B a" + A %= "A c" + A %= "d" + B %= "B c" + B %= "d" + + return g + +def test_lalr(): + g = grammar() + + lexer = g.get_lexer() + parser = g.get_parser('lalr1') + + tokens = lexer('ab') + result = parser(tokens) + + assert result == None \ No newline at end of file From 4a0a8a40940e2d84835b9cd3784184cfbb55ff08 Mon Sep 17 00:00:00 2001 From: Alejandro Klever <45394625+alejandroklever@users.noreply.github.com> Date: Thu, 27 Jun 2024 00:49:16 -0400 Subject: [PATCH 2/7] feat: add grammar comparison tests --- .gitignore | 1 + .vscode/settings.json | 7 ------ build.py | 7 ++---- pyjapt/parsing.py | 19 +++++++++------ tests/test_arithmetic_grammar.py | 9 +++---- tests/test_lalr1_but_not_slr.py | 31 ++++++++++++++++++++++++ tests/test_ll1_grammar.py | 33 -------------------------- tests/test_lr1_but_not_lalr_grammar.py | 31 ++++++++++++++++++++++++ 8 files changed, 82 insertions(+), 56 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 tests/test_lalr1_but_not_slr.py delete mode 100644 tests/test_ll1_grammar.py create mode 100644 tests/test_lr1_but_not_lalr_grammar.py diff --git a/.gitignore b/.gitignore index 3708df5..1de3273 100644 --- a/.gitignore +++ b/.gitignore @@ -142,6 +142,7 @@ cython_debug/ # project folders .idea +.vscode/settings.json build dist pyjapt.egg-info \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9b38853..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file diff --git a/build.py b/build.py index f4cf679..95ecc37 100644 --- a/build.py +++ b/build.py @@ -1,13 +1,10 @@ import os import re +import pyjapt with open("pyjapt/__init__.py", "r") as f: - version = ( - re.search(r"__version__ = \"\d\.\d\.\d\"", f.read()) - .group() - .replace('__version__ = "', "")[:-1] - ) + version = pyjapt.__version__ with open("pyproject.toml", "r") as f: s = f.read() diff --git a/pyjapt/parsing.py b/pyjapt/parsing.py index 4bedf90..087fca5 100644 --- a/pyjapt/parsing.py +++ b/pyjapt/parsing.py @@ -501,7 +501,7 @@ def get_lexer(self) -> Lexer: self.lexical_error_handler, ) - def get_parser(self, name: Literal['slr', 'lalr1', 'lr1'], verbose: bool = False): + def get_parser(self, name: Literal["slr", "lalr1", "lr1"], verbose: bool = False): if name == "slr": return SLRParser(self, verbose) @@ -735,7 +735,7 @@ def compute_local_first(firsts, alpha): return first_alpha -def compute_firsts(grammar: Grammar): +def compute_firsts(grammar: Grammar) -> Dict[Symbol, ContainerSet]: firsts = {} change = True @@ -809,7 +809,7 @@ def compute_follows(grammar: Grammar, firsts): ######################### # LR0 AUTOMATA BUILDING # ######################### -def closure_lr0(items: Iterable[Item]): +def closure_lr0(items: Iterable[Item]) -> FrozenSet[Item]: closure = set(items) pending = set(items) @@ -927,7 +927,7 @@ def goto_lr1(items, symbol, firsts=None, just_kernel=False): return items if just_kernel else closure_lr1(items, firsts) -def build_lr1_automaton(grammar, firsts=None): +def build_lr1_automaton(grammar: Grammar, firsts=None): assert len(grammar.start_symbol.productions) == 1, "Grammar must be augmented" if not firsts: @@ -1058,6 +1058,7 @@ def __init__( sys.stderr.write( f"Warning: {self.shift_reduce_count} Shift-Reduce Conflicts\n" ) + sys.stderr.write( f"Warning: {self.reduce_reduce_count} Reduce-Reduce Conflicts\n" ) @@ -1085,6 +1086,10 @@ def error(parser: "ShiftReduceParser"): f' "{parser.current_token.column}"', ) + @property + def has_conflicts(self) -> bool: + return self.conflicts != [] + ############# # End # ############# @@ -1128,9 +1133,9 @@ def _register(self, table, key, value): action, tag = table[key] if action != value[0]: if action == self.SHIFT: - table[ - key - ] = value # By default shifting if exists a Shift-Reduce Conflict + table[key] = ( + value # By default shifting if exists a Shift-Reduce Conflict + ) self.shift_reduce_count += 1 self.conflicts.append(("SR", value[1], tag)) else: diff --git a/tests/test_arithmetic_grammar.py b/tests/test_arithmetic_grammar.py index 73a0e9d..7f8a4e2 100644 --- a/tests/test_arithmetic_grammar.py +++ b/tests/test_arithmetic_grammar.py @@ -2,7 +2,7 @@ import pytest from pyjapt import Grammar -from pyjapt.typing import RuleList, Lexer, SLRParser, LR1Parser, LALR1Parser +from pyjapt.typing import RuleList, Lexer tests = [ @@ -18,7 +18,7 @@ ] -def get_artithmetic_expressions_grammar() -> Grammar: +def get_arithmetic_expressions_grammar() -> Grammar: g = Grammar() expr = g.add_non_terminal("expr", True) term, fact = g.add_non_terminals("term fact") @@ -48,13 +48,14 @@ def empty_expression(s: RuleList): return g -def parse(parser_name: Literal['slr', 'lr1', 'lalr1'], text: str): - g = get_artithmetic_expressions_grammar() +def parse(parser_name: Literal["slr", "lr1", "lalr1"], text: str): + g = get_arithmetic_expressions_grammar() lexer = g.get_lexer() parser = g.get_parser(parser_name) return parser(lexer(text)) + @pytest.mark.slr @pytest.mark.parametrize("test,expected", tests) def test_slr(test, expected): diff --git a/tests/test_lalr1_but_not_slr.py b/tests/test_lalr1_but_not_slr.py new file mode 100644 index 0000000..e06c479 --- /dev/null +++ b/tests/test_lalr1_but_not_slr.py @@ -0,0 +1,31 @@ +from pyjapt import Grammar + + +def grammar(): + g = Grammar() + + S = g.add_non_terminal("S", True) + (A,) = g.add_non_terminals("A") + + g.add_terminals("a b c d") + + S %= "A a" + S %= "b A c" + S %= "d c" + S %= "b d a" + A %= "d" + + return g + + +def test_slr(): + g = grammar() + parser = g.get_parser("slr") + print(parser.conflicts) + assert parser.has_conflicts + + +def test_lalr(): + g = grammar() + parser = g.get_parser("lalr1") + assert not parser.has_conflicts diff --git a/tests/test_ll1_grammar.py b/tests/test_ll1_grammar.py deleted file mode 100644 index 50dc036..0000000 --- a/tests/test_ll1_grammar.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -from pyjapt import Grammar, Lexer -from pyjapt.parsing import RuleList - - -def grammar(): - g = Grammar() - - S = g.add_non_terminal("S", True) - A, B = g.add_non_terminals("A B") - - g.add_terminals("a b c d") - - S %= "A a" - S %= "B a" - A %= "A c" - A %= "d" - B %= "B c" - B %= "d" - - return g - -def test_lalr(): - g = grammar() - - lexer = g.get_lexer() - parser = g.get_parser('lalr1') - - tokens = lexer('ab') - result = parser(tokens) - - assert result == None \ No newline at end of file diff --git a/tests/test_lr1_but_not_lalr_grammar.py b/tests/test_lr1_but_not_lalr_grammar.py new file mode 100644 index 0000000..d2aab13 --- /dev/null +++ b/tests/test_lr1_but_not_lalr_grammar.py @@ -0,0 +1,31 @@ +from pyjapt import Grammar + + +def grammar(): + g = Grammar() + + S = g.add_non_terminal("S", True) + A, B = g.add_non_terminals("A B") + + g.add_terminals("a b c d") + + S %= "A a" + S %= "b A c" + S %= "B c" + S %= "b B a" + A %= "d" + B %= "d" + + return g + + +def test_lalr(): + g = grammar() + parser = g.get_parser("lalr1") + assert parser.has_conflicts + + +def test_lr1(): + g = grammar() + parser = g.get_parser("lr1") + assert not parser.has_conflicts From 4d65e782cfcf94df8b28cf1953ab8b6c6b0b31ce Mon Sep 17 00:00:00 2001 From: Alejandro Klever <45394625+alejandroklever@users.noreply.github.com> Date: Fri, 27 Feb 2026 21:24:06 -0500 Subject: [PATCH 3/7] Update tests/test_lalr1_but_not_slr.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_lalr1_but_not_slr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_lalr1_but_not_slr.py b/tests/test_lalr1_but_not_slr.py index e06c479..5b39d13 100644 --- a/tests/test_lalr1_but_not_slr.py +++ b/tests/test_lalr1_but_not_slr.py @@ -21,7 +21,6 @@ def grammar(): def test_slr(): g = grammar() parser = g.get_parser("slr") - print(parser.conflicts) assert parser.has_conflicts From 348b81b5cba00529d7e7a6553d9aa3c04929933a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:24:37 +0000 Subject: [PATCH 4/7] Initial plan From f53f8d2b524f61be07310ee30f39f9f39dee7177 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:25:24 +0000 Subject: [PATCH 5/7] Initial plan From 6946c2479b8fc9785cf37cc6b5cc0806ca83e74f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:25:30 +0000 Subject: [PATCH 6/7] Add @pytest.mark.lalr1 decorator and import pytest to test_lalr1_but_not_slr.py Co-authored-by: alejandroklever <45394625+alejandroklever@users.noreply.github.com> --- tests/test_lalr1_but_not_slr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_lalr1_but_not_slr.py b/tests/test_lalr1_but_not_slr.py index 5b39d13..2498209 100644 --- a/tests/test_lalr1_but_not_slr.py +++ b/tests/test_lalr1_but_not_slr.py @@ -1,3 +1,5 @@ +import pytest + from pyjapt import Grammar @@ -24,6 +26,7 @@ def test_slr(): assert parser.has_conflicts +@pytest.mark.lalr1 def test_lalr(): g = grammar() parser = g.get_parser("lalr1") From bc291d71a21b5c7eafa1b50f020c4c8877c50903 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:26:04 +0000 Subject: [PATCH 7/7] Remove unnecessary file open in build.py, use pyjapt.__version__ directly Co-authored-by: alejandroklever <45394625+alejandroklever@users.noreply.github.com> --- build.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build.py b/build.py index 95ecc37..3e4c38f 100644 --- a/build.py +++ b/build.py @@ -3,8 +3,7 @@ import pyjapt -with open("pyjapt/__init__.py", "r") as f: - version = pyjapt.__version__ +version = pyjapt.__version__ with open("pyproject.toml", "r") as f: s = f.read()