diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc2365..cdaabad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Fixed +* Properly bound repetitions for machine parsers ([#53]) * Group quantized 2+ char literals in regex optimization ([#54]) @@ -219,4 +220,5 @@ descent parser and a work-in-progress state-machine parser. [#38]: https://github.com/goodmami/pe/issues/38 [#44]: https://github.com/goodmami/pe/issues/44 [#46]: https://github.com/goodmami/pe/issues/46 +[#53]: https://github.com/goodmami/pe/issues/53 [#54]: https://github.com/goodmami/pe/issues/54 diff --git a/pe/_cy_machine.pyx b/pe/_cy_machine.pyx index bd352d2..6fe98e0 100644 --- a/pe/_cy_machine.pyx +++ b/pe/_cy_machine.pyx @@ -272,7 +272,8 @@ cdef class _Parser: continue elif instr.opcode == UPDATE: - if instr.maxcount == -1 or state.count < instr.maxcount: + state.count += 1 + if instr.maxcount < 0 or state.count < instr.maxcount: state.pos = pos state.argidx = len(args) state.kwidx = len(kwargs) @@ -433,7 +434,7 @@ def _loop(defn, mincount, maxcount): *(pi.copy() for _ in range(mincount) for pi in pis), Instruction(BRANCH, len(pis) + 2), *pis, - Instruction(UPDATE, -len(pis), maxcount=maxcount) + Instruction(UPDATE, -len(pis), maxcount=(maxcount - mincount)) ] diff --git a/pe/_py_machine.py b/pe/_py_machine.py index f5a5984..81597c7 100644 --- a/pe/_py_machine.py +++ b/pe/_py_machine.py @@ -205,8 +205,9 @@ def _match( # noqa: C901 elif opcode == UPDATE: next_idx, _, count, prev_mark, _, _ = pop() - if maxcount == -1 or count < maxcount: - push((next_idx, pos, count + 1, prev_mark, len(args), len(kwargs))) + count += 1 + if maxcount < 0 or count < maxcount: + push((next_idx, pos, count, prev_mark, len(args), len(kwargs))) idx += oploc else: idx += 1 @@ -355,7 +356,7 @@ def _loop(defn, mincount: int, maxcount: int): return [*(pis * mincount), # risk of billion laughs attack Instruction(BRANCH, len(pis) + 2), *pis, - Instruction(UPDATE, -len(pis), maxcount=maxcount)] + Instruction(UPDATE, -len(pis), maxcount=(maxcount - mincount))] def _sym(defn): diff --git a/test/test_parsers.py b/test/test_parsers.py index aad0fb5..8066bc7 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -91,6 +91,7 @@ 'aabbcc', 0, 3, _blank), ('Rpt3', Rpt(abc, min=3), 'aaxx', 0, FAIL, None), ('Rpt4', Rpt(abc, max=1), 'aabbcc', 0, 1, _blank), + ('Rpt5', Rpt('a', max=2), 'aaaaaa', 0, 2, _blank), ('And0', And(abc), 'a', 0, 0, _blank), ('And1', And(abc), 'd', 0, FAIL, None),