From 3578fd87cdd804607a6a1e8a1d5bffd1bf6c3b55 Mon Sep 17 00:00:00 2001 From: Toddr Bot Date: Tue, 7 Apr 2026 05:34:02 +0000 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20branch=5Freset=20qr()=20crash=20?= =?UTF-8?q?=E2=80=94=20wrong=20family/type=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qr() method in Parser.pm checked `family eq 'group'` to detect non-capturing groups and extract their flags. Since branch_reset also has family 'group' but no on()/off() methods, calling qr() on a regex like (?|(a)|(b)) crashed with "Can't locate object method 'on'". Fix: check `type eq 'group'` instead, which only matches the non-capturing group construct that actually has flag accessors. Co-Authored-By: Claude Opus 4.6 --- lib/Regexp/Parser.pm | 2 +- t/12perl542.t | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/Regexp/Parser.pm b/lib/Regexp/Parser.pm index 9412df5..fe578af 100644 --- a/lib/Regexp/Parser.pm +++ b/lib/Regexp/Parser.pm @@ -248,7 +248,7 @@ sub qr { no warnings 'regexp'; use re 'eval'; - if (@$rx == 1 and $rx->[0]->family eq 'group') { + if (@$rx == 1 and $rx->[0]->type eq 'group') { my $vis = join "", map $_->qr, @{ $rx->[0]->{data} }; my $flags = $rx->[0]->on; $flags =~ s/^\^//; # strip caret prefix for qr// modifier diff --git a/t/12perl542.t b/t/12perl542.t index 7c6f855..efa2915 100644 --- a/t/12perl542.t +++ b/t/12perl542.t @@ -114,6 +114,17 @@ subtest '(?|...) branch reset groups' => sub { my $root = $r->root; is($root->[0]->family, 'group', 'branch_reset family'); is($root->[0]->type, 'branch_reset', 'branch_reset type'); + + # qr() must work on branch_reset (was crashing: "on" method missing) + { + my $p = Regexp::Parser->new; + $p->regex('(?|(a)|(b))'); + my $qr = $p->qr; + ok(defined $qr, 'branch_reset qr() returns defined'); + ok('a' =~ $qr, 'branch_reset qr matches first alternative'); + ok('b' =~ $qr, 'branch_reset qr matches second alternative'); + ok('c' !~ $qr, 'branch_reset qr rejects non-matching'); + } }; # ======================================== From eed2aaf5e0cf51967bf1a6c5559b50803a2f2062 Mon Sep 17 00:00:00 2001 From: Toddr Bot Date: Tue, 7 Apr 2026 05:35:48 +0000 Subject: [PATCH 2/3] test: add 86-test qr() functional test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comprehensive test that verifies the full pipeline: source pattern → parse → tree → qr() → match. Covers literals, anchors, quantifiers, possessive quantifiers, character classes, shorthand classes, flags, caret flags, lookahead/lookbehind, atomic groups, named captures, branch reset, backtracking verbs, \K, script runs, quotemeta, Unicode properties, POSIX classes, \R/\h/\v/\X, conditionals, named characters, and escape sequences. Previously qr() was only tested incidentally in ~5 files. Co-Authored-By: Claude Opus 4.6 --- MANIFEST | 1 + t/18qr_functional.t | 188 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 t/18qr_functional.t diff --git a/MANIFEST b/MANIFEST index fc423c7..623c9c1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -29,4 +29,5 @@ t/14quotemeta.t t/15boundary.t t/16define.t t/17extcharclass.t +t/18qr_functional.t t/99misc.t diff --git a/t/18qr_functional.t b/t/18qr_functional.t new file mode 100644 index 0000000..ac2f605 --- /dev/null +++ b/t/18qr_functional.t @@ -0,0 +1,188 @@ +use strict; +use warnings; +use Test::More; +use Regexp::Parser; + +# Functional test: parse a regex, produce qr//, verify match behavior. +# This tests the full pipeline: source → parse → tree → qr() → match. + +my @tests = ( + # [pattern, test_string, should_match, description] + + # Literals + ['abc', 'abc', 1, 'literal match'], + ['abc', 'def', 0, 'literal non-match'], + + # Anchors + ['^abc$', 'abc', 1, 'anchored match'], + ['^abc$', 'xabc', 0, 'anchored non-match'], + ['\Aabc\z', 'abc', 1, '\A \z anchors'], + + # Quantifiers + ['a+', 'aaa', 1, 'plus quantifier'], + ['a+', '', 0, 'plus needs at least one'], + ['a*', '', 1, 'star matches empty'], + ['a?', 'b', 1, 'question matches empty'], + ['a{3}', 'aaa', 1, 'exact count'], + ['a{3}', 'aa', 0, 'exact count too few'], + ['a{2,4}', 'aaa', 1, 'range quantifier'], + ['a+?', 'aaa', 1, 'lazy plus'], + ['a{2,5}?', 'aaaa', 1, 'lazy range'], + + # Possessive quantifiers + ['a++b', 'aab', 1, 'possessive plus match'], + ['a*+b', 'aab', 1, 'possessive star match'], + + # Character classes + ['[abc]', 'b', 1, 'char class match'], + ['[abc]', 'd', 0, 'char class non-match'], + ['[^abc]', 'd', 1, 'negated class match'], + ['[^abc]', 'a', 0, 'negated class non-match'], + ['[a-z]', 'm', 1, 'range class'], + ['[a-z]', 'A', 0, 'range class case mismatch'], + + # Shorthand classes + ['\d+', '123', 1, '\d matches digits'], + ['\d+', 'abc', 0, '\d rejects letters'], + ['\w+', 'hello', 1, '\w matches word chars'], + ['\s+', ' ', 1, '\s matches whitespace'], + ['\D', 'x', 1, '\D matches non-digit'], + ['\W', '!', 1, '\W matches non-word'], + ['\S', 'x', 1, '\S matches non-space'], + + # Dot + ['a.b', 'axb', 1, 'dot matches any char'], + ['a.b', "a\nb", 0, 'dot skips newline by default'], + + # Alternation + ['a|b', 'a', 1, 'alternation first'], + ['a|b', 'b', 1, 'alternation second'], + ['a|b', 'c', 0, 'alternation non-match'], + ['abc|def', 'def', 1, 'multi-char alternation'], + + # Groups - non-capturing + ['(?:abc)+', 'abcabc', 1, 'quantified nc group'], + ['(?:a|b)c', 'ac', 1, 'nc group with alternation'], + ['(?:a|b)c', 'bc', 1, 'nc group alt second branch'], + + # Groups - capturing + ['(a)(b)\2', 'abb', 1, 'backref match'], + ['(a)(b)\2', 'abc', 0, 'backref non-match'], + ['(a+)(b+)\2', 'aabbb', 1, 'backref with quantifiers'], + + # Flags + ['(?i:abc)', 'ABC', 1, 'case-insensitive flag'], + ['(?i:abc)', 'abc', 1, 'case-insensitive lowercase too'], + ['(?s:a.b)', "a\nb", 1, 'single-line (dotall) flag'], + ['(?i)abc', 'ABC', 1, 'flag-only toggle'], + + # Caret flags + ['(?^:abc)', 'abc', 1, 'caret flag basic'], + ['(?^i:abc)', 'ABC', 1, 'caret flag with i'], + + # Lookahead + ['(?=foo)foo', 'foo', 1, 'positive lookahead'], + ['(?!bar)foo', 'foo', 1, 'negative lookahead match'], + ['(?!foo)foo', 'foo', 0, 'negative lookahead non-match'], + + # Lookbehind + ['(?<=a)b', 'ab', 1, 'positive lookbehind'], + ['(?abc)', 'abc', 1, 'atomic group match'], + + # Named captures + ['(?abc)\k', 'abcabc', 1, 'named capture + backref'], + ['(?abc)\k', 'abcdef', 0, 'named capture backref non-match'], + ["(?abc)\\k'n'", 'abcabc', 1, 'named capture single-quote backref'], + ['(?Px)(?P=n)', 'xx', 1, 'Python named capture'], + + # Branch reset + ['(?|(a)|(b))', 'a', 1, 'branch reset first alt'], + ['(?|(a)|(b))', 'b', 1, 'branch reset second alt'], + ['(?|(a)|(b))', 'c', 0, 'branch reset non-match'], + + # Backtracking verbs + ['(*FAIL)', 'x', 0, 'FAIL always fails'], + ['a(*ACCEPT)b', 'a', 1, 'ACCEPT succeeds early'], + + # \K (keep) + ['foo\Kbar', 'foobar', 1, '\K in match context'], + + # Script run + ['(*sr:\d+)', '123', 1, 'script_run match'], + + # Quotemeta + ['\Qa.b\E', 'a.b', 1, 'quotemeta literal dot'], + ['\Qa.b\E', 'axb', 0, 'quotemeta rejects wildcard'], + + # Unicode properties + ['\p{Lu}', 'A', 1, 'uppercase property'], + ['\P{Lu}', 'a', 1, 'negated uppercase property'], + + # POSIX classes + ['[[:alpha:]]', 'a', 1, 'POSIX alpha'], + ['[[:digit:]]', '5', 1, 'POSIX digit'], + ['[[:^alpha:]]', '5', 1, 'POSIX negated alpha'], + + # \R, \h, \v, \X + ['\R', "\n", 1, '\R matches newline'], + ['\h', ' ', 1, '\h matches space'], + ['\H', 'x', 1, '\H matches non-hspace'], + ['\v', "\n", 1, '\v matches vertical space'], + ['\V', 'x', 1, '\V matches non-vspace'], + ['\X', 'a', 1, '\X matches grapheme cluster'], + + # Conditional + ['(?(1)a|b)', 'b', 1, 'conditional false branch'], + + # Named characters + ['\N{LATIN SMALL LETTER A}', 'a', 1, 'named character'], + + # Hex and control escapes + ['\x41', 'A', 1, 'hex escape'], + ['\x{41}', 'A', 1, 'braced hex escape'], + ['\cA', "\x01", 1, 'control char'], + + # Escape sequences + ['\t', "\t", 1, 'tab escape'], + ['\n', "\n", 1, 'newline escape'], + ['\r', "\r", 1, 'return escape'], +); + +plan tests => scalar @tests; + +for my $t (@tests) { + my ($pat, $str, $expect, $desc) = @$t; + my $r = Regexp::Parser->new; + my $ok = $r->regex($pat); + unless (defined $ok) { + fail("$desc: parse failed for '$pat'"); + next; + } + + my $qr = eval { $r->qr }; + if ($@) { + fail("$desc: qr() died: $@"); + next; + } + unless (defined $qr) { + fail("$desc: qr() returned undef"); + next; + } + + my $got = eval { $str =~ $qr ? 1 : 0 }; + if ($@) { + fail("$desc: match died: $@"); + next; + } + + if ($expect) { + ok($got, $desc); + } + else { + ok(!$got, $desc); + } +} From b00ecf6fad1ebbbe013e84f8d28b4e6c64e98760 Mon Sep 17 00:00:00 2001 From: Toddr Bot Date: Tue, 14 Apr 2026 08:45:01 +0000 Subject: [PATCH 3/3] fix: skip qr() functional tests for constructs unsupported on older Perls (*script_run:) requires Perl 5.30+ and \N{NAME} in runtime patterns requires lexer-time resolution only available in 5.30+. Both tests now skip gracefully on older Perl versions instead of failing. Co-Authored-By: Claude Opus 4.6 --- t/18qr_functional.t | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/t/18qr_functional.t b/t/18qr_functional.t index ac2f605..dd5842c 100644 --- a/t/18qr_functional.t +++ b/t/18qr_functional.t @@ -7,7 +7,8 @@ use Regexp::Parser; # This tests the full pipeline: source → parse → tree → qr() → match. my @tests = ( - # [pattern, test_string, should_match, description] + # [pattern, test_string, should_match, description, min_perl_version] + # min_perl_version is optional; when present, the test is skipped on older Perls # Literals ['abc', 'abc', 1, 'literal match'], @@ -111,8 +112,8 @@ my @tests = ( # \K (keep) ['foo\Kbar', 'foobar', 1, '\K in match context'], - # Script run - ['(*sr:\d+)', '123', 1, 'script_run match'], + # Script run (Perl 5.30+; verb not recognized by older regex engines) + ['(*sr:\d+)', '123', 1, 'script_run match', 5.030], # Quotemeta ['\Qa.b\E', 'a.b', 1, 'quotemeta literal dot'], @@ -138,8 +139,9 @@ my @tests = ( # Conditional ['(?(1)a|b)', 'b', 1, 'conditional false branch'], - # Named characters - ['\N{LATIN SMALL LETTER A}', 'a', 1, 'named character'], + # Named characters (\N{NAME} in runtime qr// requires 5.30+; older Perls + # demand lexer-time resolution that can't happen in a string-built pattern) + ['\N{LATIN SMALL LETTER A}', 'a', 1, 'named character', 5.030], # Hex and control escapes ['\x41', 'A', 1, 'hex escape'], @@ -155,7 +157,16 @@ my @tests = ( plan tests => scalar @tests; for my $t (@tests) { - my ($pat, $str, $expect, $desc) = @$t; + my ($pat, $str, $expect, $desc, $min_perl) = @$t; + + if ($min_perl && $] < $min_perl) { + my $need = sprintf("%.3f", $min_perl); + SKIP: { + skip "$desc requires Perl $need (have $])", 1; + } + next; + } + my $r = Regexp::Parser->new; my $ok = $r->regex($pat); unless (defined $ok) {