From cc7e2f8209d46f67963e25fedd5d3dd88d98d933 Mon Sep 17 00:00:00 2001 From: Toddr Bot Date: Sun, 5 Apr 2026 08:00:04 +0000 Subject: [PATCH] feat: add {,n} quantifier (Perl 5.34+) and fix \p{^Name} negation Two parsing fixes: 1. {,n} quantifier syntax (Perl 5.34+): upper bound only with implicit min=0. Previously treated as literal text. Normalizes to {0,n} in visual output. Supports lazy ({,n}?) and possessive ({,n}+) modifiers. 2. \p{^Name} caret negation: the ^ prefix inside \p{} is equivalent to \P{Name} but the parser was storing "^Name" as the type with neg=0. Now correctly strips ^ and sets neg=1, normalizing to \P{Name}. Co-Authored-By: Claude Opus 4.6 --- MANIFEST | 1 + lib/Regexp/Parser/Handlers.pm | 16 +++- t/18quantprop.t | 141 ++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 t/18quantprop.t diff --git a/MANIFEST b/MANIFEST index fc423c7..ab7d644 100644 --- a/MANIFEST +++ b/MANIFEST @@ -29,4 +29,5 @@ t/14quotemeta.t t/15boundary.t t/16define.t t/17extcharclass.t +t/18quantprop.t t/99misc.t diff --git a/lib/Regexp/Parser/Handlers.pm b/lib/Regexp/Parser/Handlers.pm index e700f73..dfdacb5 100644 --- a/lib/Regexp/Parser/Handlers.pm +++ b/lib/Regexp/Parser/Handlers.pm @@ -270,8 +270,14 @@ sub init { $name = $1; } - return $S->force_object(anyof_class => $S->force_object(prop => $name, 0)) if $cc; - return $S->object(prop => $name, 0); + # \p{^Name} is equivalent to \P{Name} (Perl negation syntax) + my $neg = 0; + if ($name =~ s/^\^//) { + $neg = 1; + } + + return $S->force_object(anyof_class => $S->force_object(prop => $name, $neg)) if $cc; + return $S->object(prop => $name, $neg); }); # nspace (not a space) @@ -498,6 +504,12 @@ sub init { $S->error($S->RPe_BCURLY) if length($max) and $min > $max; return $S->object(quant => $min, $max); } + # {,n} syntax (Perl 5.34+): upper bound only, min defaults to 0 + if (${&Rx} =~ m{ \G , (\d+) \} }xgc) { + my $max = $1; + push @{ $S->{next} }, qw< minmod >; + return $S->object(quant => 0, $max); + } return $S->object(exact => '{'); }); diff --git a/t/18quantprop.t b/t/18quantprop.t new file mode 100644 index 0000000..78921f0 --- /dev/null +++ b/t/18quantprop.t @@ -0,0 +1,141 @@ +use strict; +use warnings; +use Test::More; +use Regexp::Parser; + +# Tests for {,n} quantifier syntax (Perl 5.34+) and \p{^Name} negation + +my $r = Regexp::Parser->new; + +# --- {,n} quantifier (Perl 5.34+): upper bound only, min=0 --- + +subtest '{,n} quantifier basics' => sub { + $r->regex('a{,5}'); + my $vis = $r->visual; + # {,n} normalizes to {0,n} in visual output + is($vis, 'a{0,5}', '{,5} parses as quantifier (normalizes to {0,5})'); + + my @nodes = @{ $r->root }; + # quant wraps the exact node — root has 1 top-level quant node + is(scalar @nodes, 1, '{,5} produces one top-level node'); + is($nodes[0]->family, 'quant', 'top-level node is quant'); + is($nodes[0]->{min}, 0, 'quant min is 0'); + is($nodes[0]->{max}, 5, 'quant max is 5'); +}; + +subtest '{,n} with different values' => sub { + for my $n (1, 2, 10, 100) { + $r->regex("x{,$n}"); + my @nodes = @{ $r->root }; + is($nodes[0]->family, 'quant', "{,$n} produces quant node"); + is($nodes[0]->{min}, 0, "{,$n} min is 0"); + is($nodes[0]->{max}, $n, "{,$n} max is $n"); + } +}; + +subtest '{,n} with lazy and possessive modifiers' => sub { + $r->regex('a{,3}?'); + is($r->visual, 'a{0,3}?', '{,3}? parses with lazy modifier'); + + $r->regex('a{,3}+'); + is($r->visual, 'a{0,3}+', '{,3}+ parses with possessive modifier'); +}; + +subtest '{,n} in groups and classes' => sub { + $r->regex('(?:ab){,2}'); + is($r->visual, '(?:ab){0,2}', '{,n} works on groups'); + + $r->regex('[abc]{,4}'); + is($r->visual, '[abc]{0,4}', '{,n} works on character classes'); +}; + +subtest '{,n} round-trip' => sub { + for my $pat ('a{,5}', 'x{,1}', '(?:ab){,3}', 'a{,5}?', 'a{,5}+') { + $r->regex($pat); + my $vis1 = $r->visual; + # Re-parse the normalized output + $r->regex($vis1); + my $vis2 = $r->visual; + is($vis2, $vis1, "round-trip: $pat -> $vis1"); + } +}; + +subtest '{,n} not confused with literal' => sub { + # {,} without digits should still be literal + $r->regex('a{,}'); + is($r->visual, 'a{,}', '{,} without digits is literal'); + + # {,0} is valid — matches zero times + $r->regex('a{,0}'); + my @nodes = @{ $r->root }; + is($nodes[0]->family, 'quant', '{,0} is a valid quantifier'); + is($nodes[0]->{min}, 0, '{,0} min is 0'); + is($nodes[0]->{max}, 0, '{,0} max is 0'); +}; + +# --- \p{^Name} property negation --- + +subtest '\\p{^Name} negation' => sub { + $r->regex('\\p{^Greek}'); + my @nodes = @{ $r->root }; + is(scalar @nodes, 1, '\\p{^Greek} produces one node'); + is($nodes[0]->family, 'prop', 'node is a prop'); + is($nodes[0]->type, 'Greek', 'type is Greek (without ^)'); + is($nodes[0]->neg, 1, 'neg flag is set'); + is($nodes[0]->visual, '\\P{Greek}', '\\p{^Greek} normalizes to \\P{Greek}'); +}; + +subtest '\\p{^Name} vs \\P{Name} equivalence' => sub { + $r->regex('\\p{^Alpha}'); + my @n1 = @{ $r->root }; + + $r->regex('\\P{Alpha}'); + my @n2 = @{ $r->root }; + + is($n1[0]->neg, $n2[0]->neg, 'same neg flag'); + is($n1[0]->type, $n2[0]->type, 'same type'); + is($n1[0]->visual, $n2[0]->visual, 'same visual output'); +}; + +subtest '\\p{Name} without ^ is still positive' => sub { + $r->regex('\\p{Greek}'); + my @nodes = @{ $r->root }; + is($nodes[0]->neg, 0, '\\p{Greek} neg is 0'); + is($nodes[0]->type, 'Greek', 'type is Greek'); + is($nodes[0]->visual, '\\p{Greek}', 'visual is \\p{Greek}'); +}; + +subtest '\\p{^Name} in character class' => sub { + $r->regex('[\\p{^Digit}]'); + my $vis = $r->visual; + is($vis, '[\\P{Digit}]', '\\p{^Digit} in char class normalizes to \\P{Digit}'); + + my @nodes = @{ $r->root }; + is($nodes[0]->family, 'anyof', 'outer node is anyof'); + # Inspect the data array for the anyof_class containing the prop + my @data = @{ $nodes[0]->{data} }; + my $found_neg_prop = 0; + for my $child (@data) { + if ($child->isa('Regexp::Parser::anyof_class')) { + my $inner = $child->{data}; + if ($inner->isa('Regexp::Parser::prop')) { + is($inner->neg, 1, 'prop neg flag set in character class'); + is($inner->type, 'Digit', 'prop type is Digit'); + $found_neg_prop = 1; + } + } + } + ok($found_neg_prop, 'found negated prop inside character class'); +}; + +subtest '\\p{^Name} round-trip' => sub { + # After normalization, \p{^Greek} becomes \P{Greek} + # Round-trip: \P{Greek} -> \P{Greek} + $r->regex('\\P{Greek}'); + my $vis1 = $r->visual; + $r->regex($vis1); + my $vis2 = $r->visual; + is($vis2, $vis1, 'round-trip \\P{Greek}'); +}; + +done_testing;