diff --git a/MANIFEST b/MANIFEST index fc423c7..c56366f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -29,4 +29,5 @@ t/14quotemeta.t t/15boundary.t t/16define.t t/17extcharclass.t +t/18single_quote_capture.t t/99misc.t diff --git a/lib/Regexp/Parser/Handlers.pm b/lib/Regexp/Parser/Handlers.pm index e700f73..49ade55 100644 --- a/lib/Regexp/Parser/Handlers.pm +++ b/lib/Regexp/Parser/Handlers.pm @@ -211,7 +211,15 @@ sub init { $S->error($S->RPe_RBRACE, 'g'); } - # \gN form (no braces, positive only) + # \g-N form (no braces, negative relative) + if (${&Rx} =~ m{ \G ( - \d+ ) }xgc) { + my $num = $1; + my $abs = (&SIZE_ONLY ? $S->{maxpar} : $S->{nparen}) + $num + 1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and $abs < 1; + return $S->object(ref => $abs, "\\g$num"); + } + + # \gN form (no braces, absolute) if (${&Rx} =~ m{ \G (\d+) }xgc) { my $num = $1; $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and $num > $S->{maxpar}; @@ -703,6 +711,22 @@ sub init { if (${&Rx} =~ m{ \G (.) }xgcs) { my $n = "$c$1"; + + # (?'name'...) alternate named capture — handle here because + # the single-quote is a package separator in Perl, so + # add_handler/can() dispatch fails for method name "(?\'" + if ($1 eq "'") { + if (${&Rx} =~ m{ \G ([A-Za-z_]\w*) ' }xgc) { + my $name = $1; + push @{ $S->{next} }, qw< c) atom >; + &SIZE_ONLY ? ++$S->{maxpar} : ++$S->{nparen}; + push @{ $S->{flags} }, &Rf; + $S->{named_captures}{$name} = $S->{nparen} unless &SIZE_ONLY; + return $S->object(named_open => $S->{nparen}, $name, "(?'"); + } + $S->error($S->RPe_NOTREC, 1, substr(${&Rx}, &RxPOS - 1)); + } + return $S->$n if $S->can($n); &RxPOS--; } @@ -1109,21 +1133,8 @@ sub init { $S->error($S->RPe_NOTREC, 2, substr(${&Rx}, &RxPOS - 2)); }); - # (?'name'...) alternate named capture syntax - $self->add_handler("(?\'" => sub { - my ($S) = @_; - - if (${&Rx} =~ m{ \G ([A-Za-z_]\w*) ' }xgc) { - my $name = $1; - push @{ $S->{next} }, qw< c) atom >; - &SIZE_ONLY ? ++$S->{maxpar} : ++$S->{nparen}; - push @{ $S->{flags} }, &Rf; - $S->{named_captures}{$name} = $S->{nparen} unless &SIZE_ONLY; - return $S->object(named_open => $S->{nparen}, $name); - } - - $S->error($S->RPe_NOTREC, 1, substr(${&Rx}, &RxPOS - 1)); - }); + # (?'name'...) is handled inline in the (? handler above + # because ' is a Perl package separator, breaking add_handler/can() dispatch # (?R) -- whole-pattern recursion (Perl 5.10+) $self->add_handler('(?R' => sub { diff --git a/lib/Regexp/Parser/Objects.pm b/lib/Regexp/Parser/Objects.pm index cea754c..e6965bb 100644 --- a/lib/Regexp/Parser/Objects.pm +++ b/lib/Regexp/Parser/Objects.pm @@ -1651,15 +1651,24 @@ our @ISA = qw( Regexp::Parser::__object__ ); sub new { - my ($class, $rx, $nparen, $name, @data) = @_; + my ($class, $rx, $nparen, $name, @rest) = @_; + # Optional prefix arg for alternate syntax: "(?'" vs default "(?<" + my $prefix = (@rest && !ref $rest[0] && $rest[0] =~ /^\(\?/) ? shift @rest : undef; + my $raw; + if ($prefix && $prefix eq "(?'") { + $raw = "(?'$name'"; + } + else { + $raw = "(?<$name>"; + } my $self = bless { rx => $rx, flags => $rx->{flags}[-1], family => 'open', nparen => $nparen, name => $name, - data => \@data, - raw => "(?<$name>", + data => \@rest, + raw => $raw, down => 1, }, $class; $self->{rx}{captures}[$nparen - 1] = $self; diff --git a/t/18single_quote_capture.t b/t/18single_quote_capture.t new file mode 100644 index 0000000..947314a --- /dev/null +++ b/t/18single_quote_capture.t @@ -0,0 +1,99 @@ +use strict; +use warnings; +use Test::More; +use Regexp::Parser; + +my $r = Regexp::Parser->new; + +# === (?'name'...) single-quote named capture === + +ok( $r->regex("(?'word'\\w+)"), "(?'name'...) parses" ); +is( $r->visual, "(?'word'\\w+)", "(?'name'...) visual preserves single quotes" ); + +# Tree structure +{ + $r->regex("(?'foo'bar)"); + $r->parse; + my @nodes = @{ $r->root }; + is( $nodes[0]->family, 'open', "(?'foo'...) family is open" ); + is( $nodes[0]->nparen, 1, "(?'foo'...) is capture 1" ); + is( $nodes[0]->name, 'foo', "(?'foo'...) name is foo" ); + is( $nodes[0]->raw, "(?'foo'", "(?'foo'...) raw uses single quotes" ); +} + +# Captures array +{ + $r->regex("(?'x'a)(?'y'b)"); + $r->parse; + my @cap = @{ $r->captures }; + is( scalar @cap, 2, "two captures" ); + is( $cap[0]->name, 'x', "first capture name" ); + is( $cap[1]->name, 'y', "second capture name" ); +} + +# Round-trip: parse -> visual -> re-parse -> visual +{ + my $rx = "(?'name'\\d+)"; + $r->regex($rx); + my $v1 = $r->visual; + ok( $r->regex($v1), "round-trip re-parse" ); + is( $r->visual, $v1, "round-trip visual stable" ); +} + +# Mixed with (?...) angle-bracket syntax +{ + ok( $r->regex("(?x)(?'b'y)"), "mixed angle + single-quote" ); + $r->parse; + my @cap = @{ $r->captures }; + is( $cap[0]->raw, "(?", "angle bracket raw" ); + is( $cap[1]->raw, "(?'b'", "single quote raw" ); +} + +# Named backref to single-quote captured group +{ + ok( $r->regex("(?'q'[\"']).*?\\k"), "named backref to single-quote group" ); + is( $r->visual, "(?'q'[\"']).*?\\k", "backref visual" ); +} + +# qr() output +{ + $r->regex("(?'name'\\w+)"); + $r->parse; + my $qr = $r->qr; + like( $qr, qr/\(\?'name'/, "qr() preserves single-quote syntax" ); +} + +# === bare \g-N (negative relative backref without braces) === + +ok( $r->regex("(a)\\g-1"), "\\g-1 bare parses" ); +is( $r->visual, "(a)\\g-1", "\\g-1 bare visual" ); + +ok( $r->regex("(a)(b)\\g-2"), "\\g-2 bare parses" ); +is( $r->visual, "(a)(b)\\g-2", "\\g-2 bare visual" ); + +# Tree structure for bare \g-N +{ + $r->regex("(x)(y)\\g-1"); + $r->parse; + my @nodes = @{ $r->root }; + # nodes: open(1), open(2), ref + my $ref = $nodes[2]; + is( $ref->family, 'ref', "\\g-1 family is ref" ); + # \g-1 from 2 groups = group 2 + is( $ref->visual, "\\g-1", "\\g-1 visual" ); +} + +# Braced forms still work +ok( $r->regex("(a)\\g{-1}"), "\\g{-1} braced still works" ); +ok( $r->regex("(a)\\g{1}"), "\\g{1} braced still works" ); +ok( $r->regex("(a)\\g1"), "\\g1 bare still works" ); + +# Invalid bare \g-N (reference before group 1) +# Error fires on tree-building pass, so must call visual/parse +{ + $r->regex("(a)\\g-2"); + my $vis = eval { $r->visual }; + ok( $@, "\\g-2 with 1 group errors on parse" ); +} + +done_testing;