From 7bed179d631fa6e8606edc7ece9ada1e79ed8525 Mon Sep 17 00:00:00 2001 From: Toddr Bot Date: Wed, 8 Apr 2026 10:24:41 +0000 Subject: [PATCH] fix: validate named backreferences against existing capture groups The parser accepted \k, \g{name}, (?P=name), (?&name), and (?P>name) even when no matching named capture group existed anywhere in the pattern. Perl itself rejects these with "Reference to nonexistent named group". Root cause: named_captures hash was only populated during the second (tree-building) pass, and no validation checked it for backref/recursion handlers. Numbered backrefs were already validated against maxpar. Fix: populate named_captures during both passes (SIZE_ONLY uses maxpar, tree-building uses nparen), then validate all named ref forms during the second pass. Forward references work correctly because the first pass has already recorded all group names. Co-Authored-By: Claude Opus 4.6 --- lib/Regexp/Parser/Handlers.pm | 43 +++++++++++++++++++++++++------- t/11errors.t | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/lib/Regexp/Parser/Handlers.pm b/lib/Regexp/Parser/Handlers.pm index e700f73..ba9bef2 100644 --- a/lib/Regexp/Parser/Handlers.pm +++ b/lib/Regexp/Parser/Handlers.pm @@ -185,6 +185,7 @@ sub init { # \g{name} — named backref if (${&Rx} =~ m{ \G ([a-zA-Z_]\w*) \} }xgc) { my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; return $S->object(gref => $name, "\\g{$name}"); } # \g{N}, \g{-N}, or \g{+N} — numeric (possibly relative) @@ -1071,14 +1072,20 @@ sub init { } if (${&Rx} =~ m{ \G < ([^>]+) > }xgc) { - return $S->object(named_ref => $1, "\\k<$1>"); + my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; + return $S->object(named_ref => $name, "\\k<$name>"); } elsif (${&Rx} =~ m{ \G ' ([^']+) ' }xgc) { - return $S->object(named_ref => $1, "\\k'$1'"); + my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; + return $S->object(named_ref => $name, "\\k'$name'"); } elsif (${&Rx} =~ m{ \G \{ ([^\}]+) \} }xgc) { # \k{name} — brace-delimited named backref (Perl 5.32+) - return $S->object(named_ref => $1, "\\k{$1}"); + my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; + return $S->object(named_ref => $name, "\\k{$name}"); } $S->error($S->RPe_BADESC, "k", ""); @@ -1100,9 +1107,14 @@ sub init { if (${&Rx} =~ m{ \G ([A-Za-z_]\w*) > }xgc) { my $name = $1; push @{ $S->{next} }, qw< c) atom >; - &SIZE_ONLY ? ++$S->{maxpar} : ++$S->{nparen}; + if (&SIZE_ONLY) { + ++$S->{maxpar}; + $S->{named_captures}{$name} = $S->{maxpar}; + } else { + ++$S->{nparen}; + $S->{named_captures}{$name} = $S->{nparen}; + } push @{ $S->{flags} }, &Rf; - $S->{named_captures}{$name} = $S->{nparen} unless &SIZE_ONLY; return $S->object(named_open => $S->{nparen}, $name); } @@ -1116,9 +1128,14 @@ sub init { if (${&Rx} =~ m{ \G ([A-Za-z_]\w*) ' }xgc) { my $name = $1; push @{ $S->{next} }, qw< c) atom >; - &SIZE_ONLY ? ++$S->{maxpar} : ++$S->{nparen}; + if (&SIZE_ONLY) { + ++$S->{maxpar}; + $S->{named_captures}{$name} = $S->{maxpar}; + } else { + ++$S->{nparen}; + $S->{named_captures}{$name} = $S->{nparen}; + } push @{ $S->{flags} }, &Rf; - $S->{named_captures}{$name} = $S->{nparen} unless &SIZE_ONLY; return $S->object(named_open => $S->{nparen}, $name); } @@ -1141,6 +1158,7 @@ sub init { if (${&Rx} =~ m{ \G ([A-Za-z_]\w*) \) }xgc) { my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; return $S->object(named_recurse => $name, "(?&$name)"); } @@ -1157,21 +1175,28 @@ sub init { if (${&Rx} =~ m{ \G < ([A-Za-z_]\w*) > }xgc) { my $name = $1; push @{ $S->{next} }, qw< c) atom >; - &SIZE_ONLY ? ++$S->{maxpar} : ++$S->{nparen}; + if (&SIZE_ONLY) { + ++$S->{maxpar}; + $S->{named_captures}{$name} = $S->{maxpar}; + } else { + ++$S->{nparen}; + $S->{named_captures}{$name} = $S->{nparen}; + } push @{ $S->{flags} }, &Rf; - $S->{named_captures}{$name} = $S->{nparen} unless &SIZE_ONLY; return $S->object(named_open => $S->{nparen}, $name); } # (?P=name) named backreference if (${&Rx} =~ m{ \G = ([A-Za-z_]\w*) \) }xgc) { my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; return $S->object(named_ref => $name, "(?P=$name)"); } # (?P>name) named recursion if (${&Rx} =~ m{ \G > ([A-Za-z_]\w*) \) }xgc) { my $name = $1; + $S->error($S->RPe_BGROUP) if !&SIZE_ONLY and !exists $S->{named_captures}{$name}; return $S->object(named_recurse => $name, "(?P>$name)"); } diff --git a/t/11errors.t b/t/11errors.t index a8f5233..1017b95 100644 --- a/t/11errors.t +++ b/t/11errors.t @@ -342,4 +342,50 @@ parses_ok('(?{1+1})', 'valid code block'); parses_ok('(??{1+1})', 'valid logical code block'); parses_ok('a(?:b|c)d', 'valid alternation in group'); +# --- RPe_BGROUP: named backreferences to nonexistent groups --- +# All named backref forms must reject when no matching named group exists. +# Perl: "Reference to nonexistent named group" + +{ + my $r = Regexp::Parser->new; + my $RPe_BGROUP = ($r->RPe_BGROUP)[0]; + + # \k forms + fails_visual('\k', $RPe_BGROUP, '\k nonexistent group'); + fails_visual('(a)\k', $RPe_BGROUP, '\k no named capture'); + fails_visual('(?a)\k', $RPe_BGROUP, '\k wrong name'); + fails_visual("\\k'foo'", $RPe_BGROUP, "\\k'foo' nonexistent group"); + fails_visual('\k{foo}', $RPe_BGROUP, '\k{foo} nonexistent group'); + + # \g{name} form + fails_visual('\g{foo}', $RPe_BGROUP, '\g{foo} nonexistent group'); + fails_visual('(?a)\g{foo}', $RPe_BGROUP, '\g{foo} wrong name'); + + # (?P=name) Python-style backref + fails_visual('(?P=foo)', $RPe_BGROUP, '(?P=foo) nonexistent group'); + fails_visual('(?a)(?P=foo)', $RPe_BGROUP, '(?P=foo) wrong name'); + + # (?&name) named recursion + fails_visual('(?&foo)', $RPe_BGROUP, '(?&foo) nonexistent group'); + fails_visual('(?a)(?&foo)', $RPe_BGROUP, '(?&foo) wrong name'); + + # (?P>name) Python-style named recursion + fails_visual('(?P>foo)', $RPe_BGROUP, '(?P>foo) nonexistent group'); + fails_visual('(?a)(?P>foo)', $RPe_BGROUP, '(?P>foo) wrong name'); + + # Valid named refs must still parse correctly + parses_ok('(?a)\k', 'valid \k with matching group'); + parses_ok("(?a)\\k'foo'", "valid \\k'foo' with matching group"); + parses_ok('(?a)\k{foo}', 'valid \k{foo} with matching group'); + parses_ok('(?a)\g{foo}', 'valid \g{foo} with matching group'); + parses_ok('(?a)(?P=foo)', 'valid (?P=foo) with matching group'); + parses_ok('(?a)(?&foo)', 'valid (?&foo) with matching group'); + parses_ok('(?a)(?P>foo)', 'valid (?P>foo) with matching group'); + parses_ok('(?Pa)\k', 'valid Python-style capture + \k'); + + # Forward references must work (group defined after ref) + parses_ok('(?&foo)(?a)', 'forward named recursion'); + parses_ok('(?P>foo)(?a)', 'forward Python named recursion'); +} + done_testing;