Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,15 @@ ERROR HANDLING
RPe_IRANGE (-28)
Invalid [] range "%s-%s"

RPe_DUPLCH (-29)
Regexp modifier "%s" may not appear twice

RPe_EXCLCH (-30)
Regexp modifiers "%s" and "%s" are mutually exclusive

RPe_NEGCHR (-31)
Regexp modifier "%s" may not appear after the "-"

EXTENSIONS
Here are some ideas for extensions (sub-classes) for this module. Some
of them may be absorbed into the core functionality of Regexp::Parser in
Expand Down
12 changes: 12 additions & 0 deletions lib/Regexp/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,18 @@ False [] range "%s-%s"

Invalid [] range "%s-%s"

=item RPe_DUPLCH (-29)

Regexp modifier "%s" may not appear twice

=item RPe_EXCLCH (-30)

Regexp modifiers "%s" and "%s" are mutually exclusive

=item RPe_NEGCHR (-31)

Regexp modifier "%s" may not appear after the "-"

=back

=head1 EXTENSIONS
Expand Down
3 changes: 3 additions & 0 deletions lib/Regexp/Parser/Diagnostics.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ use constant RPe_OUTPOS => --$ENUM, 'POSIX syntax [%s %s] belongs inside charact
use constant RPe_EMPTYB => --$ENUM, 'Empty \%s{}';
use constant RPe_FRANGE => --$ENUM, 'False [] range "%s-%s"';
use constant RPe_IRANGE => --$ENUM, 'Invalid [] range "%s-%s"';
use constant RPe_DUPLCH => --$ENUM, 'Regexp modifier "%s" may not appear twice';
use constant RPe_EXCLCH => --$ENUM, 'Regexp modifiers "%s" and "%s" are mutually exclusive';
use constant RPe_NEGCHR => --$ENUM, 'Regexp modifier "%s" may not appear after the "-"';

1;
22 changes: 22 additions & 0 deletions lib/Regexp/Parser/Handlers.pm
Original file line number Diff line number Diff line change
Expand Up @@ -734,10 +734,28 @@ sub init {
&RxPOS -= length($on.$off);
my $old = &RxPOS;

my $charset_seen = ''; # track first charset flag (a/d/l/u)
for (split //, $on) {
&RxPOS++;
if (my $f = $S->can("FLAG_$_")) {
my $v = $S->$f(1) and $r_on .= $_;
# charset flag conflict detection (Perl 5.14+)
if ($v && ($v & 0xF0)) { # this is a charset flag
if ($charset_seen) {
if ($charset_seen eq $_ && $_ eq 'a') {
# aa (strict ASCII) is valid — continue
}
elsif ($charset_seen eq $_) {
# dd, ll, uu — doubled non-a charset flag
$S->error($S->RPe_DUPLCH, $_);
}
else {
# different charset flags — mutually exclusive
$S->error($S->RPe_EXCLCH, $charset_seen, $_);
}
}
$charset_seen = $_;
}
# /xx: if x is already on, set the xx bit (Perl 5.26+)
if ($_ eq 'x' && ($f_on & $v)) {
$f_on |= 0x200; # FLAG_xx
Expand All @@ -756,6 +774,10 @@ sub init {
&RxPOS++;
if (my $f = $S->can("FLAG_$_")) {
my $v = $S->$f(0) and $r_off .= $_;
# charset flags may not appear after - (Perl 5.14+)
if ($v && ($v & 0xF0)) {
$S->error($S->RPe_NEGCHR, $_);
}
# -xx: also turn off the xx bit (Perl 5.26+)
if ($_ eq 'x' && ($f_off & $v)) {
$f_off |= 0x200; # FLAG_xx
Expand Down
40 changes: 40 additions & 0 deletions t/11errors.t
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,46 @@ warns_regex('(?o)', 'useless (?o) flag warning');
# Negative flag forms too
warns_regex('(?-g)', 'useless (?-g) flag warning');

##
## 9b. CHARSET FLAG CONFLICTS — RPe_DUPLCH, RPe_EXCLCH, RPe_NEGCHR
##

# Doubled non-a charset flags (Perl rejects these)
fails_regex('(?dd:a)', ($r->RPe_DUPLCH)[0], 'doubled d flag');
fails_regex('(?ll:a)', ($r->RPe_DUPLCH)[0], 'doubled l flag');
fails_regex('(?uu:a)', ($r->RPe_DUPLCH)[0], 'doubled u flag');

# Mutually exclusive charset flags
fails_regex('(?al:a)', ($r->RPe_EXCLCH)[0], 'a and l mutually exclusive');
fails_regex('(?ad:a)', ($r->RPe_EXCLCH)[0], 'a and d mutually exclusive');
fails_regex('(?au:a)', ($r->RPe_EXCLCH)[0], 'a and u mutually exclusive');
fails_regex('(?du:a)', ($r->RPe_EXCLCH)[0], 'd and u mutually exclusive');
fails_regex('(?dl:a)', ($r->RPe_EXCLCH)[0], 'd and l mutually exclusive');
fails_regex('(?lu:a)', ($r->RPe_EXCLCH)[0], 'l and u mutually exclusive');

# Charset flags not allowed after - (negation section)
fails_regex('(?-a:a)', ($r->RPe_NEGCHR)[0], 'charset a after -');
fails_regex('(?-d:a)', ($r->RPe_NEGCHR)[0], 'charset d after -');
fails_regex('(?-l:a)', ($r->RPe_NEGCHR)[0], 'charset l after -');
fails_regex('(?-u:a)', ($r->RPe_NEGCHR)[0], 'charset u after -');
fails_regex('(?i-a:a)', ($r->RPe_NEGCHR)[0], 'charset a after - with on-flags');
fails_regex('(?i-al:a)', ($r->RPe_NEGCHR)[0], 'charset a after - mixed');

# Valid charset flag usage
parses_ok('(?aa:a)', 'aa (strict ASCII) is valid');
parses_ok('(?a:a)', 'single a flag valid');
parses_ok('(?d:a)', 'single d flag valid');
parses_ok('(?l:a)', 'single l flag valid');
parses_ok('(?u:a)', 'single u flag valid');
parses_ok('(?ai:a)', 'charset a + modifier i valid');
parses_ok('(?aai:a)', 'strict ASCII + modifier i valid');
parses_ok('(?^a:a)', 'caret + charset a valid');
parses_ok('(?^aa:a)', 'caret + strict ASCII valid');

# Caret with charset conflicts should also fail
fails_regex('(?^al:a)', ($r->RPe_EXCLCH)[0], 'caret + a and l exclusive');
fails_regex('(?^du:a)', ($r->RPe_EXCLCH)[0], 'caret + d and u exclusive');

##
## 10. BAD ESCAPE IN CHARACTER CLASS WARNINGS — RPe_BADESC
##
Expand Down
Loading