From 916c07aecd7277d126d71ca4181bced1bb20991c Mon Sep 17 00:00:00 2001
From: Toddr Bot <toddbot@rinaldo.us>
Date: Tue, 31 Mar 2026 08:03:48 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20fix=20\N=20escape=20handling=20?=
 =?UTF-8?q?=E2=80=94=20bare=20\N,=20\N{U+HHHH},=20quantifier=20disambiguat?=
 =?UTF-8?q?ion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three bugs fixed in \N handling:

1. Bare \N now parses as "not newline" (Perl 5.12+) instead of erroring.
   Creates a nonnewline node type. Still errors inside character classes,
   matching Perl's behavior.

2. \N{U+HHHH} no longer produces "isn't numeric in chr" warnings.
   The nchar() method now detects U+HHHH format and uses chr(hex())
   directly instead of passing through charnames::vianame() which
   returns the character (not code point) for this format.

3. \N{3,5} is now correctly parsed as \N + quantifier {3,5}, not as
   a named character lookup for "3,5". The handler checks if braced
   content looks like a quantifier pattern before consuming it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 MANIFEST                      |   1 +
 lib/Regexp/Parser.pm          |  12 ++-
 lib/Regexp/Parser/Handlers.pm |  28 ++++--
 lib/Regexp/Parser/Objects.pm  |  32 ++++++-
 t/10roundtrip.t               |   8 ++
 t/11errors.t                  |   5 +-
 t/18nonnewline.t              | 174 ++++++++++++++++++++++++++++++++++
 7 files changed, 251 insertions(+), 9 deletions(-)
 create mode 100644 t/18nonnewline.t

diff --git a/MANIFEST b/MANIFEST
index fc423c7..7fc2304 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -29,4 +29,5 @@ t/14quotemeta.t
 t/15boundary.t
 t/16define.t
 t/17extcharclass.t
+t/18nonnewline.t
 t/99misc.t
diff --git a/lib/Regexp/Parser.pm b/lib/Regexp/Parser.pm
index 9412df5..6df114e 100644
--- a/lib/Regexp/Parser.pm
+++ b/lib/Regexp/Parser.pm
@@ -137,7 +137,17 @@ sub named_captures {
 
 sub nchar {
   my $self = shift;
-  return map chr(/^\^(\S)/ ? (64 ^ ord $1) : charnames::vianame($_)), @_;
+  return map {
+    if (/^\^(\S)/) {
+      chr(64 ^ ord $1);
+    }
+    elsif (/^U\+([0-9a-fA-F]+)$/) {
+      chr(hex($1));
+    }
+    else {
+      chr(charnames::vianame($_));
+    }
+  } @_;
 }
 
 
diff --git a/lib/Regexp/Parser/Handlers.pm b/lib/Regexp/Parser/Handlers.pm
index e700f73..4ff1278 100644
--- a/lib/Regexp/Parser/Handlers.pm
+++ b/lib/Regexp/Parser/Handlers.pm
@@ -221,15 +221,31 @@ sub init {
     $S->error($S->RPe_BRACES, 'g');
   });
 
-  # named (named character)
+  # \N (not newline) or \N{NAME} / \N{U+HHHH} (named character)
   $self->add_handler('\N' => sub {
     my ($S, $cc) = @_;
-    $S->error($S->RPe_BRACES, 'N') if ${&Rx} !~ m{ \G \{ }xgc;
-    $S->error($S->RPe_RBRACE, 'N') if ${&Rx} !~ m{ \G ([^\}]*) \} }xgc;
 
-    my $name = $1;
-    return $S->force_object(anyof_char => $S->nchar($name), "\\N{$name}") if $cc;
-    return $S->object(exact => $S->nchar($name), "\\N{$name}");
+    # Check for \N{...}
+    if (${&Rx} =~ m{ \G \{ }xgc) {
+      # Disambiguate \N{3,5} (quantifier) from \N{NAME} (named char):
+      # if contents look like a quantifier pattern, back up and treat as bare \N
+      if (!$cc && ${&Rx} =~ m{ \G (?= \d+,?\d*\} ) }xgc) {
+        --&RxPOS;  # un-consume the {
+        return $S->object(nonnewline =>);
+      }
+
+      $S->error($S->RPe_RBRACE, 'N') if ${&Rx} !~ m{ \G ([^\}]*) \} }xgc;
+
+      my $name = $1;
+      return $S->force_object(anyof_char => $S->nchar($name), "\\N{$name}") if $cc;
+      return $S->object(exact => $S->nchar($name), "\\N{$name}");
+    }
+
+    # bare \N = "not newline" (Perl 5.12+), not valid in character class
+    if ($cc) {
+      $S->error($S->RPe_BRACES, 'N');
+    }
+    return $S->object(nonnewline =>);
   });
 
   # \o{NNN} octal escape (Perl 5.14+)
diff --git a/lib/Regexp/Parser/Objects.pm b/lib/Regexp/Parser/Objects.pm
index cea754c..10f28cb 100644
--- a/lib/Regexp/Parser/Objects.pm
+++ b/lib/Regexp/Parser/Objects.pm
@@ -1626,6 +1626,25 @@
 }
 
 
+{
+  # \N (not newline, added in Perl 5.12)
+  package Regexp::Parser::nonnewline;
+  our @ISA = qw( Regexp::Parser::__object__ );
+
+  sub new {
+    my ($class, $rx) = @_;
+    my $self = bless {
+      rx => $rx,
+      flags => $rx->{flags}[-1],
+      family => 'nonnewline',
+      type => 'nonnewline',
+      vis => '\N',
+    }, $class;
+    return $self;
+  }
+}
+
+
 {
   # \R (generic linebreak, added in Perl 5.10)
   package Regexp::Parser::lnbreak;
@@ -2105,7 +2124,7 @@ character class's ender is an C<anyof_close> node.
 The general family of this object.  These are any of: alnum, anchor,
 anyof, anyof_char, anyof_class, anyof_range, assertion, branch,
 charclass_expr, close, clump, digit, exact, flags, group, groupp,
-grouppn, hspace, lnbreak, minmod, open, possessive, prop, quant,
+grouppn, hspace, lnbreak, minmod, nonnewline, open, possessive, prop, quant,
 recurse, ref, reg_any, verb, vspace.
 
 =item my $f = $obj->flags()
@@ -2671,6 +2690,17 @@ Neg: 1 if negated
 Vertical whitespace character class shorthand (Perl 5.10+).  Matches
 characters like newline, carriage return, form feed, and vertical tab.
 
+=head2 nonnewline
+
+Family: nonnewline
+
+Types: nonnewline (C<\N>)
+
+Not-newline assertion (Perl 5.12+).  Matches any character except C<\n>,
+regardless of the C</s> flag.  Unlike C<.>, which matches newlines under
+C</s>, C<\N> never matches a newline.  Not valid inside character classes
+(use C<\N{NAME}> or C<\N{U+HHHH}> there instead).
+
 =head2 lnbreak
 
 Family: lnbreak
diff --git a/t/10roundtrip.t b/t/10roundtrip.t
index d1167cf..eefd373 100644
--- a/t/10roundtrip.t
+++ b/t/10roundtrip.t
@@ -158,6 +158,14 @@ my @patterns = (
     "(?<foo>bar)\\k'foo'",
     '(?<foo>bar)\\k{foo}',
     '(?<x>a)(?P=x)',
+
+    # \N (not newline) and \N{NAME}
+    '\\N',
+    '\\N+',
+    '\\N{3,5}',
+    '\\N{SPACE}',
+    '\\N{U+0041}',
+    '\\N{LATIN SMALL LETTER A}',
 );
 
 plan tests => scalar(@patterns) * 2;
diff --git a/t/11errors.t b/t/11errors.t
index a8f5233..dc65500 100644
--- a/t/11errors.t
+++ b/t/11errors.t
@@ -145,7 +145,10 @@ fails_regex('\\',        ($r->RPe_ESLASH)[0],   'lone backslash');
 
 # Missing braces on \g, \N
 fails_regex('\\g',       ($r->RPe_BRACES)[0],   '\\g without braces');
-fails_regex('\\N',       ($r->RPe_BRACES)[0],   '\\N without braces');
+# bare \N is valid since Perl 5.12 (means "not newline")
+parses_ok('\\N',                                  '\\N bare (not newline)');
+# but bare \N inside character class is still an error
+fails_regex('[\\N]',    ($r->RPe_BRACES)[0],      '\\N without braces in char class');
 
 # Missing right brace
 fails_regex('\\x{abc',   ($r->RPe_RBRACE)[0],   '\\x{... missing right brace');
diff --git a/t/18nonnewline.t b/t/18nonnewline.t
new file mode 100644
index 0000000..8da76fa
--- /dev/null
+++ b/t/18nonnewline.t
@@ -0,0 +1,174 @@
+use strict;
+use warnings;
+use Test::More;
+use Regexp::Parser;
+
+# Tests for \N handling:
+#   - \N bare = "not newline" (Perl 5.12+)
+#   - \N{NAME} = named character
+#   - \N{U+HHHH} = Unicode code point
+
+my $r = Regexp::Parser->new;
+
+##
+## 1. Bare \N (not newline)
+##
+
+{
+    my $ok = $r->regex('\N');
+    ok($ok, '\\N bare parses successfully');
+    is($r->visual, '\N', '\\N visual round-trips');
+
+    # Check node type
+    my $w = $r->walker;
+    my $node = $w->();
+    ok($node, '\\N produces a node');
+    is($node->family, 'nonnewline', '\\N node family is nonnewline');
+    is($node->type, 'nonnewline', '\\N node type is nonnewline');
+    is($node->visual, '\N', '\\N node visual is \\N');
+}
+
+# \N in a larger pattern
+{
+    $r->regex('a\Nb');
+    is($r->visual, 'a\Nb', '\\N in context: a\\Nb');
+}
+
+# \N with quantifiers
+{
+    $r->regex('\N+');
+    is($r->visual, '\N+', '\\N+ (one or more non-newlines)');
+}
+
+{
+    $r->regex('\N*');
+    is($r->visual, '\N*', '\\N* (zero or more non-newlines)');
+}
+
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{3,5}');
+    is($r->visual, '\N{3,5}', '\\N{3,5} (quantifier, not named char)');
+    is(scalar @warnings, 0, '\\N{3,5} produces no warnings');
+
+    # Verify structure: quantifier wrapping nonnewline
+    my $w = $r->walker;
+    my $node = $w->();
+    is($node->family, 'quant', '\\N{3,5} quantifier node present');
+    my $inner = $w->();
+    is($inner->family, 'nonnewline', '\\N{3,5} inner node is nonnewline');
+}
+
+# \N{3} is also a quantifier, not a named char
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{3}');
+    is($r->visual, '\N{3}', '\\N{3} (exact quantifier)');
+    is(scalar @warnings, 0, '\\N{3} produces no warnings');
+}
+
+# Round-trip: parse -> visual -> re-parse -> visual
+{
+    $r->regex('\N');
+    my $v1 = $r->visual;
+    $r->regex($v1);
+    my $v2 = $r->visual;
+    is($v2, $v1, '\\N round-trips correctly');
+}
+
+{
+    $r->regex('^\N+$');
+    my $v1 = $r->visual;
+    $r->regex($v1);
+    my $v2 = $r->visual;
+    is($v2, $v1, '^\N+$ round-trips correctly');
+}
+
+##
+## 2. \N{NAME} (named character)
+##
+
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{SPACE}');
+    is($r->visual, '\N{SPACE}', '\\N{SPACE} visual');
+    is(scalar @warnings, 0, '\\N{SPACE} produces no warnings');
+}
+
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{LATIN SMALL LETTER A}');
+    is($r->visual, '\N{LATIN SMALL LETTER A}', '\\N{LATIN SMALL LETTER A} visual');
+    is(scalar @warnings, 0, '\\N{LATIN SMALL LETTER A} no warnings');
+}
+
+##
+## 3. \N{U+HHHH} (Unicode code point)
+##
+
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{U+0041}');
+    is($r->visual, '\N{U+0041}', '\\N{U+0041} visual');
+    is(scalar @warnings, 0, '\\N{U+0041} produces no warnings');
+
+    # Check the node contains the right character
+    my $w = $r->walker;
+    my $node = $w->();
+    ok($node, '\\N{U+0041} produces a node');
+    is($node->visual, '\N{U+0041}', '\\N{U+0041} node visual');
+    is($node->data, 'A', '\\N{U+0041} resolves to "A"');
+}
+
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('\N{U+0020}');
+    is($r->visual, '\N{U+0020}', '\\N{U+0020} visual (space)');
+    is(scalar @warnings, 0, '\\N{U+0020} produces no warnings');
+
+    my $w = $r->walker;
+    my $node = $w->();
+    is($node->data, ' ', '\\N{U+0020} resolves to space');
+}
+
+# \N{U+HHHH} inside character class
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, $_[0] };
+    $r->regex('[\N{U+0041}]');
+    is($r->visual, '[\N{U+0041}]', '\\N{U+0041} in character class');
+    is(scalar @warnings, 0, '\\N{U+0041} in char class no warnings');
+}
+
+# \N{U+HHHH} round-trip
+{
+    $r->regex('\N{U+0041}');
+    my $v1 = $r->visual;
+    $r->regex($v1);
+    my $v2 = $r->visual;
+    is($v2, $v1, '\\N{U+0041} round-trips correctly');
+}
+
+##
+## 4. Error cases preserved
+##
+
+# \N bare inside character class is still an error
+{
+    my $ok = $r->regex('[\N]');
+    ok(!$ok, '\\N bare inside character class is rejected');
+}
+
+# \N{ without closing brace
+{
+    my $ok = $r->regex('\N{SPACE');
+    ok(!$ok, '\\N{SPACE (unclosed) is rejected');
+}
+
+done_testing;