From 2f6926e698075de3eef0446f8ebdf3e348b46238 Mon Sep 17 00:00:00 2001 From: Wondr Date: Fri, 5 Jun 2026 03:14:15 +0100 Subject: [PATCH] grep: let dot match newline in null-data mode --- src/matcher.rs | 6 ++++++ tests/test_grep.rs | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/matcher.rs b/src/matcher.rs index 6d72b69..783d239 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -285,6 +285,12 @@ impl CompiledPattern { if config.ignore_case { options |= RegexOptions::REGEX_OPTION_IGNORECASE; } + // In GNU grep's Basic/Extended modes, `-z` makes newline ordinary data + // for `.`, but PCRE keeps its existing non-DOTALL behavior. The GNU + // `pcre-context` test documents this as current behavior until PCRE2. + if config.null_data && matches!(config.regex_mode, RegexMode::Basic | RegexMode::Extended) { + options |= RegexOptions::REGEX_OPTION_MULTILINE; + } fn compile_with(pattern: &str, syntax: &Syntax, options: RegexOptions) -> UResult { Regex::with_options_and_encoding(pattern, options, syntax).map_err(|err| { diff --git a/tests/test_grep.rs b/tests/test_grep.rs index 2c16db4..1dc8e89 100644 --- a/tests/test_grep.rs +++ b/tests/test_grep.rs @@ -1251,6 +1251,27 @@ fn null_data_mode_records() { .succeeds() .stdout_is_bytes(b"hello\0"); + // With NUL-delimited records, newline is ordinary data and `.` matches it. + let (_s, mut c) = ucmd(); + c.args(&["-z", "-o", "."]) + .pipe_in(&b"a\nb"[..]) + .succeeds() + .stdout_is_bytes(b"a\0\n\0b\0"); + + // GNU grep's PCRE path currently does not let `.*` consume the extra + // newline here under -z; this mirrors the GNU pcre-context test. + let (_s, mut c) = ucmd(); + c.args(&["-P", "-z", "-o", r"(?<=\n\n\n).*"]) + .pipe_in( + &b"NUL preceded by 0 empty lines.\0\ + \nNUL preceded by 1 empty line.\0\ + \n\nNUL preceded by 2 empty lines.\0\ + \n\n\nNUL preceded by 3 empty lines.\0\ + \n\n\n\nNUL preceded by 4 empty lines.\0\n"[..], + ) + .succeeds() + .stdout_is_bytes(b"NUL preceded by 3 empty lines.\0NUL preceded by 4 empty lines.\0"); + // Counting works under -z. let (_s, mut c) = ucmd(); c.args(&["-z", "-c", "hello"])