From f31923244088009ebccf887431c8dbdf5c0fec32 Mon Sep 17 00:00:00 2001 From: Wondr Date: Fri, 5 Jun 2026 11:41:01 +0100 Subject: [PATCH] grep: honor whitespace escapes in BRE --- src/matcher.rs | 6 ++++++ tests/test_grep.rs | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/matcher.rs b/src/matcher.rs index 6da6d9b..de7f93d 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -273,6 +273,12 @@ impl CompiledPattern { // GNU grep supports \` and \' as buffer anchors in BRE and ERE. syntax.enable_operators(SyntaxOperator::SYNTAX_OPERATOR_ESC_GNU_BUF_ANCHOR); } + if config.regex_mode == RegexMode::Basic { + // GNU grep accepts \s and \S as whitespace shorthands in BRE. + // Follow Oniguruma's operator directly here; remaining invalid + // UTF-8 differences are engine semantics, not local syntax gaps. + syntax.enable_operators(SyntaxOperator::SYNTAX_OPERATOR_ESC_S_WHITE_SPACE); + } if config.regex_mode == RegexMode::Perl { // GNU grep supports `(?P...)`. // Unfortunately, the onig crate defines the OP2 flag without the diff --git a/tests/test_grep.rs b/tests/test_grep.rs index 6a42175..b310808 100644 --- a/tests/test_grep.rs +++ b/tests/test_grep.rs @@ -100,6 +100,27 @@ fn gnu_buffer_anchors() { .stdout_only("cat\ntar\n"); } +#[test] +fn bre_whitespace_shorthands() { + let (_s, mut c) = ucmd(); + c.args(&[r"\s"]) + .pipe_in("a b\nxy\n") + .succeeds() + .stdout_only("a b\n"); + + let (_s, mut c) = ucmd(); + c.args(&["-c", r"\S"]) + .pipe_in("aS b\n \nx\n") + .succeeds() + .stdout_only("2\n"); + + let (_s, mut c) = ucmd(); + c.args(&[r"\s\+"]) + .pipe_in("a b\nxy\n") + .succeeds() + .stdout_only("a b\n"); +} + #[test] fn ere_metacharacters() { let cases: &[(&[&str], &str, &str)] = &[