diff --git a/src/matcher.rs b/src/matcher.rs index 6da6d9b..2e65ac3 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -11,6 +11,7 @@ use onig::{ }; use onig_sys::{OnigEncCtype_ONIGENC_CTYPE_WORD, OnigEncodingUTF8}; use uucore::error::{UResult, USimpleError}; +use uucore::show_warning; pub struct Matcher<'a> { config: &'a Config<'a>, @@ -273,6 +274,18 @@ impl CompiledPattern { // GNU grep supports \` and \' as buffer anchors in BRE and ERE. syntax.enable_operators(SyntaxOperator::SYNTAX_OPERATOR_ESC_GNU_BUF_ANCHOR); } + + let mut normalized_pattern = None; + let pattern = if config.regex_mode == RegexMode::Extended { + if let Some((op, rest)) = strip_leading_repeat_operator(pattern) { + show_warning!("{op} at start of expression"); + normalized_pattern = Some(rest.to_string()); + } + normalized_pattern.as_deref().unwrap_or(pattern) + } else { + pattern + }; + if config.regex_mode == RegexMode::Perl { // GNU grep supports `(?P...)`. // Unfortunately, the onig crate defines the OP2 flag without the @@ -354,6 +367,25 @@ impl CompiledPattern { } } +fn strip_leading_repeat_operator(pattern: &str) -> Option<(&'static str, &str)> { + match pattern.as_bytes().first()? { + b'?' => Some(("?", &pattern[1..])), + b'*' => Some(("*", &pattern[1..])), + b'+' => Some(("+", &pattern[1..])), + b'{' => strip_leading_interval_repeat(pattern).map(|rest| ("{...}", rest)), + _ => None, + } +} + +fn strip_leading_interval_repeat(pattern: &str) -> Option<&str> { + let close = pattern.as_bytes().iter().position(|&b| b == b'}')?; + let body = &pattern[1..close]; + let is_interval = !body.is_empty() + && body.bytes().all(|b| b.is_ascii_digit() || b == b',') + && body.bytes().any(|b| b.is_ascii_digit()); + is_interval.then_some(&pattern[close + 1..]) +} + #[cfg(test)] mod tests { use super::plain_literal; diff --git a/tests/test_grep.rs b/tests/test_grep.rs index 6a42175..410c8e0 100644 --- a/tests/test_grep.rs +++ b/tests/test_grep.rs @@ -159,6 +159,32 @@ fn initial_tab_skips_empty_lines() { .stdout_is("in:\tx\nin:\t \n"); } +#[test] +fn ere_leading_repeat_operators_warn_and_match_empty() { + let cases = [ + ("?", "warning: ? at start of expression"), + ("*", "warning: * at start of expression"), + ("+", "warning: + at start of expression"), + ("{2}", "warning: {...} at start of expression"), + ("{,2}", "warning: {...} at start of expression"), + ]; + + for (pattern, warning) in cases { + let (_s, mut c) = ucmd(); + c.args(&["-E", "-e", pattern]) + .pipe_in("abc\n") + .succeeds() + .stdout_is("abc\n") + .stderr_contains(warning); + } + + let (_s, mut c) = ucmd(); + c.args(&["*foo"]) + .pipe_in("*foo\nfoo\n") + .succeeds() + .stdout_only("*foo\n"); +} + #[test] fn fixed_string_is_literal() { // Metacharacters are not interpreted.