From 28186e9ec3867c70cbdaa6988fbb9e24b95a2195 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sun, 31 May 2026 10:06:09 +0200
Subject: [PATCH 1/2] perf: buffer-at-a-time search for literal patterns

Literal searches were ~50-70x slower than GNU grep because every line
paid per-line costs (terminator scan, NUL scan, dispatch) even when a
buffer held no match. Add a buffer-at-a-time driver that scans whole
chunks with a substring searcher and only locates line boundaries
around the matches it finds; a chunk with no match costs a single
vectorized sweep and no per-line work.

The driver activates only for plain ASCII literal patterns (case
sensitive, no metacharacters) in the simpler output modes: -c, -l, -L,
-q, and plain line printing with -n/-b/filename/-m. Anything needing
match positions, context, inversion, color, or special binary handling
falls back to the unchanged line-at-a-time path. Output stays
byte-identical to that path, including binary/invalid-UTF-8 behavior.

- line_buffer: read_chunk() yields the largest span of complete lines.
- matcher: expose per-pattern memmem searchers when every pattern is a
  plain literal (plain_literal()).
- searcher: eligible_for_fast_path(), fast_locate(), fast_print().

All scanning rides on the memchr crate (SIMD memchr/memrchr/memmem).
Unit tests for read_chunk and plain_literal; integration tests for
prefixes, -m, and multi-chunk line-number correctness.

Benchmarks (31 MB corpus) vs prior release:
  -F (no match):  232ms -> 15ms  (15.9x; now faster than GNU)
  -c literal:     229ms -> 15ms  (15.2x)
  plain print:    248ms -> 18ms  (13.5x)
Regex and -i paths are unchanged (still the line-at-a-time engine).
---
 src/lib.rs         |   2 +-
 src/line_buffer.rs | 191 +++++++++++++++++++++++++++++++++++-
 src/matcher.rs     |  99 ++++++++++++++++++-
 src/searcher.rs    | 240 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/test_grep.rs |  67 +++++++++++++
 5 files changed, 595 insertions(+), 4 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0f2b63a..e34bd0a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,7 +22,7 @@ use std::io::{IsTerminal as _, Read};
 use std::path::Path;
 use uucore::error::{FromIo, UResult, USimpleError};
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 #[doc(hidden)]
 pub enum RegexMode {
     Fixed,
diff --git a/src/line_buffer.rs b/src/line_buffer.rs
index 54e7057..51ee4da 100644
--- a/src/line_buffer.rs
+++ b/src/line_buffer.rs
@@ -3,7 +3,7 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 
-use memchr::memchr;
+use memchr::{memchr, memrchr};
 use std::fs::File;
 use std::io::{self, Read as _};
 
@@ -111,4 +111,193 @@ impl LineBuffer {
             self.end += n;
         }
     }
+
+    /// Read the next run of *complete* lines as a single slice.
+    ///
+    /// Returns `Ok(None)` at end of input. Otherwise returns `Ok(Some((chunk,
+    /// chunk_start)))`, where `chunk` spans one or more whole lines (each ending
+    /// in the terminator) and `chunk_start` is the absolute byte offset of the
+    /// first byte of the chunk. The only exception is a final line lacking a
+    /// terminator, which is returned on its own as the last chunk.
+    ///
+    /// This hands back as much buffered data as ends on a line boundary, so a
+    /// caller can scan many lines with one pass instead of line by line.
+    pub fn read_chunk(&mut self, file: &mut File) -> io::Result<Option<(&[u8], u64)>> {
+        loop {
+            // Hand back everything up to and including the last terminator.
+            if self.end > self.beg
+                && let Some(off) = memrchr(self.line_terminator, &self.buffer[self.beg..self.end])
+            {
+                let beg = self.beg;
+                let lim = self.beg + off + 1;
+                let chunk_start = self.next_line_start;
+                self.next_line_start += (lim - beg) as u64;
+                self.beg = lim;
+                self.scan = lim;
+                return Ok(Some((&self.buffer[beg..lim], chunk_start)));
+            }
+
+            // No whole line buffered. At EOF, flush any unterminated remainder.
+            if self.eof {
+                if self.beg == self.end {
+                    return Ok(None);
+                }
+                let beg = self.beg;
+                let chunk_start = self.next_line_start;
+                self.next_line_start += (self.end - beg) as u64;
+                self.beg = self.end;
+                self.scan = self.end;
+                return Ok(Some((&self.buffer[beg..self.end], chunk_start)));
+            }
+
+            // Slide the partial tail to the front to maximize room for reading.
+            if self.beg > 0 {
+                self.buffer.copy_within(self.beg..self.end, 0);
+                self.end -= self.beg;
+                self.beg = 0;
+                self.scan = 0;
+            }
+            if self.end == self.buffer.len() {
+                // A single line is longer than the whole buffer; grow it.
+                self.buffer.resize(self.buffer.len() * 2, 0);
+            }
+
+            let n = loop {
+                match file.read(&mut self.buffer[self.end..]) {
+                    Ok(n) => break n,
+                    Err(e) if e.kind() == io::ErrorKind::Interrupted => {}
+                    Err(e) => return Err(e),
+                }
+            };
+            if n == 0 {
+                self.eof = true;
+            } else {
+                self.end += n;
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::{Seek as _, SeekFrom, Write as _};
+    use std::sync::atomic::{AtomicU32, Ordering};
+
+    static COUNTER: AtomicU32 = AtomicU32::new(0);
+
+    /// A temp file pre-loaded with `content`, rewound to the start, and removed
+    /// from disk when dropped.
+    struct TempInput {
+        file: File,
+        path: std::path::PathBuf,
+    }
+
+    impl Drop for TempInput {
+        fn drop(&mut self) {
+            let _ = std::fs::remove_file(&self.path);
+        }
+    }
+
+    fn temp_input(content: &[u8]) -> TempInput {
+        let mut path = std::env::temp_dir();
+        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
+        path.push(format!("uu_grep_lb_{}_{n}.tmp", std::process::id()));
+        let mut file = std::fs::OpenOptions::new()
+            .read(true)
+            .write(true)
+            .create(true)
+            .truncate(true)
+            .open(&path)
+            .unwrap();
+        file.write_all(content).unwrap();
+        file.seek(SeekFrom::Start(0)).unwrap();
+        TempInput { file, path }
+    }
+
+    /// Drain `read_chunk` into a list of (owned bytes, start offset) pairs.
+    fn chunks(term: u8, content: &[u8]) -> Vec<(Vec<u8>, u64)> {
+        let mut lb = LineBuffer::new(term);
+        let mut input = temp_input(content);
+        let mut out = Vec::new();
+        while let Some((chunk, start)) = lb.read_chunk(&mut input.file).unwrap() {
+            out.push((chunk.to_vec(), start));
+        }
+        out
+    }
+
+    #[test]
+    fn empty_input_yields_nothing() {
+        assert!(chunks(b'\n', b"").is_empty());
+    }
+
+    #[test]
+    fn whole_complete_lines_come_back_as_one_chunk() {
+        // Small input arrives in a single read, so everything up to the final
+        // terminator is one chunk starting at offset 0.
+        assert_eq!(
+            chunks(b'\n', b"a\nbb\nccc\n"),
+            vec![(b"a\nbb\nccc\n".to_vec(), 0)]
+        );
+    }
+
+    #[test]
+    fn unterminated_tail_is_a_final_chunk_with_its_own_offset() {
+        // "a\n" is the complete-line chunk; "bb" is flushed at EOF at offset 2.
+        assert_eq!(
+            chunks(b'\n', b"a\nbb"),
+            vec![(b"a\n".to_vec(), 0), (b"bb".to_vec(), 2)]
+        );
+    }
+
+    #[test]
+    fn input_without_any_terminator_is_one_chunk() {
+        assert_eq!(chunks(b'\n', b"abc"), vec![(b"abc".to_vec(), 0)]);
+    }
+
+    #[test]
+    fn honors_a_custom_terminator() {
+        assert_eq!(
+            chunks(b'\0', b"a\0bb\0c"),
+            vec![(b"a\0bb\0".to_vec(), 0), (b"c".to_vec(), 5)]
+        );
+    }
+
+    #[test]
+    fn reassembles_input_larger_than_the_buffer() {
+        // Force many reads and at least one chunk boundary mid-file.
+        let mut content = Vec::new();
+        for i in 0..50_000u32 {
+            content.extend_from_slice(format!("line number {i}\n").as_bytes());
+        }
+        assert!(content.len() > 128 * 1024);
+
+        let got = chunks(b'\n', &content);
+        assert!(got.len() > 1, "expected multiple chunks, got {}", got.len());
+
+        // Chunks must tile the input exactly, contiguously, each ending on a
+        // line boundary (the input ends with a terminator).
+        let mut expected_start = 0u64;
+        let mut joined = Vec::new();
+        for (bytes, start) in &got {
+            assert_eq!(*start, expected_start);
+            assert_eq!(*bytes.last().unwrap(), b'\n');
+            expected_start += bytes.len() as u64;
+            joined.extend_from_slice(bytes);
+        }
+        assert_eq!(joined, content);
+    }
+
+    #[test]
+    fn grows_to_hold_a_single_overlong_line() {
+        // One line far bigger than the initial 128 KiB buffer, then a short one.
+        let mut content = vec![b'x'; 300 * 1024];
+        content.push(b'\n');
+        content.extend_from_slice(b"tail\n");
+
+        let got = chunks(b'\n', &content);
+        let joined: Vec<u8> = got.iter().flat_map(|(b, _)| b.clone()).collect();
+        assert_eq!(joined, content);
+        assert_eq!(got[0].1, 0);
+    }
 }
diff --git a/src/matcher.rs b/src/matcher.rs
index d9cf846..604259c 100644
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -4,6 +4,7 @@
 // file that was distributed with this source code.
 
 use crate::{Config, RegexMode};
+use memchr::memmem;
 use onig::{
     EncodedBytes, Regex, RegexOptions, Region, SearchOptions, Syntax, SyntaxBehavior,
     SyntaxOperator,
@@ -14,6 +15,12 @@ use uucore::error::{UResult, USimpleError};
 pub struct Matcher<'a> {
     config: &'a Config<'a>,
     patterns: Vec<CompiledPattern>,
+    /// One substring searcher per pattern, present only when *every* pattern is
+    /// a plain literal that a raw byte search resolves exactly (see
+    /// [`plain_literal`]). When set, a caller can decide a line matches by
+    /// looking for any of these needles, bypassing the regex engine entirely.
+    /// `None` as soon as a single pattern needs real regex evaluation.
+    literal_searchers: Option<Vec<memmem::Finder<'static>>>,
 }
 
 impl<'a> Matcher<'a> {
@@ -22,7 +29,32 @@ impl<'a> Matcher<'a> {
         for raw in config.patterns {
             patterns.push(CompiledPattern::compile(raw, config)?);
         }
-        Ok(Self { config, patterns })
+
+        // If we can reduce the whole pattern set to literal needles, keep a
+        // searcher for each so the driver can take a bulk substring-scan path.
+        let needles: Option<Vec<Vec<u8>>> = config
+            .patterns
+            .iter()
+            .map(|p| plain_literal(p, config.ignore_case, config.regex_mode))
+            .collect();
+        let literal_searchers = needles.filter(|n| !n.is_empty()).map(|n| {
+            n.iter()
+                .map(|w| memmem::Finder::new(w).into_owned())
+                .collect()
+        });
+
+        Ok(Self {
+            config,
+            patterns,
+            literal_searchers,
+        })
+    }
+
+    /// Per-pattern substring searchers, present only when the pattern set is a
+    /// pure set of literals (no regex needed). Used by the searcher to scan a
+    /// whole buffer at once instead of testing line by line.
+    pub fn literal_searchers(&self) -> Option<&[memmem::Finder<'static>]> {
+        self.literal_searchers.as_deref()
     }
 
     /// Decide whether `line` matches and return the positions to highlight.
@@ -194,6 +226,25 @@ impl Cursor<'_> {
     }
 }
 
+/// Return the literal bytes of `pattern` when a raw byte-for-byte substring
+/// search is *exactly* equivalent to matching it, otherwise `None`.
+///
+/// We accept only ASCII, case-sensitive needles. That keeps the byte search in
+/// agreement with the regex engine on every possible input, including bytes that
+/// are not valid UTF-8: an ASCII byte can never be part of a multi-byte sequence,
+/// so its presence is unambiguous. In the regex modes we also require that no
+/// byte could ever act as a metacharacter; under `-F` the text is literal as-is.
+fn plain_literal(pattern: &str, ignore_case: bool, mode: RegexMode) -> Option<Vec<u8>> {
+    if ignore_case || pattern.is_empty() || !pattern.is_ascii() {
+        return None;
+    }
+    // Every byte that carries special meaning in any of our regex syntaxes.
+    // A needle without these reads the same as a literal in Basic/Extended/Perl.
+    const SPECIAL: &[u8] = b".*[]^$\\+?{}()|";
+    let plain = mode == RegexMode::Fixed || !pattern.bytes().any(|b| SPECIAL.contains(&b));
+    plain.then(|| pattern.as_bytes().to_vec())
+}
+
 struct CompiledPattern {
     /// Default semantics. It's decently fast and used for searching.
     leftmost: Regex,
@@ -289,3 +340,49 @@ impl CompiledPattern {
             .is_some()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::plain_literal;
+    use crate::RegexMode;
+
+    fn lit(p: &str, ic: bool, mode: RegexMode) -> Option<Vec<u8>> {
+        plain_literal(p, ic, mode)
+    }
+
+    #[test]
+    fn fixed_mode_takes_any_ascii_verbatim() {
+        // Under -F every byte is literal, even regex metacharacters.
+        assert_eq!(lit("abc", false, RegexMode::Fixed), Some(b"abc".to_vec()));
+        assert_eq!(lit("a.*b", false, RegexMode::Fixed), Some(b"a.*b".to_vec()));
+        assert_eq!(lit("a+b", false, RegexMode::Fixed), Some(b"a+b".to_vec()));
+    }
+
+    #[test]
+    fn regex_modes_accept_metacharacter_free_literals() {
+        for mode in [RegexMode::Basic, RegexMode::Extended, RegexMode::Perl] {
+            assert_eq!(lit("ing", false, mode), Some(b"ing".to_vec()));
+            assert_eq!(lit("Hello123", false, mode), Some(b"Hello123".to_vec()));
+        }
+    }
+
+    #[test]
+    fn regex_modes_reject_anything_with_a_metacharacter() {
+        for mode in [RegexMode::Basic, RegexMode::Extended, RegexMode::Perl] {
+            for p in [
+                "a.b", "a*", "[ab]", "^a", "a$", "a\\b", "a+", "a?", "(a)", "a|b", "a{2}",
+            ] {
+                assert_eq!(lit(p, false, mode), None, "pattern {p:?} in {mode:?}");
+            }
+        }
+    }
+
+    #[test]
+    fn rejects_empty_case_insensitive_and_non_ascii() {
+        assert_eq!(lit("", false, RegexMode::Fixed), None);
+        assert_eq!(lit("abc", true, RegexMode::Fixed), None); // -i
+        assert_eq!(lit("abc", true, RegexMode::Basic), None);
+        assert_eq!(lit("café", false, RegexMode::Fixed), None); // non-ASCII
+        assert_eq!(lit("naïve", false, RegexMode::Basic), None);
+    }
+}
diff --git a/src/searcher.rs b/src/searcher.rs
index c63c826..5da5936 100644
--- a/src/searcher.rs
+++ b/src/searcher.rs
@@ -8,7 +8,8 @@ use crate::line_buffer::LineBuffer;
 use crate::matcher::Matcher;
 use crate::output::OutputWriter;
 use crate::{BinaryMode, Config, DeviceMode, DirectoryMode};
-use memchr::memchr;
+use memchr::memmem::Finder;
+use memchr::{memchr, memchr_iter, memrchr};
 use std::ffi::OsStr;
 use std::fs::File;
 use std::io;
@@ -248,12 +249,221 @@ impl<'a> Searcher<'a> {
         self.binary_notice_enabled && self.session_binary_detected && self.session_any_match()
     }
 
+    /// Whether the current configuration can use the buffer-at-a-time fast
+    /// path. It applies only to pure-literal patterns and the simpler output
+    /// modes — anything needing match positions, context, inversion, or special
+    /// binary handling falls back to the line-at-a-time [`Self::session_run`].
+    fn eligible_for_fast_path(&self) -> bool {
+        // On Windows the line-at-a-time path strips a trailing CR before
+        // matching; the fast path mirrors that only for printed output, so a
+        // literal needle still behaves the same. Nothing else differs.
+        self.matcher.literal_searchers().is_some()
+            && !self.config.invert_match
+            && !self.config.word_regexp
+            && !self.config.line_regexp
+            && !self.config.only_matching
+            && !self.config.use_color
+            // `has_context` also covers `-C 0`, which still emits `--` separators.
+            && !self.config.has_context
+            && !self.config.null_data
+            && self.config.binary_mode != BinaryMode::WithoutMatch
+    }
+
+    /// Buffer-at-a-time driver for literal patterns. Instead of testing every
+    /// line, it scans whole chunks with a substring searcher and only locates
+    /// line boundaries around the matches it finds.
+    fn session_run_fast(
+        &mut self,
+        lb: &mut LineBuffer,
+        path: &Path,
+        reader: &mut File,
+    ) -> io::Result<bool> {
+        lb.reset();
+        if self.config.quiet
+            || self.config.files_with_matches
+            || self.config.files_without_match
+            || self.config.count
+        {
+            self.fast_locate(lb, path, reader)
+        } else {
+            self.fast_print(lb, path, reader)
+        }
+    }
+
+    /// Fast path for modes that only need to know *whether* / *how many* lines
+    /// match: `-c`, `-l`, `-L`, `-q`. No per-line rendering, so no line numbers,
+    /// byte offsets, or binary bookkeeping are required (the count of matching
+    /// lines is unaffected by binary detection, and `-l`/`-L`/`-q` list files
+    /// regardless).
+    fn fast_locate(
+        &mut self,
+        lb: &mut LineBuffer,
+        path: &Path,
+        reader: &mut File,
+    ) -> io::Result<bool> {
+        let finders = self
+            .matcher
+            .literal_searchers()
+            .expect("eligibility guarantees literal searchers");
+        let max = self.config.max_count;
+        // Existence is enough for these three; only `-c` needs the full tally.
+        let stop_at_first =
+            self.config.quiet || self.config.files_with_matches || self.config.files_without_match;
+
+        let mut count: u64 = 0;
+        let mut matched = false;
+        'outer: while let Some((chunk, _)) = lb.read_chunk(reader)? {
+            let mut p = 0;
+            while p < chunk.len() {
+                let Some(rel) = leftmost_match(finders, &chunk[p..]) else {
+                    break;
+                };
+                if max.is_some_and(|mx| count >= mx) {
+                    break 'outer;
+                }
+                let (_, line_end) = line_bounds(chunk, p + rel);
+                count += 1;
+                matched = true;
+                if stop_at_first {
+                    break 'outer;
+                }
+                // Each line counts once: resume past this line's terminator.
+                p = line_end + 1;
+            }
+        }
+
+        // `-l`/`-L` take precedence over `-c`, matching the line-at-a-time path.
+        if self.config.quiet {
+            // Exit status only.
+        } else if self.config.files_with_matches {
+            if matched {
+                self.writer.write_filename(path)?;
+            }
+        } else if self.config.files_without_match {
+            if !matched {
+                self.writer.write_filename(path)?;
+            }
+        } else if self.config.count {
+            self.writer.write_count(count, path)?;
+        }
+        Ok(matched)
+    }
+
+    /// Fast path that prints whole matching lines (optionally with `-n`, `-b`,
+    /// filename prefixes, `-m`). Binary files are detected per chunk and reported
+    /// with the usual notice instead of dumping their lines.
+    fn fast_print(
+        &mut self,
+        lb: &mut LineBuffer,
+        path: &Path,
+        reader: &mut File,
+    ) -> io::Result<bool> {
+        let finders = self
+            .matcher
+            .literal_searchers()
+            .expect("eligibility guarantees literal searchers");
+        let max = self.config.max_count;
+        let want_lineno = self.config.line_number;
+        let detect_binary = self.config.binary_mode != BinaryMode::Text;
+        let notice_enabled = self.binary_notice_enabled;
+
+        let mut count: u64 = 0;
+        let mut matched = false;
+        let mut binary = false;
+        // Number of terminators in all previously consumed chunks (for `-n`).
+        let mut base_lines: u64 = 0;
+
+        'outer: while let Some((chunk, chunk_off)) = lb.read_chunk(reader)? {
+            let mut p = 0;
+            // NUL scanned up to here; terminators counted up to `nl_cursor`.
+            let mut nul_scanned = 0;
+            let mut nl_cursor = 0;
+            let mut nl_before = 0u64;
+
+            while p < chunk.len() {
+                let Some(rel) = leftmost_match(finders, &chunk[p..]) else {
+                    break;
+                };
+                if max.is_some_and(|mx| count >= mx) {
+                    break 'outer;
+                }
+                let (line_beg, line_end) = line_bounds(chunk, p + rel);
+
+                // A NUL anywhere up to this line marks the file binary, as does
+                // an invalid-UTF-8 matching line.
+                if detect_binary && !binary {
+                    if memchr(0, &chunk[nul_scanned..line_end]).is_some() {
+                        binary = true;
+                    }
+                    nul_scanned = line_end;
+                }
+
+                let line = &chunk[line_beg..line_end];
+                #[cfg(windows)]
+                let line = if self.config.strip_cr && line.last() == Some(&b'\r') {
+                    &line[..line.len() - 1]
+                } else {
+                    line
+                };
+
+                if detect_binary && !binary && std::str::from_utf8(line).is_err() {
+                    binary = true;
+                }
+
+                if binary {
+                    // First match in a binary file: stop and emit the notice
+                    // once at the end instead of dumping the line.
+                    matched = true;
+                    break 'outer;
+                }
+
+                let line_number = if want_lineno {
+                    nl_before += count_terminators(&chunk[nl_cursor..line_beg]);
+                    nl_cursor = line_beg;
+                    base_lines + nl_before + 1
+                } else {
+                    0
+                };
+                self.writer.write_line(
+                    &LineView {
+                        line,
+                        line_number,
+                        byte_offset: chunk_off + line_beg as u64,
+                        is_match: true,
+                        match_positions: &[],
+                    },
+                    path,
+                )?;
+                count += 1;
+                matched = true;
+                p = line_end + 1;
+            }
+
+            // Carry NUL detection and the line tally across the chunk boundary.
+            if detect_binary && !binary && memchr(0, &chunk[nul_scanned..]).is_some() {
+                binary = true;
+            }
+            if want_lineno {
+                base_lines += nl_before + count_terminators(&chunk[nl_cursor..]);
+            }
+        }
+
+        if binary && notice_enabled && matched {
+            self.writer.report_binary_match(path);
+        }
+        Ok(matched)
+    }
+
     fn session_run(
         &mut self,
         lb: &mut LineBuffer,
         path: &Path,
         reader: &mut File,
     ) -> io::Result<bool> {
+        if self.eligible_for_fast_path() {
+            return self.session_run_fast(lb, path, reader);
+        }
+
         // Reset all session (per-file) state.
         self.session_context_buf.clear();
         self.session_match_count = 0;
@@ -470,3 +680,31 @@ impl<'a> Searcher<'a> {
         }
     }
 }
+
+/// Offset of the earliest occurrence of any needle in `hay`, or `None`.
+fn leftmost_match(finders: &[Finder<'static>], hay: &[u8]) -> Option<usize> {
+    let mut best: Option<usize> = None;
+    for finder in finders {
+        if let Some(pos) = finder.find(hay) {
+            best = Some(best.map_or(pos, |b| b.min(pos)));
+            if best == Some(0) {
+                break; // Can't start any earlier.
+            }
+        }
+    }
+    best
+}
+
+/// Count line terminators in `bytes`.
+fn count_terminators(bytes: &[u8]) -> u64 {
+    memchr_iter(b'\n', bytes).count() as u64
+}
+
+/// Byte range `[start, end)` of the line containing `pos` in `buf`, excluding
+/// the trailing terminator. `start` follows the previous terminator (or 0);
+/// `end` is the next terminator (or end of buffer).
+fn line_bounds(buf: &[u8], pos: usize) -> (usize, usize) {
+    let start = memrchr(b'\n', &buf[..pos]).map_or(0, |i| i + 1);
+    let end = memchr(b'\n', &buf[pos..]).map_or(buf.len(), |i| pos + i);
+    (start, end)
+}
diff --git a/tests/test_grep.rs b/tests/test_grep.rs
index d061918..87bc292 100644
--- a/tests/test_grep.rs
+++ b/tests/test_grep.rs
@@ -1272,3 +1272,70 @@ fn repeated_options_are_accepted() {
         .succeeds()
         .stdout_only("a\nb\n");
 }
+
+#[test]
+fn literal_buffer_path_prefixes_and_max() {
+    // Plain literals are served by the buffer-at-a-time engine; the line/byte
+    // prefixes and -m must still be byte-identical to the line-at-a-time path.
+
+    // -n and -b together: "lineno:byteoffset:line".
+    let (_s, mut c) = ucmd();
+    c.args(&["-nb", "foo"])
+        .pipe_in("foo\nbar\nfoobar\n")
+        .succeeds()
+        .stdout_only("1:0:foo\n3:8:foobar\n");
+
+    // A line matched more than once is still emitted once.
+    let (_s, mut c) = ucmd();
+    c.args(&["-c", "oo"])
+        .pipe_in("oooo\nbar\noo\n")
+        .succeeds()
+        .stdout_only("2\n");
+
+    // -m caps printed matches.
+    let (_s, mut c) = ucmd();
+    c.args(&["-m", "2", "x"])
+        .pipe_in("x\ny\nx\nz\nx\n")
+        .succeeds()
+        .stdout_only("x\nx\n");
+
+    // Final line without a trailing terminator still matches and is printed
+    // with an added newline.
+    let (_s, mut c) = ucmd();
+    c.args(&["foo"])
+        .pipe_in("bar\nfoo")
+        .succeeds()
+        .stdout_only("foo\n");
+}
+
+#[test]
+fn literal_buffer_path_spans_many_chunks() {
+    // Build an input far larger than the read buffer so the buffer-at-a-time
+    // engine crosses several chunk boundaries, and check that line numbers and
+    // counts stay correct across them.
+    let mut input = String::new();
+    let mut expected_n = String::new();
+    let mut count = 0u32;
+    for i in 1..=100_000u32 {
+        if i % 7 == 0 {
+            input.push_str("needle\n");
+            expected_n.push_str(&format!("{i}:needle\n"));
+            count += 1;
+        } else {
+            input.push_str("some filler text\n");
+        }
+    }
+    assert!(input.len() > 512 * 1024, "input must exceed several chunks");
+
+    let (_s, mut c) = ucmd();
+    c.args(&["-c", "needle"])
+        .pipe_in(input.clone())
+        .succeeds()
+        .stdout_only(format!("{count}\n"));
+
+    let (_s, mut c) = ucmd();
+    c.args(&["-n", "needle"])
+        .pipe_in(input)
+        .succeeds()
+        .stdout_only(expected_n);
+}

From 56d774f576bd8e3a04027d555b9cdfc471cb923f Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sun, 31 May 2026 11:41:16 +0200
Subject: [PATCH 2/2] test: cover slow-path modes that literal tests no longer
 reach

The buffer-at-a-time fast path now serves the literal patterns that the
existing -l/-L/-q and binary tests used, leaving the line-at-a-time
engine's equivalents uncovered. Add bracket-class (non-literal) tests
for -l/-L/-q and binary handling (notice, -a text, without-match bail,
and the finalize-time notice), plus a fast-path test for a NUL that is
only discovered after a line was already printed.

No dead code was found: the remaining uncovered lines are writer I/O
error-propagation arms and pre-existing filesystem error handlers.
---
 tests/test_grep.rs | 100 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/tests/test_grep.rs b/tests/test_grep.rs
index 87bc292..560dc12 100644
--- a/tests/test_grep.rs
+++ b/tests/test_grep.rs
@@ -1339,3 +1339,103 @@ fn literal_buffer_path_spans_many_chunks() {
         .succeeds()
         .stdout_only(expected_n);
 }
+
+// Plain literals run on the buffer-at-a-time fast path, so the following tests
+// use bracket-class patterns (non-literal) to keep the line-at-a-time engine's
+// `-l` / `-L` / `-q` and binary-handling paths exercised too.
+
+#[test]
+fn slow_path_list_and_quiet_modes() {
+    let (scene, _) = ucmd();
+    scene.fixtures.write("hit", "yes\n");
+    scene.fixtures.write("miss", "no\n");
+
+    // -l: list matching files.
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-l", "[y]es", "hit", "miss"])
+        .succeeds()
+        .stdout_is("hit\n");
+
+    // -L with a match in one file: only the non-matching file is listed.
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-L", "[y]es", "hit", "miss"])
+        .succeeds()
+        .stdout_is("miss\n");
+
+    // -L with no match anywhere: both files listed, exit 1.
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-L", "[z]z", "hit", "miss"])
+        .fails_with_code(1)
+        .stdout_is("hit\nmiss\n");
+
+    // -q stops at the first match (exit 0) or reports no match (exit 1).
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-q", "[y]es", "hit"])
+        .succeeds()
+        .no_output();
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-q", "[z]z", "hit"])
+        .fails_with_code(1)
+        .no_output();
+}
+
+#[test]
+fn slow_path_binary_handling() {
+    let (scene, _) = ucmd();
+    // NOTE: avoid the name "nul" here — it's a reserved device name on Windows,
+    // so writing/reading it hits the null device instead of a real file.
+    scene.fixtures.write_bytes("nulbin", b"hit\0\n");
+    scene.fixtures.write_bytes("bad", b"a\x9d\n");
+
+    // Binary notice on the line-at-a-time engine (regex pattern).
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["[h]it", "nulbin"])
+        .succeeds()
+        .no_stdout()
+        .stderr_contains("binary file matches");
+
+    // -a forces text mode: the NUL line is printed verbatim.
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["-a", "[h]it", "nulbin"])
+        .succeeds()
+        .stdout_is_bytes(b"hit\0\n");
+
+    // --binary-files=without-match bails out on an invalid-UTF-8 match.
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["--binary-files=without-match", "[a]", "bad"])
+        .fails_with_code(1)
+        .no_output();
+
+    // A NUL after the matched line means binariness is discovered at EOF, so
+    // the line is printed first and the notice is emitted during finalization.
+    scene.fixtures.write_bytes("late", b"hit\nno\0\n");
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["[h]it", "late"])
+        .succeeds()
+        .stdout_is("hit\n")
+        .stderr_contains("binary file matches");
+}
+
+#[test]
+fn fast_path_binary_detected_after_a_printed_line() {
+    // A NUL that appears only after the last match in the buffer marks the file
+    // binary on the fast path *after* an earlier match was already printed: the
+    // printed line stays and the trailing notice is still emitted.
+    let (scene, _) = ucmd();
+    scene.fixtures.write_bytes("b", b"hit\nno\0\n");
+    scene
+        .cmd(env!("CARGO_BIN_EXE_grep"))
+        .args(&["hit", "b"])
+        .succeeds()
+        .stdout_is("hit\n")
+        .stderr_contains("binary file matches");
+}