From 740b3022dc21fc0ad728cbb1222aaa29d31ce789 Mon Sep 17 00:00:00 2001 From: Wondr Date: Fri, 5 Jun 2026 11:20:01 +0100 Subject: [PATCH] grep: allow wasm32-wasip1 cargo check --- .github/workflows/ci.yml | 14 +++++++++ Cargo.toml | 6 ++-- src/matcher.rs | 63 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9551318..58e8d46 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,20 @@ jobs: - name: Run tests run: cargo test --verbose + wasm: + name: Check wasm32-wasip1 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-wasip1 + + - name: Check + run: cargo check --target wasm32-wasip1 + coverage: name: Code coverage (${{ matrix.os }}) runs-on: ${{ matrix.os }} diff --git a/Cargo.toml b/Cargo.toml index 7170309..443cd97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,11 +22,13 @@ path = "src/lib.rs" clap = { version = "4.5", features = ["wrap_help", "cargo", "color"] } glob = "0.3.1" memchr = "2.7.2" -onig = { version = "~6.5.1", default-features = false } -onig_sys = { version = "*", default-features = false } uucore = "0.8.0" walkdir = "2.5" +[target.'cfg(not(all(target_family = "wasm", target_os = "wasi")))'.dependencies] +onig = { version = "~6.5.1", default-features = false } +onig_sys = { version = "*", default-features = false } + [[bench]] name = "grep_bench" harness = false diff --git a/src/matcher.rs b/src/matcher.rs index 6d72b69..b88dcef 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -5,10 +5,12 @@ use crate::{Config, RegexMode}; use memchr::memmem; +#[cfg(not(all(target_family = "wasm", target_os = "wasi")))] use onig::{ EncodedBytes, Regex, RegexOptions, Region, SearchOptions, Syntax, SyntaxBehavior, SyntaxOperator, }; +#[cfg(not(all(target_family = "wasm", target_os = "wasi")))] use onig_sys::{OnigEncCtype_ONIGENC_CTYPE_WORD, OnigEncodingUTF8}; use uucore::error::{UResult, USimpleError}; @@ -111,6 +113,7 @@ impl<'a> Matcher<'a> { /// Word-boundary check `-w`. /// NOTE that `-w` does not check both sides, unlike `\b` in a regex. /// Start/End-of-line count as non-words. + #[cfg(not(all(target_family = "wasm", target_os = "wasi")))] fn is_word_match(line: &[u8], start: usize, end: usize) -> bool { // SAFETY: This code uses OnigEncodingType such that it can support other types of encodings in the future. unsafe { @@ -137,6 +140,23 @@ impl<'a> Matcher<'a> { true } } + + /// WASI builds do not link Oniguruma, so this is only a conservative ASCII + /// fallback for literal-only matches. + #[cfg(all(target_family = "wasm", target_os = "wasi"))] + fn is_word_match(line: &[u8], start: usize, end: usize) -> bool { + fn is_ascii_word(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || byte == b'_' + } + + if end < line.len() && is_ascii_word(line[end]) { + return false; + } + if start > 0 && is_ascii_word(line[start - 1]) { + return false; + } + true + } } /// Streaming k-way merge over compiled patterns @@ -248,6 +268,7 @@ fn plain_literal(pattern: &str, ignore_case: bool, mode: RegexMode) -> Option UResult { let mut syntax = *match config.regex_mode { @@ -344,6 +366,47 @@ impl CompiledPattern { } } +#[cfg(all(target_family = "wasm", target_os = "wasi"))] +struct CompiledPattern { + needle: Vec, + finder: memmem::Finder<'static>, +} + +#[cfg(all(target_family = "wasm", target_os = "wasi"))] +impl CompiledPattern { + fn compile(pattern: &str, config: &Config) -> UResult { + let Some(needle) = plain_literal(pattern, config.ignore_case, config.regex_mode) else { + return Err(USimpleError::new( + 2, + "wasm32-wasip1 builds support ASCII literal patterns only; full regex matching requires Oniguruma and a C WASI sysroot".to_string(), + )); + }; + let finder = memmem::Finder::new(&needle).into_owned(); + Ok(Self { needle, finder }) + } + + /// Find the leftmost match starting at or after `offset`. + fn search_leftmost(&self, line: &[u8], offset: usize) -> Option<(usize, usize)> { + self.finder.find(&line[offset..]).map(|relative| { + let start = offset + relative; + (start, start + self.needle.len()) + }) + } + + /// Given a known leftmost start `start`, return the longest extent + /// of a match anchored exactly there. + fn longest_end_at(&self, line: &[u8], start: usize) -> Option { + line.get(start..start + self.needle.len()) + .is_some_and(|bytes| bytes == self.needle.as_slice()) + .then_some(start + self.needle.len()) + } + + /// True if any match exists in `line`. + fn is_match(&self, line: &[u8]) -> bool { + self.finder.find(line).is_some() + } +} + #[cfg(test)] mod tests { use super::plain_literal;