Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ jobs:
- name: Run tests
run: cargo test --verbose

wasm:
name: Check wasm32-wasip1
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-wasip1

- name: Check
run: cargo check --target wasm32-wasip1

coverage:
name: Code coverage (${{ matrix.os }})
runs-on: ${{ matrix.os }}
Expand Down
6 changes: 4 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ path = "src/lib.rs"
clap = { version = "4.5", features = ["wrap_help", "cargo", "color"] }
glob = "0.3.1"
memchr = "2.7.2"
onig = { version = "~6.5.1", default-features = false }
onig_sys = { version = "*", default-features = false }
uucore = "0.8.0"
walkdir = "2.5"

[target.'cfg(not(all(target_family = "wasm", target_os = "wasi")))'.dependencies]
onig = { version = "~6.5.1", default-features = false }
onig_sys = { version = "*", default-features = false }

[[bench]]
name = "grep_bench"
harness = false
Expand Down
63 changes: 63 additions & 0 deletions src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

use crate::{Config, RegexMode};
use memchr::memmem;
#[cfg(not(all(target_family = "wasm", target_os = "wasi")))]
use onig::{
EncodedBytes, Regex, RegexOptions, Region, SearchOptions, Syntax, SyntaxBehavior,
SyntaxOperator,
};
#[cfg(not(all(target_family = "wasm", target_os = "wasi")))]
use onig_sys::{OnigEncCtype_ONIGENC_CTYPE_WORD, OnigEncodingUTF8};
use uucore::error::{UResult, USimpleError};

Expand Down Expand Up @@ -111,6 +113,7 @@ impl<'a> Matcher<'a> {
/// Word-boundary check `-w`.
/// NOTE that `-w` does not check both sides, unlike `\b` in a regex.
/// Start/End-of-line count as non-words.
#[cfg(not(all(target_family = "wasm", target_os = "wasi")))]
fn is_word_match(line: &[u8], start: usize, end: usize) -> bool {
// SAFETY: This code uses OnigEncodingType such that it can support other types of encodings in the future.
unsafe {
Expand All @@ -137,6 +140,23 @@ impl<'a> Matcher<'a> {
true
}
}

/// WASI builds do not link Oniguruma, so this is only a conservative ASCII
/// fallback for literal-only matches.
#[cfg(all(target_family = "wasm", target_os = "wasi"))]
fn is_word_match(line: &[u8], start: usize, end: usize) -> bool {
fn is_ascii_word(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || byte == b'_'
}

if end < line.len() && is_ascii_word(line[end]) {
return false;
}
if start > 0 && is_ascii_word(line[start - 1]) {
return false;
}
true
}
}

/// Streaming k-way merge over compiled patterns
Expand Down Expand Up @@ -248,6 +268,7 @@ fn plain_literal(pattern: &str, ignore_case: bool, mode: RegexMode) -> Option<Ve
plain.then(|| pattern.as_bytes().to_vec())
}

#[cfg(not(all(target_family = "wasm", target_os = "wasi")))]
struct CompiledPattern {
/// Default semantics. It's decently fast and used for searching.
leftmost: Regex,
Expand All @@ -257,6 +278,7 @@ struct CompiledPattern {
longest_anchored: Regex,
}

#[cfg(not(all(target_family = "wasm", target_os = "wasi")))]
impl CompiledPattern {
fn compile(pattern: &str, config: &Config) -> UResult<Self> {
let mut syntax = *match config.regex_mode {
Expand Down Expand Up @@ -344,6 +366,47 @@ impl CompiledPattern {
}
}

#[cfg(all(target_family = "wasm", target_os = "wasi"))]
struct CompiledPattern {
needle: Vec<u8>,
finder: memmem::Finder<'static>,
}

#[cfg(all(target_family = "wasm", target_os = "wasi"))]
impl CompiledPattern {
fn compile(pattern: &str, config: &Config) -> UResult<Self> {
let Some(needle) = plain_literal(pattern, config.ignore_case, config.regex_mode) else {
return Err(USimpleError::new(
2,
"wasm32-wasip1 builds support ASCII literal patterns only; full regex matching requires Oniguruma and a C WASI sysroot".to_string(),
));
};
let finder = memmem::Finder::new(&needle).into_owned();
Ok(Self { needle, finder })
}

/// Find the leftmost match starting at or after `offset`.
fn search_leftmost(&self, line: &[u8], offset: usize) -> Option<(usize, usize)> {
self.finder.find(&line[offset..]).map(|relative| {
let start = offset + relative;
(start, start + self.needle.len())
})
}

/// Given a known leftmost start `start`, return the longest extent
/// of a match anchored exactly there.
fn longest_end_at(&self, line: &[u8], start: usize) -> Option<usize> {
line.get(start..start + self.needle.len())
.is_some_and(|bytes| bytes == self.needle.as_slice())
.then_some(start + self.needle.len())
}

/// True if any match exists in `line`.
fn is_match(&self, line: &[u8]) -> bool {
self.finder.find(line).is_some()
}
}

#[cfg(test)]
mod tests {
use super::plain_literal;
Expand Down
Loading