Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions regex-automata/src/dfa/onepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2215,11 +2215,16 @@ impl DFA {
// We *also* need to set any explicit slots that are active as part of
// the path to the match state.
if self.explicit_slot_start < slots.len() {
// NOTE: The 'cache.explicit_slots()' slice is setup at the
// beginning of every search such that it is guaranteed to return a
// slice of length equivalent to 'slots[explicit_slot_start..]'.
slots[self.explicit_slot_start..]
.copy_from_slice(cache.explicit_slots());
// Copy only the slots that exist in the cache. When a regex has
// capture groups with zero repetition (e.g., (abc){0}), the cache
// may have fewer explicit slots than what the caller provided.
let cache_slots = cache.explicit_slots();
let available = core::cmp::min(
cache_slots.len(),
slots.len().saturating_sub(self.explicit_slot_start),
);
slots[self.explicit_slot_start..self.explicit_slot_start + available]
.copy_from_slice(&cache_slots[..available]);
epsilons.slots().apply(at, &mut slots[self.explicit_slot_start..]);
}
*matched_pid = Some(pid);
Expand Down Expand Up @@ -2577,7 +2582,7 @@ impl Cache {
}

fn setup_search(&mut self, explicit_slot_len: usize) {
self.explicit_slot_len = explicit_slot_len;
self.explicit_slot_len = core::cmp::min(explicit_slot_len, self.explicit_slots.len());
}
}

Expand Down
1 change: 1 addition & 0 deletions regex-automata/tests/dfa/onepass/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
#[cfg(not(miri))]
mod suite;
mod regression;
51 changes: 51 additions & 0 deletions regex-automata/tests/dfa/onepass/regression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Regression test for zero-repetition capture groups causing panic.
// See: https://github.com/rust-lang/regex/issues/XXX
#[test]
fn zero_repetition_capture_group() {
use regex_automata::{
dfa::onepass::DFA,
util::primitives::NonMaxUsize,
Anchored, Input,
};

let expr = DFA::new(r"(abc)(ABC){0}").unwrap();
let s = "abcABC";
let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes);

// Test with slot array sized for the pattern
let mut cache = expr.create_cache();
let mut slots: Vec<Option<NonMaxUsize>> = vec![None; 4];
let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap();
assert_eq!(pid, Some(regex_automata::PatternID::must(0)));
assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap()));
assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap()));
assert_eq!(slots[2], Some(NonMaxUsize::new(0).unwrap()));
assert_eq!(slots[3], Some(NonMaxUsize::new(3).unwrap()));

// Test with larger slot array (simulating reuse after a different regex)
let mut slots2: Vec<Option<NonMaxUsize>> = vec![None; 6];
let pid2 = expr.try_search_slots(&mut cache, &input, &mut slots2).unwrap();
assert_eq!(pid2, Some(regex_automata::PatternID::must(0)));
// First capture group should match
assert_eq!(slots2[2], Some(NonMaxUsize::new(0).unwrap()));
assert_eq!(slots2[3], Some(NonMaxUsize::new(3).unwrap()));
// Second capture group with {0} should be None
assert_eq!(slots2[4], None);
assert_eq!(slots2[5], None);

// Test switching between different regexes with different capture group counts
let expr2 = DFA::new(r"(abc)(ABC)").unwrap();
let mut cache2 = expr2.create_cache();
let mut slots3: Vec<Option<NonMaxUsize>> = vec![None; 6];
let pid3 = expr2.try_search_slots(&mut cache2, &input, &mut slots3).unwrap();
assert_eq!(pid3, Some(regex_automata::PatternID::must(0)));
assert_eq!(slots3[4], Some(NonMaxUsize::new(3).unwrap()));
assert_eq!(slots3[5], Some(NonMaxUsize::new(6).unwrap()));

// Switch back to the first regex - this previously caused a panic
let mut slots4: Vec<Option<NonMaxUsize>> = vec![None; 6];
let pid4 = expr.try_search_slots(&mut cache, &input, &mut slots4).unwrap();
assert_eq!(pid4, Some(regex_automata::PatternID::must(0)));
assert_eq!(slots4[4], None);
assert_eq!(slots4[5], None);
}