Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ impl ParseState {
let mut esc_regions = Region::new();
if entry
.regex
.search(line, start, line.len(), Some(&mut esc_regions))
.search(line, start, line.len(), Some(&mut esc_regions), true)
{
let (esc_start, _esc_end) = esc_regions.pos(0).unwrap();
if esc_start < search_end {
Expand Down Expand Up @@ -714,7 +714,11 @@ impl ParseState {
_ => (match_pat.regex(), true),
};
// print!(" executing regex: {:?} at pos {} on line {}", regex.regex_str(), start, line);
let matched = regex.search(line, start, search_end, Some(regions));
// Only None-operation patterns should avoid zero-length matches. All other operations
// (Push, Set, Pop, Embed, Branch, Fail) legitimately need to match zero-length input
// (e.g. lookaheads used with Branch/Fail, empty patterns used with Pop/Set).
let allow_empty = !matches!(match_pat.operation, MatchOperation::None);
let matched = regex.search(line, start, search_end, Some(regions), allow_empty);

if matched {
let (match_start, match_end) = regions.pos(0).unwrap();
Expand Down
62 changes: 60 additions & 2 deletions src/parsing/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ use std::error::Error;
pub struct Regex {
regex_str: String,
regex: OnceCell<regex_impl::Regex>,
/// Lazily-compiled variant that won't match zero-length strings (for use with
/// match patterns whose operation does not modify the parser context stack).
regex_not_empty: OnceCell<regex_impl::Regex>,
}

/// A region contains text positions for capture groups in a match result.
Expand All @@ -29,6 +32,7 @@ impl Regex {
Self {
regex_str,
regex: OnceCell::new(),
regex_not_empty: OnceCell::new(),
}
}

Expand All @@ -53,30 +57,61 @@ impl Regex {
/// the [`Region`] to be reused between searches, which makes a significant performance
/// difference.
///
/// When `allow_empty` is `false`, zero-length matches are not considered. This should be used
/// for match patterns whose operation does not push, set, pop or embed a context, to prevent
/// the parser from stalling at the same position.
///
/// [`Region`]: struct.Region.html
pub fn search(
&self,
text: &str,
begin: usize,
end: usize,
region: Option<&mut Region>,
allow_empty: bool,
) -> bool {
self.regex()
.search(text, begin, end, region.map(|r| &mut r.region))
if allow_empty {
return self
.regex()
.search(text, begin, end, region.map(|r| &mut r.region));
}
// For Oniguruma, the not_empty_regex is compiled with FIND_NOT_EMPTY which
// natively avoids empty matches. For fancy-regex, which lacks a compile-time
// equivalent option, we additionally filter out any zero-length match below.
match region {
Some(region) => {
let matched =
self.not_empty_regex()
.search(text, begin, end, Some(&mut region.region));
if matched && region.pos(0).map_or(false, |(ms, me)| ms == me) {
return false;
}
matched
}
None => self.not_empty_regex().search(text, begin, end, None),
}
}

fn regex(&self) -> &regex_impl::Regex {
self.regex.get_or_init(|| {
regex_impl::Regex::new(&self.regex_str).expect("regex string should be pre-tested")
})
}

fn not_empty_regex(&self) -> &regex_impl::Regex {
self.regex_not_empty.get_or_init(|| {
regex_impl::Regex::new_find_not_empty(&self.regex_str)
.expect("regex string should be pre-tested")
})
}
}

impl Clone for Regex {
fn clone(&self) -> Self {
Regex {
regex_str: self.regex_str.clone(),
regex: OnceCell::new(),
regex_not_empty: OnceCell::new(),
}
}
}
Expand Down Expand Up @@ -158,6 +193,21 @@ mod regex_impl {
}
}

pub fn new_find_not_empty(
regex_str: &str,
) -> Result<Regex, Box<dyn Error + Send + Sync + 'static>> {
let result = onig::Regex::with_options(
regex_str,
RegexOptions::REGEX_OPTION_CAPTURE_GROUP
| RegexOptions::REGEX_OPTION_FIND_NOT_EMPTY,
Syntax::default(),
);
match result {
Ok(regex) => Ok(Regex { regex }),
Err(error) => Err(Box::new(error)),
}
}

pub fn is_match(&self, text: &str) -> bool {
self.regex
.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
Expand Down Expand Up @@ -220,6 +270,14 @@ mod regex_impl {
}
}

pub fn new_find_not_empty(
regex_str: &str,
) -> Result<Regex, Box<dyn Error + Send + Sync + 'static>> {
// fancy-regex doesn't support a compile-time FIND_NOT_EMPTY option; empty matches are
// filtered out at search time via a wrapper in the outer Regex::search method.
Self::new(regex_str)
}

pub fn is_match(&self, text: &str) -> bool {
// Errors are treated as non-matches
self.regex.is_match(text).unwrap_or(false)
Expand Down
2 changes: 1 addition & 1 deletion src/parsing/syntax_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ mod tests {
let r = Regex::new(r"(\\\[\]\(\))(b)(c)(d)(e)".into());
let s = r"\[]()bcde";
let mut region = Region::new();
let matched = r.search(s, 0, s.len(), Some(&mut region));
let matched = r.search(s, 0, s.len(), Some(&mut region), true);
assert!(matched);

let regex_with_refs = pat.regex_with_refs(&region, s);
Expand Down
2 changes: 1 addition & 1 deletion src/parsing/syntax_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ impl SyntaxSet {
let s = s.strip_prefix("\u{feff}").unwrap_or(s); // Strip UTF-8 BOM
let cache = self.first_line_cache();
for &(ref reg, i) in cache.regexes.iter().rev() {
if reg.search(s, 0, s.len(), None) {
if reg.search(s, 0, s.len(), None, true) {
return Some(&self.syntaxes[i]);
}
}
Expand Down
26 changes: 16 additions & 10 deletions src/parsing/yaml_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ impl SyntaxDefinition {
// Thanks @wbond for letting me know this is the correct way to check for captures
has_captures = state
.backref_regex
.search(&regex_str, 0, regex_str.len(), None);
.search(&regex_str, 0, regex_str.len(), None, true);
MatchOperation::Pop(y as usize)
} else if let Ok(y) = get_key(map, "push", Some) {
MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
Expand All @@ -375,10 +375,13 @@ impl SyntaxDefinition {
if let Ok(v) = get_key(map, "escape", Some) {
let escape_raw = v.as_str().ok_or(ParseSyntaxError::TypeMismatch)?;
let escape_regex_str = Self::parse_regex(escape_raw, state)?;
let escape_has_captures =
state
.backref_regex
.search(&escape_regex_str, 0, escape_regex_str.len(), None);
let escape_has_captures = state.backref_regex.search(
&escape_regex_str,
0,
escape_regex_str.len(),
None,
true,
);

let escape_captures =
if let Ok(cap_map) = get_key(map, "escape_captures", |x| x.as_hash()) {
Expand Down Expand Up @@ -515,10 +518,13 @@ impl SyntaxDefinition {
let mut result = String::new();
let mut index = 0;
let mut region = Region::new();
while state
.variable_regex
.search(raw_regex, index, raw_regex.len(), Some(&mut region))
{
while state.variable_regex.search(
raw_regex,
index,
raw_regex.len(),
Some(&mut region),
true,
) {
let (begin, end) = region.pos(0).unwrap();

result.push_str(&raw_regex[index..begin]);
Expand Down Expand Up @@ -660,7 +666,7 @@ fn re_resolve_variables(raw_regex: &str, state: &ReResolveState<'_>) -> String {
let mut region = Region::new();
while state
.variable_regex
.search(raw_regex, index, raw_regex.len(), Some(&mut region))
.search(raw_regex, index, raw_regex.len(), Some(&mut region), true)
{
let (begin, end) = region.pos(0).unwrap();
result.push_str(&raw_regex[index..begin]);
Expand Down
5 changes: 1 addition & 4 deletions testdata/known_syntest_failures.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
loading syntax definitions from testdata/Packages
FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38
FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1
FAILED testdata/Packages/Makefile/syntax_test_makefile.mak: 6
exiting with code 1
exiting with code 0
2 changes: 0 additions & 2 deletions testdata/known_syntest_failures_fancy.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
loading syntax definitions from testdata/Packages
FAILED testdata/Packages/C#/tests/syntax_test_Strings.cs: 38
FAILED testdata/Packages/LaTeX/syntax_test_latex.tex: 1
FAILED testdata/Packages/Markdown/syntax_test_markdown.md: 11
exiting with code 1
2 changes: 1 addition & 1 deletion tests/snapshots/public-api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ impl syntect::parsing::Regex
pub fn syntect::parsing::Regex::is_match(&self, text: &str) -> bool
pub fn syntect::parsing::Regex::new(regex_str: alloc::string::String) -> Self
pub fn syntect::parsing::Regex::regex_str(&self) -> &str
pub fn syntect::parsing::Regex::search(&self, text: &str, begin: usize, end: usize, region: core::option::Option<&mut syntect::parsing::Region>) -> bool
pub fn syntect::parsing::Regex::search(&self, text: &str, begin: usize, end: usize, region: core::option::Option<&mut syntect::parsing::Region>, allow_empty: bool) -> bool
pub fn syntect::parsing::Regex::try_compile(regex_str: &str) -> core::option::Option<alloc::boxed::Box<(dyn core::error::Error + core::marker::Send + core::marker::Sync + 'static)>>
impl core::clone::Clone for syntect::parsing::Regex
pub fn syntect::parsing::Regex::clone(&self) -> Self
Expand Down
Loading