From 1d4e848342eec7561325283ea1cb00d4c7937f45 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 28 May 2026 16:36:34 +0200 Subject: [PATCH 1/2] docs: elaborate on fast scan mode documentation The documentation for the `fast_scan` method has been significantly expanded. It now includes a detailed explanation of how the mode works internally (pattern classification, stopping after the first match) and a comprehensive code example demonstrating its behavior and implications. --- lib/src/models.rs | 4 ++++ lib/src/scanner/mod.rs | 47 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/lib/src/models.rs b/lib/src/models.rs index 44a8820ec..db5f9d22a 100644 --- a/lib/src/models.rs +++ b/lib/src/models.rs @@ -374,6 +374,10 @@ impl<'a, 'r> Pattern<'a, 'r> { } /// Returns the matches found for this pattern. + /// + /// The returned matches are affected by [`crate::Scanner::fast_scan`]. + /// If fast scan mode is enabled, not all matches are guaranteed to be + /// returned. pub fn matches(&self) -> Matches<'a, 'r> { Matches { ctx: self.ctx, diff --git a/lib/src/scanner/mod.rs b/lib/src/scanner/mod.rs index a46aa7df4..9a2cbce42 100644 --- a/lib/src/scanner/mod.rs +++ b/lib/src/scanner/mod.rs @@ -229,14 +229,55 @@ impl<'r> Scanner<'r> { /// Enables or disables fast scan mode. /// - /// In fast scan mode, the scanner avoids tracking matches for patterns - /// when it is not necessary (e.g. when a rule condition only performs a - /// simple boolean check `$a`). + /// During rule compilation, the compiler analyzes rule conditions to + /// identify patterns that are only ever used in simple boolean existence + /// checks (e.g., `$a` in YARA). If a pattern is never queried for its match + /// count (`#a`), specific match offset (`@a`), match length (`!a`), or + /// evaluated inside a loop, it is classified as a fast-scan pattern. + /// + /// In fast scan mode, the scanner optimizes scans by stopping the search + /// and match tracking for these fast-scan patterns once their **very first + /// match** is found. Subsequent occurrences in the input data are ignored, + /// preventing redundant Aho-Corasick scans, regex evaluations, and match + /// memory allocations. /// /// Note that using fast scan mode implies that not all matches will be /// reported. For instance, when iterating matches using [`ScanResults`], /// you won't get all occurrences of the pattern in the file, only the first /// one. + /// + /// ### Example + /// + /// ``` + /// # use yara_x::{Compiler, Scanner}; + /// let mut compiler = Compiler::new(); + /// compiler.add_source(r#" + /// rule test { + /// strings: + /// $a = "abc" + /// condition: + /// $a + /// } + /// "#).unwrap(); + /// + /// let rules = compiler.build(); + /// let mut scanner = Scanner::new(&rules); + /// + /// // Enable fast scan mode. + /// scanner.fast_scan(true); + /// + /// // The haystack contains two matches of "abc". + /// let results = scanner.scan(b"abc...abc").unwrap(); + /// + /// // Find the matching rule. + /// let matching_rule = results.matching_rules().next().unwrap(); + /// + /// // Only a single match is returned for pattern $a. + /// let pattern = matching_rule.patterns().next().unwrap(); + /// let mut matches = pattern.matches(); + /// assert_eq!(matches.next().unwrap().range().start, 0); // The first match + /// assert!(matches.next().is_none()); // No other matches are returned + /// ``` pub fn fast_scan(&mut self, yes: bool) -> &mut Self { self.scan_context_mut().tracker.fast_scan = yes; self From 58cc128c1d06874aaeba18f788f0c50405ff1032 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 28 May 2026 16:37:01 +0200 Subject: [PATCH 2/2] docs: small improvements to the documentation. --- capi/include/yara_x.h | 2 +- capi/src/scanner.rs | 2 +- lib/src/models.rs | 2 +- lib/src/teddy/mod.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index 75355452f..0ef3769a5 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -710,7 +710,7 @@ enum YRX_RESULT yrx_scanner_set_timeout(struct YRX_SCANNER *scanner, // check `$a`). // // Note that using fast scan mode implies that not all matches will be -// reported. For instance, when iterating matches using [`ScanResults`], +// reported. For instance, when iterating matches using [`yara_x::ScanResults`], // you won't get all occurrences of the pattern in the file, only the first // one. enum YRX_RESULT yrx_scanner_fast_scan(struct YRX_SCANNER *scanner, diff --git a/capi/src/scanner.rs b/capi/src/scanner.rs index d52a821a7..89897839e 100644 --- a/capi/src/scanner.rs +++ b/capi/src/scanner.rs @@ -173,7 +173,7 @@ pub unsafe extern "C" fn yrx_scanner_set_timeout( /// check `$a`). /// /// Note that using fast scan mode implies that not all matches will be -/// reported. For instance, when iterating matches using [`ScanResults`], +/// reported. For instance, when iterating matches using [`yara_x::ScanResults`], /// you won't get all occurrences of the pattern in the file, only the first /// one. #[unsafe(no_mangle)] diff --git a/lib/src/models.rs b/lib/src/models.rs index db5f9d22a..7bf583e81 100644 --- a/lib/src/models.rs +++ b/lib/src/models.rs @@ -455,7 +455,7 @@ impl<'a> Match<'a, '_> { /// and some extra bytes at its left and right. The returned range indicates /// the portion of the slice that corresponds to the match itself. /// - /// Calling this function only makes sense if [`Scanner::match_context_size`] + /// Calling this function only makes sense if [`crate::Scanner::match_context_size`] /// is used for indicating how many bytes at the left and right of each /// match are desired. Otherwise, this function will return the same result /// as [`Match::data`]. diff --git a/lib/src/teddy/mod.rs b/lib/src/teddy/mod.rs index 78e55538f..2eaf48994 100644 --- a/lib/src/teddy/mod.rs +++ b/lib/src/teddy/mod.rs @@ -1,6 +1,6 @@ /*! Teddy is a SIMD accelerated multiple substring matching algorithm. -This implementation was taken from https://github.com/BurntSushi/aho-corasick +This implementation was taken from with minor modifications. */