Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions capi/include/yara_x.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,19 @@ void yrx_scanner_destroy(struct YRX_SCANNER *scanner);
enum YRX_RESULT yrx_scanner_set_timeout(struct YRX_SCANNER *scanner,
uint64_t timeout);

// Enables or disables fast scan mode for the scanner.
//
// In fast scan mode, the scanner avoids tracking matches for patterns when it
// is not necessary (e.g. when a rule condition only performs a simple boolean
// check `$a`).
//
// Note that using fast scan mode implies that not all matches will be
// reported. For instance, when iterating matches using [`ScanResults`],
// you won't get all occurrences of the pattern in the file, only the first
// one.
enum YRX_RESULT yrx_scanner_fast_scan(struct YRX_SCANNER *scanner,
bool yes);

// Scans a data buffer.
//
// `data` can be null as long as `len` is 0. In such cases its handled as
Expand Down
38 changes: 38 additions & 0 deletions capi/src/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ impl<'r> InnerScanner<'r> {
self
}

fn fast_scan(&mut self, yes: bool) -> &mut Self {
match self {
InnerScanner::SingleBlock(s) => {
s.fast_scan(yes);
}
InnerScanner::MultiBlock(s) => {
s.fast_scan(yes);
}
InnerScanner::None => unreachable!(),
}
self
}

fn make_multi_block(&mut self) -> &mut yara_x::blocks::Scanner<'r> {
// Already a multi-block scanner, nothing else to do.
if let Self::MultiBlock(s) = self {
Expand Down Expand Up @@ -153,6 +166,31 @@ pub unsafe extern "C" fn yrx_scanner_set_timeout(
YRX_RESULT::YRX_SUCCESS
}

/// Enables or disables fast scan mode for the scanner.
///
/// In fast scan mode, the scanner avoids tracking matches for patterns when it
/// is not necessary (e.g. when a rule condition only performs a simple boolean
/// check `$a`).
///
/// Note that using fast scan mode implies that not all matches will be
/// reported. For instance, when iterating matches using [`ScanResults`],
/// you won't get all occurrences of the pattern in the file, only the first
/// one.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn yrx_scanner_fast_scan(
scanner: *mut YRX_SCANNER,
yes: bool,
) -> YRX_RESULT {
let scanner = match scanner.as_mut() {
Some(s) => s,
None => return YRX_RESULT::YRX_INVALID_ARGUMENT,
};

scanner.inner.fast_scan(yes);

YRX_RESULT::YRX_SUCCESS
}

/// Scans a data buffer.
///
/// `data` can be null as long as `len` is 0. In such cases its handled as
Expand Down
48 changes: 42 additions & 6 deletions capi/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ use crate::{
yrx_rule_iter_metadata, yrx_rule_iter_patterns, yrx_rule_iter_tags,
yrx_rule_namespace, yrx_rules_deserialize, yrx_rules_destroy,
yrx_rules_iter, yrx_rules_iter_imports, yrx_rules_serialize,
yrx_scanner_create, yrx_scanner_destroy, yrx_scanner_finish,
yrx_scanner_on_console_log, yrx_scanner_on_matching_rule,
yrx_scanner_scan, yrx_scanner_scan_block, yrx_scanner_set_global_bool,
yrx_scanner_set_global_float, yrx_scanner_set_global_int,
yrx_scanner_set_global_json, yrx_scanner_set_global_str,
yrx_scanner_set_module_data, yrx_scanner_set_timeout,
yrx_scanner_create, yrx_scanner_destroy, yrx_scanner_fast_scan,
yrx_scanner_finish, yrx_scanner_on_console_log,
yrx_scanner_on_matching_rule, yrx_scanner_scan, yrx_scanner_scan_block,
yrx_scanner_set_global_bool, yrx_scanner_set_global_float,
yrx_scanner_set_global_int, yrx_scanner_set_global_json,
yrx_scanner_set_global_str, yrx_scanner_set_module_data,
yrx_scanner_set_timeout,
};

use std::ffi::{CStr, c_char, c_void};
Expand Down Expand Up @@ -457,3 +458,38 @@ fn capi_errors() {
yrx_compiler_destroy(compiler);
}
}

#[test]
fn capi_fast_scan() {
unsafe {
let mut compiler = std::ptr::null_mut();
yrx_compiler_create(0, &mut compiler);

let src = c"rule test { strings: $a = \"foo\" condition: $a }";
yrx_compiler_add_source(compiler, src.as_ptr());

let rules = yrx_compiler_build(compiler);
yrx_compiler_destroy(compiler);

let mut scanner = std::ptr::null_mut();
yrx_scanner_create(rules, &mut scanner);

// Enable fast scan mode
yrx_scanner_fast_scan(scanner, true);

let mut matches = 0;
yrx_scanner_on_matching_rule(
scanner,
on_rule_match_increase_counter,
&mut matches as *mut i32 as *mut c_void,
);

let data = b"foofoofoo";
yrx_scanner_scan(scanner, data.as_ptr(), data.len());

assert_eq!(matches, 1);

yrx_scanner_destroy(scanner);
yrx_rules_destroy(rules);
}
}
14 changes: 14 additions & 0 deletions go/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ func (s *Scanner) SetTimeout(timeout time.Duration) {
runtime.KeepAlive(s)
}

// FastScan enables or disables fast scan mode.
//
// In fast scan mode, the scanner avoids tracking matches for patterns when it
// is not necessary (e.g. when a rule condition only performs a simple boolean
// check `$a`).
//
// Note that using fast scan mode implies that not all matches will be
// reported. For instance, when iterating matches, you won't get all occurrences
// of the pattern in the file, only the first one.
func (s *Scanner) FastScan(yes bool) {
C.yrx_scanner_fast_scan(s.cScanner, C.bool(yes))
runtime.KeepAlive(s)
}

var ErrTimeout = errors.New("timeout")

// SetGlobal sets the value of a global variable.
Expand Down
18 changes: 18 additions & 0 deletions go/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,21 @@ func BenchmarkScan(b *testing.B) {
}
}
}

func TestScannerFastScan(t *testing.T) {
r, _ := Compile(`
rule t {
strings:
$a = "foo"
condition:
$a
}`)
s := NewScanner(r)
s.FastScan(true)
scanResults, _ := s.Scan([]byte("foofoofoo"))
matchingRules := scanResults.MatchingRules()

assert.Len(t, matchingRules, 1)
assert.Len(t, matchingRules[0].Patterns(), 1)
assert.Len(t, matchingRules[0].Patterns()[0].Matches(), 1)
}
65 changes: 59 additions & 6 deletions lib/src/compiler/ir/ast2ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ pub(in crate::compiler) fn text_pattern_from_ast<'src>(
identifier: pattern.identifier.clone(),
in_use: false,
span: pattern.span(),
fast_scan_allowed: true,
pattern: Pattern::Text(LiteralPattern {
flags,
text,
Expand Down Expand Up @@ -305,6 +306,7 @@ pub(in crate::compiler) fn hex_pattern_from_ast<'src>(
identifier: pattern.identifier.clone(),
in_use: false,
span: pattern.span(),
fast_scan_allowed: true,
pattern: Pattern::Hex(RegexpPattern {
hir,
flags: PatternFlags::Ascii,
Expand Down Expand Up @@ -440,6 +442,7 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>(
identifier: pattern.identifier.clone(),
in_use: false,
span: pattern.span(),
fast_scan_allowed: true,
pattern: Pattern::Regexp(RegexpPattern {
flags,
hir,
Expand Down Expand Up @@ -772,6 +775,10 @@ fn expr_from_ast(
pattern.make_non_anchorable();
}

if !matches!(anchor, MatchAnchor::None) {
pattern.disallow_fast_scan();
}

ctx.ir.pattern_match(pattern_idx, anchor)
}
}
Expand Down Expand Up @@ -805,13 +812,19 @@ fn expr_from_ast(
let range = range_from_ast(ctx, range)?;
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_count(pattern_idx, Some(range))
}
(_, None) => {
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_count(pattern_idx, None)
}
}
Expand Down Expand Up @@ -847,13 +860,19 @@ fn expr_from_ast(
integer_in_range_from_ast(ctx, index, 1..=i64::MAX)?;
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_offset(pattern_idx, Some(range))
}
(_, None) => {
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_offset(pattern_idx, None)
}
}
Expand Down Expand Up @@ -889,13 +908,19 @@ fn expr_from_ast(
integer_in_range_from_ast(ctx, index, 1..=i64::MAX)?;
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_length(pattern_idx, Some(index))
}
(_, None) => {
let (pattern_idx, pattern) =
ctx.get_pattern_mut(&p.identifier)?;
pattern.make_non_anchorable().mark_as_used();
pattern
.make_non_anchorable()
.mark_as_used()
.disallow_fast_scan();
ctx.ir.pattern_length(pattern_idx, None)
}
}
Expand Down Expand Up @@ -1247,6 +1272,34 @@ fn for_of_expr_from_ast(

let body = bool_expr_from_ast(ctx, &for_of.body)?;

let mut allow_fast_scan = true;

for event in ctx.ir.dfs_iter(body) {
if let dfs::Event::Enter((_, expr, _)) = event
&& (matches!(
expr,
Expr::PatternCountVar { .. }
| Expr::PatternOffsetVar { .. }
| Expr::PatternLengthVar { .. }
) || (match expr {
Expr::PatternMatchVar { anchor, .. } => {
!matches!(anchor, MatchAnchor::None)
}
_ => false,
}))
{
allow_fast_scan = false;
break;
}
}

if !allow_fast_scan {
for &pattern_idx in &pattern_set {
ctx.current_rule_patterns[pattern_idx.as_usize()]
.disallow_fast_scan();
}
}

ctx.for_of_depth -= 1;
ctx.symbol_table.pop();
ctx.vars.unwind(&stack_frame);
Expand Down
24 changes: 23 additions & 1 deletion lib/src/compiler/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ pub(crate) struct PatternInRule<'src> {
pattern: Pattern,
span: Span,
in_use: bool,
fast_scan_allowed: bool,
}

impl<'src> PatternInRule<'src> {
Expand Down Expand Up @@ -183,6 +184,27 @@ impl<'src> PatternInRule<'src> {
self.in_use = true;
self
}

/// Returns true if this pattern can be fast-scanned.
///
/// A pattern can be fast-scanned if its occurrences are only evaluated
/// as simple boolean checks (e.g. `$a`), meaning the scanner can stop
/// tracking matches for it once the first match has been found.
#[inline]
pub fn fast_scan_allowed(&self) -> bool {
self.fast_scan_allowed
}

/// Disallows fast-scanning for this pattern.
///
/// This is called when the pattern is used in a context that requires
/// tracking all matches (such as count `#a`, offset `@a`, length `!a`,
/// anchored checks, or loop equivalents).
#[inline]
pub fn disallow_fast_scan(&mut self) -> &mut Self {
self.fast_scan_allowed = false;
self
}
}

/// Represents a pattern in YARA.
Expand Down Expand Up @@ -1560,7 +1582,7 @@ impl IR {
pub fn matches_regex_set(
&mut self,
lhs: ExprId,
regex_set: crate::compiler::RegexSetId,
regex_set: RegexSetId,
) -> ExprId {
let expr_id = ExprId::from(self.nodes.len());
self.parents[lhs.0 as usize] = expr_id;
Expand Down
Loading
Loading