Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for --module-data option in scan command #183

Merged
merged 11 commits into from
Sep 5, 2024
19 changes: 10 additions & 9 deletions cli/src/commands/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::cmp::min;
use std::fs::File;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::sync::Mutex;
use std::time::{Duration, Instant};

use anyhow::{bail, Context, Error};
Expand All @@ -15,7 +15,7 @@ use superconsole::{Component, Line, Lines, Span};
use yansi::Color::{Cyan, Red, Yellow};
use yansi::Paint;
use yara_x::errors::ScanError;
use yara_x::{MetaValue, Rule, Rules, ScanResults, Scanner};
use yara_x::{MetaValue, Rule, Rules, ScanOptions, ScanResults, Scanner};

use crate::commands::{
compile_rules, external_var_parser, truncate_with_ellipsis,
Expand Down Expand Up @@ -271,9 +271,7 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
for (module_full_name, metadata_path) in metadata {
let meta = std::fs::read(Path::new(metadata_path))?;

let arcd_meta = Arc::<[u8]>::from(meta);

all_metadata.push((module_full_name.to_string(), arcd_meta));
all_metadata.push((module_full_name.to_string(), meta));
}
all_metadata
};
Expand Down Expand Up @@ -321,12 +319,15 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
.unwrap()
.push((file_path.clone(), now));

for (module_full_name, meta) in all_metadata.iter() {
scanner.set_module_meta(module_full_name, Some(meta));
}
let scan_options = all_metadata.iter().fold(
ScanOptions::new(),
|acc, (module_name, meta)| {
acc.set_module_metadata(module_name, meta)
},
);

let scan_results = scanner
.scan_file(file_path.as_path())
.scan_file_with_options(file_path.as_path(), scan_options)
.with_context(|| format!("scanning {:?}", &file_path));

state
Expand Down
1 change: 1 addition & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pub use scanner::Pattern;
pub use scanner::Patterns;
pub use scanner::Rule;
pub use scanner::ScanError;
pub use scanner::ScanOptions;
pub use scanner::ScanResults;
pub use scanner::Scanner;
pub use variables::Variable;
Expand Down
13 changes: 0 additions & 13 deletions lib/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::ops::{Range, RangeInclusive};
use std::ptr::NonNull;
use std::rc::Rc;
use std::sync::atomic::Ordering;
use std::sync::Arc;

#[cfg(feature = "logging")]
use log::*;
Expand Down Expand Up @@ -84,18 +83,6 @@ pub(crate) struct ScanContext<'r> {
/// operation. Keys are the fully qualified protobuf message names, and
/// values are the protobuf messages set with [`Scanner::set_module_output`].
pub user_provided_module_outputs: FxHashMap<String, Box<dyn MessageDyn>>,
/// Hash map that contains the metadata for modules for *the next scan*.
/// The reasoning behind having an `Arc<[u8]>` instead of a `Vec<u8>` is
/// the fact that we need to set this hashmap way too often for batch scans
/// (metadata must be set for each file). The overhead of cloning the
/// vector alternative could be non-trivial (`O(1)` vs `O(n)`) for a single
/// iteration/file.
///
/// Chose `Arc<_>` over `Rc<_>` for `Arc<_>` compatibility with `Send` and
/// `Sync` traits.
///
/// Avoided `&[u8]` because of lifetimes
pub module_meta: FxHashMap<String, Arc<[u8]>>,
/// Hash map that tracks the matches occurred during a scan. The keys
/// are the PatternId of the matching pattern, and values are a list
/// of matches.
Expand Down
86 changes: 55 additions & 31 deletions lib/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The scanner takes the rules produces by the compiler and scans data with them.
*/

use std::cell::RefCell;
use std::collections::hash_map;
use std::collections::{hash_map, HashMap};
use std::io::Read;
use std::ops::{Deref, Range};
use std::path::{Path, PathBuf};
Expand All @@ -13,7 +13,7 @@ use std::ptr::{null, NonNull};
use std::rc::Rc;
use std::slice::Iter;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Once};
use std::sync::Once;
use std::time::Duration;
use std::{cmp, fs, thread};

Expand Down Expand Up @@ -110,6 +110,32 @@ impl<'a> AsRef<[u8]> for ScannedData<'a> {
}
}

/// Optional information for the scan operation.
#[derive(Debug, Default)]
pub struct ScanOptions<'a> {
module_metadata: HashMap<&'a str, &'a [u8]>,
}

impl<'a> ScanOptions<'a> {
/// Creates a new instance of `ScanOptions` with no additional information
/// for the scan operation.
///
/// Use other methods to add additional information.
pub fn new() -> Self {
Self { module_metadata: Default::default() }
}

/// Adds metadata for a YARA module.
pub fn set_module_metadata(
mut self,
module_name: &'a str,
metadata: &'a [u8],
) -> Self {
self.module_metadata.insert(module_name, metadata);
self
}
}

/// Scans data with already compiled YARA rules.
///
/// The scanner receives a set of compiled [`Rules`] and scans data with those
Expand Down Expand Up @@ -157,7 +183,6 @@ impl<'r> Scanner<'r> {
main_memory: None,
module_outputs: FxHashMap::default(),
user_provided_module_outputs: FxHashMap::default(),
module_meta: FxHashMap::default(),
pattern_matches: PatternMatches::new(),
unconfirmed_matches: FxHashMap::default(),
deadline: 0,
Expand Down Expand Up @@ -341,6 +366,20 @@ impl<'r> Scanner<'r> {
Ok(data)
}

/// Like [`scan_file`], but allows to specify additional scan options.
pub fn scan_file_with_options<'a, 'opts, P>(
&'a mut self,
target: P,
scan_options: ScanOptions<'opts>,
) -> Result<ScanResults<'a, 'r>, ScanError>
where
P: AsRef<Path>,
{
let target = Self::load_file(target.as_ref())?;

self.scan_with_options_impl(target, scan_options)
}

/// Scans a file.
pub fn scan_file<'a, P>(
&'a mut self,
Expand All @@ -349,17 +388,24 @@ impl<'r> Scanner<'r> {
where
P: AsRef<Path>,
{
let target = Self::load_file(target.as_ref())?;
self.scan_file_with_options(target, ScanOptions::new())
chudicek marked this conversation as resolved.
Show resolved Hide resolved
}

self.scan_impl(target)
/// Like [`scan`], but allows to specify additional scan options.
pub fn scan_with_options<'a, 'opts>(
&'a mut self,
data: &'a [u8],
scan_options: ScanOptions<'opts>,
) -> Result<ScanResults<'a, 'r>, ScanError> {
self.scan_with_options_impl(ScannedData::Slice(data), scan_options)
}

/// scans in-memory data (with optional metadata)
pub fn scan<'a>(
&'a mut self,
data: &'a [u8],
) -> Result<ScanResults<'a, 'r>, ScanError> {
self.scan_impl(ScannedData::Slice(data))
self.scan_with_options(data, ScanOptions::new())
chudicek marked this conversation as resolved.
Show resolved Hide resolved
}

/// Sets the value of a global variable.
Expand Down Expand Up @@ -462,26 +508,6 @@ impl<'r> Scanner<'r> {
Ok(())
}

/// Updates the metadata for a module specified by its fully-qualified name.
///
/// If the `meta` argument is `None`, the metadata for the module is removed.
///
/// See [`Scanner::module_meta`] for the reasoning behind choosing `Arc<_>`
pub fn set_module_meta(
&mut self,
module_name: &str,
meta: Option<&Arc<[u8]>>,
) {
if let Some(meta) = meta {
self.wasm_store
.data_mut()
.module_meta
.insert(module_name.to_string(), meta.clone());
} else {
self.wasm_store.data_mut().module_meta.remove(module_name);
}
}

/// Similar to [`Scanner::set_module_output`], but receives a module name
/// and the protobuf message as raw data.
///
Expand Down Expand Up @@ -527,9 +553,10 @@ impl<'r> Scanner<'r> {
}

impl<'r> Scanner<'r> {
fn scan_impl<'a>(
fn scan_with_options_impl<'a, 'opts>(
&'a mut self,
data: ScannedData<'a>,
scan_options: ScanOptions<'opts>, // todo use
chudicek marked this conversation as resolved.
Show resolved Hide resolved
) -> Result<ScanResults<'a, 'r>, ScanError> {
// Clear information about matches found in a previous scan, if any.
self.reset();
Expand Down Expand Up @@ -615,7 +642,7 @@ impl<'r> Scanner<'r> {
Some(output)
} else {
let meta =
ctx.module_meta.get(module_name).map(|data| &**data);
scan_options.module_metadata.get(module_name).copied();

let data = data.as_ref();

Expand Down Expand Up @@ -722,9 +749,6 @@ impl<'r> Scanner<'r> {
}
}

// clear the metadata for all modules
self.wasm_store.data_mut().module_meta.clear();

match func_result {
Ok(0) => Ok(ScanResults::new(self.wasm_store.data(), data)),
Ok(1) => Err(ScanError::Timeout),
Expand Down
8 changes: 5 additions & 3 deletions lib/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ macro_rules! test_rule {

let mut scanner = crate::scanner::Scanner::new(&rules);

for (module_name, module_data) in $metadata {
scanner.set_module_meta(module_name, Some(module_data));
let mut scan_options = crate::ScanOptions::new();

for (module_name, meta) in $metadata {
scan_options = scan_options.set_module_metadata(module_name, meta);
}

let num_matching_rules = scanner
.scan($data)
.scan_with_options($data, scan_options)
.expect("scan should not fail")
.matching_rules()
.len();
Expand Down