diff --git a/Cargo.lock b/Cargo.lock index 931e1ba79..8b7770561 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1385,6 +1385,12 @@ dependencies = [ "quote", ] +[[package]] +name = "indent" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f1a0777d972970f204fdf8ef319f1f4f8459131636d7e3c96c5d59570d0fa6" + [[package]] name = "indenter" version = "0.3.3" @@ -4079,8 +4085,10 @@ dependencies = [ "enable-ansi-support", "env_logger", "globwalk", + "indent", "log", "pprof", + "protobuf", "serde_json", "superconsole", "walkdir", diff --git a/yara-x-cli/Cargo.toml b/yara-x-cli/Cargo.toml index 29e227b91..5e64c545d 100644 --- a/yara-x-cli/Cargo.toml +++ b/yara-x-cli/Cargo.toml @@ -40,6 +40,7 @@ clap = { workspace = true, features=["cargo"] } enable-ansi-support = { workspace = true } env_logger = { workspace = true , optional = true } log = { workspace = true, optional = true } +protobuf = { workspace = true } serde_json = { workspace = true } yansi = { workspace = true } yara-x = { workspace = true } @@ -49,6 +50,7 @@ yara-x-fmt = { workspace = true } crossbeam = "0.8.2" crossterm = "0.27.0" globwalk = "0.8.1" +indent = "0.1.1" pprof = { version = "0.12.1", features = ["flamegraph"], optional=true } superconsole = "0.2.0" walkdir = "2.3.2" diff --git a/yara-x-cli/src/commands/scan.rs b/yara-x-cli/src/commands/scan.rs index e2814822a..692e3bbc8 100644 --- a/yara-x-cli/src/commands/scan.rs +++ b/yara-x-cli/src/commands/scan.rs @@ -9,9 +9,10 @@ use std::time::{Duration, Instant}; use anyhow::{bail, Context, Error}; use clap::{arg, value_parser, Arg, ArgAction, ArgMatches, Command}; use crossbeam::channel::Sender; +use indent::indent_all_by; use superconsole::style::Stylize; use superconsole::{Component, Line, Lines, Span}; -use yansi::Color::{Cyan, Red}; +use yansi::Color::{Cyan, Red, Yellow}; use yansi::Paint; use yara_x::{Rule, Rules, ScanError, Scanner}; @@ -47,6 +48,10 @@ pub fn scan() -> Command { .help("Print matching patterns, limited to the first N bytes") .value_parser(value_parser!(usize)) ) + .arg( + arg!(-D --"dump-module-output") + .help("Dumps the data produced by modules") + ) .arg( arg!(-n - -"negate") .help("Print non-satisfied rules only") @@ -98,6 +103,7 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> { let path_as_namespace = args.get_flag("path-as-namespace"); let skip_larger = args.get_one::("skip-larger"); let negate = args.get_flag("negate"); + let dump_module_output = args.get_flag("dump-module-output"); let timeout = args.get_one::("timeout"); let mut external_vars: Option> = args @@ -226,6 +232,24 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> { ); }; + if dump_module_output { + for (mod_name, mod_output) in scan_results.module_outputs() { + output + .send(Message::Info(format!( + ">>> {} {}\n{}<<<", + Yellow.paint(mod_name).bold(), + file_path.display(), + indent_all_by( + 4, + protobuf::text_format::print_to_string_pretty( + mod_output, + ) + ), + ))) + .unwrap(); + } + } + state.num_scanned_files.fetch_add(1, Ordering::Relaxed); Ok(()) diff --git a/yara-x/src/lib.rs b/yara-x/src/lib.rs index c81256409..1f54fcd85 100644 --- a/yara-x/src/lib.rs +++ b/yara-x/src/lib.rs @@ -53,6 +53,7 @@ pub use compiler::SerializationError; pub use scanner::Match; pub use scanner::Matches; pub use scanner::MatchingRules; +pub use scanner::ModuleOutputs; pub use scanner::NonMatchingRules; pub use scanner::Pattern; pub use scanner::Patterns; diff --git a/yara-x/src/scanner/mod.rs b/yara-x/src/scanner/mod.rs index aff9ff30e..484d2f814 100644 --- a/yara-x/src/scanner/mod.rs +++ b/yara-x/src/scanner/mod.rs @@ -4,6 +4,7 @@ The scanner takes the rules produces by the compiler and scans data with them. */ use std::cell::RefCell; +use std::collections::hash_map; use std::io::Read; use std::ops::{Deref, Range}; use std::path::{Path, PathBuf}; @@ -27,7 +28,7 @@ use wasmtime::{ }; use crate::compiler::{IdentId, PatternId, RuleId, RuleInfo, Rules}; -use crate::modules::BUILTIN_MODULES; +use crate::modules::{Module, BUILTIN_MODULES}; use crate::string_pool::BStringPool; use crate::types::{Struct, TypeValue}; use crate::variables::VariableError; @@ -59,7 +60,8 @@ pub enum ScanError { /// Global counter that gets incremented every 1 second by a dedicated thread. /// -/// This counter is used for determining the when a scan operation has timed out. +/// This counter is used for determining the when a scan operation has timed +/// out. static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0); /// Used for spawning the thread that increments `HEARTBEAT_COUNTER`. @@ -84,8 +86,9 @@ impl<'a> AsRef<[u8]> for ScannedData<'a> { /// Scans data with already compiled YARA rules. /// /// The scanner receives a set of compiled [`Rules`] and scans data with those -/// rules. The same scanner can be used for scanning multiple files or in-memory -/// data sequentially, but you need multiple scanners for scanning in parallel. +/// rules. The same scanner can be used for scanning multiple files or +/// in-memory data sequentially, but you need multiple scanners for scanning in +/// parallel. pub struct Scanner<'r> { wasm_store: Pin>>>, wasm_main_func: TypedFunc<(), i32>, @@ -107,10 +110,10 @@ impl<'r> Scanner<'r> { // for accessing the WASM memory from code that only has a reference // to ScanContext. This kind of circular data structures are not // natural to Rust, and they can be achieved either by using unsafe - // pointers, or by using Rc::Weak. In this case we are storing a pointer - // to the store in ScanContext. The store is put into a pinned box in - // order to make sure that it doesn't move from its original memory - // address and the pointer remains valid. + // pointers, or by using Rc::Weak. In this case we are storing a + // pointer to the store in ScanContext. The store is put into a + // pinned box in order to make sure that it doesn't move from + // its original memory address and the pointer remains valid. let mut wasm_store = Box::pin(Store::new( &crate::wasm::ENGINE, ScanContext { @@ -323,8 +326,8 @@ impl<'r> Scanner<'r> { /// Sets the value of a global variable. /// /// The variable must has been previously defined by calling - /// [`crate::Compiler::define_global`], and the type it has during the definition - /// must match the type of the new value (`T`). + /// [`crate::Compiler::define_global`], and the type it has during the + /// definition must match the type of the new value (`T`). /// /// The variable will retain the new value in subsequent scans, unless this /// function is called again for setting a new value. @@ -465,8 +468,9 @@ impl<'r> Scanner<'r> { ); // Make sure that the module is returning a protobuf message where - // all required fields are initialized. This only applies to proto2, - // proto3 doesn't have "required" fields, all fields are optional. + // all required fields are initialized. This only applies to + // proto2, proto3 doesn't have "required" fields, all + // fields are optional. debug_assert!( module_output.is_initialized_dyn(), "module `{}` returned a protobuf `{}` where some required fields are not initialized ", @@ -515,7 +519,8 @@ impl<'r> Scanner<'r> { // This will return Err(ScanError::Timeout), when the scan timeout is // reached while WASM code is being executed. If the timeout occurs // while ScanContext::search_for_patterns is being executed, the result - // will be Ok(1). If the scan completes successfully the result is Ok(0).` + // will be Ok(1). If the scan completes successfully the result is + // Ok(0).` let func_result = self.wasm_main_func.call(self.wasm_store.as_context_mut(), ()); @@ -593,9 +598,10 @@ impl<'r> Scanner<'r> { .unwrap() .data_mut(self.wasm_store.as_context_mut()); - // Starting at MATCHING_RULES_BITMAP in main memory there's a bitmap - // were the N-th bit indicates if the rule with ID = N matched or not, - // If some rule matched in a previous call the bitmap will contain some + // Starting at MATCHING_RULES_BITMAP in main memory there's a + // bitmap were the N-th bit indicates if the rule with + // ID = N matched or not, If some rule matched in a + // previous call the bitmap will contain some // bits set to 1 and need to be cleared. let base = MATCHING_RULES_BITMAP_BASE as usize; let bitmap = BitSlice::<_, Lsb0>::from_slice_mut( @@ -628,7 +634,8 @@ impl<'a, 'r> ScanResults<'a, 'r> { MatchingRules::new(self.ctx, &self.data) } - /// Returns an iterator that yields the non-matching rules in arbitrary order. + /// Returns an iterator that yields the non-matching rules in arbitrary + /// order. pub fn non_matching_rules(&'a self) -> NonMatchingRules<'a, 'r> { NonMatchingRules::new(self.ctx, &self.data) } @@ -639,7 +646,7 @@ impl<'a, 'r> ScanResults<'a, 'r> { /// The result will be `None` if the module doesn't exist or didn't /// produce any output. pub fn module_output( - &'a self, + &self, module_name: &str, ) -> Option<&'a dyn MessageDyn> { let module = BUILTIN_MODULES.get(module_name)?; @@ -650,6 +657,14 @@ impl<'a, 'r> ScanResults<'a, 'r> { .as_ref(); Some(module_output) } + + /// Returns an iterator that yields tuples composed of a YARA module name + /// and the protobuf produced by that module. + /// + /// Only returns the modules that produced some output. + pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> { + ModuleOutputs::new(self.ctx) + } } /// Iterator that yields the rules that matched during a scan. @@ -716,7 +731,8 @@ impl<'a, 'r> NonMatchingRules<'a, 'r> { data, iterator: matching_rules_bitmap.iter_zeros(), // The number of non-matching rules is the total number of rules - // minus the number of matching rules, both private and non-private. + // minus the number of matching rules, both private and + // non-private. len: ctx.compiled_rules.rules().len() - ctx.private_matching_rules.len() - ctx.non_private_matching_rules.len(), @@ -754,6 +770,35 @@ impl<'a, 'r> ExactSizeIterator for NonMatchingRules<'a, 'r> { } } +/// Iterator that returns the outputs produced by YARA modules. +pub struct ModuleOutputs<'a, 'r> { + ctx: &'a ScanContext<'r>, + iterator: hash_map::Iter<'a, &'a str, Module>, +} + +impl<'a, 'r> ModuleOutputs<'a, 'r> { + fn new(ctx: &'a ScanContext<'r>) -> Self { + Self { ctx, iterator: BUILTIN_MODULES.iter() } + } +} + +impl<'a, 'r> Iterator for ModuleOutputs<'a, 'r> { + type Item = (&'a str, &'a dyn MessageDyn); + + fn next(&mut self) -> Option { + loop { + let (name, module) = self.iterator.next()?; + if let Some(module_output) = self + .ctx + .module_outputs + .get(module.root_struct_descriptor.full_name()) + { + return Some((*name, module_output.as_ref())); + } + } + } +} + /// A structure that describes a rule. pub struct Rule<'a, 'r> { ctx: &'a ScanContext<'r>, diff --git a/yara-x/src/scanner/tests.rs b/yara-x/src/scanner/tests.rs index 46e21e3a9..9bb620a57 100644 --- a/yara-x/src/scanner/tests.rs +++ b/yara-x/src/scanner/tests.rs @@ -178,6 +178,38 @@ fn module_output() { assert_eq!(output.int32_one, Some(1_i32)); } +#[cfg(feature = "test_proto2-module")] +#[test] +fn module_outputs() { + let rules = crate::compile( + r#" + import "test_proto2" + rule test { + condition: + test_proto2.file_size == 3 + } + "#, + ) + .unwrap(); + + let mut scanner = Scanner::new(&rules); + let scan_results = scanner.scan(b"").expect("scan should not fail"); + + let mut outputs = scan_results.module_outputs(); + + let (name, output) = outputs + .next() + .expect("module outputs iterator should produce at least one item"); + + assert_eq!(name, "test_proto2"); + + let output: &crate::modules::protos::test_proto2::TestProto2 = + ::downcast_ref(output).unwrap(); + + assert_eq!(output.int32_one, Some(1_i32)); + assert!(outputs.next().is_none()); +} + #[test] fn variables_1() { let mut compiler = crate::Compiler::new();