Skip to content

Commit

Permalink
feat(cli): implement the --dump-module-output command-line flag
Browse files Browse the repository at this point in the history
This option prints the outputs produced by YARA modules.  Also implements the `ScanResults::module_outputs` API that returns the outputs produced by all modules.
  • Loading branch information
plusvic committed Oct 4, 2023
1 parent 107275c commit ae3881b
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 20 deletions.
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions yara-x-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ clap = { workspace = true, features=["cargo"] }
enable-ansi-support = { workspace = true }
env_logger = { workspace = true , optional = true }
log = { workspace = true, optional = true }
protobuf = { workspace = true }
serde_json = { workspace = true }
yansi = { workspace = true }
yara-x = { workspace = true }
Expand All @@ -49,6 +50,7 @@ yara-x-fmt = { workspace = true }
crossbeam = "0.8.2"
crossterm = "0.27.0"
globwalk = "0.8.1"
indent = "0.1.1"
pprof = { version = "0.12.1", features = ["flamegraph"], optional=true }
superconsole = "0.2.0"
walkdir = "2.3.2"
Expand Down
26 changes: 25 additions & 1 deletion yara-x-cli/src/commands/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ use std::time::{Duration, Instant};
use anyhow::{bail, Context, Error};
use clap::{arg, value_parser, Arg, ArgAction, ArgMatches, Command};
use crossbeam::channel::Sender;
use indent::indent_all_by;
use superconsole::style::Stylize;
use superconsole::{Component, Line, Lines, Span};
use yansi::Color::{Cyan, Red};
use yansi::Color::{Cyan, Red, Yellow};
use yansi::Paint;
use yara_x::{Rule, Rules, ScanError, Scanner};

Expand Down Expand Up @@ -47,6 +48,10 @@ pub fn scan() -> Command {
.help("Print matching patterns, limited to the first N bytes")
.value_parser(value_parser!(usize))
)
.arg(
arg!(-D --"dump-module-output")
.help("Dumps the data produced by modules")
)
.arg(
arg!(-n - -"negate")
.help("Print non-satisfied rules only")
Expand Down Expand Up @@ -98,6 +103,7 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
let path_as_namespace = args.get_flag("path-as-namespace");
let skip_larger = args.get_one::<u64>("skip-larger");
let negate = args.get_flag("negate");
let dump_module_output = args.get_flag("dump-module-output");
let timeout = args.get_one::<u64>("timeout");

let mut external_vars: Option<Vec<(String, serde_json::Value)>> = args
Expand Down Expand Up @@ -226,6 +232,24 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
);
};

if dump_module_output {
for (mod_name, mod_output) in scan_results.module_outputs() {
output
.send(Message::Info(format!(
">>> {} {}\n{}<<<",
Yellow.paint(mod_name).bold(),
file_path.display(),
indent_all_by(
4,
protobuf::text_format::print_to_string_pretty(
mod_output,
)
),
)))
.unwrap();
}
}

state.num_scanned_files.fetch_add(1, Ordering::Relaxed);

Ok(())
Expand Down
1 change: 1 addition & 0 deletions yara-x/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ pub use compiler::SerializationError;
pub use scanner::Match;
pub use scanner::Matches;
pub use scanner::MatchingRules;
pub use scanner::ModuleOutputs;
pub use scanner::NonMatchingRules;
pub use scanner::Pattern;
pub use scanner::Patterns;
Expand Down
83 changes: 64 additions & 19 deletions yara-x/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ The scanner takes the rules produces by the compiler and scans data with them.
*/

use std::cell::RefCell;
use std::collections::hash_map;
use std::io::Read;
use std::ops::{Deref, Range};
use std::path::{Path, PathBuf};
Expand All @@ -27,7 +28,7 @@ use wasmtime::{
};

use crate::compiler::{IdentId, PatternId, RuleId, RuleInfo, Rules};
use crate::modules::BUILTIN_MODULES;
use crate::modules::{Module, BUILTIN_MODULES};
use crate::string_pool::BStringPool;
use crate::types::{Struct, TypeValue};
use crate::variables::VariableError;
Expand Down Expand Up @@ -59,7 +60,8 @@ pub enum ScanError {

/// Global counter that gets incremented every 1 second by a dedicated thread.
///
/// This counter is used for determining the when a scan operation has timed out.
/// This counter is used for determining the when a scan operation has timed
/// out.
static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0);

/// Used for spawning the thread that increments `HEARTBEAT_COUNTER`.
Expand All @@ -84,8 +86,9 @@ impl<'a> AsRef<[u8]> for ScannedData<'a> {
/// Scans data with already compiled YARA rules.
///
/// The scanner receives a set of compiled [`Rules`] and scans data with those
/// rules. The same scanner can be used for scanning multiple files or in-memory
/// data sequentially, but you need multiple scanners for scanning in parallel.
/// rules. The same scanner can be used for scanning multiple files or
/// in-memory data sequentially, but you need multiple scanners for scanning in
/// parallel.
pub struct Scanner<'r> {
wasm_store: Pin<Box<Store<ScanContext<'r>>>>,
wasm_main_func: TypedFunc<(), i32>,
Expand All @@ -107,10 +110,10 @@ impl<'r> Scanner<'r> {
// for accessing the WASM memory from code that only has a reference
// to ScanContext. This kind of circular data structures are not
// natural to Rust, and they can be achieved either by using unsafe
// pointers, or by using Rc::Weak. In this case we are storing a pointer
// to the store in ScanContext. The store is put into a pinned box in
// order to make sure that it doesn't move from its original memory
// address and the pointer remains valid.
// pointers, or by using Rc::Weak. In this case we are storing a
// pointer to the store in ScanContext. The store is put into a
// pinned box in order to make sure that it doesn't move from
// its original memory address and the pointer remains valid.
let mut wasm_store = Box::pin(Store::new(
&crate::wasm::ENGINE,
ScanContext {
Expand Down Expand Up @@ -323,8 +326,8 @@ impl<'r> Scanner<'r> {
/// Sets the value of a global variable.
///
/// The variable must has been previously defined by calling
/// [`crate::Compiler::define_global`], and the type it has during the definition
/// must match the type of the new value (`T`).
/// [`crate::Compiler::define_global`], and the type it has during the
/// definition must match the type of the new value (`T`).
///
/// The variable will retain the new value in subsequent scans, unless this
/// function is called again for setting a new value.
Expand Down Expand Up @@ -465,8 +468,9 @@ impl<'r> Scanner<'r> {
);

// Make sure that the module is returning a protobuf message where
// all required fields are initialized. This only applies to proto2,
// proto3 doesn't have "required" fields, all fields are optional.
// all required fields are initialized. This only applies to
// proto2, proto3 doesn't have "required" fields, all
// fields are optional.
debug_assert!(
module_output.is_initialized_dyn(),
"module `{}` returned a protobuf `{}` where some required fields are not initialized ",
Expand Down Expand Up @@ -515,7 +519,8 @@ impl<'r> Scanner<'r> {
// This will return Err(ScanError::Timeout), when the scan timeout is
// reached while WASM code is being executed. If the timeout occurs
// while ScanContext::search_for_patterns is being executed, the result
// will be Ok(1). If the scan completes successfully the result is Ok(0).`
// will be Ok(1). If the scan completes successfully the result is
// Ok(0).`
let func_result =
self.wasm_main_func.call(self.wasm_store.as_context_mut(), ());

Expand Down Expand Up @@ -593,9 +598,10 @@ impl<'r> Scanner<'r> {
.unwrap()
.data_mut(self.wasm_store.as_context_mut());

// Starting at MATCHING_RULES_BITMAP in main memory there's a bitmap
// were the N-th bit indicates if the rule with ID = N matched or not,
// If some rule matched in a previous call the bitmap will contain some
// Starting at MATCHING_RULES_BITMAP in main memory there's a
// bitmap were the N-th bit indicates if the rule with
// ID = N matched or not, If some rule matched in a
// previous call the bitmap will contain some
// bits set to 1 and need to be cleared.
let base = MATCHING_RULES_BITMAP_BASE as usize;
let bitmap = BitSlice::<_, Lsb0>::from_slice_mut(
Expand Down Expand Up @@ -628,7 +634,8 @@ impl<'a, 'r> ScanResults<'a, 'r> {
MatchingRules::new(self.ctx, &self.data)
}

/// Returns an iterator that yields the non-matching rules in arbitrary order.
/// Returns an iterator that yields the non-matching rules in arbitrary
/// order.
pub fn non_matching_rules(&'a self) -> NonMatchingRules<'a, 'r> {
NonMatchingRules::new(self.ctx, &self.data)
}
Expand All @@ -639,7 +646,7 @@ impl<'a, 'r> ScanResults<'a, 'r> {
/// The result will be `None` if the module doesn't exist or didn't
/// produce any output.
pub fn module_output(
&'a self,
&self,
module_name: &str,
) -> Option<&'a dyn MessageDyn> {
let module = BUILTIN_MODULES.get(module_name)?;
Expand All @@ -650,6 +657,14 @@ impl<'a, 'r> ScanResults<'a, 'r> {
.as_ref();
Some(module_output)
}

/// Returns an iterator that yields tuples composed of a YARA module name
/// and the protobuf produced by that module.
///
/// Only returns the modules that produced some output.
pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> {
ModuleOutputs::new(self.ctx)
}
}

/// Iterator that yields the rules that matched during a scan.
Expand Down Expand Up @@ -716,7 +731,8 @@ impl<'a, 'r> NonMatchingRules<'a, 'r> {
data,
iterator: matching_rules_bitmap.iter_zeros(),
// The number of non-matching rules is the total number of rules
// minus the number of matching rules, both private and non-private.
// minus the number of matching rules, both private and
// non-private.
len: ctx.compiled_rules.rules().len()
- ctx.private_matching_rules.len()
- ctx.non_private_matching_rules.len(),
Expand Down Expand Up @@ -754,6 +770,35 @@ impl<'a, 'r> ExactSizeIterator for NonMatchingRules<'a, 'r> {
}
}

/// Iterator that returns the outputs produced by YARA modules.
pub struct ModuleOutputs<'a, 'r> {
ctx: &'a ScanContext<'r>,
iterator: hash_map::Iter<'a, &'a str, Module>,
}

impl<'a, 'r> ModuleOutputs<'a, 'r> {
fn new(ctx: &'a ScanContext<'r>) -> Self {
Self { ctx, iterator: BUILTIN_MODULES.iter() }
}
}

impl<'a, 'r> Iterator for ModuleOutputs<'a, 'r> {
type Item = (&'a str, &'a dyn MessageDyn);

fn next(&mut self) -> Option<Self::Item> {
loop {
let (name, module) = self.iterator.next()?;
if let Some(module_output) = self
.ctx
.module_outputs
.get(module.root_struct_descriptor.full_name())
{
return Some((*name, module_output.as_ref()));
}
}
}
}

/// A structure that describes a rule.
pub struct Rule<'a, 'r> {
ctx: &'a ScanContext<'r>,
Expand Down
32 changes: 32 additions & 0 deletions yara-x/src/scanner/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,38 @@ fn module_output() {
assert_eq!(output.int32_one, Some(1_i32));
}

#[cfg(feature = "test_proto2-module")]
#[test]
fn module_outputs() {
let rules = crate::compile(
r#"
import "test_proto2"
rule test {
condition:
test_proto2.file_size == 3
}
"#,
)
.unwrap();

let mut scanner = Scanner::new(&rules);
let scan_results = scanner.scan(b"").expect("scan should not fail");

let mut outputs = scan_results.module_outputs();

let (name, output) = outputs
.next()
.expect("module outputs iterator should produce at least one item");

assert_eq!(name, "test_proto2");

let output: &crate::modules::protos::test_proto2::TestProto2 =
<dyn MessageDyn>::downcast_ref(output).unwrap();

assert_eq!(output.int32_one, Some(1_i32));
assert!(outputs.next().is_none());
}

#[test]
fn variables_1() {
let mut compiler = crate::Compiler::new();
Expand Down

0 comments on commit ae3881b

Please sign in to comment.