From 2117e147895e187271932d5177eed44be14c88f2 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 26 Nov 2024 11:25:14 +0100 Subject: [PATCH 01/21] chore: bump version to 0.11.0 --- Cargo.lock | 18 +++++++++--------- Cargo.toml | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1db82a6..69b4329b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4358,7 +4358,7 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yara-x" -version = "0.10.0" +version = "0.11.0" dependencies = [ "aho-corasick", "annotate-snippets", @@ -4429,7 +4429,7 @@ dependencies = [ [[package]] name = "yara-x-capi" -version = "0.10.0" +version = "0.11.0" dependencies = [ "cbindgen", "serde_json", @@ -4438,7 +4438,7 @@ dependencies = [ [[package]] name = "yara-x-cli" -version = "0.10.0" +version = "0.11.0" dependencies = [ "anyhow", "ascii_tree", @@ -4472,7 +4472,7 @@ dependencies = [ [[package]] name = "yara-x-fmt" -version = "0.10.0" +version = "0.11.0" dependencies = [ "bitmask", "bstr", @@ -4487,7 +4487,7 @@ dependencies = [ [[package]] name = "yara-x-macros" -version = "0.10.0" +version = "0.11.0" dependencies = [ "darling", "proc-macro2", @@ -4497,7 +4497,7 @@ dependencies = [ [[package]] name = "yara-x-parser" -version = "0.10.0" +version = "0.11.0" dependencies = [ "anyhow", "ascii_tree", @@ -4521,7 +4521,7 @@ dependencies = [ [[package]] name = "yara-x-proto" -version = "0.10.0" +version = "0.11.0" dependencies = [ "protobuf", "protobuf-codegen", @@ -4530,7 +4530,7 @@ dependencies = [ [[package]] name = "yara-x-proto-yaml" -version = "0.10.0" +version = "0.11.0" dependencies = [ "chrono", "globwalk", @@ -4545,7 +4545,7 @@ dependencies = [ [[package]] name = "yara-x-py" -version = "0.10.0" +version = "0.11.0" dependencies = [ "protobuf-json-mapping", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 2446e2ca..600d51df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace.package] -version = "0.10.0" +version = "0.11.0" authors = ["Victor M. Alvarez "] edition = "2021" homepage = "https://virustotal.github.io/yara-x" @@ -105,11 +105,11 @@ x509-parser = "0.16.0" yaml-rust = "0.4.5" yansi = "1.0.1" yara-x = { path = "lib" } -yara-x-fmt = { path = "fmt", version = "0.10.0" } -yara-x-macros = { path = "macros", version = "0.10.0" } -yara-x-parser = { path = "parser", version = "0.10.0" } -yara-x-proto = { path = "proto", version = "0.10.0" } -yara-x-proto-yaml = { path = "proto-yaml", version = "0.10.0" } +yara-x-fmt = { path = "fmt", version = "0.11.0" } +yara-x-macros = { path = "macros", version = "0.11.0" } +yara-x-parser = { path = "parser", version = "0.11.0" } +yara-x-proto = { path = "proto", version = "0.11.0" } +yara-x-proto-yaml = { path = "proto-yaml", version = "0.11.0" } zip = "2.1.1" # Special profile that builds a release binary with link-time optimization. From b0d699d0ac087770264bf8039f4723c1da07104c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 26 Nov 2024 11:46:06 +0100 Subject: [PATCH 02/21] ci: prepare for cibuildwheel 3 cibuildwheel 3 will require Python 3.11+ and PyPy packages will be disabled by default. --- .github/workflows/release.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3a93e307..4ba2e2de 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -138,7 +138,7 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.11' - run: rustup target add aarch64-apple-darwin x86_64-apple-darwin if: matrix.build == 'macos' @@ -155,6 +155,7 @@ jobs: - name: Build ${{ matrix.platform || matrix.os }} binaries run: cibuildwheel --output-dir wheelhouse py env: + CIBW_ENABLE: 'pypy' CIBW_BUILD: '${{ matrix.python-version }}-*' # wasmtime doesn't support i686 CIBW_SKIP: '*_i686 *-musllinux* *-win32' From ffcdd0d218746b6d35b789c024d649473c316ae6 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 26 Nov 2024 12:41:13 +0100 Subject: [PATCH 03/21] docs: add blob entry about rules profiling --- site/content/blog/rules-profiling/index.md | 91 ++++++++++++++++++++++ site/hugo_stats.json | 6 ++ 2 files changed, 97 insertions(+) create mode 100644 site/content/blog/rules-profiling/index.md diff --git a/site/content/blog/rules-profiling/index.md b/site/content/blog/rules-profiling/index.md new file mode 100644 index 00000000..1c45b984 --- /dev/null +++ b/site/content/blog/rules-profiling/index.md @@ -0,0 +1,91 @@ +--- +title: "Profiling your YARA rules" +description: "How to obtain information about the performance of your YARA rules" +summary: "" +date: 2024-11-26T00:00:00+01:00 +lastmod: 2024-11-26T00:00:00+01:00 +draft: false +weight: 50 +categories: [ ] +tags: [ ] +contributors: [ "Victor M. Alvarez" ] +pinned: false +homepage: false +seo: + title: "Rules profiling" # custom title (optional) + description: "Describes the new rules profiling feature introduced in YARA-X 0.11.0" # custom description (recommended) + canonical: "" # custom canonical URL (optional) + noindex: false # false (default) or true +--- + +Not all YARA rules perform equally; some can significantly slow down scanning +throughput. When working with a large set of rules, identifying which ones are +causing performance bottlenecks can be challenging, especially without the right +tools. + +To address this, YARA-X 0.11.0 introduces a new feature designed to streamline +the process of identifying slow rules: the `--profiling` option for +the `yr scan` +command. + +## Enabling rules profiling + +Because this feature incurs a slight performance overhead, it is disabled by +default. To use it, you must build YARA-X with profiling support enabled. This +can be done using the following command: + +```shell +cargo build --release --features=rules-profiling +``` + +Once built with profiling support, you can activate the feature by adding the +`--profiling` flag to the scan command. For example: + +```shell +yr scan --profiling my_rules.yar target_file +``` + +## How it works + +When the `--profiling` option is used, the `scan` command will operate as usual +while also collecting performance data for your rules. After the scan is +complete, +the profiling results will be displayed, highlighting the slowest rules and +their execution times. A sample output is shown below: + +``` +«««««««««««« PROFILING INFORMATION »»»»»»»»»»»» + +Slowest rules: + +* rule : some_slow_rule + namespace : default + pattern matching : 21.433µs + condition evaluation : 2.429054588s + TOTAL : 2.429076021s + +* rule : another_slow-rule + namespace : default + pattern matching : 5.790941033s + condition evaluation : 10.329µs + TOTAL : 5.790963123s +``` + +The profiling output lists the slowest-performing rules, ordered by total +execution time in descending order (the slowest rule appears first). Each +rule's performance is broken down into two components: + +* Pattern matching time: The time spent searching for patterns specified in the + rule. +* Condition evaluation time: The time spent evaluating the rule's conditions. + +By reporting these metrics separately, the profiling feature helps you determine +whether a rule's slowness is due to inefficient pattern matching or complex +condition evaluation. + +Rules with a total execution time below 100ms are excluded from the profiling +report to keep the output concise. If no rules meet the threshold, the profiling +section will remain empty, indicating that your rules are efficiently optimized. + +This new feature empowers users to fine-tune their rule sets by identifying and +addressing performance bottlenecks with ease. I hope you find it useful. \ No newline at end of file diff --git a/site/hugo_stats.json b/site/hugo_stats.json index fd89f49f..ccca8bba 100644 --- a/site/hugo_stats.json +++ b/site/hugo_stats.json @@ -387,6 +387,9 @@ "dyn", "dyntype", "dysymtab", + "enabling-rules-profiling", + "enabling-the-profiling-feature", + "enabling-the-rules-profiling", "entitlement_hash", "entropyoffset-size", "entropystring", @@ -398,6 +401,7 @@ "example-10", "example-11", "example-12", + "example-13", "example-2", "example-3", "example-4", @@ -446,6 +450,7 @@ "hexinteger", "hexmessage-integer", "higher-overall-performance", + "how-it-works", "identifier", "identifier-1", "imphash", @@ -579,6 +584,7 @@ "stricter-escaped-characters-in-regular-expressions", "subsystem", "sym", + "sym_hash", "symbind", "symtab", "symtype", From 6be66ba88d0f5c2db8065dc97ed79754abcfdb00 Mon Sep 17 00:00:00 2001 From: chudicek <125287497+chudicek@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:20:36 +0100 Subject: [PATCH 04/21] feat(cuckoo-update): data processed via serde schema, module_export fns return option (#253) --- lib/src/modules/cuckoo/mod.rs | 441 +++++++++++++------------------ lib/src/modules/cuckoo/schema.rs | 196 ++++++++++++++ 2 files changed, 384 insertions(+), 253 deletions(-) create mode 100644 lib/src/modules/cuckoo/schema.rs diff --git a/lib/src/modules/cuckoo/mod.rs b/lib/src/modules/cuckoo/mod.rs index 741ed303..4c024086 100644 --- a/lib/src/modules/cuckoo/mod.rs +++ b/lib/src/modules/cuckoo/mod.rs @@ -1,312 +1,247 @@ -use std::cell::RefCell; - #[cfg(feature = "logging")] use log::error; -use serde_json::{Map, Value}; use crate::compiler::RegexpId; use crate::modules::prelude::*; use crate::modules::protos::cuckoo::*; +mod schema; #[cfg(test)] mod tests; +use std::cell::RefCell; +use std::rc::Rc; thread_local! { - static CUCKOO_REPORT: RefCell>> = const { RefCell::new(None) }; + static LOCAL_DATA: RefCell>> = const { RefCell::new(None) }; } -#[module_main] -fn main(_data: &[u8], meta: Option<&[u8]>) -> Cuckoo { - if let Some(meta) = meta { - match serde_json::from_slice::(meta) { - Ok(Value::Object(json)) => CUCKOO_REPORT.set(Some(json)), - Ok(_) => { - #[cfg(feature = "logging")] - error!("cuckoo report is not a valid JSON") - } - #[cfg(feature = "logging")] - Err(err) => error!("can't parse cuckoo report: {}", err), - #[cfg(not(feature = "logging"))] - Err(_) => {} - } - } - Cuckoo::new() +fn get_local() -> Option> { + LOCAL_DATA.with(|data| data.borrow().clone()) } -#[module_export(name = "network.dns_lookup")] -fn network_dns_lookup(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - let find_match = |objects: &Vec, field_name: &str| { - objects.iter().any(|object| { - object - .get(field_name) - .and_then(|val| val.as_str()) - .map(|val| ctx.regexp_matches(regexp_id, val.as_bytes())) - .unwrap_or(false) - }) - }; +fn set_local(value: schema::CuckooJson) { + LOCAL_DATA.with(|data| { + *data.borrow_mut() = Some(Rc::new(value)); + }); +} - // The top-level object contains a "network" key that contains - // network-related information. - let network = report.as_ref().and_then(|report| report.get("network")); +#[module_main] +fn main(_data: &[u8], meta: Option<&[u8]>) -> Cuckoo { + let parsed = + serde_json::from_slice::(meta.unwrap_or_default()); - // Recent versions of Cuckoo generate domain resolution information with - // this format: - // - // "domains": [ - // { - // "ip": "192.168.0.1", - // "domain": "foo.bar.com" - // } - // ] - // - // But older versions with this other format: - // - // "dns": [ - // { - // "ip": "192.168.0.1", - // "hostname": "foo.bar.com" - // } - // ] - // - // Additionally, the newer versions also have a "dns" field. So, let's try - // to locate the "domains" field first, if not found fall back to the older - // format. - if network - .and_then(|report| report.get("domains")) - .and_then(|domains| domains.as_array()) - .map(|domains| find_match(domains, "domain")) - .unwrap_or(false) - { - return true; + match parsed { + Ok(parsed) => { + set_local(parsed); } - - if network - .and_then(|report| report.get("dns")) - .and_then(|dns| dns.as_array()) - .map(|dns| find_match(dns, "hostname")) - .unwrap_or(false) - { - return true; + Err(e) => { + #[cfg(feature = "logging")] + error!("can't parse cuckoo report: {}", e); } + }; - false - }) -} - -enum RequestType { - Get, - Post, - Both, + Cuckoo::new() } -fn http_request( +#[module_export(name = "network.dns_lookup")] +fn network_dns_lookup_r( ctx: &ScanContext, regexp_id: RegexpId, - request_type: RequestType, -) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("network")) - .and_then(|network| network.get("http")) - .and_then(|http| http.as_array()) - .map(|http| { - http.iter().any(|request| { - let req_method = match request - .get("method") - .and_then(|req_method| req_method.as_str()) - { - Some(req_method) => req_method, - None => return false, - }; - - let req_uri = match request - .get("uri") - .and_then(|req_uri| req_uri.as_str()) - { - Some(req_uri) => req_uri, - None => return false, - }; - - match request_type { - RequestType::Get => { - if !req_method.eq_ignore_ascii_case("get") { - return false; - } - } - RequestType::Post => { - if !req_method.eq_ignore_ascii_case("post") { - return false; - } - } - RequestType::Both => { - if !req_method.eq_ignore_ascii_case("get") - && !req_method.eq_ignore_ascii_case("post") - { - return false; - } - } - } - - return ctx.regexp_matches(regexp_id, req_uri.as_bytes()); - }) +) -> Option { + Some( + get_local()? + .network + .domains + .iter() + .flatten() + .filter(|domain| { + ctx.regexp_matches(regexp_id, domain.domain.as_bytes()) }) - .unwrap_or(false) - }) + .count() as _, + ) } #[module_export(name = "network.http_request")] -fn network_http_request(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - http_request(ctx, regexp_id, RequestType::Both) +fn network_http_request_r( + ctx: &ScanContext, + regexp_id: RegexpId, +) -> Option { + Some( + get_local()? + .network + .http + .iter() + .flatten() + .filter(|http| { + http.method.is_some() // ~> is request (is not response) + && ctx.regexp_matches(regexp_id, http.uri.as_bytes()) + }) + .count() as _, + ) } #[module_export(name = "network.http_get")] -fn network_http_get(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - http_request(ctx, regexp_id, RequestType::Get) +fn network_http_get_r(ctx: &ScanContext, regexp_id: RegexpId) -> Option { + Some( + get_local()? + .network + .http + .iter() + .flatten() + .filter(|http| { + http.method + .as_ref() + .map(|method| method.eq_ignore_ascii_case("get")) + .unwrap_or(false) + && ctx.regexp_matches(regexp_id, http.uri.as_bytes()) + }) + .count() as _, + ) } #[module_export(name = "network.http_post")] -fn network_http_post(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - http_request(ctx, regexp_id, RequestType::Post) -} - -#[module_export(name = "network.http_user_agent")] -fn network_http_user_agent(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("network")) - .and_then(|network| network.get("http")) - .and_then(|http| http.as_array()) - .map(|http| { - http.iter() - .filter_map(|request| request.get("user-agent")) - .filter_map(|ua| ua.as_str()) - .any(|ua| ctx.regexp_matches(regexp_id, ua.as_bytes())) +fn network_http_post_r(ctx: &ScanContext, regexp_id: RegexpId) -> Option { + Some( + get_local()? + .network + .http + .iter() + .flatten() + .filter(|http| { + http.method + .as_ref() + .map(|method| method.eq_ignore_ascii_case("post")) + .unwrap_or(false) + && ctx.regexp_matches(regexp_id, http.uri.as_bytes()) }) - .unwrap_or(false) - }) + .count() as _, + ) } -fn network_conn( +#[module_export(name = "network.http_user_agent")] +fn network_http_user_agent_r( ctx: &ScanContext, regexp_id: RegexpId, - conn: &str, - port: i64, -) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("network")) - .and_then(|network| network.get(conn)) - .and_then(|connections| connections.as_array()) - .map(|connections| { - connections.iter().any(|conn| { - let dst_port = match conn - .get("dport") - .and_then(|dst_port| dst_port.as_i64()) - { - Some(dst_port) => dst_port, - None => return false, - }; - - let dst_addr = match conn - .get("dst") - .and_then(|dst_addr| dst_addr.as_str()) - { - Some(dst_addr) => dst_addr, - None => return false, - }; - - dst_port == port - && ctx.regexp_matches(regexp_id, dst_addr.as_bytes()) - }) +) -> Option { + Some( + get_local()? + .network + .http + .iter() + .flatten() + .flat_map(|http| http.user_agent.iter()) + .filter(|user_agent| { + ctx.regexp_matches(regexp_id, user_agent.as_bytes()) }) - .unwrap_or(false) - }) + .count() as _, + ) } #[module_export(name = "network.tcp")] -fn network_tcp(ctx: &ScanContext, regexp_id: RegexpId, port: i64) -> bool { - network_conn(ctx, regexp_id, "tcp", port) +fn network_tcp_ri( + ctx: &ScanContext, + dst_re: RegexpId, + port: i64, +) -> Option { + Some( + get_local()? + .network + .tcp + .iter() + .flatten() + .filter(|tcp| { + tcp.dport == port as u64 + && tcp + .dst + .iter() + .chain(tcp.dst_domain.iter()) + .any(|dst| ctx.regexp_matches(dst_re, dst.as_bytes())) + }) + .count() as _, + ) } #[module_export(name = "network.udp")] -fn network_udp(ctx: &ScanContext, regexp_id: RegexpId, port: i64) -> bool { - network_conn(ctx, regexp_id, "udp", port) +fn network_udp_ri( + ctx: &ScanContext, + dst_re: RegexpId, + port: i64, +) -> Option { + Some( + get_local()? + .network + .udp + .iter() + .flatten() + .filter(|udp| { + udp.dport == port as u64 + && udp + .dst + .iter() + .chain(udp.dst_domain.iter()) + .any(|dst| ctx.regexp_matches(dst_re, dst.as_bytes())) + }) + .count() as _, + ) } #[module_export(name = "network.host")] -fn network_host(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("network")) - .and_then(|network| network.get("hosts")) - .and_then(|hosts| hosts.as_array()) - .map(|hosts| { - hosts - .iter() - .filter_map(|host| host.as_str()) - .any(|host| ctx.regexp_matches(regexp_id, host.as_bytes())) - }) - .unwrap_or(false) - }) +fn network_host_r(ctx: &ScanContext, re: RegexpId) -> Option { + Some( + get_local()? + .network + .hosts + .iter() + .flatten() + .filter(|host| ctx.regexp_matches(re, host.as_bytes())) + .count() as _, + ) } #[module_export(name = "sync.mutex")] -fn sync_mutex(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("behavior")) - .and_then(|behaviour| behaviour.get("summary")) - .and_then(|summary| summary.get("mutexes")) - .and_then(|mutexes| mutexes.as_array()) - .map(|mutexes| { - mutexes - .iter() - .filter_map(|m| m.as_str()) - .any(|m| ctx.regexp_matches(regexp_id, m.as_bytes())) - }) - .unwrap_or(false) - }) +fn sync_mutex_r(ctx: &ScanContext, mutex_re: RegexpId) -> Option { + Some( + get_local()? + .behavior + .summary + .mutexes + .iter() + .flatten() + .filter(|mutex| ctx.regexp_matches(mutex_re, mutex.as_bytes())) + .count() as _, + ) } #[module_export(name = "filesystem.file_access")] -fn filesystem_file_access(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("behavior")) - .and_then(|behaviour| behaviour.get("summary")) - .and_then(|summary| summary.get("files")) - .and_then(|files| files.as_array()) - .map(|files| { - files - .iter() - .filter_map(|file| file.as_str()) - .any(|file| ctx.regexp_matches(regexp_id, file.as_bytes())) - }) - .unwrap_or(false) - }) +fn filesystem_file_access_r( + ctx: &ScanContext, + regexp_id: RegexpId, +) -> Option { + Some( + get_local()? + .behavior + .summary + .files + .iter() + .flatten() + .filter(|file| ctx.regexp_matches(regexp_id, file.as_bytes())) + .count() as _, + ) } #[module_export(name = "registry.key_access")] -fn registry_key_access(ctx: &ScanContext, regexp_id: RegexpId) -> bool { - CUCKOO_REPORT.with_borrow(|report| { - report - .as_ref() - .and_then(|report| report.get("behavior")) - .and_then(|behaviour| behaviour.get("summary")) - .and_then(|summary| summary.get("keys")) - .and_then(|keys| keys.as_array()) - .map(|keys| { - keys.iter() - .filter_map(|key| key.as_str()) - .any(|key| ctx.regexp_matches(regexp_id, key.as_bytes())) - }) - .unwrap_or(false) - }) +fn registry_key_access_r( + ctx: &ScanContext, + regexp_id: RegexpId, +) -> Option { + Some( + get_local()? + .behavior + .summary + .keys + .iter() + .flatten() + .filter(|key| ctx.regexp_matches(regexp_id, key.as_bytes())) + .count() as _, + ) } diff --git a/lib/src/modules/cuckoo/schema.rs b/lib/src/modules/cuckoo/schema.rs new file mode 100644 index 00000000..dd96ab9a --- /dev/null +++ b/lib/src/modules/cuckoo/schema.rs @@ -0,0 +1,196 @@ +use std::fmt; + +use serde::{de::Visitor, Deserialize, Deserializer}; + +#[derive(serde::Deserialize, Debug)] +pub(super) struct DomainJson { + pub domain: String, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct HttpJson { + #[serde(rename = "user-agent")] + pub user_agent: Option, + pub method: Option, // string ftw + pub uri: String, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct TcpJson { + pub dst: Option, + pub dst_domain: Option, + pub dport: u64, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct UdpJson { + pub dst: Option, + pub dst_domain: Option, + pub dport: u64, +} + +#[derive(/* serde::Deserialize, - custom */ Debug)] +pub(super) struct NetworkJson { + pub domains: Option>, + pub http: Option>, + pub tcp: Option>, + pub udp: Option>, + pub hosts: Option>, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct SummaryJson { + pub mutexes: Option>, + pub files: Option>, + pub keys: Option>, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct BehaviorJson { + pub summary: SummaryJson, +} + +#[derive(serde::Deserialize, Debug)] +pub(super) struct CuckooJson { + pub network: NetworkJson, + pub behavior: BehaviorJson, +} + +impl<'de> Deserialize<'de> for NetworkJson { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct MyVisitor; + + impl<'de> Visitor<'de> for MyVisitor { + type Value = NetworkJson; + + fn expecting(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.write_str("string or object") + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + // must not parse `old_domains` before the whole map is searched + // if there is a `domains` field, then the value for the key `old_domains` should be ignored + // - specifically, it is okay if the `old_domains` does not have the expected structure if `domains` is present + let mut old_domains = None::; + let mut domains = None::; + + let mut http = None::>; + let mut tcp = None::>; + let mut udp = None::>; + let mut hosts = None::>; + + while let Some((key, val)) = + map.next_entry::()? + { + match key.as_str() { + "domains" => { + domains = Some(val); + } + "dns" => { + if domains.is_some() { + continue; // prefer "domains" over "dns" + } + + old_domains = Some(val); + } + "http" => { + http = Some( + match serde::Deserialize::deserialize(val) { + Ok(v) => v, + Err(e) => { + return Err(serde::de::Error::custom( + e, + )); + } + }, + ); + } + "tcp" => { + tcp = Some(match serde::Deserialize::deserialize( + val, + ) { + Ok(v) => v, + Err(e) => { + return Err(serde::de::Error::custom(e)); + } + }); + } + "udp" => { + udp = Some(match serde::Deserialize::deserialize( + val, + ) { + Ok(v) => v, + Err(e) => { + return Err(serde::de::Error::custom(e)); + } + }); + } + "hosts" => { + hosts = Some( + match serde::Deserialize::deserialize(val) { + Ok(v) => v, + Err(e) => { + return Err(serde::de::Error::custom( + e, + )); + } + }, + ); + } + _ => {} + } + } + + #[derive(serde::Deserialize, Debug)] + struct OldDomainJson { + pub hostname: String, + } + + let domains: Option> = + match (domains, old_domains) { + (Some(domains), _) => { + match serde::Deserialize::deserialize(domains) { + Ok(v) => Some(v), + Err(e) => { + return Err(serde::de::Error::custom(e)); + } + } + } + (None, Some(old_domains)) => { + let old_domains: Vec = + match serde::Deserialize::deserialize( + old_domains, + ) { + Ok(v) => v, + Err(e) => { + return Err(serde::de::Error::custom( + e, + )); + } + }; + + Some( + old_domains + .into_iter() + .map(|old| DomainJson { + domain: old.hostname, + }) + .collect(), + ) + } + (None, None) => None, // domains field is optional + }; + + Ok(NetworkJson { domains, http, tcp, udp, hosts }) + } + } + + deserializer.deserialize_any(MyVisitor) + } +} From f85d2a132bceefa32f0b2bf1aba6bdd2a0158c0c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 27 Nov 2024 17:44:22 +0100 Subject: [PATCH 05/21] style: limit code to 80 columns and simplify code in `cuckoo` module. --- lib/src/modules/cuckoo/schema.rs | 74 ++++++++++---------------------- 1 file changed, 22 insertions(+), 52 deletions(-) diff --git a/lib/src/modules/cuckoo/schema.rs b/lib/src/modules/cuckoo/schema.rs index dd96ab9a..0502731c 100644 --- a/lib/src/modules/cuckoo/schema.rs +++ b/lib/src/modules/cuckoo/schema.rs @@ -1,5 +1,6 @@ use std::fmt; +use serde::de::Error; use serde::{de::Visitor, Deserialize, Deserializer}; #[derive(serde::Deserialize, Debug)] @@ -74,9 +75,11 @@ impl<'de> Deserialize<'de> for NetworkJson { where A: serde::de::MapAccess<'de>, { - // must not parse `old_domains` before the whole map is searched - // if there is a `domains` field, then the value for the key `old_domains` should be ignored - // - specifically, it is okay if the `old_domains` does not have the expected structure if `domains` is present + // Must not parse `old_domains` before the whole map is + // searched if there is a `domains` field, then the value for + // the key `old_domains` should be ignored - specifically, it + // is okay if the `old_domains` does not have the expected + // structure if `domains` is present. let mut old_domains = None::; let mut domains = None::; @@ -96,51 +99,30 @@ impl<'de> Deserialize<'de> for NetworkJson { if domains.is_some() { continue; // prefer "domains" over "dns" } - old_domains = Some(val); } "http" => { http = Some( - match serde::Deserialize::deserialize(val) { - Ok(v) => v, - Err(e) => { - return Err(serde::de::Error::custom( - e, - )); - } - }, + Deserialize::deserialize(val) + .map_err(Error::custom)?, ); } "tcp" => { - tcp = Some(match serde::Deserialize::deserialize( - val, - ) { - Ok(v) => v, - Err(e) => { - return Err(serde::de::Error::custom(e)); - } - }); + tcp = Some( + Deserialize::deserialize(val) + .map_err(Error::custom)?, + ); } "udp" => { - udp = Some(match serde::Deserialize::deserialize( - val, - ) { - Ok(v) => v, - Err(e) => { - return Err(serde::de::Error::custom(e)); - } - }); + udp = Some( + Deserialize::deserialize(val) + .map_err(Error::custom)?, + ); } "hosts" => { hosts = Some( - match serde::Deserialize::deserialize(val) { - Ok(v) => v, - Err(e) => { - return Err(serde::de::Error::custom( - e, - )); - } - }, + Deserialize::deserialize(val) + .map_err(Error::custom)?, ); } _ => {} @@ -155,25 +137,13 @@ impl<'de> Deserialize<'de> for NetworkJson { let domains: Option> = match (domains, old_domains) { (Some(domains), _) => { - match serde::Deserialize::deserialize(domains) { - Ok(v) => Some(v), - Err(e) => { - return Err(serde::de::Error::custom(e)); - } - } + Deserialize::deserialize(domains) + .map_err(Error::custom)? } (None, Some(old_domains)) => { let old_domains: Vec = - match serde::Deserialize::deserialize( - old_domains, - ) { - Ok(v) => v, - Err(e) => { - return Err(serde::de::Error::custom( - e, - )); - } - }; + Deserialize::deserialize(old_domains) + .map_err(Error::custom)?; Some( old_domains From f3ea4f36f05528d9f42b5882c30888754a207e1c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 28 Nov 2024 11:14:52 +0100 Subject: [PATCH 06/21] fix: large memory consumption while parsing corrupted PE file. Parsing file 9bddb45c44d9c25a4f97ef800cb110de5e6a15349bac05d389c8bda37902f25a was causing a large memory consumption due to the high number of (malformed) import entries. We were limiting the number of functions per DLL, but not the overall number of functions, which in this case was very high. --- lib/src/modules/pe/parser.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/src/modules/pe/parser.rs b/lib/src/modules/pe/parser.rs index b0585504..f268ddc9 100644 --- a/lib/src/modules/pe/parser.rs +++ b/lib/src/modules/pe/parser.rs @@ -1752,9 +1752,9 @@ impl<'a> PE<'a> { /// arrays equivalent to the INT and IAT. /// /// Another differences between ordinal and delayed imports is that in - /// in delayed imports the INT and IAT can contain virtual addresses - /// instead of relative virtual address (RVAs). Whether they contain one - /// or the other depends on a bit in the `attributes` field in the + /// delayed imports the INT and IAT can contain virtual addresses instead + /// of relative virtual address (RVAs). Whether they contain one or the + /// other depends on a bit in the `attributes` field in the /// IMAGE_DELAYLOAD_DESCRIPTOR structure. fn parse_import_impl

( &self, @@ -1789,6 +1789,8 @@ impl<'a> PE<'a> { }), ); + let mut num_imported_funcs = 0; + for mut descriptor in import_descriptors.take(Self::MAX_PE_IMPORTS) { // If the values in the descriptor are virtual addresses, convert // them to relative virtual addresses (RVAs) by subtracting the @@ -1900,10 +1902,11 @@ impl<'a> PE<'a> { } if !funcs.is_empty() { + num_imported_funcs += funcs.len(); imported_funcs.push((dll_name, funcs)); } - if imported_funcs.len() >= Self::MAX_PE_IMPORTS { + if num_imported_funcs >= Self::MAX_PE_IMPORTS { break; } } From 850745c09bb7481989f2184d5b9511dee900bfa4 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 28 Nov 2024 15:28:24 +0100 Subject: [PATCH 07/21] style: fix clippy warning --- lib/src/modules/cuckoo/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/src/modules/cuckoo/mod.rs b/lib/src/modules/cuckoo/mod.rs index 4c024086..a086384c 100644 --- a/lib/src/modules/cuckoo/mod.rs +++ b/lib/src/modules/cuckoo/mod.rs @@ -34,10 +34,12 @@ fn main(_data: &[u8], meta: Option<&[u8]>) -> Cuckoo { Ok(parsed) => { set_local(parsed); } + #[cfg(feature = "logging")] Err(e) => { - #[cfg(feature = "logging")] error!("can't parse cuckoo report: {}", e); } + #[cfg(not(feature = "logging"))] + Err(_) => {} }; Cuckoo::new() From 07e91260049d8cc74bfc2a9650632844173ce01e Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 13:02:32 +0100 Subject: [PATCH 08/21] chore: upgrade `serde` to version 1.0.215 --- Cargo.lock | 8 ++++---- Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69b4329b..009f6650 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2981,9 +2981,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] @@ -3001,9 +3001,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 600d51df..c6694c66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,8 +89,8 @@ regex-automata = "0.4.7" roxmltree = "0.20.0" rsa = "0.9.6" rustc-hash = "2.0.0" -serde = "=1.0.210" -serde_json = "1.0.132" +serde = "1.0.215" +serde_json = "1.0.133" sha1 = "0.10.6" sha2 = "0.10.8" smallvec = "1.13.2" From ed0f985aa3b379a1c11d3b68513195e01d531480 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 13:06:43 +0100 Subject: [PATCH 09/21] chore: bump version to 0.11.1 --- Cargo.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c6694c66..c15b9e38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace.package] -version = "0.11.0" +version = "0.11.1" authors = ["Victor M. Alvarez "] edition = "2021" homepage = "https://virustotal.github.io/yara-x" @@ -105,11 +105,11 @@ x509-parser = "0.16.0" yaml-rust = "0.4.5" yansi = "1.0.1" yara-x = { path = "lib" } -yara-x-fmt = { path = "fmt", version = "0.11.0" } -yara-x-macros = { path = "macros", version = "0.11.0" } -yara-x-parser = { path = "parser", version = "0.11.0" } -yara-x-proto = { path = "proto", version = "0.11.0" } -yara-x-proto-yaml = { path = "proto-yaml", version = "0.11.0" } +yara-x-fmt = { path = "fmt", version = "0.11.1" } +yara-x-macros = { path = "macros", version = "0.11.1" } +yara-x-parser = { path = "parser", version = "0.11.1" } +yara-x-proto = { path = "proto", version = "0.11.1" } +yara-x-proto-yaml = { path = "proto-yaml", version = "0.11.1" } zip = "2.1.1" # Special profile that builds a release binary with link-time optimization. From faed391d1dd11fe6e56aabc1514747d058459a01 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 16:45:51 +0100 Subject: [PATCH 10/21] ci: use specific OS versions while building Python extensions (#255) The creation of Python packages started failing with the following error: ``` yara_x.pypy38-pp73-darwin.so has a minimum target of 10.12 Set the environment variable 'MACOSX_DEPLOYMENT_TARGET=10.12' to update minimum supported macOS for this wheel. ``` --- .github/workflows/release.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 4ba2e2de..0e022e0b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -107,14 +107,14 @@ jobs: - windows include: - build: linux - os: ubuntu-latest + os: ubuntu-22.04 - build: macos - os: macos-latest + os: macos-13 arch: 'arm64 x86_64' - build: windows - os: windows-latest + os: windows-2022 arch: 'x86 AMD64' steps: From fef1c63b1fe0e449eac547bb5eaccbb620fee88f Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 16:54:18 +0100 Subject: [PATCH 11/21] chore: update Cargo.lock --- Cargo.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 009f6650..bba491ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4358,7 +4358,7 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yara-x" -version = "0.11.0" +version = "0.11.1" dependencies = [ "aho-corasick", "annotate-snippets", @@ -4429,7 +4429,7 @@ dependencies = [ [[package]] name = "yara-x-capi" -version = "0.11.0" +version = "0.11.1" dependencies = [ "cbindgen", "serde_json", @@ -4438,7 +4438,7 @@ dependencies = [ [[package]] name = "yara-x-cli" -version = "0.11.0" +version = "0.11.1" dependencies = [ "anyhow", "ascii_tree", @@ -4472,7 +4472,7 @@ dependencies = [ [[package]] name = "yara-x-fmt" -version = "0.11.0" +version = "0.11.1" dependencies = [ "bitmask", "bstr", @@ -4487,7 +4487,7 @@ dependencies = [ [[package]] name = "yara-x-macros" -version = "0.11.0" +version = "0.11.1" dependencies = [ "darling", "proc-macro2", @@ -4497,7 +4497,7 @@ dependencies = [ [[package]] name = "yara-x-parser" -version = "0.11.0" +version = "0.11.1" dependencies = [ "anyhow", "ascii_tree", @@ -4521,7 +4521,7 @@ dependencies = [ [[package]] name = "yara-x-proto" -version = "0.11.0" +version = "0.11.1" dependencies = [ "protobuf", "protobuf-codegen", @@ -4530,7 +4530,7 @@ dependencies = [ [[package]] name = "yara-x-proto-yaml" -version = "0.11.0" +version = "0.11.1" dependencies = [ "chrono", "globwalk", @@ -4545,7 +4545,7 @@ dependencies = [ [[package]] name = "yara-x-py" -version = "0.11.0" +version = "0.11.1" dependencies = [ "protobuf-json-mapping", "pyo3", From 595a2bd5c5d5c8fe1198e65ffad2c70d45f4df5b Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 17:01:22 +0100 Subject: [PATCH 12/21] chore(capi+go): don't enable rules profiling by default. Also improves the profiling-related documentation in the Go library. --- capi/Cargo.toml | 4 +-- capi/include/yara_x.h | 5 ++- capi/src/lib.rs | 3 ++ capi/src/scanner.rs | 77 ++++++++++++++++++++++--------------------- go/scanner.go | 23 +++++++++++-- go/scanner_test.go | 12 ------- 6 files changed, 70 insertions(+), 54 deletions(-) diff --git a/capi/Cargo.toml b/capi/Cargo.toml index b130b6a4..afec7f4f 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -12,7 +12,7 @@ homepage.workspace = true [features] # The `capi` feature is required by `cargo-c`. -default = ["capi", "rules-profiling"] +default = ["capi"] capi = [] # When enabled, the serialization of compiled rules include native code for @@ -29,7 +29,7 @@ native-code-serialization = ["yara-x/native-code-serialization"] # Enables rules profiling. # -# This feature is enabled by default. +# This feature is disabled by default. rules-profiling = ["yara-x/rules-profiling"] diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index 65289970..91913788 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -80,6 +80,9 @@ typedef enum YRX_RESULT { SERIALIZATION_ERROR, // An error returned when a rule doesn't have any metadata. NO_METADATA, + // An error returned in cases where some API is not supported because the + // library was not built with the required features. + NOT_SUPPORTED, } YRX_RESULT; // A compiler that takes YARA source code and produces compiled rules. @@ -708,7 +711,7 @@ enum YRX_RESULT yrx_scanner_set_global_float(struct YRX_SCANNER *scanner, // Iterates over the top N most expensive rules, calling the callback for // each rule. // -// Requires the `rules-profiling` feature. +// Requires the `rules-profiling` feature, otherwise the // // See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details. enum YRX_RESULT yrx_scanner_iter_most_expensive_rules(struct YRX_SCANNER *scanner, diff --git a/capi/src/lib.rs b/capi/src/lib.rs index 1442820d..c52d28fe 100644 --- a/capi/src/lib.rs +++ b/capi/src/lib.rs @@ -152,6 +152,9 @@ pub enum YRX_RESULT { SERIALIZATION_ERROR, /// An error returned when a rule doesn't have any metadata. NO_METADATA, + /// An error returned in cases where some API is not supported because the + /// library was not built with the required features. + NOT_SUPPORTED, } /// Returns the error message for the most recent function in this API diff --git a/capi/src/scanner.rs b/capi/src/scanner.rs index f6aacedd..704c196a 100644 --- a/capi/src/scanner.rs +++ b/capi/src/scanner.rs @@ -61,11 +61,11 @@ pub unsafe extern "C" fn yrx_scanner_set_timeout( scanner: *mut YRX_SCANNER, timeout: u64, ) -> YRX_RESULT { - if scanner.is_null() { - return YRX_RESULT::INVALID_ARGUMENT; - } + let scanner = match scanner.as_mut() { + Some(s) => s, + None => return YRX_RESULT::INVALID_ARGUMENT, + }; - let scanner = scanner.as_mut().unwrap(); scanner.inner.set_timeout(Duration::from_secs(timeout)); YRX_RESULT::SUCCESS @@ -84,16 +84,16 @@ pub unsafe extern "C" fn yrx_scanner_scan( ) -> YRX_RESULT { _yrx_set_last_error::(None); - if scanner.is_null() { - return YRX_RESULT::INVALID_ARGUMENT; - } + let scanner = match scanner.as_mut() { + Some(s) => s, + None => return YRX_RESULT::INVALID_ARGUMENT, + }; let data = match slice_from_ptr_and_len(data, len) { Some(data) => data, None => return YRX_RESULT::INVALID_ARGUMENT, }; - let scanner = scanner.as_mut().unwrap(); let scan_results = scanner.inner.scan(data); if let Err(err) = scan_results { @@ -178,9 +178,10 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( data: *const u8, len: usize, ) -> YRX_RESULT { - if scanner.is_null() { - return YRX_RESULT::INVALID_ARGUMENT; - } + let scanner = match scanner.as_mut() { + Some(s) => s, + None => return YRX_RESULT::INVALID_ARGUMENT, + }; let module_name = match CStr::from_ptr(name).to_str() { Ok(name) => name, @@ -195,8 +196,6 @@ pub unsafe extern "C" fn yrx_scanner_set_module_output( None => return YRX_RESULT::INVALID_ARGUMENT, }; - let scanner = scanner.as_mut().unwrap(); - match scanner.inner.set_module_output_raw(module_name, data) { Ok(_) => { _yrx_set_last_error::(None); @@ -216,9 +215,10 @@ unsafe extern "C" fn yrx_scanner_set_global< ident: *const c_char, value: T, ) -> YRX_RESULT { - if scanner.is_null() { - return YRX_RESULT::INVALID_ARGUMENT; - } + let scanner = match scanner.as_mut() { + Some(s) => s, + None => return YRX_RESULT::INVALID_ARGUMENT, + }; let ident = match CStr::from_ptr(ident).to_str() { Ok(ident) => ident, @@ -228,8 +228,6 @@ unsafe extern "C" fn yrx_scanner_set_global< } }; - let scanner = scanner.as_mut().unwrap(); - match scanner.inner.set_global(ident, value) { Ok(_) => { _yrx_set_last_error::(None); @@ -327,35 +325,40 @@ pub type YRX_MOST_EXPENSIVE_RULES_CALLBACK = extern "C" fn( /// Iterates over the top N most expensive rules, calling the callback for /// each rule. /// -/// Requires the `rules-profiling` feature. +/// Requires the `rules-profiling` feature, otherwise the /// /// See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details. -#[cfg(feature = "rules-profiling")] #[no_mangle] +#[allow(unused_variables)] pub unsafe extern "C" fn yrx_scanner_iter_most_expensive_rules( scanner: *mut YRX_SCANNER, n: usize, callback: YRX_MOST_EXPENSIVE_RULES_CALLBACK, user_data: *mut c_void, ) -> YRX_RESULT { - if scanner.is_null() { - return YRX_RESULT::INVALID_ARGUMENT; - } - - let scanner = scanner.as_ref().unwrap(); + #[cfg(not(feature = "rules-profiling"))] + return YRX_RESULT::NOT_SUPPORTED; + + #[cfg(feature = "rules-profiling")] + { + let scanner = match scanner.as_ref() { + Some(s) => s, + None => return YRX_RESULT::INVALID_ARGUMENT, + }; - for profiling_info in scanner.inner.most_expensive_rules(n) { - let namespace = CString::new(profiling_info.namespace).unwrap(); - let rule = CString::new(profiling_info.rule).unwrap(); + for profiling_info in scanner.inner.most_expensive_rules(n) { + let namespace = CString::new(profiling_info.namespace).unwrap(); + let rule = CString::new(profiling_info.rule).unwrap(); + + callback( + namespace.as_ptr(), + rule.as_ptr(), + profiling_info.pattern_matching_time.as_secs_f64(), + profiling_info.condition_exec_time.as_secs_f64(), + user_data, + ); + } - callback( - namespace.as_ptr(), - rule.as_ptr(), - profiling_info.pattern_matching_time.as_secs_f64(), - profiling_info.condition_exec_time.as_secs_f64(), - user_data, - ); + YRX_RESULT::SUCCESS } - - YRX_RESULT::SUCCESS } diff --git a/go/scanner.go b/go/scanner.go index 60be3a79..e765012d 100644 --- a/go/scanner.go +++ b/go/scanner.go @@ -247,6 +247,13 @@ func (s *Scanner) Scan(buf []byte) (*ScanResults, error) { return scanResults, err } +// ProfilingInfo contains profiling information about a YARA rule. +// +// For each rule it contains: the rule's namespace, the rule's name, +// the time spent in matching patterns declared by the rule, and the time +// spent evaluating the rule's condition. +// +// See [Scanner.MostExpensiveRules]. type ProfilingInfo struct { Namespace string Rule string @@ -276,16 +283,28 @@ func mostExpensiveRulesCallback( }) } +// MostExpensiveRules returns information about the slowest rules and how much +// time they spent matching patterns and executing their conditions. +// +// In order to use this function the YARA-X C library must be built with +// support for rules profiling, which is done by enabling the `rules-profiling` +// feature. Otherwise, calling this function will cause a panic. func (s *Scanner) MostExpensiveRules(n int) []ProfilingInfo { profilingInfo := make([]ProfilingInfo, 0) mostExpensiveRules := cgo.NewHandle(&profilingInfo) defer mostExpensiveRules.Delete() - if C._yrx_scanner_iter_most_expensive_rules( + result := C._yrx_scanner_iter_most_expensive_rules( s.cScanner, C.size_t(n), C.YRX_MOST_EXPENSIVE_RULES_CALLBACK(C.mostExpensiveRulesCallback), - C.uintptr_t(mostExpensiveRules)) != C.SUCCESS { + C.uintptr_t(mostExpensiveRules)) + + if result == C.NOT_SUPPORTED { + panic("MostExpensiveRules requires that the YARA-X C library is built with the `rules-profiling` feature") + } + + if result != C.SUCCESS { panic("yrx_scanner_iter_most_expensive_rules failed") } diff --git a/go/scanner_test.go b/go/scanner_test.go index 25937a63..bd59e288 100644 --- a/go/scanner_test.go +++ b/go/scanner_test.go @@ -91,18 +91,6 @@ func TestScannerTimeout(t *testing.T) { assert.ErrorIs(t, err, ErrTimeout) } -func TestScannerMostExpensiveRules(t *testing.T) { - r, _ := Compile("rule t { strings: $a = /a(.*)*a/ condition: $a }") - s := NewScanner(r) - _, err := s.Scan(bytes.Repeat([]byte("a"), 5000)) - assert.NoError(t, err) - profilingInfo := s.MostExpensiveRules(1) - assert.Equal(t, "t", profilingInfo[0].Rule) - assert.Equal(t, "default", profilingInfo[0].Namespace) - assert.Greater(t, profilingInfo[0].PatternMatchingTime, time.Duration(0)) - assert.Greater(t, profilingInfo[0].ConditionExecTime, time.Duration(0)) -} - func TestScannerMetadata(t *testing.T) { r, _ := Compile(`rule t { meta: From c816fb3af973ab6e569b5bf9efc20943ea5ac3ed Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 17:10:16 +0100 Subject: [PATCH 13/21] ci: go back to the latest version of each OS for building the Python library The issue mentioned in #255 is not related to the OS used. Also stop building the Python library with Python 3.8 and 3.9 which are pretty old anyways. --- .github/workflows/release.yaml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0e022e0b..a5e4c983 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -97,9 +97,7 @@ jobs: fail-fast: false matrix: python-version: - - cp39 - - pp38 - - pp39 + - cp310 - pp310 build: - linux @@ -107,14 +105,14 @@ jobs: - windows include: - build: linux - os: ubuntu-22.04 + os: ubuntu-latest - build: macos - os: macos-13 + os: macos-latest arch: 'arm64 x86_64' - build: windows - os: windows-2022 + os: windows-latest arch: 'x86 AMD64' steps: From c4b48b82ddd10eefa34fa00c1bff65a2e2322c4c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 17:25:30 +0100 Subject: [PATCH 14/21] ci: allow running the release workflow manually. Also add the `MACOSX_DEPLOYMENT_TARGET` environment variable while building the Python library. --- .github/workflows/release.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a5e4c983..04bb2488 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -3,6 +3,8 @@ on: create: tags: - 'v*' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: build: if: ${{ startsWith(github.ref, 'refs/tags/v') }} @@ -172,6 +174,8 @@ jobs: CIBW_TEST_SKIP: '*-macosx_arm64 *-macosx_universal2:arm64' CIBW_BUILD_VERBOSITY: 1 + MACOSX_DEPLOYMENT_TARGET: '10.12' + - name: Upload artifacts uses: actions/upload-artifact@v4 with: From 5990c84704f0929dc28fd6b9b033e29fed6852d6 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 17:43:50 +0100 Subject: [PATCH 15/21] ci: build packages for older Python versions again. --- .github/workflows/release.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 04bb2488..466857a5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -99,7 +99,9 @@ jobs: fail-fast: false matrix: python-version: - - cp310 + - cp39 + - pp38 + - pp39 - pp310 build: - linux From 15867283089e83bbeb94b7a4a097030231e5c999 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 18:13:40 +0100 Subject: [PATCH 16/21] ci: bump maturin to version 1.7.6 --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 466857a5..f33b11f4 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -151,7 +151,7 @@ jobs: - name: Build sdist if: matrix.build == 'linux' && matrix.python-version == 'cp39' run: | - pip install maturin==1.7.1 + pip install maturin==1.7.6 maturin sdist --manifest-path py/Cargo.toml -o wheelhouse - name: Build ${{ matrix.platform || matrix.os }} binaries From 3503d72e81d25102422f376da16621f30a42ff7f Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 18:35:09 +0100 Subject: [PATCH 17/21] chore: drop support for Python 3.8 --- .github/workflows/python.yaml | 2 +- .github/workflows/release.yaml | 3 +-- py/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 3e78e3fa..3fbae814 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] os: [ ubuntu-latest, macos-latest, windows-latest ] runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f33b11f4..01bbb9fb 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -100,7 +100,6 @@ jobs: matrix: python-version: - cp39 - - pp38 - pp39 - pp310 build: @@ -140,7 +139,7 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: '3.12' - run: rustup target add aarch64-apple-darwin x86_64-apple-darwin if: matrix.build == 'macos' diff --git a/py/pyproject.toml b/py/pyproject.toml index 6065eb58..ae647710 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "yara-x" description = "Python bindings for YARA-X" -requires-python = ">=3.8" +requires-python = ">=3.9" readme = "README.md" keywords = ["pattern-matching", "cybersecurity", "forensics", "malware", "yara"] classifiers = [ From 508a2433f07f22c249f1fdc53ab261abc92e9448 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 19:00:16 +0100 Subject: [PATCH 18/21] ci: upgrade `gh-action-pypi-publish` to version 1.12 --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 01bbb9fb..0d213087 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -248,7 +248,7 @@ jobs: path: dist - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1.12 with: #repository-url: https://test.pypi.org/legacy/ skip-existing: true From 0b3dc9bdeb16599830a3d94a4fe86bcb0a5dc8b5 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 19:23:08 +0100 Subject: [PATCH 19/21] ci: pin to a specific version of maturin This was the version that built the a wheel package with Metadata-Version 2.3, which seems to be the larger version accepted by `pypa/gh-action-pypi-publish`, newer versions of `maturin`seem to be producing packages with Metadata-Version 2.4. --- py/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/pyproject.toml b/py/pyproject.toml index ae647710..36b8fd16 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.0,<2.0"] +requires = ["maturin==1.7.4"] build-backend = "maturin" [project] From ee65c741941c9675da075334fce6b4fe90091639 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 19:35:47 +0100 Subject: [PATCH 20/21] ci: pin to maturin 1.7.4 while creating the source package. See: 0b3dc9bdeb16599830a3d94a4fe86bcb0a5dc8b5 --- .github/workflows/release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0d213087..23267219 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -150,7 +150,7 @@ jobs: - name: Build sdist if: matrix.build == 'linux' && matrix.python-version == 'cp39' run: | - pip install maturin==1.7.6 + pip install maturin==1.7.4 maturin sdist --manifest-path py/Cargo.toml -o wheelhouse - name: Build ${{ matrix.platform || matrix.os }} binaries From 8cb1ac57fedabb0cd7e39b62dab6142cff189e77 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Nov 2024 19:42:25 +0100 Subject: [PATCH 21/21] docs: update documentation about minimum supported version of Python. Python 3.8 has reached its end-of-life and is not supported anymore. --- py/README.md | 2 +- site/content/docs/api/python.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/py/README.md b/py/README.md index 3114bb60..17a32f7e 100644 --- a/py/README.md +++ b/py/README.md @@ -6,7 +6,7 @@ ![GitHub Repo stars](https://img.shields.io/github/stars/VirusTotal/yara-x) The official Python library for [YARA-X](https://virustotal.github.io/yara-x). -Supports Python 3.8+ in Linux, MacOS and Windows. +Supports Python 3.9+ in Linux, MacOS and Windows. ```python import yara_x diff --git a/site/content/docs/api/python.md b/site/content/docs/api/python.md index 99f0f58e..80a8c087 100644 --- a/site/content/docs/api/python.md +++ b/site/content/docs/api/python.md @@ -22,7 +22,7 @@ Python is a popular language among YARA users. They use Python for all kinds of automation tasks, and the YARA-X ecosystem wouldn't be complete without the possibility of using it from Python programs. -YARA-X offers support for Python 3.8 or later, in Linux, MacOS and Windows. +YARA-X offers support for Python 3.9 or later, in Linux, MacOS and Windows. ## Installation