diff --git a/cli/src/commands/dump.rs b/cli/src/commands/dump.rs index c150b7b3..cd0794ef 100644 --- a/cli/src/commands/dump.rs +++ b/cli/src/commands/dump.rs @@ -20,6 +20,8 @@ enum SupportedModules { Elf, Pe, Dotnet, + Olecf, + Vba } #[derive(Debug, Clone, ValueEnum)] @@ -111,6 +113,12 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> { if !requested_modules.contains(&&SupportedModules::Pe) { module_output.pe = MessageField::none() } + if !requested_modules.contains(&&SupportedModules::Olecf) { + module_output.olecf = MessageField::none() + } + if !requested_modules.contains(&&SupportedModules::Vba) { + module_output.vba = MessageField::none() + } } else { // Module was not specified, only show those that produced meaningful // results, the rest are cleared out. @@ -131,6 +139,12 @@ pub fn exec_dump(args: &ArgMatches) -> anyhow::Result<()> { if !module_output.pe.is_pe() { module_output.pe = MessageField::none() } + if !module_output.olecf.is_olecf() { + module_output.olecf = MessageField::none() + } + if !module_output.vba.has_macros() { + module_output.vba = MessageField::none() + } } match output_format { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 4e112e8c..a9260313 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -146,6 +146,9 @@ magic-module = [ # The `math` module. math-module = [] +# The `olecf` module +olecf-module = [] + # The `pe` module parses PE files. pe-module = [ "dep:const-oid", @@ -182,6 +185,9 @@ text-module = [ # conditions of a rule to check against other epoch time. time-module = [] +# The `vba` module +vba-module = [] + # Features that are enabled by default. default = [ "constant-folding", @@ -194,10 +200,12 @@ default = [ "macho-module", "math-module", "hash-module", + "olecf-module", "pe-module", "string-module", "time-module", "lnk-module", + "vba-module", "test_proto2-module", "test_proto3-module", ] @@ -260,6 +268,7 @@ x509-parser = { workspace = true, optional = true } yansi = { workspace = true } yara-x-macros = { workspace = true } yara-x-parser = { workspace = true, features = ["serde"] } +zip = { workspace = true } lingua = { version = "1.6.2", optional = true, default-features = false, features = ["english", "german", "french", "spanish"] } diff --git a/lib/src/modules/add_modules.rs b/lib/src/modules/add_modules.rs index a9a52808..9d9fdd56 100644 --- a/lib/src/modules/add_modules.rs +++ b/lib/src/modules/add_modules.rs @@ -18,6 +18,8 @@ add_module!(modules, "macho", macho, "macho.Macho", Some("macho"), Some(macho::_ add_module!(modules, "magic", magic, "magic.Magic", Some("magic"), Some(magic::__main__ as MainFn)); #[cfg(feature = "math-module")] add_module!(modules, "math", math, "math.Math", Some("math"), Some(math::__main__ as MainFn)); +#[cfg(feature = "olecf-module")] +add_module!(modules, "olecf", olecf, "olecf.Olecf", Some("olecf"), Some(olecf::__main__ as MainFn)); #[cfg(feature = "pe-module")] add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn)); #[cfg(feature = "string-module")] @@ -30,4 +32,6 @@ add_module!(modules, "test_proto3", test_proto3, "test_proto3.TestProto3", Some( add_module!(modules, "text", text, "text.Text", Some("text"), Some(text::__main__ as MainFn)); #[cfg(feature = "time-module")] add_module!(modules, "time", time, "time.Time", Some("time"), Some(time::__main__ as MainFn)); +#[cfg(feature = "vba-module")] +add_module!(modules, "vba", vba, "vba.Vba", Some("vba"), Some(vba::__main__ as MainFn)); } \ No newline at end of file diff --git a/lib/src/modules/mod.rs b/lib/src/modules/mod.rs index 156175e1..91b0afed 100644 --- a/lib/src/modules/mod.rs +++ b/lib/src/modules/mod.rs @@ -174,6 +174,24 @@ pub mod mods { /// Data structure returned by the `macho` module. pub use super::protos::macho::Macho; + /// Data structures defined by the `olecf` module. + /// + /// The main structure produced by the module is [`olecf:Olecf`]. The rest + /// of them are used by one or more fields in the main structure. + /// + pub use super::protos::olecf; + /// Data structure returned by the `olecf` module. + pub use super::protos::olecf::Olecf; + + /// Data structures defined by the `vba` module. + /// + /// The main structure produced by the module is [`vba::Vba`]. The rest + /// of them are used by one or more fields in the main structure. + /// + pub use super::protos::vba; + /// Data structure returned by the `macho` module. + pub use super::protos::vba::Vba; + /// Data structures defined by the `pe` module. /// /// The main structure produced by the module is [`pe::PE`]. The rest @@ -268,6 +286,8 @@ pub mod mods { info.dotnet = protobuf::MessageField(invoke::(data)); info.macho = protobuf::MessageField(invoke::(data)); info.lnk = protobuf::MessageField(invoke::(data)); + info.olecf = protobuf::MessageField(invoke::(data)); + info.vba = protobuf::MessageField(invoke::(data)); info } diff --git a/lib/src/modules/modules.rs b/lib/src/modules/modules.rs index 7113eeaa..e75a4c52 100644 --- a/lib/src/modules/modules.rs +++ b/lib/src/modules/modules.rs @@ -17,6 +17,8 @@ mod macho; mod magic; #[cfg(feature = "math-module")] mod math; +#[cfg(feature = "olecf-module")] +mod olecf; #[cfg(feature = "pe-module")] mod pe; #[cfg(feature = "string-module")] @@ -28,4 +30,6 @@ mod test_proto3; #[cfg(feature = "text-module")] mod text; #[cfg(feature = "time-module")] -mod time; \ No newline at end of file +mod time; +#[cfg(feature = "vba-module")] +mod vba; \ No newline at end of file diff --git a/lib/src/modules/olecf/mod.rs b/lib/src/modules/olecf/mod.rs new file mode 100644 index 00000000..3f19e3e2 --- /dev/null +++ b/lib/src/modules/olecf/mod.rs @@ -0,0 +1,50 @@ +/*! YARA module that parses OLE Compound File Binary Format files. + +The OLE CF format (also known as Compound File Binary Format or CFBF) is a +container format used by many Microsoft file formats including DOC, XLS, PPT, +and MSI. This module specializes in parsing OLE CF files and extracting +metadata about their structure and contents. +*/ + +use crate::modules::prelude::*; +use crate::modules::protos::olecf::*; +pub mod parser; + +#[module_main] +fn main(data: &[u8], _meta: Option<&[u8]>) -> Olecf { + + match parser::OLECFParser::new(data) { + Ok(parser) => { + let mut olecf = Olecf::new(); + + // Check and set is_olecf + let is_valid = parser.is_valid_header(); + olecf.is_olecf = Some(is_valid); + + // Get stream names and sizes + match parser.get_stream_names() { + Ok(names) => { + // Get sizes for each stream + olecf.stream_sizes = names.iter() + .filter_map(|name| { + parser.get_stream_size(name) + .ok() + .map(|size| size as i64) + }) + .collect(); + + // Assign names last after we're done using them + olecf.stream_names = names; + }, + Err(_) => (), + } + + olecf + }, + Err(_) => { + let mut olecf = Olecf::new(); + olecf.is_olecf = Some(false); + olecf + } + } +} \ No newline at end of file diff --git a/lib/src/modules/olecf/parser.rs b/lib/src/modules/olecf/parser.rs new file mode 100644 index 00000000..8c4cc4e0 --- /dev/null +++ b/lib/src/modules/olecf/parser.rs @@ -0,0 +1,371 @@ +use std::collections::HashMap; + +const OLECF_SIGNATURE: &[u8] = &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; +const SECTOR_SHIFT: u16 = 9; +const MINI_SECTOR_SHIFT: u16 = 6; +const DIRECTORY_ENTRY_SIZE: u64 = 128; + +// Directory Entry Types +const STORAGE_TYPE: u8 = 1; +const STREAM_TYPE: u8 = 2; +const ROOT_STORAGE_TYPE: u8 = 5; + +// Special sectors +const ENDOFCHAIN: u32 = 0xFFFFFFFE; +const FREESECT: u32 = 0xFFFFFFFF; +const MAX_REGULAR_SECTOR: u32 = 0xFFFFFFFA; + +pub struct OLECFParser<'a> { + data: &'a [u8], + sector_size: usize, + mini_sector_size: usize, + fat_sectors: Vec, + directory_sectors: Vec, + mini_fat_sectors: Vec, + dir_entries: HashMap, + mini_stream_start: u32, + mini_stream_size: u64, +} + +struct DirectoryEntry { + name: String, + size: u64, + start_sector: u32, + stream_type: u8, +} + +impl<'a> OLECFParser<'a> { + pub fn new(data: &'a [u8]) -> Result { + let mut parser = OLECFParser { + data, + sector_size: 1 << SECTOR_SHIFT, + mini_sector_size: 1 << MINI_SECTOR_SHIFT, + fat_sectors: Vec::new(), + directory_sectors: Vec::new(), + mini_fat_sectors: Vec::new(), + dir_entries: HashMap::new(), + mini_stream_start: 0, + mini_stream_size: 0, + }; + + parser.parse_header()?; + parser.parse_directory()?; + + if parser.mini_stream_size > 0 && parser.mini_stream_start >= MAX_REGULAR_SECTOR { + return Err("Invalid mini stream start sector"); + } + + Ok(parser) + } + + fn read_u16(data: &[u8], offset: usize) -> Result { + if offset + 2 > data.len() { + return Err("Buffer too small for u16"); + } + Ok(u16::from_le_bytes([data[offset], data[offset + 1]])) + } + + fn read_u32(data: &[u8], offset: usize) -> Result { + if offset + 4 > data.len() { + return Err("Buffer too small for u32"); + } + Ok(u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ])) + } + + pub fn is_valid_header(&self) -> bool { + self.data.len() >= OLECF_SIGNATURE.len() && &self.data[..OLECF_SIGNATURE.len()] == OLECF_SIGNATURE + } + + fn parse_header(&mut self) -> Result<(), &'static str> { + if !self.is_valid_header() { + return Err("Invalid OLECF signature"); + } + + let byte_order = Self::read_u16(self.data, 28)?; + if byte_order != 0xFFFE { + return Err("Invalid byte order mark"); + } + + let num_fat_sectors = Self::read_u32(self.data, 44)?; + let first_dir_sector = Self::read_u32(self.data, 48)?; + let first_mini_fat = Self::read_u32(self.data, 60)?; + let mini_fat_count = Self::read_u32(self.data, 64)?; + let first_difat_sector = Self::read_u32(self.data, 68)?; + let difat_count = Self::read_u32(self.data, 72)?; + + // DIFAT entries in header + let mut difat_entries = Vec::new(); + let mut offset = 76; + for _ in 0..109 { + if offset + 4 > self.data.len() { + break; + } + let sector = Self::read_u32(self.data, offset)?; + if sector < MAX_REGULAR_SECTOR { + difat_entries.push(sector); + } + offset += 4; + } + + // Follow DIFAT chain + let mut current_difat = first_difat_sector; + for _ in 0..difat_count { + if current_difat >= MAX_REGULAR_SECTOR { + break; + } + let difat_data = self.read_sector(current_difat)?; + let entries_per_difat = (self.sector_size / 4) - 1; + for i in 0..entries_per_difat { + let s = Self::read_u32(difat_data, i * 4)?; + if s < MAX_REGULAR_SECTOR { + difat_entries.push(s); + } + } + current_difat = Self::read_u32(difat_data, self.sector_size - 4)?; + } + + if difat_entries.is_empty() && num_fat_sectors > 0 { + return Err("No FAT sectors found despite nonzero FAT count"); + } + self.fat_sectors = difat_entries; + + // Directory chain + if first_dir_sector < MAX_REGULAR_SECTOR { + self.directory_sectors = self.follow_chain(first_dir_sector); + } else { + return Err("No valid directory start sector"); + } + + // MiniFAT chain + if mini_fat_count > 0 && first_mini_fat < MAX_REGULAR_SECTOR { + self.mini_fat_sectors = self.follow_chain(first_mini_fat); + } + + Ok(()) + } + + fn sector_to_offset(&self, sector: u32) -> usize { + 512 + (sector as usize * self.sector_size) + } + + fn read_sector(&self, sector: u32) -> Result<&[u8], &'static str> { + let offset = self.sector_to_offset(sector); + if offset + self.sector_size > self.data.len() { + return Err("Sector read out of bounds"); + } + Ok(&self.data[offset..offset + self.sector_size]) + } + + fn get_fat_entry(&self, sector: u32) -> Result { + let entry_index = sector as usize; + let entries_per_sector = self.sector_size / 4; + let fat_sector_index = entry_index / entries_per_sector; + if fat_sector_index >= self.fat_sectors.len() { + return Err("FAT entry sector index out of range"); + } + let fat_sector = self.fat_sectors[fat_sector_index]; + let fat = self.read_sector(fat_sector)?; + let fat_entry_offset = (entry_index % entries_per_sector) * 4; + Self::read_u32(fat, fat_entry_offset) + } + + fn follow_chain(&self, start_sector: u32) -> Vec { + let mut chain = Vec::new(); + if start_sector >= MAX_REGULAR_SECTOR { + return chain; + } + + let mut current = start_sector; + while current < MAX_REGULAR_SECTOR { + chain.push(current); + let next = match self.get_fat_entry(current) { + Ok(n) => n, + Err(_) => break, + }; + if next >= MAX_REGULAR_SECTOR || next == FREESECT || next == ENDOFCHAIN { + break; + } + current = next; + } + chain + } + + fn read_directory_entry(&self, offset: usize) -> Result { + if offset + 128 > self.data.len() { + return Err("Incomplete directory entry"); + } + + let name_len = Self::read_u16(self.data, offset + 64)? as usize; + if name_len < 2 || name_len > 64 { + return Err("Invalid name length"); + } + + let name_bytes = &self.data[offset..offset + name_len]; + let filtered: Vec = name_bytes.iter().copied().filter(|&b| b != 0).collect(); + let name = String::from_utf8_lossy(&filtered).to_string(); + + let stream_type = self.data[offset + 66]; + let start_sector = Self::read_u32(self.data, offset + 116)?; // start sector + let size_32 = Self::read_u32(self.data, offset + 120)?; // size is 4 bytes, read as u32 + let size = size_32 as u64; + + Ok(DirectoryEntry { + name, + size, + start_sector, + stream_type, + }) + } + + fn parse_directory(&mut self) -> Result<(), &'static str> { + if self.directory_sectors.is_empty() { + return Err("No directory sectors found"); + } + + for §or in &self.directory_sectors { + let mut entry_offset = 0; + + while entry_offset + DIRECTORY_ENTRY_SIZE as usize <= self.sector_size { + let abs_offset = self.sector_to_offset(sector) + entry_offset; + if abs_offset + DIRECTORY_ENTRY_SIZE as usize > self.data.len() { + break; + } + match self.read_directory_entry(abs_offset) { + Ok(entry) => { + if entry.stream_type == ROOT_STORAGE_TYPE { + self.mini_stream_start = entry.start_sector; + self.mini_stream_size = entry.size; + } + if entry.stream_type == STORAGE_TYPE + || entry.stream_type == STREAM_TYPE + || entry.stream_type == ROOT_STORAGE_TYPE + { + self.dir_entries.insert(entry.name.clone(), entry); + } + } + Err(_) => {} + } + entry_offset += DIRECTORY_ENTRY_SIZE as usize; + } + } + + Ok(()) + } + + pub fn get_stream_names(&self) -> Result, &'static str> { + if self.dir_entries.is_empty() { + return Err("No streams found"); + } + Ok(self.dir_entries.keys().cloned().collect()) + } + + pub fn get_stream_size(&self, stream_name: &str) -> Result { + self.dir_entries.get(stream_name).map(|e| e.size).ok_or("Stream not found") + } + + pub fn get_stream_data(&self, stream_name: &str) -> Result, &'static str> { + let entry = self.dir_entries.get(stream_name) + .ok_or("Stream not found")?; + + if entry.size < 4096 && entry.stream_type != ROOT_STORAGE_TYPE { + self.get_mini_stream_data(entry.start_sector, entry.size) + } else { + self.get_regular_stream_data(entry.start_sector, entry.size) + } + } + + fn get_regular_stream_data(&self, start_sector: u32, size: u64) -> Result, &'static str> { + let mut data = Vec::with_capacity(size as usize); + let mut current_sector = start_sector; + let mut total_read = 0; + + while current_sector < MAX_REGULAR_SECTOR && total_read < size as usize { + let sector_data = self.read_sector(current_sector)?; + let bytes_to_read = std::cmp::min(self.sector_size, size as usize - total_read); + + data.extend_from_slice(§or_data[..bytes_to_read]); + total_read += bytes_to_read; + + if total_read < size as usize { + let next = self.get_fat_entry(current_sector)?; + if next == ENDOFCHAIN || next >= MAX_REGULAR_SECTOR { + break; + } + current_sector = next; + } + } + + if data.len() != size as usize { + return Err("Incomplete stream data"); + } + + Ok(data) + } + + fn get_root_mini_stream_data(&self) -> Result, &'static str> { + // The mini stream is stored as a regular FAT-based stream + self.get_regular_stream_data(self.mini_stream_start, self.mini_stream_size) + } + + fn get_minifat_entry(&self, mini_sector: u32) -> Result { + if self.mini_fat_sectors.is_empty() { + return Ok(ENDOFCHAIN); + } + + let entry_index = mini_sector as usize; + let entries_per_sector = self.sector_size / 4; + let fat_sector_index = entry_index / entries_per_sector; + if fat_sector_index >= self.mini_fat_sectors.len() { + return Ok(ENDOFCHAIN); + } + let sector = self.mini_fat_sectors[fat_sector_index]; + let fat = self.read_sector(sector)?; + let offset = (entry_index % entries_per_sector) * 4; + Self::read_u32(fat, offset) + } + + fn get_mini_stream_data(&self, start_mini_sector: u32, size: u64) -> Result, &'static str> { + if self.mini_stream_size == 0 { + return Err("No mini stream present"); + } + + let mini_stream_data = self.get_root_mini_stream_data()?; + let mini_data_len = mini_stream_data.len(); + + let mut data = Vec::with_capacity(size as usize); + let mut current = start_mini_sector; + + while current < MAX_REGULAR_SECTOR && data.len() < size as usize { + let mini_offset = current as usize * self.mini_sector_size; + if mini_offset >= mini_data_len { + return Err("Mini stream offset out of range"); + } + + let bytes_to_read = std::cmp::min(self.mini_sector_size, size as usize - data.len()); + if mini_offset + bytes_to_read > mini_data_len { + return Err("Mini stream extends beyond available data"); + } + + data.extend_from_slice(&mini_stream_data[mini_offset..mini_offset + bytes_to_read]); + + if data.len() < size as usize { + let next = self.get_minifat_entry(current)?; + if next == ENDOFCHAIN || next >= MAX_REGULAR_SECTOR { + break; + } + current = next; + } + } + + if data.len() != size as usize { + return Err("Incomplete mini stream data"); + } + + Ok(data) + } +} diff --git a/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out new file mode 100644 index 00000000..e1947b31 --- /dev/null +++ b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out @@ -0,0 +1,16 @@ +olecf: + is_olecf: true + stream_names: + - "CompObj" + - "1Table" + - "SummaryInformation" + - "Root Entry" + - "WordDocument" + - "DocumentSummaryInformation" + stream_sizes: + - 114 + - 7273 + - 4096 + - 128 + - 4096 + - 4096 \ No newline at end of file diff --git a/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip new file mode 100644 index 00000000..2236f51f Binary files /dev/null and b/lib/src/modules/olecf/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip differ diff --git a/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out new file mode 100644 index 00000000..8867215a --- /dev/null +++ b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.out @@ -0,0 +1,2 @@ +olecf: + is_olecf: true \ No newline at end of file diff --git a/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip new file mode 100644 index 00000000..2e6f897d Binary files /dev/null and b/lib/src/modules/olecf/tests/testdata/cc354533e3a8190985784e476d6e16cc04f43f53935a885c99c21148c975a705.zip differ diff --git a/lib/src/modules/protos/mods.proto b/lib/src/modules/protos/mods.proto index 486d8d96..94bdd4a9 100644 --- a/lib/src/modules/protos/mods.proto +++ b/lib/src/modules/protos/mods.proto @@ -6,6 +6,8 @@ import "elf.proto"; import "pe.proto"; import "lnk.proto"; import "macho.proto"; +import "olecf.proto"; +import "vba.proto"; package mods; @@ -16,4 +18,6 @@ message Modules { optional dotnet.Dotnet dotnet = 3; optional macho.Macho macho = 4; optional lnk.Lnk lnk = 5; + optional olecf.Olecf olecf = 6; + optional vba.Vba vba = 7; } \ No newline at end of file diff --git a/lib/src/modules/protos/olecf.proto b/lib/src/modules/protos/olecf.proto new file mode 100644 index 00000000..6b42b3a3 --- /dev/null +++ b/lib/src/modules/protos/olecf.proto @@ -0,0 +1,22 @@ +syntax = "proto2"; +import "yara.proto"; + +package olecf; + +option (yara.module_options) = { + name : "olecf" + root_message: "olecf.Olecf" + rust_module: "olecf" + cargo_feature: "olecf-module" +}; + +message Olecf { + // Check if file is an OLE CF file + required bool is_olecf = 1; + + // Get array of stream names + repeated string stream_names = 2; + + // Get size of a specific stream by name + repeated int64 stream_sizes = 3; +} \ No newline at end of file diff --git a/lib/src/modules/protos/vba.proto b/lib/src/modules/protos/vba.proto new file mode 100644 index 00000000..b28ed21a --- /dev/null +++ b/lib/src/modules/protos/vba.proto @@ -0,0 +1,37 @@ +syntax = "proto2"; +import "yara.proto"; + +package vba; + +option (yara.module_options) = { + name: "vba" + root_message: "vba.Vba" + rust_module: "vba" + cargo_feature: "vba-module" +}; + +message Vba { + // True if VBA macros are present + optional bool has_macros = 1; + + // Names of VBA macro modules found + repeated string module_names = 2; + + // Type of each module (standard, class, form) + repeated string module_types = 3; + + // The actual VBA code for each module + repeated string module_code = 4; + + // Project metadata + message ProjectInfo { + optional string name = 1; + optional string version = 2; + repeated string references = 3; + + // Additional metadata + optional int32 module_count = 4; + optional bool is_compressed = 5; + } + optional ProjectInfo project_info = 5; +} \ No newline at end of file diff --git a/lib/src/modules/vba/mod.rs b/lib/src/modules/vba/mod.rs new file mode 100644 index 00000000..4dd21dfb --- /dev/null +++ b/lib/src/modules/vba/mod.rs @@ -0,0 +1,205 @@ +/*! YARA module that extracts VBA (Visual Basic for Applications) macros from Office documents. */ + +use crate::modules::prelude::*; +use crate::modules::protos::vba::*; +use crate::modules::protos::vba::vba::ProjectInfo; +use protobuf::MessageField; +use std::collections::HashMap; +use std::io::Read; +use std::io::Cursor; +use zip::ZipArchive; + +mod parser; +use parser::{VbaProject, ModuleType}; + +#[derive(Debug)] +struct VbaExtractor { + data: Vec, +} + +impl VbaExtractor { + fn new(data: &[u8]) -> Self { + Self { + data: data.to_vec(), + } + } + + fn is_zip(&self) -> bool { + let result = self.data.starts_with(&[0x50, 0x4B, 0x03, 0x04]); + result + } + + fn read_stream(&self, ole_parser: &crate::modules::olecf::parser::OLECFParser, name: &str) -> Result, &'static str> { + let size = ole_parser.get_stream_size(name)? as usize; + + // Skip empty streams + if size == 0 { + return Err("Stream is empty"); + } + + let data = ole_parser.get_stream_data(name)?; + + Ok(data) + } + + fn extract_from_ole(&self) -> Result { + let ole_parser = crate::modules::olecf::parser::OLECFParser::new(&self.data)?; + let stream_names = ole_parser.get_stream_names()?; + + let mut vba_dir = None; + let mut modules = HashMap::new(); + let mut project_streams = Vec::new(); + + // First process the dir stream + if let Some(dir_name) = stream_names.iter().find(|n| n.to_lowercase().trim() == "dir") { + match self.read_stream(&ole_parser, dir_name) { + Ok(data) => { + vba_dir = Some(data); + }, + Err(_) => (), + } + } + + // Then process other streams + for name in &stream_names { + let lowercase_name = name.to_lowercase(); + + if lowercase_name != "dir" { + if lowercase_name.contains("module") || + lowercase_name.contains("thisdocument") || + lowercase_name.ends_with(".bas") || + lowercase_name.ends_with(".cls") || + lowercase_name.ends_with(".frm") { + if let Ok(data) = self.read_stream(&ole_parser, name) { + if !data.is_empty() { + modules.insert(name.clone(), data); + println!("Added module: {}", name); + } + } + } else if lowercase_name.contains("project") && !lowercase_name.contains("_vba_project") { + if let Ok(data) = self.read_stream(&ole_parser, name) { + project_streams.push(data); + } + } + } + } + + // Always try the dir stream first if we found it + if let Some(dir_data) = vba_dir { + parser::VbaProject::parse(&dir_data, modules) + } else { + Err("No VBA directory stream found") + } + } + + fn extract_from_zip(&self) -> Result { + let reader = Cursor::new(&self.data); + let mut archive = ZipArchive::new(reader) + .map_err(|_| "Failed to read ZIP archive")?; + + // Search for potential VBA project files + let vba_project_names = [ + "word/vbaProject.bin", + "xl/vbaProject.bin", + "ppt/vbaProject.bin", + "vbaProject.bin" + ]; + + for name in &vba_project_names { + match archive.by_name(name) { + Ok(mut file) => { + let mut contents = Vec::new(); + file.read_to_end(&mut contents) + .map_err(|_| "Failed to read vbaProject.bin")?; + + // Parse as OLE + let ole_parser = crate::modules::olecf::parser::OLECFParser::new(&contents)?; + let stream_names = ole_parser.get_stream_names()?; + + let mut vba_dir = None; + let mut modules = HashMap::new(); + + for stream_name in &stream_names { + let _stream_size = ole_parser.get_stream_size(stream_name)?; + + if stream_name.starts_with("dir") { + match self.read_stream(&ole_parser, stream_name) { + Ok(data) => { + if !data.is_empty() { + vba_dir = Some(data); + } + }, + Err(_) => (), + } + } + } + + // Process other streams + for name in &stream_names { + if let Ok(data) = self.read_stream(&ole_parser, name) { + if !data.is_empty() { + modules.insert(name.clone(), data); + } + } + } + + // Use dir stream if found, otherwise fail + if let Some(dir_data) = vba_dir { + return parser::VbaProject::parse(&dir_data, modules); + } + }, + Err(_) => continue, + } + } + + Err("No VBA project found in ZIP") + } +} + +#[module_main] +fn main(data: &[u8], _meta: Option<&[u8]>) -> Vba { + let mut vba = Vba::new(); + vba.has_macros = Some(false); + + let extractor = VbaExtractor::new(data); + + let project_result = if extractor.is_zip() { + extractor.extract_from_zip() + } else { + extractor.extract_from_ole() + }; + + match project_result { + Ok(project) => { + vba.has_macros = Some(true); + + let mut project_info = ProjectInfo::new(); + project_info.name = Some(project.info.name.clone()); + project_info.version = Some(project.info.version.clone()); + project_info.references = project.info.references.clone(); + + // Add metadata + let module_count = project.modules.len() as i32; + project_info.module_count = Some(module_count); + project_info.is_compressed = Some(true); + + vba.project_info = MessageField::some(project_info); + + // Process modules + for module in project.modules.values() { + vba.module_names.push(module.name.clone()); + vba.module_types.push(match module.module_type { + ModuleType::Standard => "Standard".to_string(), + ModuleType::Class => "Class".to_string(), + ModuleType::Unknown => "Unknown".to_string(), + }); + vba.module_code.push(module.code.clone()); + } + }, + Err(_) => { + vba.has_macros = Some(false); + } + } + + vba +} \ No newline at end of file diff --git a/lib/src/modules/vba/parser.rs b/lib/src/modules/vba/parser.rs new file mode 100644 index 00000000..276d66c5 --- /dev/null +++ b/lib/src/modules/vba/parser.rs @@ -0,0 +1,602 @@ +use std::collections::HashMap; + +pub enum ModuleType { + Standard, + Class, + Unknown, +} + +pub struct ProjectInfo { + pub name: String, + pub version: String, + pub references: Vec, +} + +pub struct VbaModule { + pub name: String, + pub code: String, + pub module_type: ModuleType, +} + +pub struct VbaProject { + pub modules: HashMap, + pub info: ProjectInfo, +} + +impl VbaProject { + fn copytoken_help(difference: usize) -> (u16, u16, u32, u16) { + let bit_count = (difference as f64).log2().ceil() as u32; + let bit_count = bit_count.max(4); + let length_mask = 0xFFFF >> bit_count; + let offset_mask = !length_mask; + let maximum_length = (0xFFFF >> bit_count) + 3; + + (length_mask, offset_mask, bit_count, maximum_length) + } + + pub fn decompress_stream(compressed: &[u8]) -> Result, &'static str> { + if compressed.is_empty() { + return Err("Empty input buffer"); + } + + if compressed[0] != 0x01 { + return Err("Invalid signature byte"); + } + + let mut decompressed = Vec::new(); + let mut current = 1; // Skip signature byte + + while current < compressed.len() { + // Ensure we have enough bytes for the chunk header + if current + 2 > compressed.len() { + return Err("Incomplete chunk header"); + } + + // Read chunk header + let chunk_header = u16::from_le_bytes([compressed[current], compressed[current + 1]]); + let chunk_size = (chunk_header & 0x0FFF) as usize + 3; + let chunk_is_compressed = (chunk_header & 0x8000) != 0; + + current += 2; + + // Validate chunk size + if chunk_is_compressed && chunk_size > 4095 { + return Err("CompressedChunkSize > 4095 but CompressedChunkFlag == 1"); + } + if !chunk_is_compressed && chunk_size != 4095 { + return Err("CompressedChunkSize != 4095 but CompressedChunkFlag == 0"); + } + + let chunk_end = std::cmp::min(compressed.len(), current + chunk_size); + + if !chunk_is_compressed { + if current + 4096 > compressed.len() { + return Err("Incomplete uncompressed chunk"); + } + decompressed.extend_from_slice(&compressed[current..current + 4096]); + current += 4096; + continue; + } + + let decompressed_chunk_start = decompressed.len(); + + while current < chunk_end { + // Read flag byte + let flag_byte = compressed[current]; + current += 1; + + // Process each bit in the flag byte + for bit_index in 0..8 { + if current >= chunk_end { + break; + } + + if (flag_byte & (1 << bit_index)) == 0 { + // Literal token + decompressed.push(compressed[current]); + current += 1; + } else { + // Copy token + if current + 2 > compressed.len() { + return Err("Incomplete copy token"); + } + + let copy_token = u16::from_le_bytes([compressed[current], compressed[current + 1]]); + let (length_mask, offset_mask, bit_count, _) = + Self::copytoken_help(decompressed.len() - decompressed_chunk_start); + + let length = (copy_token & length_mask) + 3; + let temp1 = copy_token & offset_mask; + let temp2 = 16 - bit_count; + let offset = u16::try_from((temp1 >> temp2) + 1) + .map_err(|_| "Offset calculation overflow")?; + + if offset as usize > decompressed.len() { + return Err("Invalid copy token offset"); + } + + let copy_source = decompressed.len() - offset as usize; + for i in 0..length { + let source_idx = copy_source + i as usize; + if source_idx >= decompressed.len() { + return Err("Copy token source out of bounds"); + } + decompressed.push(decompressed[source_idx]); + } + current += 2; + } + } + } + } + + Ok(decompressed) + } + + + pub fn parse(compressed_dir_stream: &[u8], module_streams: HashMap>) -> Result { + let dir_stream = Self::decompress_stream(compressed_dir_stream)?; + + let mut pos = 0; + let mut modules = HashMap::new(); + let mut references = Vec::new(); + let project_name; + let version_major; + let version_minor; + + // Parse PROJECTSYSKIND Record + let syskind_id = read_u16(&dir_stream, &mut pos)?; + if syskind_id != 0x0001 { + return Err("Invalid SYSKIND_ID"); + } + let syskind_size = read_u32(&dir_stream, &mut pos)?; + if syskind_size != 0x0004 { + return Err("Invalid SYSKIND_SIZE"); + } + let _syskind = read_u32(&dir_stream, &mut pos)?; + + // Parse PROJECTLCID Record + let lcid_id = read_u16(&dir_stream, &mut pos)?; + if lcid_id != 0x0002 { + return Err("Invalid LCID_ID"); + } + let lcid_size = read_u32(&dir_stream, &mut pos)?; + if lcid_size != 0x0004 { + return Err("Invalid LCID_SIZE"); + } + let lcid = read_u32(&dir_stream, &mut pos)?; + if lcid != 0x409 { + return Err("Invalid LCID"); + } + + // Parse PROJECTLCIDINVOKE Record + let lcid_invoke_id = read_u16(&dir_stream, &mut pos)?; + if lcid_invoke_id != 0x0014 { + return Err("Invalid LCIDINVOKE_ID"); + } + let lcid_invoke_size = read_u32(&dir_stream, &mut pos)?; + if lcid_invoke_size != 0x0004 { + return Err("Invalid LCIDINVOKE_SIZE"); + } + let lcid_invoke = read_u32(&dir_stream, &mut pos)?; + if lcid_invoke != 0x409 { + return Err("Invalid LCIDINVOKE"); + } + + // Parse PROJECTCODEPAGE Record + let codepage_id = read_u16(&dir_stream, &mut pos)?; + if codepage_id != 0x0003 { + return Err("Invalid CODEPAGE_ID"); + } + let codepage_size = read_u32(&dir_stream, &mut pos)?; + if codepage_size != 0x0002 { + return Err("Invalid CODEPAGE_SIZE"); + } + let _codepage = read_u16(&dir_stream, &mut pos)?; + + // Parse PROJECTNAME Record + let name_id = read_u16(&dir_stream, &mut pos)?; + if name_id != 0x0004 { + return Err("Invalid NAME_ID"); + } + let name_size = read_u32(&dir_stream, &mut pos)? as usize; + if name_size < 1 || name_size > 128 { + return Err("Project name not in valid range"); + } + let name_bytes = read_bytes(&dir_stream, &mut pos, name_size)?; + project_name = String::from_utf8_lossy(&name_bytes).to_string(); + + // Parse PROJECTDOCSTRING Record + let doc_id = read_u16(&dir_stream, &mut pos)?; + if doc_id != 0x0005 { + return Err("Invalid DOCSTRING_ID"); + } + let doc_size = read_u32(&dir_stream, &mut pos)? as usize; + let _doc_string = read_bytes(&dir_stream, &mut pos, doc_size)?; + let doc_reserved = read_u16(&dir_stream, &mut pos)?; + if doc_reserved != 0x0040 { + return Err("Invalid DOCSTRING_Reserved"); + } + let doc_unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + if doc_unicode_size % 2 != 0 { + return Err("DOCSTRING_Unicode size not even"); + } + let _doc_unicode = read_bytes(&dir_stream, &mut pos, doc_unicode_size)?; + + // Parse PROJECTHELPFILEPATH Record + let helpfile_id = read_u16(&dir_stream, &mut pos)?; + if helpfile_id != 0x0006 { + return Err("Invalid HELPFILEPATH_ID"); + } + let helpfile_size1 = read_u32(&dir_stream, &mut pos)? as usize; + if helpfile_size1 > 260 { + return Err("Help file path 1 too long"); + } + let helpfile1 = read_bytes(&dir_stream, &mut pos, helpfile_size1)?; + let helpfile_reserved = read_u16(&dir_stream, &mut pos)?; + if helpfile_reserved != 0x003D { + return Err("Invalid HELPFILEPATH_Reserved"); + } + let helpfile_size2 = read_u32(&dir_stream, &mut pos)? as usize; + if helpfile_size2 != helpfile_size1 { + return Err("Help file sizes don't match"); + } + let helpfile2 = read_bytes(&dir_stream, &mut pos, helpfile_size2)?; + if helpfile1 != helpfile2 { + return Err("Help files don't match"); + } + + // Parse PROJECTHELPCONTEXT Record + let helpcontext_id = read_u16(&dir_stream, &mut pos)?; + if helpcontext_id != 0x0007 { + return Err("Invalid HELPCONTEXT_ID"); + } + let helpcontext_size = read_u32(&dir_stream, &mut pos)?; + if helpcontext_size != 0x0004 { + return Err("Invalid HELPCONTEXT_SIZE"); + } + let _helpcontext = read_u32(&dir_stream, &mut pos)?; + + // Parse PROJECTLIBFLAGS Record + let libflags_id = read_u16(&dir_stream, &mut pos)?; + if libflags_id != 0x0008 { + return Err("Invalid LIBFLAGS_ID"); + } + let libflags_size = read_u32(&dir_stream, &mut pos)?; + if libflags_size != 0x0004 { + return Err("Invalid LIBFLAGS_SIZE"); + } + let libflags = read_u32(&dir_stream, &mut pos)?; + if libflags != 0x0000 { + return Err("Invalid LIBFLAGS"); + } + + // Parse PROJECTVERSION Record + let version_id = read_u16(&dir_stream, &mut pos)?; + if version_id != 0x0009 { + return Err("Invalid VERSION_ID"); + } + let version_reserved = read_u32(&dir_stream, &mut pos)?; + if version_reserved != 0x0004 { + return Err("Invalid VERSION_Reserved"); + } + version_major = read_u32(&dir_stream, &mut pos)?; + version_minor = read_u16(&dir_stream, &mut pos)?; + + // Parse PROJECTCONSTANTS Record + let constants_id = read_u16(&dir_stream, &mut pos)?; + if constants_id != 0x000C { + return Err("Invalid CONSTANTS_ID"); + } + let constants_size = read_u32(&dir_stream, &mut pos)? as usize; + if constants_size > 1015 { + return Err("Constants size too large"); + } + let _constants = read_bytes(&dir_stream, &mut pos, constants_size)?; + let constants_reserved = read_u16(&dir_stream, &mut pos)?; + if constants_reserved != 0x003C { + return Err("Invalid CONSTANTS_Reserved"); + } + let constants_unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + if constants_unicode_size % 2 != 0 { + return Err("Constants unicode size not even"); + } + let _constants_unicode = read_bytes(&dir_stream, &mut pos, constants_unicode_size)?; + + // Parse References + let mut last_check; + loop { + let check = read_u16(&dir_stream, &mut pos)?; + last_check = check; // Save the check value + if check == 0x000F { + break; + } + + match check { + 0x0016 => { + // REFERENCENAME + let name_size = read_u32(&dir_stream, &mut pos)? as usize; + let name_bytes = read_bytes(&dir_stream, &mut pos, name_size)?; + let name = String::from_utf8_lossy(&name_bytes).to_string(); + references.push(name); + + let reserved = read_u16(&dir_stream, &mut pos)?; + if reserved != 0x003E { + return Err("Invalid REFERENCE_Reserved"); + } + let unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + let _name_unicode = read_bytes(&dir_stream, &mut pos, unicode_size)?; + }, + 0x0033 => { + // REFERENCEORIGINAL + let _size = read_u32(&dir_stream, &mut pos)? as usize; + let _libid = read_bytes(&dir_stream, &mut pos, _size)?; + }, + 0x002F => { + // REFERENCECONTROL + let size_twiddled = read_u32(&dir_stream, &mut pos)? as usize; + let _twiddled = read_bytes(&dir_stream, &mut pos, size_twiddled)?; + + let reserved1 = read_u32(&dir_stream, &mut pos)?; + if reserved1 != 0x0000 { + return Err("Invalid REFERENCECONTROL_Reserved1"); + } + + let reserved2 = read_u16(&dir_stream, &mut pos)?; + if reserved2 != 0x0000 { + return Err("Invalid REFERENCECONTROL_Reserved2"); + } + + // Check for optional name record + let check2 = read_u16(&dir_stream, &mut pos)?; + if check2 == 0x0016 { + let name_size = read_u32(&dir_stream, &mut pos)? as usize; + let _name = read_bytes(&dir_stream, &mut pos, name_size)?; + + let reserved = read_u16(&dir_stream, &mut pos)?; + if reserved != 0x003E { + return Err("Invalid REFERENCECONTROL_NameRecord_Reserved"); + } + + let unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + let _name_unicode = read_bytes(&dir_stream, &mut pos, unicode_size)?; + } + + let reserved3 = if check2 == 0x0016 { + read_u16(&dir_stream, &mut pos)? + } else { + check2 + }; + if reserved3 != 0x0030 { + return Err("Invalid REFERENCECONTROL_Reserved3"); + } + + let _size_extended = read_u32(&dir_stream, &mut pos)?; + let size_libid = read_u32(&dir_stream, &mut pos)? as usize; + let _libid = read_bytes(&dir_stream, &mut pos, size_libid)?; + let _reserved4 = read_u32(&dir_stream, &mut pos)?; + let _reserved5 = read_u16(&dir_stream, &mut pos)?; + let _original_typelib = read_bytes(&dir_stream, &mut pos, 16)?; + let _cookie = read_u32(&dir_stream, &mut pos)?; + }, + 0x000D => { + // REFERENCEREGISTERED + let _size = read_u32(&dir_stream, &mut pos)?; + + let libid_size = read_u32(&dir_stream, &mut pos)? as usize; + let _libid = read_bytes(&dir_stream, &mut pos, libid_size)?; + let reserved1 = read_u32(&dir_stream, &mut pos)?; + if reserved1 != 0x0000 { + return Err("Invalid REFERENCEREGISTERED_Reserved1"); + } + + let reserved2 = read_u16(&dir_stream, &mut pos)?; + if reserved2 != 0x0000 { + return Err("Invalid REFERENCEREGISTERED_Reserved2"); + } + }, + 0x000E => { + // REFERENCEPROJECT + let _size = read_u32(&dir_stream, &mut pos)?; + let libid_abs_size = read_u32(&dir_stream, &mut pos)? as usize; + let _libid_abs = read_bytes(&dir_stream, &mut pos, libid_abs_size)?; + + let libid_rel_size = read_u32(&dir_stream, &mut pos)? as usize; + let _libid_rel = read_bytes(&dir_stream, &mut pos, libid_rel_size)?; + + let _major = read_u32(&dir_stream, &mut pos)?; + let _minor = read_u16(&dir_stream, &mut pos)?; + }, + _ => return Err("Invalid reference type"), + } + } + + if last_check != 0x000F { + return Err("Invalid PROJECTMODULES_Id"); + } + + let modules_size = read_u32(&dir_stream, &mut pos)?; + if modules_size != 0x0002 { + return Err("Invalid PROJECTMODULES_Size"); + } + + let modules_count = read_u16(&dir_stream, &mut pos)?; + + let cookie_id = read_u16(&dir_stream, &mut pos)?; + if cookie_id != 0x0013 { + return Err("Invalid ProjectCookie_Id"); + } + + let cookie_size = read_u32(&dir_stream, &mut pos)?; + if cookie_size != 0x0002 { + return Err("Invalid ProjectCookie_Size"); + } + + let _cookie = read_u16(&dir_stream, &mut pos)?; + + // Parse each module + for _ in 0..modules_count { + // Parse MODULENAME record + let module_id = read_u16(&dir_stream, &mut pos)?; + if module_id != 0x0019 { + return Err("Invalid MODULENAME_Id"); + } + + let module_name_size = read_u32(&dir_stream, &mut pos)? as usize; + let name_bytes = read_bytes(&dir_stream, &mut pos, module_name_size)?; + let module_name = String::from_utf8_lossy(&name_bytes).to_string(); + + let mut module_type = ModuleType::Unknown; + let mut stream_name = String::new(); + let mut module_offset = 0u32; + + // Parse optional sections + loop { + let section_id = read_u16(&dir_stream, &mut pos)?; + match section_id { + 0x0047 => { + // MODULENAMEUNICODE + let unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + let _unicode_name = read_bytes(&dir_stream, &mut pos, unicode_size)?; + }, + 0x001A => { + // MODULESTREAMNAME + let stream_size = read_u32(&dir_stream, &mut pos)? as usize; + let stream_bytes = read_bytes(&dir_stream, &mut pos, stream_size)?; + stream_name = String::from_utf8_lossy(&stream_bytes).to_string(); + + let reserved = read_u16(&dir_stream, &mut pos)?; + if reserved != 0x0032 { + return Err("Invalid STREAMNAME_Reserved"); + } + + let unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + let _unicode_name = read_bytes(&dir_stream, &mut pos, unicode_size)?; + }, + 0x001C => { + // MODULEDOCSTRING + let doc_size = read_u32(&dir_stream, &mut pos)? as usize; + let _doc_string = read_bytes(&dir_stream, &mut pos, doc_size)?; + + let reserved = read_u16(&dir_stream, &mut pos)?; + if reserved != 0x0048 { + return Err("Invalid DOCSTRING_Reserved"); + } + + let unicode_size = read_u32(&dir_stream, &mut pos)? as usize; + let _unicode_doc = read_bytes(&dir_stream, &mut pos, unicode_size)?; + }, + 0x0031 => { + // MODULEOFFSET + let offset_size = read_u32(&dir_stream, &mut pos)?; + if offset_size != 0x0004 { + return Err("Invalid OFFSET_Size"); + } + module_offset = read_u32(&dir_stream, &mut pos)?; + }, + 0x001E => { + // MODULEHELPCONTEXT + let help_size = read_u32(&dir_stream, &mut pos)?; + if help_size != 0x0004 { + return Err("Invalid HELPCONTEXT_Size"); + } + let _help_context = read_u32(&dir_stream, &mut pos)?; + }, + 0x002C => { + // MODULECOOKIE + let cookie_size = read_u32(&dir_stream, &mut pos)?; + if cookie_size != 0x0002 { + return Err("Invalid COOKIE_Size"); + } + let _cookie = read_u16(&dir_stream, &mut pos)?; + }, + 0x0021 => { + module_type = ModuleType::Standard; + let _reserved = read_u32(&dir_stream, &mut pos)?; + }, + 0x0022 => { + module_type = ModuleType::Class; + let _reserved = read_u32(&dir_stream, &mut pos)?; + }, + 0x0025 => { + // MODULEREADONLY + let reserved = read_u32(&dir_stream, &mut pos)?; + if reserved != 0x0000 { + return Err("Invalid READONLY_Reserved"); + } + }, + 0x0028 => { + // MODULEPRIVATE + let reserved = read_u32(&dir_stream, &mut pos)?; + if reserved != 0x0000 { + return Err("Invalid PRIVATE_Reserved"); + } + }, + 0x002B => { + // TERMINATOR + let reserved = read_u32(&dir_stream, &mut pos)?; + if reserved != 0x0000 { + return Err("Invalid MODULE_Reserved"); + } + break; + }, + _ => return Err("Invalid module section ID"), + } + } + + // Get module code + if let Some(module_data) = module_streams.get(&stream_name) { + let code_data = if module_offset as usize >= module_data.len() { + return Err("Invalid module offset"); + } else { + &module_data[module_offset as usize..] + }; + + if !code_data.is_empty() { + let decompressed = Self::decompress_stream(code_data)?; + let code = String::from_utf8_lossy(&decompressed).to_string(); + modules.insert(module_name.clone(), VbaModule { + name: module_name, + code, + module_type, + }); + } + } + } + + Ok(VbaProject { + modules, + info: ProjectInfo { + name: project_name, + version: format!("{}.{}", version_major, version_minor), + references, + }, + }) + } +} + +fn read_u16(data: &[u8], pos: &mut usize) -> Result { + if *pos + 2 > data.len() { + return Err("Not enough bytes to read u16"); + } + let value = u16::from_le_bytes([data[*pos], data[*pos + 1]]); + *pos += 2; + Ok(value) +} + +fn read_u32(data: &[u8], pos: &mut usize) -> Result { + if *pos + 4 > data.len() { + return Err("Not enough bytes to read u32"); + } + let value = u32::from_le_bytes([data[*pos], data[*pos + 1], data[*pos + 2], data[*pos + 3]]); + *pos += 4; + Ok(value) +} + +fn read_bytes(data: &[u8], pos: &mut usize, len: usize) -> Result, &'static str> { + if *pos + len > data.len() { + return Err("Not enough bytes to read"); + } + let bytes = data[*pos..*pos + len].to_vec(); + *pos += len; + Ok(bytes) +} diff --git a/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out new file mode 100644 index 00000000..408dca38 --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.out @@ -0,0 +1,17 @@ +vba: + has_macros: true + module_names: + - "ThisDocument" + module_types: + - "Class" + module_code: + - "Attribute VB_Name = \"ThisDocument\"\r\nAttribute VB_Base = \"1Normal.ThisDocument\"\r\nAttribute VB_GlobalNameSpace = False\r\nAttribute VB_Creatable = False\r\nAttribute VB_PredeclaredId = True\r\nAttribute VB_Exposed = True\r\nAttribute VB_TemplateDerived = True\r\nAttribute VB_Customizable = True\r\n\r\nPrivate Sub Document_New()\r\n MsgBox \"Hello, world!\"\r\nEnd Sub\r\n" + project_info: + name: "Project" + version: "1769106437.10" + references: + - "stdole" + - "Normal" + - "Office" + module_count: 1 + is_compressed: true \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip new file mode 100644 index 00000000..d12d5ee9 Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/643d1e3b68c1e31aef5779eb28ac3b0aaa284c91c47c26cfc2dbb3bc7f569103.zip differ diff --git a/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out new file mode 100644 index 00000000..a1340cbc --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.out @@ -0,0 +1,2 @@ +vba: + has_macros: false \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip new file mode 100644 index 00000000..2236f51f Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/8de0e0bba84e2f80c2e2b58b66224f0d3a780f44fbb04fcf7caae34b973eb766.zip differ diff --git a/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out new file mode 100644 index 00000000..408dca38 --- /dev/null +++ b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.out @@ -0,0 +1,17 @@ +vba: + has_macros: true + module_names: + - "ThisDocument" + module_types: + - "Class" + module_code: + - "Attribute VB_Name = \"ThisDocument\"\r\nAttribute VB_Base = \"1Normal.ThisDocument\"\r\nAttribute VB_GlobalNameSpace = False\r\nAttribute VB_Creatable = False\r\nAttribute VB_PredeclaredId = True\r\nAttribute VB_Exposed = True\r\nAttribute VB_TemplateDerived = True\r\nAttribute VB_Customizable = True\r\n\r\nPrivate Sub Document_New()\r\n MsgBox \"Hello, world!\"\r\nEnd Sub\r\n" + project_info: + name: "Project" + version: "1769106437.10" + references: + - "stdole" + - "Normal" + - "Office" + module_count: 1 + is_compressed: true \ No newline at end of file diff --git a/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip new file mode 100644 index 00000000..0d146833 Binary files /dev/null and b/lib/src/modules/vba/tests/testdata/c62c12501055319db152f092e263f65da037c4a6f7ec0112832b95916ac8a1fb.zip differ