Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement dylib parsing #43

Merged
merged 9 commits into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
max_width = 79
use_small_heuristics = "max"
use_small_heuristics = "Max"
comment_width = 79
wrap_comments = true
156 changes: 155 additions & 1 deletion yara-x/src/modules/macho/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ use log::*;

use arrayref::array_ref;
use byteorder::{BigEndian, ByteOrder};
use nom::{bytes::complete::take, multi::count, number::complete::*, IResult};
use nom::{
bytes::complete::{tag, take, take_till},
combinator::map_res,
multi::count,
number::complete::*,
sequence::tuple,
IResult,
};
use thiserror::Error;

/// Mach-O file needs to have at least header of size 28 to be considered
Expand Down Expand Up @@ -50,7 +57,11 @@ const CPU_TYPE_POWERPC64: u32 = 0x01000012;
/// Define Mach-O load commands
const LC_SEGMENT: u32 = 0x00000001;
const LC_UNIXTHREAD: u32 = 0x00000005;
const LC_LOAD_DYLIB: u32 = 0x0000000c;
const LC_ID_DYLIB: u32 = 0x0000000d;
const LC_LOAD_WEAK_DYLIB: u32 = 0x80000018;
const LC_SEGMENT_64: u32 = 0x00000019;
const LC_REEXPORT_DYLIB: u32 = 0x8000001f;
const LC_MAIN: u32 = 0x80000028;

/// Enum that provides strongly-typed error system used in code
Expand Down Expand Up @@ -153,6 +164,27 @@ struct LoadCommand {
cmdsize: u32,
}

/// `DylibObject`: Represents a dylib struct in the Mach-O file.
/// Fields: name, timestamp, current_version, compatibility_version
#[repr(C)]
#[derive(Debug, Default, Clone)]
struct DylibObject {
name: Vec<u8>,
timestamp: u32,
current_version: u32,
compatibility_version: u32,
}

/// `DylibCommand`: Represents a dylib command in the Mach-O file.
/// Fields: cmd, cmdsize, dylib
#[repr(C)]
#[derive(Debug, Default, Clone)]
struct DylibCommand {
cmd: u32,
cmdsize: u32,
dylib: DylibObject,
}

/// `SegmentCommand32`: Represents a 32-bit segment command in the Mach-O file.
/// Fields: cmd, cmdsize, segname, vmaddr, vmsize, fileoff, filesize, maxprot,
/// initprot, nsects, flags
Expand Down Expand Up @@ -609,6 +641,31 @@ fn swap_load_command(command: &mut LoadCommand) {
command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes());
}

/// Swaps the endianness of fields within a Mach-O dylib from BigEndian
/// to LittleEndian in-place.
///
/// # Arguments
///
/// * `dylib`: A mutable reference to the Mach-O dylib.
fn swap_dylib(dylib: &mut DylibObject) {
dylib.timestamp = BigEndian::read_u32(&dylib.timestamp.to_le_bytes());
dylib.compatibility_version =
BigEndian::read_u32(&dylib.compatibility_version.to_le_bytes());
dylib.current_version =
BigEndian::read_u32(&dylib.current_version.to_le_bytes());
}

/// Swaps the endianness of fields within a Mach-O dylib command from
/// BigEndian to LittleEndian in-place.
///
/// # Arguments
///
/// * `command`: A mutable reference to the Mach-O dylib command.
fn swap_dylib_command(command: &mut DylibCommand) {
command.cmd = BigEndian::read_u32(&command.cmd.to_le_bytes());
command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes());
}

/// Swaps the endianness of fields within a 32-bit Mach-O segment command from
/// BigEndian to LittleEndian in-place.
///
Expand Down Expand Up @@ -839,6 +896,37 @@ fn parse_load_command(input: &[u8]) -> IResult<&[u8], LoadCommand> {
Ok((input, LoadCommand { cmd, cmdsize }))
}

fn parse_dylib(input: &[u8]) -> IResult<&[u8], DylibObject> {
latonis marked this conversation as resolved.
Show resolved Hide resolved
// offset but we don't need it
let (input, _) = le_u32(input)?;
let (input, timestamp) = le_u32(input)?;
let (input, current_version) = le_u32(input)?;
let (input, compatibility_version) = le_u32(input)?;

let (input, name) = map_res(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| std::str::from_utf8(s),
)(input)?;

Ok((
input,
DylibObject {
name: name.into(),
timestamp,
compatibility_version,
current_version,
},
))
}

fn parse_dylib_command(input: &[u8]) -> IResult<&[u8], DylibCommand> {
latonis marked this conversation as resolved.
Show resolved Hide resolved
let (input, cmd) = le_u32(input)?;
let (input, cmdsize) = le_u32(input)?;
let (input, dylib) = parse_dylib(input)?;

Ok((input, DylibCommand { cmd, cmdsize, dylib }))
}

/// Parse the 32-bit segment command of a Mach-O file, offering a structured
/// view of its content.
///
Expand Down Expand Up @@ -1385,6 +1473,67 @@ fn parse_ppc_thread_state64(input: &[u8]) -> IResult<&[u8], PPCThreadState64> {
Ok((input, PPCThreadState64 { srr0, srr1, r, cr, xer, lr, ctr, vrsave }))
}

/// Handles the LC_LOAD_DYLIB, LC_ID_DYLIB, LC_LOAD_WEAK_DYLIB, and
/// LC_REEXPORT_DYLIB commands for Mach-O files, parsing the data
/// and populating a protobuf representation of the dylib.
///
/// # Arguments
///
/// * `command_data`: The raw byte data of the dylib command.
/// * `size`: The size of the dylib command data.
/// * `macho_file`: Mutable reference to the protobuf representation of the
/// Mach-O file.
///
/// # Returns
///
/// Returns a `Result<(), MachoError>` indicating the success or failure of the
/// operation.
///
/// # Errors
///
/// * `MachoError::FileSectionTooSmall`: Returned when the segment size is
/// smaller than the expected DylibCommand struct size.
/// * `MachoError::ParsingError`: Returned when there is an error parsing the
/// dylib command data.
/// * `MachoError::MissingHeaderValue`: Returned when the "magic" header value
/// is missing, needed for determining if bytes should be swapped.
fn handle_dylib_command(
command_data: &[u8],
size: usize,
macho_file: &mut File,
) -> Result<(), MachoError> {
if size < std::mem::size_of::<DylibCommand>() {
return Err(MachoError::FileSectionTooSmall(
"DylibCommand".to_string(),
));
}

let (_, mut dy) = parse_dylib_command(command_data)
.map_err(|e| MachoError::ParsingError(format!("{:?}", e)))?;
if should_swap_bytes(
macho_file
.magic
.ok_or(MachoError::MissingHeaderValue("magic".to_string()))?,
) {
swap_dylib_command(&mut dy);
swap_dylib(&mut dy.dylib);
}

let dylib = Dylib {
name: Some(
std::str::from_utf8(&dy.dylib.name)
.unwrap_or_default()
.to_string(),
),
timestamp: Some(dy.dylib.timestamp),
compatibility_version: Some(dy.dylib.compatibility_version),
current_version: Some(dy.dylib.current_version),
..Default::default()
};
macho_file.dylibs.push(dylib);
Ok(())
}

/// Handles the LC_SEGMENT command for 32-bit Mach-O files, parsing the data
/// and populating a protobuf representation of the segment and its associated
/// file sections.
Expand Down Expand Up @@ -1902,6 +2051,10 @@ fn handle_command(
LC_MAIN => {
handle_main(command_data, cmdsize, macho_file)?;
}
LC_LOAD_DYLIB | LC_ID_DYLIB | LC_LOAD_WEAK_DYLIB
| LC_REEXPORT_DYLIB => {
handle_dylib_command(command_data, cmdsize, macho_file)?;
}
_ => {}
}
}
Expand Down Expand Up @@ -2411,6 +2564,7 @@ fn main(ctx: &ScanContext) -> Macho {
macho_proto.reserved = file_data.reserved;
macho_proto.number_of_segments = file_data.number_of_segments;
macho_proto.segments = file_data.segments;
macho_proto.dylibs = file_data.dylibs;
macho_proto.entry_point = file_data.entry_point;
macho_proto.stack_size = file_data.stack_size;
}
Expand Down
30 changes: 30 additions & 0 deletions yara-x/src/modules/macho/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,36 @@ fn test_swap_load_command() {
assert_eq!(command.cmdsize, 0x88776655);
}

#[test]
fn test_swap_dylib() {
let mut command = DylibObject {
timestamp: 0x11223344,
compatibility_version: 0x55667788,
current_version: 0x99AABBCC,
..Default::default()
};

swap_dylib(&mut command);

assert_eq!(command.timestamp, 0x44332211);
assert_eq!(command.compatibility_version, 0x88776655);
assert_eq!(command.current_version, 0xCCBBAA99);
}

#[test]
fn test_swap_dylib_command() {
let mut command = DylibCommand {
cmd: 0x11223344,
cmdsize: 0x55667788,
..Default::default()
};

swap_dylib_command(&mut command);

assert_eq!(command.cmd, 0x44332211);
assert_eq!(command.cmdsize, 0x88776655);
}

#[test]
fn test_swap_segment_command() {
let mut segment = SegmentCommand32 {
Expand Down
16 changes: 8 additions & 8 deletions yara-x/src/modules/modules.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
// File generated automatically by build.rs. Do not edit.
#[cfg(feature = "string-module")]
pub mod string;
#[cfg(feature = "macho-module")]
pub mod macho;
#[cfg(feature = "text-module")]
pub mod text;
#[cfg(feature = "hash-module")]
pub mod hash;
#[cfg(feature = "test_proto2-module")]
pub mod test_proto2;
#[cfg(feature = "hash-module")]
pub mod hash;
#[cfg(feature = "test_proto3-module")]
pub mod test_proto3;
#[cfg(feature = "macho-module")]
pub mod macho;
#[cfg(feature = "time-module")]
pub mod time;
#[cfg(feature = "test_proto3-module")]
pub mod test_proto3;
#[cfg(feature = "string-module")]
pub mod string;
25 changes: 17 additions & 8 deletions yara-x/src/modules/protos/macho.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ option (yara.module_options) = {
rust_module: "macho"
};

message Dylib {
optional string name = 1;
optional uint32 timestamp = 2;
optional uint32 compatibility_version = 3;
optional uint32 current_version = 4;
}

message Section {
optional string segname = 1;
optional string sectname = 2;
Expand Down Expand Up @@ -58,8 +65,9 @@ message File {
optional uint32 reserved = 8;
optional uint64 number_of_segments = 9;
repeated Segment segments = 10;
optional uint64 entry_point = 11;
optional uint64 stack_size = 12;
repeated Dylib dylibs = 11;
optional uint64 entry_point = 12;
optional uint64 stack_size = 13;
}

message Macho {
Expand All @@ -74,16 +82,17 @@ message Macho {
optional uint32 reserved = 8;
optional uint64 number_of_segments = 9;
repeated Segment segments = 10;
optional uint64 entry_point = 11;
optional uint64 stack_size = 12;
repeated Dylib dylibs = 11;
optional uint64 entry_point = 12;
optional uint64 stack_size = 13;

// Add fields for Mach-O fat binary header
optional uint32 fat_magic = 13;
optional uint32 nfat_arch = 14;
repeated FatArch fat_arch = 15;
optional uint32 fat_magic = 14;
optional uint32 nfat_arch = 15;
repeated FatArch fat_arch = 16;

// Nested Mach-O files
repeated File file = 16;
repeated File file = 17;
}

enum HEADER {
Expand Down
Loading