Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement dylib parsing #43

Merged
merged 9 commits into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
max_width = 79
use_small_heuristics = "max"
use_small_heuristics = "Max"
comment_width = 79
wrap_comments = true
186 changes: 185 additions & 1 deletion yara-x/src/modules/macho/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ use log::*;

use arrayref::array_ref;
use byteorder::{BigEndian, ByteOrder};
use nom::{bytes::complete::take, multi::count, number::complete::*, IResult};
use nom::{
bytes::complete::{tag, take, take_till},
combinator::map_res,
multi::count,
number::complete::*,
sequence::tuple,
IResult,
};
use thiserror::Error;

/// Mach-O file needs to have at least header of size 28 to be considered
Expand Down Expand Up @@ -50,7 +57,11 @@ const CPU_TYPE_POWERPC64: u32 = 0x01000012;
/// Define Mach-O load commands
const LC_SEGMENT: u32 = 0x00000001;
const LC_UNIXTHREAD: u32 = 0x00000005;
const LC_LOAD_DYLIB: u32 = 0x0000000c;
const LC_ID_DYLIB: u32 = 0x0000000d;
const LC_LOAD_WEAK_DYLIB: u32 = 0x80000018;
const LC_SEGMENT_64: u32 = 0x00000019;
const LC_REEXPORT_DYLIB: u32 = 0x8000001f;
const LC_MAIN: u32 = 0x80000028;

/// Enum that provides strongly-typed error system used in code
Expand Down Expand Up @@ -153,6 +164,27 @@ struct LoadCommand {
cmdsize: u32,
}

/// `DylibObject`: Represents a dylib struct in the Mach-O file.
/// Fields: name, timestamp, current_version, compatibility_version
#[repr(C)]
#[derive(Debug, Default, Clone)]
struct DylibObject {
name: Vec<u8>,
timestamp: u32,
current_version: u32,
compatibility_version: u32,
}

/// `DylibCommand`: Represents a dylib command in the Mach-O file.
/// Fields: cmd, cmdsize, dylib
#[repr(C)]
#[derive(Debug, Default, Clone)]
struct DylibCommand {
cmd: u32,
cmdsize: u32,
dylib: DylibObject,
}

/// `SegmentCommand32`: Represents a 32-bit segment command in the Mach-O file.
/// Fields: cmd, cmdsize, segname, vmaddr, vmsize, fileoff, filesize, maxprot,
/// initprot, nsects, flags
Expand Down Expand Up @@ -609,6 +641,31 @@ fn swap_load_command(command: &mut LoadCommand) {
command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes());
}

/// Swaps the endianness of fields within a Mach-O dylib from BigEndian
/// to LittleEndian in-place.
///
/// # Arguments
///
/// * `dylib`: A mutable reference to the Mach-O dylib.
fn swap_dylib(dylib: &mut DylibObject) {
dylib.timestamp = BigEndian::read_u32(&dylib.timestamp.to_le_bytes());
dylib.compatibility_version =
BigEndian::read_u32(&dylib.compatibility_version.to_le_bytes());
dylib.current_version =
BigEndian::read_u32(&dylib.current_version.to_le_bytes());
}

/// Swaps the endianness of fields within a Mach-O dylib command from
/// BigEndian to LittleEndian in-place.
///
/// # Arguments
///
/// * `command`: A mutable reference to the Mach-O dylib command.
fn swap_dylib_command(command: &mut DylibCommand) {
command.cmd = BigEndian::read_u32(&command.cmd.to_le_bytes());
command.cmdsize = BigEndian::read_u32(&command.cmdsize.to_le_bytes());
}

/// Swaps the endianness of fields within a 32-bit Mach-O segment command from
/// BigEndian to LittleEndian in-place.
///
Expand Down Expand Up @@ -839,6 +896,67 @@ fn parse_load_command(input: &[u8]) -> IResult<&[u8], LoadCommand> {
Ok((input, LoadCommand { cmd, cmdsize }))
}

/// Parse a Mach-O Dylib object, transforming raw bytes into a structured
/// format.
///
/// # Arguments
///
/// * `input`: A slice of bytes containing the raw dylib object data.
///
/// # Returns
///
/// A `nom` IResult containing the remaining unparsed input and the parsed
/// dylib structure, or a `nom` error if the parsing fails.
///
/// # Errors
///
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_dylib(input: &[u8]) -> IResult<&[u8], DylibObject> {
latonis marked this conversation as resolved.
Show resolved Hide resolved
// offset but we don't need it
let (input, _) = le_u32(input)?;
let (input, timestamp) = le_u32(input)?;
let (input, current_version) = le_u32(input)?;
let (input, compatibility_version) = le_u32(input)?;

let (input, name) = map_res(
tuple((take_till(|b| b == b'\x00'), tag(b"\x00"))),
|(s, _)| std::str::from_utf8(s),
)(input)?;

Ok((
input,
DylibObject {
name: name.into(),
timestamp,
compatibility_version,
current_version,
},
))
}

/// Parse a Mach-O DylibCommand, transforming raw bytes into a structured
/// format.
///
/// # Arguments
///
/// * `input`: A slice of bytes containing the raw DylibCommand data.
///
/// # Returns
///
/// A `nom` IResult containing the remaining unparsed input and the parsed
/// DylibCommand structure, or a `nom` error if the parsing fails.
///
/// # Errors
///
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_dylib_command(input: &[u8]) -> IResult<&[u8], DylibCommand> {
latonis marked this conversation as resolved.
Show resolved Hide resolved
let (input, cmd) = le_u32(input)?;
let (input, cmdsize) = le_u32(input)?;
let (input, dylib) = parse_dylib(input)?;

Ok((input, DylibCommand { cmd, cmdsize, dylib }))
}

/// Parse the 32-bit segment command of a Mach-O file, offering a structured
/// view of its content.
///
Expand Down Expand Up @@ -1385,6 +1503,67 @@ fn parse_ppc_thread_state64(input: &[u8]) -> IResult<&[u8], PPCThreadState64> {
Ok((input, PPCThreadState64 { srr0, srr1, r, cr, xer, lr, ctr, vrsave }))
}

/// Handles the LC_LOAD_DYLIB, LC_ID_DYLIB, LC_LOAD_WEAK_DYLIB, and
/// LC_REEXPORT_DYLIB commands for Mach-O files, parsing the data
/// and populating a protobuf representation of the dylib.
///
/// # Arguments
///
/// * `command_data`: The raw byte data of the dylib command.
/// * `size`: The size of the dylib command data.
/// * `macho_file`: Mutable reference to the protobuf representation of the
/// Mach-O file.
///
/// # Returns
///
/// Returns a `Result<(), MachoError>` indicating the success or failure of the
/// operation.
///
/// # Errors
///
/// * `MachoError::FileSectionTooSmall`: Returned when the segment size is
/// smaller than the expected DylibCommand struct size.
/// * `MachoError::ParsingError`: Returned when there is an error parsing the
/// dylib command data.
/// * `MachoError::MissingHeaderValue`: Returned when the "magic" header value
/// is missing, needed for determining if bytes should be swapped.
fn handle_dylib_command(
command_data: &[u8],
size: usize,
macho_file: &mut File,
) -> Result<(), MachoError> {
if size < std::mem::size_of::<DylibCommand>() {
return Err(MachoError::FileSectionTooSmall(
"DylibCommand".to_string(),
));
}

let (_, mut dy) = parse_dylib_command(command_data)
.map_err(|e| MachoError::ParsingError(format!("{:?}", e)))?;
if should_swap_bytes(
macho_file
.magic
.ok_or(MachoError::MissingHeaderValue("magic".to_string()))?,
) {
swap_dylib_command(&mut dy);
swap_dylib(&mut dy.dylib);
}

let dylib = Dylib {
name: Some(
std::str::from_utf8(&dy.dylib.name)
.unwrap_or_default()
.to_string(),
),
timestamp: Some(dy.dylib.timestamp),
compatibility_version: Some(dy.dylib.compatibility_version),
current_version: Some(dy.dylib.current_version),
..Default::default()
};
macho_file.dylibs.push(dylib);
Ok(())
}

/// Handles the LC_SEGMENT command for 32-bit Mach-O files, parsing the data
/// and populating a protobuf representation of the segment and its associated
/// file sections.
Expand Down Expand Up @@ -1912,6 +2091,10 @@ fn handle_command(
LC_MAIN => {
handle_main(command_data, cmdsize, macho_file)?;
}
LC_LOAD_DYLIB | LC_ID_DYLIB | LC_LOAD_WEAK_DYLIB
| LC_REEXPORT_DYLIB => {
handle_dylib_command(command_data, cmdsize, macho_file)?;
}
_ => {}
}
}
Expand Down Expand Up @@ -2421,6 +2604,7 @@ fn main(ctx: &ScanContext) -> Macho {
macho_proto.reserved = file_data.reserved;
macho_proto.number_of_segments = file_data.number_of_segments;
macho_proto.segments = file_data.segments;
macho_proto.dylibs = file_data.dylibs;
macho_proto.entry_point = file_data.entry_point;
macho_proto.stack_size = file_data.stack_size;
}
Expand Down
30 changes: 30 additions & 0 deletions yara-x/src/modules/macho/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,36 @@ fn test_swap_load_command() {
assert_eq!(command.cmdsize, 0x88776655);
}

#[test]
fn test_swap_dylib() {
let mut command = DylibObject {
timestamp: 0x11223344,
compatibility_version: 0x55667788,
current_version: 0x99AABBCC,
..Default::default()
};

swap_dylib(&mut command);

assert_eq!(command.timestamp, 0x44332211);
assert_eq!(command.compatibility_version, 0x88776655);
assert_eq!(command.current_version, 0xCCBBAA99);
}

#[test]
fn test_swap_dylib_command() {
let mut command = DylibCommand {
cmd: 0x11223344,
cmdsize: 0x55667788,
..Default::default()
};

swap_dylib_command(&mut command);

assert_eq!(command.cmd, 0x44332211);
assert_eq!(command.cmdsize, 0x88776655);
}

#[test]
fn test_swap_segment_command() {
let mut segment = SegmentCommand32 {
Expand Down
24 changes: 24 additions & 0 deletions yara-x/src/modules/macho/tests/output/macho_ppc_file.out
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,30 @@ Macho {
},
},
],
dylibs: [
Dylib {
name: Some(
"/usr/lib/libSystem.B.dylib",
),
timestamp: Some(
1111112572,
),
compatibility_version: Some(
65536,
),
current_version: Some(
4653313,
),
special_fields: SpecialFields {
unknown_fields: UnknownFields {
fields: None,
},
cached_size: CachedSize {
size: 0,
},
},
},
],
entry_point: Some(
3768,
),
Expand Down
46 changes: 46 additions & 0 deletions yara-x/src/modules/macho/tests/output/macho_x86_64_dylib_file.out
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,52 @@ Macho {
},
},
],
dylibs: [
Dylib {
name: Some(
"fact_x86_64.dylib",
),
timestamp: Some(
1,
),
compatibility_version: Some(
0,
),
current_version: Some(
0,
),
special_fields: SpecialFields {
unknown_fields: UnknownFields {
fields: None,
},
cached_size: CachedSize {
size: 0,
},
},
},
Dylib {
name: Some(
"/usr/lib/libSystem.B.dylib",
),
timestamp: Some(
2,
),
compatibility_version: Some(
65536,
),
current_version: Some(
79495168,
),
special_fields: SpecialFields {
unknown_fields: UnknownFields {
fields: None,
},
cached_size: CachedSize {
size: 0,
},
},
},
],
entry_point: None,
stack_size: None,
fat_magic: None,
Expand Down
Loading