diff --git a/Cargo.lock b/Cargo.lock index 81af2cb..c16870b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,7 +197,7 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "libsla" -version = "1.1.0" +version = "1.2.0" dependencies = [ "flate2", "libsla-sys", diff --git a/Cargo.toml b/Cargo.toml index bb4bb61..3c65279 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libsla" description = "Rust bindings to Ghidra Sleigh library libsla" -version = "1.1.0" +version = "1.2.0" edition = "2024" license = "Apache-2.0" repository = "https://github.com/mnemonikr/libsla" diff --git a/README.md b/README.md index 37b99b9..72a43b9 100644 --- a/README.md +++ b/README.md @@ -29,20 +29,56 @@ using `make sleigh_opt`. # Example +Disassemble bytes into native assembly instructions. + ```rust -// Build Sleigh with configuration files from sleigh-config crate let sleigh = GhidraSleigh::builder() .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? .build(sleigh_config::processor_x86::SLA_X86_64)?; -// The instruction reader is defined by the user and implements the InstructionLoader trait. -let instruction_reader = InstructionReader::new(); +// PUSH RBP instruction is the byte 0x55. +let instructions = InstructionBytes::new(vec![0x55]); -// Instruction to decode from the reader. -let instruction_offset = 0x800000; +// InstructionBytes is a simple byte loader that does not model multiple address spaces. +// However an address space is required, so for simplicity use the default code space. let address_space = sleigh.default_code_space(); -let instruction_address = Address::new(instruction_offset, address_space); -// Disassemble! -let pcode_disassembly = sleigh.disassemble_pcode(&instruction_reader, instruction_address)?; +// Start disassembly from the first byte (index 0) +let instruction_address = Address::new(address_space, 0); + +// Confirming this is indeed PUSH RBP. +let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?; +assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); +assert_eq!(native_disassembly.instruction.body, "RBP"); +``` + +Disassemble bytes into pcode instructions. Pcode instructions can be used for program modeling. + +```rust +let sleigh = GhidraSleigh::builder() + .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? + .build(sleigh_config::processor_x86::SLA_X86_64)?; + +// PUSH RBP +let instructions = InstructionBytes::new(vec![0x55]); +let instruction_address = Address::new(sleigh.default_code_space(), 0); +let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?; +let pcode_instructions = pcode_disassembly.instructions; + +assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions"); + +// Copy RBP into a temporary +let copy_destination = pcode_instructions[0].output.as_ref().unwrap(); +assert_eq!(pcode_instructions[0].op_code, OpCode::Copy); +assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP"); + +// Subtract 8 bytes from RSP +assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract)); +assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP"); +assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8); + +// Store temporary (RBP) into memory address pointed to by RSP +assert_eq!(pcode_instructions[2].op_code, OpCode::Store); +assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP"); +assert_eq!(&pcode_instructions[2].inputs[2], copy_destination); ``` diff --git a/src/lib.rs b/src/lib.rs index 904c6ec..9e2812c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,83 @@ //! This crate includes Rust bindings to the //! [Ghidra](https://github.com/NationalSecurityAgency/ghidra) SLEIGH library libsla for translating -//! native code to p-code. +//! native code to p-code. This allows binary analysis programs to model p-code instead of needing +//! to model each processor architecture separately. //! -//! SLEIGH is a processor specification language developed for the Ghidra used to describe -//! microprocessors with enough detail to facilitate disassembly and decompilation. The -//! processor-specific instructions are translated to **p-code**, which captures the instruction -//! semantics independent of the specific processor. The details on how to perform this translation -//! are captured by the compiled SLEIGH specification for the processor. +//! ## Examples +//! +//! ### Native Disassembly +//! +//! This example disassembles the `PUSH RBP` x86-64 instruction (byte `0x55`). +//! +//! ``` +//! # use crate::libsla::*; +//! # use sleigh_config; +//! # fn main() -> crate::libsla::Result<()> { +//! let sleigh = GhidraSleigh::builder() +//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? +//! .build(sleigh_config::processor_x86::SLA_X86_64)?; +//! +//! // PUSH RBP instruction is the byte 0x55. +//! let instructions = InstructionBytes::new(vec![0x55]); +//! +//! // InstructionBytes is a simple byte loader that does not model multiple address spaces. +//! // However an address space is required, so for simplicity use the default code space. +//! let address_space = sleigh.default_code_space(); +//! +//! // Start disassembly from the first byte (index 0) +//! let instruction_address = Address::new(address_space, 0); +//! +//! // Confirming this is indeed PUSH RBP. +//! let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?; +//! assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); +//! assert_eq!(native_disassembly.instruction.body, "RBP"); +//! # Ok(()) +//! # } +//! ``` +//! ### Pcode Disassembly +//! +//! This example disassembles the `PUSH RBP` x86-64 instruction (`0x55`) into pcode. The pcode for +//! this instruction is +//! +//! 1. `COPY temp <- RBP` +//! 2. `SUBTRACT RSP <- RSP 0x8` +//! 3. `STORE [RSP] <- temp` +//! +//! ### +//! ``` +//! # use crate::libsla::*; +//! # use sleigh_config; +//! # fn main() -> crate::libsla::Result<()> { +//! let sleigh = GhidraSleigh::builder() +//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? +//! .build(sleigh_config::processor_x86::SLA_X86_64)?; +//! +//! // PUSH RBP +//! let instructions = InstructionBytes::new(vec![0x55]); +//! let instruction_address = Address::new(sleigh.default_code_space(), 0); +//! let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?; +//! let pcode_instructions = pcode_disassembly.instructions; +//! +//! assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions"); +//! +//! // Copy RBP into a temporary +//! let copy_destination = pcode_instructions[0].output.as_ref().unwrap(); +//! assert_eq!(pcode_instructions[0].op_code, OpCode::Copy); +//! assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP"); +//! +//! // Subtract 8 bytes from RSP +//! assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract)); +//! assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP"); +//! assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8); +//! +//! // Store temporary (RBP) into memory address pointed to by RSP +//! assert_eq!(pcode_instructions[2].op_code, OpCode::Store); +//! assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP"); +//! assert_eq!(&pcode_instructions[2].inputs[2], copy_destination); +//! +//! # Ok(()) +//! # } +//! ``` mod opcodes; mod sleigh; diff --git a/src/sleigh.rs b/src/sleigh.rs index 50df4c2..5b7fe6b 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -494,6 +494,43 @@ impl api::PcodeEmit for PcodeDisassemblyOutput { } } +/// A sequence of instruction bytes which can be used by Sleigh for disassembly. +pub struct InstructionBytes(Vec); + +impl InstructionBytes { + /// Create a new instance for the provided sequence of instruction bytes + pub fn new(bytes: Vec) -> Self { + Self(bytes) + } +} + +impl FromIterator for InstructionBytes { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl InstructionLoader for InstructionBytes { + fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result, String> { + let start = usize::try_from(data.address.offset) + .map_err(|err| format!("offset should convert to usize: {err:?}"))?; + if start >= self.0.len() { + return Err(format!( + "Offset {start} exceeds count of instruction bytes {len}", + len = self.0.len() + )); + } + + // Do not overflow + let end = start.saturating_add(data.size); + + // Do not exceed the capacity of the instruction byte vec + let end = usize::min(end, self.0.len()); + + Ok(self.0[start..end].to_vec()) + } +} + /// Wrapper around the public load image API so that it can be converted to the native API. /// This is required in order to pass a trait object reference down into the native API. struct InstructionLoaderWrapper<'a>(&'a dyn InstructionLoader); diff --git a/src/tests/sleigh.rs b/src/tests/sleigh.rs index a0ce0ed..f31b9b1 100644 --- a/src/tests/sleigh.rs +++ b/src/tests/sleigh.rs @@ -10,21 +10,6 @@ use sleigh_config::processor_x86::SLA_X86_64 as SLEIGH_SPEC; use crate::*; -struct LoadImageImpl(Vec); - -impl InstructionLoader for LoadImageImpl { - fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result, String> { - let start: usize = data.address.offset.try_into().expect("invalid offset"); - if start >= self.0.len() { - return Err("Requested fill outside image".to_string()); - } - - // Never exceed image - let end = usize::min(start + data.size, self.0.len()); - Ok(self.0[start..end].to_vec()) - } -} - #[test] pub fn addr_space_type() -> Result<()> { assert_eq!( @@ -140,8 +125,9 @@ fn build_raw_sla() -> Result<()> { #[test] fn test_pcode() -> Result<()> { const NUM_INSTRUCTIONS: usize = 7; - let load_image = - LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec()); + let load_image = InstructionBytes::new( + b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec(), + ); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -164,7 +150,7 @@ fn test_pcode() -> Result<()> { #[test] fn test_assembly() -> Result<()> { let load_image = - LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec()); + InstructionBytes::new(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -300,7 +286,7 @@ pub fn invalid_register_name() -> Result<()> { #[test] pub fn insufficient_data() -> Result<()> { - let load_image = LoadImageImpl(b"\x00".to_vec()); + let load_image = InstructionBytes::new(b"\x00".to_vec()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -322,7 +308,7 @@ pub fn insufficient_data() -> Result<()> { #[test] pub fn invalid_instruction() -> Result<()> { - let load_image = LoadImageImpl(std::iter::repeat_n(0xFF, 16).collect()); + let load_image = InstructionBytes::new(std::iter::repeat_n(0xFF, 16).collect()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -384,7 +370,7 @@ fn multiple_sleigh_data_sharing() -> Result<()> { fn verify_sleigh(sleigh: GhidraSleigh) { // 0x55 = PUSH RBP - let loader = LoadImageImpl(vec![0x55]); + let loader = InstructionBytes::new(vec![0x55]); let address = Address::new(sleigh.default_code_space(), 0); let disassembly = sleigh .disassemble_native(&loader, address)