From df5082a619e975e57336dd0baadd5e016fe1c328 Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Sat, 15 Nov 2025 16:12:25 -0800 Subject: [PATCH 1/4] Added InstructionBytes for simple disassembly use case --- src/lib.rs | 82 +++++++++++++++++++++++++++++++++++++++++---- src/sleigh.rs | 37 ++++++++++++++++++++ src/tests/sleigh.rs | 28 ++++------------ 3 files changed, 120 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 904c6ec..f68ffdb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,82 @@ //! This crate includes Rust bindings to the //! [Ghidra](https://github.com/NationalSecurityAgency/ghidra) SLEIGH library libsla for translating -//! native code to p-code. +//! native code to p-code. This allows binary analysis programs to model p-code instead of needing +//! to model each processor architecture separately. //! -//! SLEIGH is a processor specification language developed for the Ghidra used to describe -//! microprocessors with enough detail to facilitate disassembly and decompilation. The -//! processor-specific instructions are translated to **p-code**, which captures the instruction -//! semantics independent of the specific processor. The details on how to perform this translation -//! are captured by the compiled SLEIGH specification for the processor. +//! ## Examples +//! +//! ### Native Disassembly +//! +//! This example disassembles the `PUSH RBP` x86-64 instruction (byte `0x55`). +//! +//! ``` +//! # use crate::libsla::*; +//! # use sleigh_config; +//! # fn main() -> crate::libsla::Result<()> { +//! let sleigh = GhidraSleigh::builder() +//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? +//! .build(sleigh_config::processor_x86::SLA_X86_64)?; +//! +//! // PUSH RBP instruction is the byte 0x55. +//! let instructions = InstructionBytes::new(vec![0x55]); +//! +//! // InstructionBytes is a simple byte loader that does not model multiple address spaces. +//! // However an address space is required, so for simplicity use the default code space. +//! let address_space = sleigh.default_code_space(); +//! +//! // Start disassembly from the first byte (index 0) +//! let instruction_address = Address::new(address_space, 0); +//! +//! // Confirming this is indeed PUSH RBP. +//! let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address.clone())?; +//! assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); +//! assert_eq!(native_disassembly.instruction.body, "RBP"); +//! # Ok(()) +//! # } +//! ``` +//! ### Pcode Disassembly +//! +//! This example disassembles the `PUSH RBP` x86-64 instruction (`0x55`) into pcode. The pcode for +//! this instruction is +//! +//! 1. `COPY temp <- RBP` +//! 2. `SUBTRACT RSP <- RSP 0x8` +//! 3. `STORE [RSP] <- temp` +//! +//! ### +//! ``` +//! # use crate::libsla::*; +//! # use sleigh_config; +//! # fn main() -> crate::libsla::Result<()> { +//! let sleigh = GhidraSleigh::builder() +//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? +//! .build(sleigh_config::processor_x86::SLA_X86_64)?; +//! +//! // PUSH RBP +//! let instructions = InstructionBytes::new(vec![0x55]); +//! let instruction_address = Address::new(sleigh.default_code_space(), 0); +//! +//! let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?; +//! let pcode_instructions = pcode_disassembly.instructions; +//! +//! assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions"); +//! +//! // Copy RBP into a temporary +//! assert_eq!(pcode_instructions[0].op_code, OpCode::Copy); +//! assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP"); +//! +//! // Subtract 8 bytes from RSP +//! assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract)); +//! assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP"); +//! assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8); +//! +//! // Store temporary (RBP) into memory address pointed to by RSP +//! assert_eq!(pcode_instructions[2].op_code, OpCode::Store); +//! assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP"); +//! +//! # Ok(()) +//! # } +//! ``` mod opcodes; mod sleigh; diff --git a/src/sleigh.rs b/src/sleigh.rs index 50df4c2..5b7fe6b 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -494,6 +494,43 @@ impl api::PcodeEmit for PcodeDisassemblyOutput { } } +/// A sequence of instruction bytes which can be used by Sleigh for disassembly. +pub struct InstructionBytes(Vec); + +impl InstructionBytes { + /// Create a new instance for the provided sequence of instruction bytes + pub fn new(bytes: Vec) -> Self { + Self(bytes) + } +} + +impl FromIterator for InstructionBytes { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl InstructionLoader for InstructionBytes { + fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result, String> { + let start = usize::try_from(data.address.offset) + .map_err(|err| format!("offset should convert to usize: {err:?}"))?; + if start >= self.0.len() { + return Err(format!( + "Offset {start} exceeds count of instruction bytes {len}", + len = self.0.len() + )); + } + + // Do not overflow + let end = start.saturating_add(data.size); + + // Do not exceed the capacity of the instruction byte vec + let end = usize::min(end, self.0.len()); + + Ok(self.0[start..end].to_vec()) + } +} + /// Wrapper around the public load image API so that it can be converted to the native API. /// This is required in order to pass a trait object reference down into the native API. struct InstructionLoaderWrapper<'a>(&'a dyn InstructionLoader); diff --git a/src/tests/sleigh.rs b/src/tests/sleigh.rs index a0ce0ed..f31b9b1 100644 --- a/src/tests/sleigh.rs +++ b/src/tests/sleigh.rs @@ -10,21 +10,6 @@ use sleigh_config::processor_x86::SLA_X86_64 as SLEIGH_SPEC; use crate::*; -struct LoadImageImpl(Vec); - -impl InstructionLoader for LoadImageImpl { - fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result, String> { - let start: usize = data.address.offset.try_into().expect("invalid offset"); - if start >= self.0.len() { - return Err("Requested fill outside image".to_string()); - } - - // Never exceed image - let end = usize::min(start + data.size, self.0.len()); - Ok(self.0[start..end].to_vec()) - } -} - #[test] pub fn addr_space_type() -> Result<()> { assert_eq!( @@ -140,8 +125,9 @@ fn build_raw_sla() -> Result<()> { #[test] fn test_pcode() -> Result<()> { const NUM_INSTRUCTIONS: usize = 7; - let load_image = - LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec()); + let load_image = InstructionBytes::new( + b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec(), + ); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -164,7 +150,7 @@ fn test_pcode() -> Result<()> { #[test] fn test_assembly() -> Result<()> { let load_image = - LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec()); + InstructionBytes::new(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -300,7 +286,7 @@ pub fn invalid_register_name() -> Result<()> { #[test] pub fn insufficient_data() -> Result<()> { - let load_image = LoadImageImpl(b"\x00".to_vec()); + let load_image = InstructionBytes::new(b"\x00".to_vec()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -322,7 +308,7 @@ pub fn insufficient_data() -> Result<()> { #[test] pub fn invalid_instruction() -> Result<()> { - let load_image = LoadImageImpl(std::iter::repeat_n(0xFF, 16).collect()); + let load_image = InstructionBytes::new(std::iter::repeat_n(0xFF, 16).collect()); let sleigh = GhidraSleigh::builder() .processor_spec(PROCESSOR_SPEC)? .build(SLEIGH_SPEC)?; @@ -384,7 +370,7 @@ fn multiple_sleigh_data_sharing() -> Result<()> { fn verify_sleigh(sleigh: GhidraSleigh) { // 0x55 = PUSH RBP - let loader = LoadImageImpl(vec![0x55]); + let loader = InstructionBytes::new(vec![0x55]); let address = Address::new(sleigh.default_code_space(), 0); let disassembly = sleigh .disassemble_native(&loader, address) From ec420d2f3e6185bbc992d1e875059bca782d0b39 Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Sat, 15 Nov 2025 16:24:05 -0800 Subject: [PATCH 2/4] Update README --- README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- src/lib.rs | 5 +++-- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 37b99b9..c5ebdcd 100644 --- a/README.md +++ b/README.md @@ -29,20 +29,56 @@ using `make sleigh_opt`. # Example +Disassemble bytes into native assembly instructions + ```rust -// Build Sleigh with configuration files from sleigh-config crate let sleigh = GhidraSleigh::builder() .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? .build(sleigh_config::processor_x86::SLA_X86_64)?; -// The instruction reader is defined by the user and implements the InstructionLoader trait. -let instruction_reader = InstructionReader::new(); +// PUSH RBP instruction is the byte 0x55. +let instructions = InstructionBytes::new(vec![0x55]); -// Instruction to decode from the reader. -let instruction_offset = 0x800000; +// InstructionBytes is a simple byte loader that does not model multiple address spaces. +// However an address space is required, so for simplicity use the default code space. let address_space = sleigh.default_code_space(); -let instruction_address = Address::new(instruction_offset, address_space); -// Disassemble! -let pcode_disassembly = sleigh.disassemble_pcode(&instruction_reader, instruction_address)?; +// Start disassembly from the first byte (index 0) +let instruction_address = Address::new(address_space, 0); + +// Confirming this is indeed PUSH RBP. +let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?; +assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); +assert_eq!(native_disassembly.instruction.body, "RBP"); +``` + +Disassemble bytes into pcode instructions for program modeling. + +```rust +let sleigh = GhidraSleigh::builder() + .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)? + .build(sleigh_config::processor_x86::SLA_X86_64)?; + +// PUSH RBP +let instructions = InstructionBytes::new(vec![0x55]); +let instruction_address = Address::new(sleigh.default_code_space(), 0); +let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?; +let pcode_instructions = pcode_disassembly.instructions; + +assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions"); + +// Copy RBP into a temporary +let copy_destination = pcode_instructions[0].output.as_ref().unwrap(); +assert_eq!(pcode_instructions[0].op_code, OpCode::Copy); +assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP"); + +// Subtract 8 bytes from RSP +assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract)); +assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP"); +assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8); + +// Store temporary (RBP) into memory address pointed to by RSP +assert_eq!(pcode_instructions[2].op_code, OpCode::Store); +assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP"); +assert_eq!(&pcode_instructions[2].inputs[2], copy_destination); ``` diff --git a/src/lib.rs b/src/lib.rs index f68ffdb..9e2812c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ //! let instruction_address = Address::new(address_space, 0); //! //! // Confirming this is indeed PUSH RBP. -//! let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address.clone())?; +//! let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?; //! assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); //! assert_eq!(native_disassembly.instruction.body, "RBP"); //! # Ok(()) @@ -55,13 +55,13 @@ //! // PUSH RBP //! let instructions = InstructionBytes::new(vec![0x55]); //! let instruction_address = Address::new(sleigh.default_code_space(), 0); -//! //! let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?; //! let pcode_instructions = pcode_disassembly.instructions; //! //! assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions"); //! //! // Copy RBP into a temporary +//! let copy_destination = pcode_instructions[0].output.as_ref().unwrap(); //! assert_eq!(pcode_instructions[0].op_code, OpCode::Copy); //! assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP"); //! @@ -73,6 +73,7 @@ //! // Store temporary (RBP) into memory address pointed to by RSP //! assert_eq!(pcode_instructions[2].op_code, OpCode::Store); //! assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP"); +//! assert_eq!(&pcode_instructions[2].inputs[2], copy_destination); //! //! # Ok(()) //! # } From a77a0f5b96ff5d39346ec79d5b02f7ba36084d46 Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Sat, 15 Nov 2025 16:26:44 -0800 Subject: [PATCH 3/4] Minor update to README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c5ebdcd..72a43b9 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ using `make sleigh_opt`. # Example -Disassemble bytes into native assembly instructions +Disassemble bytes into native assembly instructions. ```rust let sleigh = GhidraSleigh::builder() @@ -52,7 +52,7 @@ assert_eq!(native_disassembly.instruction.mnemonic, "PUSH"); assert_eq!(native_disassembly.instruction.body, "RBP"); ``` -Disassemble bytes into pcode instructions for program modeling. +Disassemble bytes into pcode instructions. Pcode instructions can be used for program modeling. ```rust let sleigh = GhidraSleigh::builder() From f3f5b5e2f7f61d8d8093e9687b012ae29b9466c3 Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Sat, 15 Nov 2025 16:30:41 -0800 Subject: [PATCH 4/4] Version bump to 1.2.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 81af2cb..c16870b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,7 +197,7 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "libsla" -version = "1.1.0" +version = "1.2.0" dependencies = [ "flate2", "libsla-sys", diff --git a/Cargo.toml b/Cargo.toml index bb4bb61..3c65279 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libsla" description = "Rust bindings to Ghidra Sleigh library libsla" -version = "1.1.0" +version = "1.2.0" edition = "2024" license = "Apache-2.0" repository = "https://github.com/mnemonikr/libsla"