Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "libsla"
description = "Rust bindings to Ghidra Sleigh library libsla"
version = "1.1.0"
version = "1.2.0"
edition = "2024"
license = "Apache-2.0"
repository = "https://github.com/mnemonikr/libsla"
Expand Down
52 changes: 44 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,56 @@ using `make sleigh_opt`.

# Example

Disassemble bytes into native assembly instructions.

```rust
// Build Sleigh with configuration files from sleigh-config crate
let sleigh = GhidraSleigh::builder()
.processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)?
.build(sleigh_config::processor_x86::SLA_X86_64)?;

// The instruction reader is defined by the user and implements the InstructionLoader trait.
let instruction_reader = InstructionReader::new();
// PUSH RBP instruction is the byte 0x55.
let instructions = InstructionBytes::new(vec![0x55]);

// Instruction to decode from the reader.
let instruction_offset = 0x800000;
// InstructionBytes is a simple byte loader that does not model multiple address spaces.
// However an address space is required, so for simplicity use the default code space.
let address_space = sleigh.default_code_space();
let instruction_address = Address::new(instruction_offset, address_space);

// Disassemble!
let pcode_disassembly = sleigh.disassemble_pcode(&instruction_reader, instruction_address)?;
// Start disassembly from the first byte (index 0)
let instruction_address = Address::new(address_space, 0);

// Confirming this is indeed PUSH RBP.
let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?;
assert_eq!(native_disassembly.instruction.mnemonic, "PUSH");
assert_eq!(native_disassembly.instruction.body, "RBP");
```

Disassemble bytes into pcode instructions. Pcode instructions can be used for program modeling.

```rust
let sleigh = GhidraSleigh::builder()
.processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)?
.build(sleigh_config::processor_x86::SLA_X86_64)?;

// PUSH RBP
let instructions = InstructionBytes::new(vec![0x55]);
let instruction_address = Address::new(sleigh.default_code_space(), 0);
let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?;
let pcode_instructions = pcode_disassembly.instructions;

assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions");

// Copy RBP into a temporary
let copy_destination = pcode_instructions[0].output.as_ref().unwrap();
assert_eq!(pcode_instructions[0].op_code, OpCode::Copy);
assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP");

// Subtract 8 bytes from RSP
assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract));
assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP");
assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8);

// Store temporary (RBP) into memory address pointed to by RSP
assert_eq!(pcode_instructions[2].op_code, OpCode::Store);
assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP");
assert_eq!(&pcode_instructions[2].inputs[2], copy_destination);
```
83 changes: 77 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,83 @@
//! This crate includes Rust bindings to the
//! [Ghidra](https://github.com/NationalSecurityAgency/ghidra) SLEIGH library libsla for translating
//! native code to p-code.
//! native code to p-code. This allows binary analysis programs to model p-code instead of needing
//! to model each processor architecture separately.
//!
//! SLEIGH is a processor specification language developed for the Ghidra used to describe
//! microprocessors with enough detail to facilitate disassembly and decompilation. The
//! processor-specific instructions are translated to **p-code**, which captures the instruction
//! semantics independent of the specific processor. The details on how to perform this translation
//! are captured by the compiled SLEIGH specification for the processor.
//! ## Examples
//!
//! ### Native Disassembly
//!
//! This example disassembles the `PUSH RBP` x86-64 instruction (byte `0x55`).
//!
//! ```
//! # use crate::libsla::*;
//! # use sleigh_config;
//! # fn main() -> crate::libsla::Result<()> {
//! let sleigh = GhidraSleigh::builder()
//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)?
//! .build(sleigh_config::processor_x86::SLA_X86_64)?;
//!
//! // PUSH RBP instruction is the byte 0x55.
//! let instructions = InstructionBytes::new(vec![0x55]);
//!
//! // InstructionBytes is a simple byte loader that does not model multiple address spaces.
//! // However an address space is required, so for simplicity use the default code space.
//! let address_space = sleigh.default_code_space();
//!
//! // Start disassembly from the first byte (index 0)
//! let instruction_address = Address::new(address_space, 0);
//!
//! // Confirming this is indeed PUSH RBP.
//! let native_disassembly = sleigh.disassemble_native(&instructions, instruction_address)?;
//! assert_eq!(native_disassembly.instruction.mnemonic, "PUSH");
//! assert_eq!(native_disassembly.instruction.body, "RBP");
//! # Ok(())
//! # }
//! ```
//! ### Pcode Disassembly
//!
//! This example disassembles the `PUSH RBP` x86-64 instruction (`0x55`) into pcode. The pcode for
//! this instruction is
//!
//! 1. `COPY temp <- RBP`
//! 2. `SUBTRACT RSP <- RSP 0x8`
//! 3. `STORE [RSP] <- temp`
//!
//! ###
//! ```
//! # use crate::libsla::*;
//! # use sleigh_config;
//! # fn main() -> crate::libsla::Result<()> {
//! let sleigh = GhidraSleigh::builder()
//! .processor_spec(sleigh_config::processor_x86::PSPEC_X86_64)?
//! .build(sleigh_config::processor_x86::SLA_X86_64)?;
//!
//! // PUSH RBP
//! let instructions = InstructionBytes::new(vec![0x55]);
//! let instruction_address = Address::new(sleigh.default_code_space(), 0);
//! let pcode_disassembly = sleigh.disassemble_pcode(&instructions, instruction_address)?;
//! let pcode_instructions = pcode_disassembly.instructions;
//!
//! assert_eq!(pcode_instructions.len(), 3, "There should be 3 pcode instructions");
//!
//! // Copy RBP into a temporary
//! let copy_destination = pcode_instructions[0].output.as_ref().unwrap();
//! assert_eq!(pcode_instructions[0].op_code, OpCode::Copy);
//! assert_eq!(sleigh.register_name(&pcode_instructions[0].inputs[0]).unwrap(), "RBP");
//!
//! // Subtract 8 bytes from RSP
//! assert_eq!(pcode_instructions[1].op_code, OpCode::Int(IntOp::Subtract));
//! assert_eq!(sleigh.register_name(&pcode_instructions[1].inputs[0]).unwrap(), "RSP");
//! assert_eq!(pcode_instructions[1].inputs[1].address.offset, 8);
//!
//! // Store temporary (RBP) into memory address pointed to by RSP
//! assert_eq!(pcode_instructions[2].op_code, OpCode::Store);
//! assert_eq!(sleigh.register_name(&pcode_instructions[2].inputs[1]).unwrap(), "RSP");
//! assert_eq!(&pcode_instructions[2].inputs[2], copy_destination);
//!
//! # Ok(())
//! # }
//! ```

mod opcodes;
mod sleigh;
Expand Down
37 changes: 37 additions & 0 deletions src/sleigh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,43 @@ impl api::PcodeEmit for PcodeDisassemblyOutput {
}
}

/// A sequence of instruction bytes which can be used by Sleigh for disassembly.
pub struct InstructionBytes(Vec<u8>);

impl InstructionBytes {
/// Create a new instance for the provided sequence of instruction bytes
pub fn new(bytes: Vec<u8>) -> Self {
Self(bytes)
}
}

impl FromIterator<u8> for InstructionBytes {
fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}

impl InstructionLoader for InstructionBytes {
fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result<Vec<u8>, String> {
let start = usize::try_from(data.address.offset)
.map_err(|err| format!("offset should convert to usize: {err:?}"))?;
if start >= self.0.len() {
return Err(format!(
"Offset {start} exceeds count of instruction bytes {len}",
len = self.0.len()
));
}

// Do not overflow
let end = start.saturating_add(data.size);

// Do not exceed the capacity of the instruction byte vec
let end = usize::min(end, self.0.len());

Ok(self.0[start..end].to_vec())
}
}

/// Wrapper around the public load image API so that it can be converted to the native API.
/// This is required in order to pass a trait object reference down into the native API.
struct InstructionLoaderWrapper<'a>(&'a dyn InstructionLoader);
Expand Down
28 changes: 7 additions & 21 deletions src/tests/sleigh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,6 @@ use sleigh_config::processor_x86::SLA_X86_64 as SLEIGH_SPEC;

use crate::*;

struct LoadImageImpl(Vec<u8>);

impl InstructionLoader for LoadImageImpl {
fn load_instruction_bytes(&self, data: &VarnodeData) -> std::result::Result<Vec<u8>, String> {
let start: usize = data.address.offset.try_into().expect("invalid offset");
if start >= self.0.len() {
return Err("Requested fill outside image".to_string());
}

// Never exceed image
let end = usize::min(start + data.size, self.0.len());
Ok(self.0[start..end].to_vec())
}
}

#[test]
pub fn addr_space_type() -> Result<()> {
assert_eq!(
Expand Down Expand Up @@ -140,8 +125,9 @@ fn build_raw_sla() -> Result<()> {
#[test]
fn test_pcode() -> Result<()> {
const NUM_INSTRUCTIONS: usize = 7;
let load_image =
LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec());
let load_image = InstructionBytes::new(
b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x0f\xaf\xc0\x5d\xc3".to_vec(),
);
let sleigh = GhidraSleigh::builder()
.processor_spec(PROCESSOR_SPEC)?
.build(SLEIGH_SPEC)?;
Expand All @@ -164,7 +150,7 @@ fn test_pcode() -> Result<()> {
#[test]
fn test_assembly() -> Result<()> {
let load_image =
LoadImageImpl(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec());
InstructionBytes::new(b"\x55\x48\x89\xe5\x89\x7d\xfc\x8b\x45\xfc\x01\xc0\x5d\xc3".to_vec());
let sleigh = GhidraSleigh::builder()
.processor_spec(PROCESSOR_SPEC)?
.build(SLEIGH_SPEC)?;
Expand Down Expand Up @@ -300,7 +286,7 @@ pub fn invalid_register_name() -> Result<()> {

#[test]
pub fn insufficient_data() -> Result<()> {
let load_image = LoadImageImpl(b"\x00".to_vec());
let load_image = InstructionBytes::new(b"\x00".to_vec());
let sleigh = GhidraSleigh::builder()
.processor_spec(PROCESSOR_SPEC)?
.build(SLEIGH_SPEC)?;
Expand All @@ -322,7 +308,7 @@ pub fn insufficient_data() -> Result<()> {

#[test]
pub fn invalid_instruction() -> Result<()> {
let load_image = LoadImageImpl(std::iter::repeat_n(0xFF, 16).collect());
let load_image = InstructionBytes::new(std::iter::repeat_n(0xFF, 16).collect());
let sleigh = GhidraSleigh::builder()
.processor_spec(PROCESSOR_SPEC)?
.build(SLEIGH_SPEC)?;
Expand Down Expand Up @@ -384,7 +370,7 @@ fn multiple_sleigh_data_sharing() -> Result<()> {

fn verify_sleigh(sleigh: GhidraSleigh) {
// 0x55 = PUSH RBP
let loader = LoadImageImpl(vec![0x55]);
let loader = InstructionBytes::new(vec![0x55]);
let address = Address::new(sleigh.default_code_space(), 0);
let disassembly = sleigh
.disassemble_native(&loader, address)
Expand Down