From 598a76ae59f24b122fc67eec1b5094d1865f3d49 Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:05:44 -0700 Subject: [PATCH 1/2] Improve disassembly ergonomics --- src/sleigh.rs | 231 ++++++++++++++++++++++++-------------------- src/tests/sleigh.rs | 4 +- 2 files changed, 129 insertions(+), 106 deletions(-) diff --git a/src/sleigh.rs b/src/sleigh.rs index d14bf43..8fcda41 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -62,14 +62,14 @@ pub trait Sleigh { &self, loader: &dyn LoadImage, address: Address, - ) -> Result>; + ) -> Result; /// Disassemble the instructions at the given address into native assembly instructions. fn disassemble_native( &self, loader: &dyn LoadImage, address: Address, - ) -> Result>; + ) -> Result; /// Get the register name for a varnode targeting a register. This will return `None` if the /// target is not a valid register. @@ -355,10 +355,16 @@ impl std::fmt::Display for PcodeInstruction { } } +/// A disassembled native assembly instruction #[derive(Clone, Debug)] pub struct AssemblyInstruction { + /// The origin of the assembly instruction pub address: Address, + + /// The instruction mnemonic pub mnemonic: String, + + /// The body of the instruction pub body: String, } @@ -375,37 +381,27 @@ impl std::fmt::Display for AssemblyInstruction { } } -#[derive(Default)] -pub struct PcodeDisassemblyOutput { - instructions: Vec, -} - -#[derive(Default)] -pub struct NativeDisassemblyOutput { - instruction: Option, -} - -/// A disassembly of instructions originating from a [VarnodeData]. -#[derive(Debug, Clone)] -pub struct Disassembly { +/// Disassembly of an instruction into pcode +#[derive(Debug)] +pub struct PcodeDisassembly { /// The disassembled instructions - pub instructions: Vec, + pub instructions: Vec, /// The origin of the instructions pub origin: VarnodeData, } -impl Disassembly { - /// Create a new disassembly - pub fn new(instructions: Vec, origin: VarnodeData) -> Self { - Self { - instructions, - origin, - } - } +/// Disassembly of an instruction into its native assembly +#[derive(Debug)] +pub struct NativeDisassembly { + /// The disassembled instruction + pub instruction: AssemblyInstruction, + + /// The origin of the instructions + pub origin: VarnodeData, } -impl std::fmt::Display for Disassembly { +impl std::fmt::Display for PcodeDisassembly { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!( f, @@ -422,6 +418,11 @@ impl std::fmt::Display for Disassembly { } } +#[derive(Default)] +struct NativeDisassemblyOutput { + instruction: Option, +} + impl api::AssemblyEmit for NativeDisassemblyOutput { fn dump( &mut self, @@ -429,6 +430,11 @@ impl api::AssemblyEmit for NativeDisassemblyOutput { mnemonic: &libsla_sys::cxx::CxxString, body: &libsla_sys::cxx::CxxString, ) { + assert!( + self.instruction.is_none(), + "native disassembly should dump 1 instruction" + ); + self.instruction = Some(AssemblyInstruction { address: address.into(), mnemonic: mnemonic.to_string(), @@ -437,6 +443,11 @@ impl api::AssemblyEmit for NativeDisassemblyOutput { } } +#[derive(Default)] +struct PcodeDisassemblyOutput { + instructions: Vec, +} + impl api::PcodeEmit for PcodeDisassemblyOutput { fn dump( &mut self, @@ -494,11 +505,6 @@ pub trait LoadImage { fn instruction_bytes(&self, data: &VarnodeData) -> std::result::Result, String>; } -enum DisassemblyKind<'a> { - Native(&'a mut NativeDisassemblyOutput), - Pcode(&'a mut PcodeDisassemblyOutput), -} - /// The encoding of the compiled sleigh specification (.slaspec). #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)] pub enum SlaDataEncoding { @@ -615,6 +621,20 @@ impl GhidraSleigh { Default::default() } + /// Convert an address to a system address. Returns `None` if the provided address space cannot + /// be mapped to a system address space. + fn sys_address(&self, address: &Address) -> Option> { + // SAFETY: The provided address space has been verified to be safe + Some(unsafe { + sys::new_address( + self.sys_address_space(address.address_space.id)?, + address.offset, + ) + }) + } + + /// Creates address space using the given address space id. Returns `None` if the provided id + /// cannot be mapped to a system address space. fn sys_address_space(&self, space_id: AddressSpaceId) -> Option<*mut sys::AddrSpace> { let num_spaces = self.sleigh.num_spaces(); for i in 0..num_spaces { @@ -628,70 +648,6 @@ impl GhidraSleigh { None } - - fn disassemble( - &self, - loader: &dyn LoadImage, - address: Address, - kind: DisassemblyKind, - ) -> Result { - let address = unsafe { - sys::new_address( - self.sys_address_space(address.address_space.id) - .expect("invalid space id"), - address.offset, - ) - }; - - let loader = InstructionLoader(loader); - let rust_loader = rust::RustLoadImage(&loader); - - let response = match kind { - DisassemblyKind::Pcode(output) => { - let mut emitter = rust::RustPcodeEmit(output); - self.sleigh - .disassemble_pcode(&rust_loader, &mut emitter, address.as_ref().unwrap()) - } - DisassemblyKind::Native(output) => { - let mut emitter = rust::RustAssemblyEmit(output); - self.sleigh.disassemble_native( - &rust_loader, - &mut emitter, - address.as_ref().unwrap(), - ) - } - }; - - let bytes_consumed = response - .map_err(|err| Error::DependencyError { - message: Cow::Borrowed("failed to decode instruction"), - source: Box::new(err), - })? - .try_into() - .map_err(|err| { - Error::InternalError(format!("instruction origin size is too large: {err}")) - })?; - - let source = VarnodeData { - address: (&*address).into(), - size: bytes_consumed, - }; - - // Sleigh may attempt to read more bytes than are available to read. - // Unfortuantely the callback API does not provide any mechanism to - // inform the caller that only a subset of the requested bytes are valid. - // Since many ISAs are variable-length instructions, it is possible the - // valid subset will decode to a valid instruction, and the requested length - // was an over-estimation. - // - // This is a sanity check to determine if the bytes Sleigh used for decoding - // are all valid. - if !loader.readable(&source) { - return Err(Error::InsufficientData(source)); - } - - Ok(source) - } } impl Sleigh for GhidraSleigh { @@ -748,22 +704,46 @@ impl Sleigh for GhidraSleigh { &self, loader: &dyn LoadImage, address: Address, - ) -> Result> { - let mut output = Default::default(); - let origin = self.disassemble(loader, address, DisassemblyKind::Pcode(&mut output))?; - Ok(Disassembly::new(output.instructions, origin)) + ) -> Result { + let sys_address = self.sys_address(&address).expect("invalid address"); + let loader = InstructionLoader(loader); + let rust_loader = rust::RustLoadImage(&loader); + let mut output = PcodeDisassemblyOutput::default(); + let mut emitter = rust::RustPcodeEmit(&mut output); + let response = self.sleigh.disassemble_pcode( + &rust_loader, + &mut emitter, + sys_address.as_ref().unwrap(), + ); + + Ok(PcodeDisassembly { + origin: handle_disassembly_response(response, loader, address)?, + instructions: output.instructions, + }) } fn disassemble_native( &self, loader: &dyn LoadImage, address: Address, - ) -> Result> { - let mut output = Default::default(); - let origin = self.disassemble(loader, address, DisassemblyKind::Native(&mut output))?; - - // TODO Convert this into an object that holds just one instruction - Ok(Disassembly::new(vec![output.instruction.unwrap()], origin)) + ) -> Result { + let sys_address = self.sys_address(&address).expect("invalid address"); + let loader = InstructionLoader(loader); + let rust_loader = rust::RustLoadImage(&loader); + let mut output = NativeDisassemblyOutput::default(); + let mut emitter = rust::RustAssemblyEmit(&mut output); + let response = self.sleigh.disassemble_native( + &rust_loader, + &mut emitter, + sys_address.as_ref().unwrap(), + ); + + Ok(NativeDisassembly { + origin: handle_disassembly_response(response, loader, address)?, + instruction: output.instruction.ok_or_else(|| { + Error::InternalError("ghidra did not disassemble an instruction".to_owned()) + })?, + }) } fn register_name_map(&self) -> BTreeMap { @@ -774,3 +754,46 @@ impl Sleigh for GhidraSleigh { .collect() } } + +/// Construct the origin of the disassembly. This can fail if the disassembly is determined to +/// have originated from invalid data. +fn handle_disassembly_response( + response: std::result::Result, + loader: InstructionLoader, + address: Address, +) -> Result { + let source = VarnodeData { + address, + size: num_bytes_disassembled(response)?, + }; + + // Sleigh may attempt to read more bytes than are available to read. + // Unfortuantely the callback API does not provide any mechanism to + // inform the caller that only a subset of the requested bytes are valid. + // Since many ISAs are variable-length instructions, it is possible the + // valid subset will decode to a valid instruction, and the requested length + // was an over-estimation. + // + // This is a sanity check to determine if the bytes Sleigh used for decoding + // are all valid. + if !loader.readable(&source) { + return Err(Error::InsufficientData(source)); + } + + Ok(source) +} + +fn num_bytes_disassembled( + response: std::result::Result, +) -> Result { + let bytes_consumed = response + .map_err(|err| Error::DependencyError { + message: Cow::Borrowed("failed to decode instruction"), + source: Box::new(err), + })? + .try_into() + .map_err(|err| { + Error::InternalError(format!("instruction origin size is too large: {err}")) + })?; + Ok(bytes_consumed) +} diff --git a/src/tests/sleigh.rs b/src/tests/sleigh.rs index 51cd253..8823675 100644 --- a/src/tests/sleigh.rs +++ b/src/tests/sleigh.rs @@ -208,7 +208,7 @@ fn test_assembly() -> Result<()> { let response = sleigh .disassemble_native(&load_image, address) .expect("Failed to decode instruction"); - let instruction = &response.instructions[0]; + let instruction = &response.instruction; assert_eq!(instruction.address.address_space.name, expected_entry.0); assert_eq!(instruction.address.offset, expected_entry.1); assert_eq!(instruction.mnemonic, expected_entry.2); @@ -372,7 +372,7 @@ fn verify_sleigh(sleigh: GhidraSleigh) { .disassemble_native(&loader, address) .expect("disassembly should succeed"); - let instruction = &disassembly.instructions[0]; + let instruction = &disassembly.instruction; assert_eq!(instruction.mnemonic, "PUSH"); assert_eq!(instruction.body, "RBP"); } From 1e798d292721bf11c21c5645e0c9db310134476f Mon Sep 17 00:00:00 2001 From: mnemonikr <138624285+mnemonikr@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:15:37 -0700 Subject: [PATCH 2/2] Derives --- src/sleigh.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/sleigh.rs b/src/sleigh.rs index 8fcda41..d194891 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -382,7 +382,7 @@ impl std::fmt::Display for AssemblyInstruction { } /// Disassembly of an instruction into pcode -#[derive(Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PcodeDisassembly { /// The disassembled instructions pub instructions: Vec, @@ -392,7 +392,7 @@ pub struct PcodeDisassembly { } /// Disassembly of an instruction into its native assembly -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct NativeDisassembly { /// The disassembled instruction pub instruction: AssemblyInstruction, @@ -401,6 +401,19 @@ pub struct NativeDisassembly { pub origin: VarnodeData, } +impl std::fmt::Display for NativeDisassembly { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "[{origin}]: {instruction}", + origin = self.origin, + instruction = self.instruction, + )?; + + Ok(()) + } +} + impl std::fmt::Display for PcodeDisassembly { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(