From 0592e5c3b823b636a456aeccbcf43acd65797187 Mon Sep 17 00:00:00 2001 From: soruh Date: Fri, 10 Oct 2025 14:57:24 +0200 Subject: [PATCH 1/5] add serde impl and add empty test --- .github/workflows/rust.yml | 4 +++- Cargo.lock | 1 + Cargo.toml | 2 ++ src/opcodes.rs | 7 +++++++ src/sleigh.rs | 10 ++++++++++ src/tests.rs | 3 +++ src/tests/serde.rs | 4 ++++ 7 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 src/tests/serde.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index afe5cd8..df4bd16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -22,11 +22,13 @@ jobs: run: cargo fmt --all -- --check - name: Unit Tests run: cargo test --verbose + - name: Serde Tests + run: cargo test --verbose --features serde --tests serde - name: Coverage Report run: | rustup component add llvm-tools-preview PATH=$(rustup show home | xargs -I '{}' find {} -name 'llvm-profdata' | xargs -I '{}' dirname {}):$PATH - RUSTFLAGS="-C instrument-coverage" cargo test --lib + RUSTFLAGS="-C instrument-coverage" cargo test --lib --features serde find . -name '*.profraw' > profraw-files.txt llvm-profdata merge -sparse -f profraw-files.txt -o merged.profdata llvm-cov report \ diff --git a/Cargo.lock b/Cargo.lock index 08b27fc..5eaf040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -195,6 +195,7 @@ version = "1.0.0" dependencies = [ "flate2", "libsla-sys", + "serde", "sleigh-config", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index b8c846e..1f1d91b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,12 @@ repository = "https://github.com/mnemonikr/libsla" [features] fuzzing = ["libsla-sys/fuzzing"] +serde = ["dep:serde"] [dependencies] thiserror = "2.0" libsla-sys = { version = "0.1.4" } +serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] flate2 = "1.1" diff --git a/src/opcodes.rs b/src/opcodes.rs index 87313f7..83e8ca9 100644 --- a/src/opcodes.rs +++ b/src/opcodes.rs @@ -6,6 +6,7 @@ use libsla_sys::sys; /// A representation of opcodes for p-code instructions. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum OpCode { /// Copy a sequence of bytes from one fixed location to another. Copy, @@ -69,6 +70,7 @@ pub enum OpCode { /// Operations for boolean, single-bit inputs. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum BoolOp { /// Negate a single bit: `!x`. Negate, @@ -87,6 +89,7 @@ pub enum BoolOp { /// operation does not include `IntSign` as an argument, then distinguishing between signed and /// unsigned is not applicable for the operation. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum IntSign { /// An integer where the most significant bit (msb) indicates the sign of the integer. The integer is /// positive if the msb is `0` and negative if the msb is `1`. Signed integers are represented @@ -99,6 +102,7 @@ pub enum IntSign { /// Operations on integers. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum IntOp { /// Add two integers: `x + y`. Add, @@ -155,6 +159,7 @@ pub enum IntOp { /// Operations on floating-point numbers. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum FloatOp { /// Check if two numbers are equal: `x == y`. Equal, @@ -213,6 +218,7 @@ pub enum FloatOp { /// Operations which represent black-box placeholders for some sequence of changes to the machine state. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum PseudoOp { /// A call that cannot be semantically represented in p-code. For example, a syscall. CallOther, @@ -229,6 +235,7 @@ pub enum PseudoOp { /// for use in processor specifications and therefore will never be emitted when directly /// translating machine instructions. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum AnalysisOp { /// Copies a sequence of bytes to a fixed location. There are multiple origins possible for the /// bytes. The selected origin depends on the execution path leading to this operation. diff --git a/src/sleigh.rs b/src/sleigh.rs index 6076c37..2f74d9c 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -81,6 +81,7 @@ pub trait Sleigh { /// An address is represented by an offset into an address space #[derive(Ord, PartialOrd, PartialEq, Eq, Clone)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Address { /// The standard interpretation of the offset is an index into the associated address space. /// However, when used in conjunction with the constant address space, the offset is the actual @@ -132,6 +133,7 @@ impl From<&sys::Address> for Address { /// A VarnodeData represents the address and size of data. #[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct VarnodeData { pub address: Address, pub size: usize, @@ -193,6 +195,7 @@ impl From<&sys::VarnodeData> for VarnodeData { /// guaranteed to be deterministically constructed. This means different instances of Sleigh may /// identify the same address space with _different_ identifiers. #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AddressSpaceId(usize); impl std::fmt::Debug for AddressSpaceId { @@ -234,6 +237,7 @@ impl AddressSpaceId { /// Information about an address space #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AddressSpace { pub id: AddressSpaceId, pub name: Cow<'static, str>, @@ -285,6 +289,7 @@ impl From<&sys::AddrSpace> for AddressSpaceId { /// Types for an [AddressSpace]. #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum AddressSpaceType { /// Special space to represent constants Constant, @@ -321,6 +326,7 @@ impl From for AddressSpaceType { /// cases. For example, the [OpCode::Load] operation encodes the [AddressSpace] using the /// [AddressSpaceId]. This identifier in particular may differ across Sleigh instances. #[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct PcodeInstruction { /// The originating address for this instruction. This information is necessary to include for /// the [OpCode::BranchIndirect] operation, which determines the destination address space from @@ -361,6 +367,7 @@ impl std::fmt::Display for PcodeInstruction { /// A disassembled native assembly instruction #[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AssemblyInstruction { /// The origin of the assembly instruction pub address: Address, @@ -387,6 +394,7 @@ impl std::fmt::Display for AssemblyInstruction { /// Disassembly of an instruction into pcode #[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct PcodeDisassembly { /// The disassembled instructions pub instructions: Vec, @@ -397,6 +405,7 @@ pub struct PcodeDisassembly { /// Disassembly of an instruction into its native assembly #[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct NativeDisassembly { /// The disassembled instruction pub instruction: AssemblyInstruction, @@ -436,6 +445,7 @@ impl std::fmt::Display for PcodeDisassembly { } #[derive(Default)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] struct NativeDisassemblyOutput { instruction: Option, } diff --git a/src/tests.rs b/src/tests.rs index 8af3a01..dc60c35 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,2 +1,5 @@ mod opcodes; mod sleigh; + +#[cfg(feature = "serde")] +mod serde; diff --git a/src/tests/serde.rs b/src/tests/serde.rs new file mode 100644 index 0000000..5418bba --- /dev/null +++ b/src/tests/serde.rs @@ -0,0 +1,4 @@ +#[test] +fn fails() { + panic!(); +} From 7418c3e808b81693888e7aac21e9bf1b3fcd2d83 Mon Sep 17 00:00:00 2001 From: soruh Date: Mon, 13 Oct 2025 17:05:58 +0200 Subject: [PATCH 2/5] add tests for serde support --- Cargo.lock | 32 ++++++ Cargo.toml | 1 + src/sleigh.rs | 4 +- src/tests/serde.rs | 261 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 294 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5eaf040..d8b7db3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,6 +189,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "libsla" version = "1.0.0" @@ -196,6 +202,7 @@ dependencies = [ "flate2", "libsla-sys", "serde", + "serde_json", "sleigh-config", "thiserror", ] @@ -219,6 +226,12 @@ dependencies = [ "cc", ] +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -252,6 +265,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "scratch" version = "1.0.9" @@ -288,6 +307,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index 1f1d91b..b504095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,4 @@ serde = { version = "1", features = ["derive"], optional = true } [dev-dependencies] flate2 = "1.1" sleigh-config = { version = "1", features = ["x86"] } +serde_json = { version = "1.0.145" } diff --git a/src/sleigh.rs b/src/sleigh.rs index 2f74d9c..6083dca 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -366,7 +366,7 @@ impl std::fmt::Display for PcodeInstruction { } /// A disassembled native assembly instruction -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AssemblyInstruction { /// The origin of the assembly instruction @@ -404,7 +404,7 @@ pub struct PcodeDisassembly { } /// Disassembly of an instruction into its native assembly -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct NativeDisassembly { /// The disassembled instruction diff --git a/src/tests/serde.rs b/src/tests/serde.rs index 5418bba..687a5a0 100644 --- a/src/tests/serde.rs +++ b/src/tests/serde.rs @@ -1,4 +1,261 @@ +use crate::*; + +fn test_round_trip(value: &T) +where + T: serde::Serialize + std::fmt::Debug, + for<'de> T: serde::Deserialize<'de>, + for<'a> &'a T: PartialEq, +{ + let serialized = serde_json::to_value(value) + .unwrap_or_else(|err| panic!("failed to serialize value {value:?}: {err}")); + let deserialized = serde_json::from_value::(serialized) + .unwrap_or_else(|err| panic!("failed to deserialize value {value:?}: {err}")); + + assert_eq!(value, &deserialized); +} + +#[test] +fn op_code() { + let values = [ + OpCode::Copy, + OpCode::Load, + OpCode::Store, + OpCode::Branch, + OpCode::BranchConditional, + OpCode::BranchIndirect, + OpCode::Call, + OpCode::CallIndirect, + OpCode::Return, + OpCode::Piece, + OpCode::Subpiece, + OpCode::Popcount, + OpCode::LzCount, + OpCode::Bool(BoolOp::Negate), + OpCode::Int(IntOp::Add), + OpCode::Float(FloatOp::Equal), + OpCode::Pseudo(PseudoOp::CallOther), + OpCode::Analysis(AnalysisOp::MultiEqual), + OpCode::Unknown(-42), + ]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn bool_op() { + let values = [BoolOp::Negate, BoolOp::And, BoolOp::Or, BoolOp::Xor]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn int_sign() { + let values = [IntSign::Signed, IntSign::Unsigned]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn int_op() { + let values = [ + IntOp::Add, + IntOp::Negate, + IntOp::Subtract, + IntOp::Multiply, + IntOp::Divide(IntSign::Unsigned), + IntOp::Remainder(IntSign::Unsigned), + IntOp::Equal, + IntOp::NotEqual, + IntOp::LessThan(IntSign::Unsigned), + IntOp::LessThanOrEqual(IntSign::Unsigned), + IntOp::Extension(IntSign::Unsigned), + IntOp::Carry(IntSign::Unsigned), + IntOp::Borrow, + IntOp::ShiftLeft, + IntOp::ShiftRight(IntSign::Unsigned), + IntOp::Bitwise(BoolOp::Negate), + ]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn float_op() { + let values = [ + FloatOp::NotEqual, + FloatOp::LessThan, + FloatOp::LessThanOrEqual, + FloatOp::IsNaN, + FloatOp::Add, + FloatOp::Subtract, + FloatOp::Multiply, + FloatOp::Divide, + FloatOp::Negate, + FloatOp::AbsoluteValue, + FloatOp::SquareRoot, + FloatOp::IntToFloat, + FloatOp::FloatToFloat, + FloatOp::Truncate, + FloatOp::Ceiling, + FloatOp::Floor, + FloatOp::Round, + ]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn pseudo_op() { + let values = [ + PseudoOp::CallOther, + PseudoOp::ConstantPoolRef, + PseudoOp::New, + ]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn analysis_op() { + let values = [ + AnalysisOp::MultiEqual, + AnalysisOp::CopyIndirect, + AnalysisOp::PointerAdd, + AnalysisOp::PointerSubcomponent, + AnalysisOp::Cast, + AnalysisOp::Insert, + AnalysisOp::Extract, + AnalysisOp::SegmentOp, + ]; + + for value in values { + test_round_trip(&value); + } +} + +#[test] +fn address_space_id() { + for id in [0, 1, 42, usize::MAX - 1, usize::MAX] { + let value = AddressSpaceId::new(id); + test_round_trip(&value); + } +} + +#[test] +fn address_space_type() { + let values = [ + AddressSpaceType::Constant, + AddressSpaceType::Processor, + AddressSpaceType::BaseRegister, + AddressSpaceType::Internal, + AddressSpaceType::FuncCallSpecs, + AddressSpaceType::PcodeOp, + AddressSpaceType::Join, + ]; + + for value in values { + test_round_trip(&value); + } +} + +fn constant_address_space(size: usize) -> AddressSpace { + AddressSpace { + id: AddressSpaceId::new(42), + name: "test-address-space".into(), + word_size: size, + address_size: size, + space_type: AddressSpaceType::Constant, + big_endian: false, + } +} + +#[test] +fn address() { + for offset in [u64::MIN, u64::MAX, 0, 1, 42] { + let value = Address::new(constant_address_space(32), offset); + test_round_trip(&value); + } +} + +fn test_address(addr: u64) -> Address { + let address_space = AddressSpace { + id: AddressSpaceId::new(42), + name: "test-address-space".into(), + word_size: 32, + address_size: 32, + space_type: AddressSpaceType::Processor, + big_endian: false, + }; + + Address::new(address_space, addr) +} + +#[test] +fn varnode_data() { + for (i, size) in [1, 2, 4, 8, 16, 32, 64].iter().enumerate() { + let value = VarnodeData::new(test_address(42_000 * i as u64), *size); + test_round_trip(&value); + } +} + +fn test_instruction() -> PcodeInstruction { + PcodeInstruction { + address: test_address(42), + op_code: OpCode::Load, + inputs: vec![VarnodeData::new( + Address::new(constant_address_space(32), 42), + 32, + )], + output: Some(VarnodeData::new(test_address(42_000), 32)), + } +} + +fn test_asm_instruction() -> AssemblyInstruction { + AssemblyInstruction { + address: test_address(42), + mnemonic: "TEST".into(), + body: "TEST foo 42".into(), + } +} + +#[test] +fn pcode_instruction() { + let value = test_instruction(); + test_round_trip(&value); +} + +#[test] +fn assembly_instruction() { + let value = test_asm_instruction(); + test_round_trip(&value); +} + +#[test] +fn pcode_disassembly() { + let value = PcodeDisassembly { + instructions: vec![test_instruction(), test_instruction()], + origin: VarnodeData::new(test_address(42_000), 42), + }; + test_round_trip(&value); +} + #[test] -fn fails() { - panic!(); +fn native_disassembly() { + let value = NativeDisassembly { + instruction: test_asm_instruction(), + origin: VarnodeData::new(test_address(42_000), 42), + }; + test_round_trip(&value); } From c4e87d8228ec77304de621ad58bce2235ac6509e Mon Sep 17 00:00:00 2001 From: soruh Date: Thu, 16 Oct 2025 21:17:58 +0200 Subject: [PATCH 3/5] remove erroneous serde derives for NativeDisassemblyOutput --- src/sleigh.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sleigh.rs b/src/sleigh.rs index 6083dca..50df4c2 100644 --- a/src/sleigh.rs +++ b/src/sleigh.rs @@ -445,7 +445,6 @@ impl std::fmt::Display for PcodeDisassembly { } #[derive(Default)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] struct NativeDisassemblyOutput { instruction: Option, } From d5332099d81979add854eb4b0f91b6709fad8fb6 Mon Sep 17 00:00:00 2001 From: soruh Date: Thu, 16 Oct 2025 21:25:19 +0200 Subject: [PATCH 4/5] add changelog entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c21747..f5d8c90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Unreleased +### Added + +* Support for serializing libsla structures using serde + ### Fixed * `AddressSpace` from another Sleigh instance is now properly recognized. From ea19ccaaf77b8c04924a5f1fbeaf3cd1aea5a64d Mon Sep 17 00:00:00 2001 From: soruh Date: Thu, 16 Oct 2025 21:26:12 +0200 Subject: [PATCH 5/5] fix highlighting in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5d8c90..de56a15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ### Added -* Support for serializing libsla structures using serde +* Support for serializing `libsla` structures using `serde` ### Fixed