Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e5e4e84
refactor(patch): ParseOpts for configurable parsing behavior
weihanglo Apr 12, 2026
1a83fe9
refactor(patch): make preamble skipping configurable
weihanglo Apr 12, 2026
d1ec236
refactor(patch): organize item order
weihanglo Apr 12, 2026
fe9b1b3
refactor(patch): wire up ParseOpts for byte parsing
weihanglo Apr 12, 2026
f4e6b2b
refactor(patch): deduplicate parse_one logic
weihanglo Apr 12, 2026
edf93e5
refactor(patch_set): extract `line_ending_len` helper
weihanglo Apr 12, 2026
90861e0
feat(patch_set): support git diff application
weihanglo Apr 12, 2026
95836ec
test(compat): add git apply compatibility tests
weihanglo Apr 11, 2026
ff392af
test(replay): add gitdiff mode support
weihanglo Apr 11, 2026
e5895d9
chore(ci): run gitdiff also in replay workflow
weihanglo Apr 12, 2026
f1eb3c6
feat(binary): binary patch types and parser
weihanglo Apr 12, 2026
1f7f50f
feat(patch_set): wire binary patch into gitdiff parser
weihanglo Apr 12, 2026
4b29938
refactor: clippy manual_div_ceil
weihanglo Feb 1, 2026
70eb62f
chore: dependency flate2 behind binary feature
weihanglo Apr 13, 2026
3540c16
feat(binary): base85/delta decode and patch application
weihanglo Apr 13, 2026
531e8d9
test: add binary patch to compat and replay tests
weihanglo Apr 13, 2026
c255bbf
refactor(patch_set): make free helpers generic over Text
weihanglo Apr 13, 2026
8f5c1d1
refactor(patch_set): make GitHeader::parse generic over Text
weihanglo Apr 14, 2026
79b3804
refactor(patch_set): extract `PatchSet` methods to free fns
weihanglo Apr 14, 2026
33b4e38
refactor: add `Text::as_str_prefix` method
weihanglo Apr 14, 2026
e50b8de
refactor(patch_set): make `PatchSet` internal generic over `T: Text`
weihanglo Apr 14, 2026
c189ab8
feat(patch_set): `PatchSet::parse_bytes` for raw byte input
weihanglo Apr 14, 2026
035b7d6
test(patch_set): add unit tests for `parse_bytes`
weihanglo Apr 14, 2026
095fa47
test(replay): switch to `PatchSet::parse_bytes~
weihanglo Apr 14, 2026
2edeadb
test(compat): switch to byte-aware API
weihanglo Apr 14, 2026
05cdc37
test(compat): add non-UTF8 hunk tests
weihanglo Apr 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
rust: [stable, beta, nightly, 1.70.0]
rust: [stable, beta, nightly, 1.75.0]

steps:
- uses: actions/checkout@v6
- run: rustup toolchain install ${{ matrix.rust }} --profile minimal
- run: cargo +${{ matrix.rust }} check --all-targets --all-features
- run: cargo +${{ matrix.rust }} test
- run: cargo +${{ matrix.rust }} test -F binary

lint:
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/replay.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
parse_mode: [unidiff]
parse_mode: [unidiff, gitdiff]
name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }}
steps:
- uses: actions/checkout@v6
Expand All @@ -67,7 +67,7 @@ jobs:
exit 1
fi
- run: rustup toolchain install stable --profile minimal
- run: cargo test --release --test replay -- --ignored --nocapture
- run: cargo test --release --test replay -F binary -- --ignored --nocapture
env:
DIFFY_TEST_REPO: ${{ inputs.repo_url == '' && '.' || 'target/test-repo' }}
DIFFY_TEST_COMMITS: ${{ inputs.commits }}
Expand Down
39 changes: 39 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ repository = "https://github.com/bmwill/diffy"
readme = "README.md"
keywords = ["diff", "patch", "merge"]
categories = ["text-processing"]
rust-version = "1.70.0"
rust-version = "1.75.0"
edition = "2021"

[features]
binary = ["dep:flate2"]
color = ["dep:anstyle"]

[dependencies]
anstyle = { version = "1.0.13", optional = true }
flate2 = { version = "1.1.9", optional = true, default-features = false, features = ["zlib-rs"] }

[dev-dependencies]
rayon = "1.10.0"
Expand All @@ -25,3 +27,11 @@ snapbox = { version = "0.6.24", features = ["dir"] }
[[example]]
name = "patch_formatter"
required-features = ["color"]

[[test]]
name = "compat"
required-features = ["binary"]

[[test]]
name = "replay"
required-features = ["binary"]
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ ignore = [
allow = [
"MIT",
"Apache-2.0",
"Zlib",
#"Apache-2.0 WITH LLVM-exception",
]
# The confidence threshold for detecting a license from license text.
Expand Down
229 changes: 229 additions & 0 deletions src/binary/base85.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
//! Base85 encoding and decoding using the character set defined in [RFC 1924].
//!
//! ## References
//!
//! * [RFC 1924]
//! * [Wikipedia: Ascii85 § RFC 1924 version](https://en.wikipedia.org/wiki/Ascii85#RFC_1924_version)
//!
//! [RFC 1924]: https://datatracker.ietf.org/doc/html/rfc1924

use std::fmt;

/// Base85 character set (RFC 1924).
const ALPHABET: &[u8; 85] = b"0123456789\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
!#$%&()*+-;<=>?@^_`{|}~";

/// Pre-computed lookup table for Base85 decoding.
///
/// Maps ASCII byte value → digit value or `0xFF` for invalid characters.
/// This provides O(1) lookup.
const TABLE: [u8; 256] = {
let mut table = [0xFFu8; 256];
let mut i = 0usize;
while i < 85 {
table[ALPHABET[i] as usize] = i as u8;
i += 1;
}
table
};

/// Error type for Base85 operations.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Base85Error {
/// Invalid character that is not in RFC 1924 alphabet.
InvalidCharacter(char),
/// Invalid input length for the operation.
InvalidLength,
}

impl fmt::Display for Base85Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Base85Error::InvalidCharacter(c) => write!(f, "invalid base85 character: {:?}", c),
Base85Error::InvalidLength => write!(f, "invalid input length"),
}
}
}

impl std::error::Error for Base85Error {}

/// Decodes a Base85 string to the provided output.
///
/// ## Limitations
///
/// The input length must be a multiple of 5.
///
/// This function does not handle padding for partial chunks.
/// When decoding data where the original byte count isn't a multiple of 4,
/// callers must handle truncation at a higher level.
/// For example, via a length indicator in Git binary patch.
pub fn decode_into(input: &str, output: &mut impl Extend<u8>) -> Result<(), Base85Error> {
let bytes = input.as_bytes();

if bytes.len() % 5 != 0 {
return Err(Base85Error::InvalidLength);
}

// TODO: Use `as_chunks::<5>()` when MSRV >= 1.88
for chunk in bytes.chunks_exact(5) {
let mut value: u32 = 0;
for &byte in chunk {
let digit = TABLE[byte as usize];
if digit == 0xFF {
return Err(Base85Error::InvalidCharacter(byte as char));
}
value = value * 85 + digit as u32;
}

output.extend(value.to_be_bytes());
}

Ok(())
}

/// Encodes bytes in Base85 to the provided output.
///
/// ## Limitations
///
/// The input length must be a multiple of 4.
///
/// This function does not handle padding for partial chunks.
/// Callers encoding data where the byte count isn't a multiple of 4
/// must handle padding at a higher level.
/// For example, via a length indicator in Git binary patch format.
#[allow(dead_code)] // will be used for patch formatting
pub fn encode_into(input: &[u8], output: &mut impl Extend<char>) -> Result<(), Base85Error> {
if input.len() % 4 != 0 {
return Err(Base85Error::InvalidLength);
}

// TODO: Use `as_chunks::<4>()` when MSRV >= 1.88
for chunk in input.chunks_exact(4) {
let mut value = u32::from_be_bytes(chunk.try_into().unwrap());

// Extract 5 base85 digits (least to most significant order)
let mut digits = [0u8; 5];
for digit in digits.iter_mut().rev() {
*digit = ALPHABET[(value % 85) as usize];
value /= 85;
}
output.extend(digits.iter().map(|&b| b as char));
}

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

fn decode(input: &str) -> Result<Vec<u8>, Base85Error> {
let mut result = Vec::with_capacity((input.len() / 5) * 4);
decode_into(input, &mut result)?;
Ok(result)
}

fn encode(input: &[u8]) -> Result<String, Base85Error> {
let mut result = String::with_capacity((input.len() / 4) * 5);
encode_into(input, &mut result)?;
Ok(result)
}

const TEST_VECTORS: &[(&[u8], &str)] = &[
(b"", ""),
(&[0x00, 0x00, 0x00, 0x00], "00000"),
(&[0xff, 0xff, 0xff, 0xff], "|NsC0"),
// Rust ecosystem phrases
(b"Rust", "Qgw55"),
(b"Fearless concurrency", "MrC1gY-MwEAY*TCV|8+JWo~16"),
(b"memory safe!", "ZDnn5a(N(gVP<6^"),
(b"blazing fast", "Vr*f0X>MmAW?^%5"),
(
b"zero-cost abstraction!??",
"dS!BNEn{zUbRc13b98cHV{~b6ZXrKE",
),
];

#[test]
fn table_covers_all_alphabet_chars() {
for (i, &c) in ALPHABET.iter().enumerate() {
assert_eq!(
TABLE[c as usize], i as u8,
"mismatch for char '{}' at index {}",
c as char, i
);
}
}

#[test]
fn table_rejects_invalid_chars() {
let invalid_chars = b" \t\n\r\"'\\[],:";
for &c in invalid_chars {
assert_eq!(
TABLE[c as usize], 0xFF,
"char '{}' should be invalid",
c as char
);
}
}

#[test]
fn decode_test_vectors() {
for (bytes, encoded) in TEST_VECTORS {
let result = decode(encoded).unwrap();
assert_eq!(&result, *bytes, "decode({:?}) failed", encoded);
}
}

#[test]
fn encode_test_vectors() {
for (bytes, encoded) in TEST_VECTORS {
let result = encode(bytes).unwrap();
assert_eq!(result, *encoded, "encode({:?}) failed", bytes);
}
}

#[test]
fn decode_invalid_length() {
assert!(matches!(decode("0000"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("000"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("00"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("0"), Err(Base85Error::InvalidLength)));
}

#[test]
fn decode_invalid_character() {
assert!(matches!(
decode("0000 "),
Err(Base85Error::InvalidCharacter(' '))
));
assert!(matches!(
decode("0000\""),
Err(Base85Error::InvalidCharacter('"'))
));
}

#[test]
fn encode_invalid_length() {
assert!(matches!(encode(&[0]), Err(Base85Error::InvalidLength)));
assert!(matches!(encode(&[0, 0]), Err(Base85Error::InvalidLength)));
assert!(matches!(
encode(&[0, 0, 0]),
Err(Base85Error::InvalidLength)
));
assert!(matches!(
encode(&[0, 0, 0, 0, 0]),
Err(Base85Error::InvalidLength)
));
}

#[test]
fn round_trip() {
for (bytes, _) in TEST_VECTORS {
let encoded = encode(bytes).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(&decoded, *bytes, "round-trip failed for {:?}", bytes);
}
}
}
Loading
Loading