Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
rust: [stable, beta, nightly, 1.70.0]
rust: [stable, beta, nightly, 1.75.0]

steps:
- uses: actions/checkout@v6
- run: rustup toolchain install ${{ matrix.rust }} --profile minimal
- run: cargo +${{ matrix.rust }} check --all-targets --all-features
- run: cargo +${{ matrix.rust }} test
- run: cargo +${{ matrix.rust }} test -F binary

lint:
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/replay.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
parse_mode: [unidiff]
parse_mode: [unidiff, gitdiff]
name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }}
steps:
- uses: actions/checkout@v6
Expand All @@ -67,7 +67,7 @@ jobs:
exit 1
fi
- run: rustup toolchain install stable --profile minimal
- run: cargo test --release --test replay -- --ignored --nocapture
- run: cargo test --release --test replay -F binary -- --ignored --nocapture
env:
DIFFY_TEST_REPO: ${{ inputs.repo_url == '' && '.' || 'target/test-repo' }}
DIFFY_TEST_COMMITS: ${{ inputs.commits }}
Expand Down
39 changes: 39 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ repository = "https://github.com/bmwill/diffy"
readme = "README.md"
keywords = ["diff", "patch", "merge"]
categories = ["text-processing"]
rust-version = "1.70.0"
rust-version = "1.75.0"
edition = "2021"

[features]
binary = ["dep:flate2"]
color = ["dep:anstyle"]

[dependencies]
anstyle = { version = "1.0.13", optional = true }
flate2 = { version = "1.1.9", optional = true, default-features = false, features = ["zlib-rs"] }

[dev-dependencies]
rayon = "1.10.0"
Expand All @@ -25,3 +27,11 @@ snapbox = { version = "0.6.24", features = ["dir"] }
[[example]]
name = "patch_formatter"
required-features = ["color"]

[[test]]
name = "compat"
required-features = ["binary"]

[[test]]
name = "replay"
required-features = ["binary"]
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ ignore = [
allow = [
"MIT",
"Apache-2.0",
"Zlib",
#"Apache-2.0 WITH LLVM-exception",
]
# The confidence threshold for detecting a license from license text.
Expand Down
229 changes: 229 additions & 0 deletions src/binary/base85.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
//! Base85 encoding and decoding using the character set defined in [RFC 1924].
//!
//! ## References
//!
//! * [RFC 1924]
//! * [Wikipedia: Ascii85 § RFC 1924 version](https://en.wikipedia.org/wiki/Ascii85#RFC_1924_version)
//!
//! [RFC 1924]: https://datatracker.ietf.org/doc/html/rfc1924

use std::fmt;

/// Base85 character set (RFC 1924).
const ALPHABET: &[u8; 85] = b"0123456789\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
!#$%&()*+-;<=>?@^_`{|}~";

/// Pre-computed lookup table for Base85 decoding.
///
/// Maps ASCII byte value → digit value or `0xFF` for invalid characters.
/// This provides O(1) lookup.
const TABLE: [u8; 256] = {
let mut table = [0xFFu8; 256];
let mut i = 0usize;
while i < 85 {
table[ALPHABET[i] as usize] = i as u8;
i += 1;
}
table
};

/// Error type for Base85 operations.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Base85Error {
/// Invalid character that is not in RFC 1924 alphabet.
InvalidCharacter(char),
/// Invalid input length for the operation.
InvalidLength,
}

impl fmt::Display for Base85Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Base85Error::InvalidCharacter(c) => write!(f, "invalid base85 character: {:?}", c),
Base85Error::InvalidLength => write!(f, "invalid input length"),
}
}
}

impl std::error::Error for Base85Error {}

/// Decodes a Base85 string to the provided output.
///
/// ## Limitations
///
/// The input length must be a multiple of 5.
///
/// This function does not handle padding for partial chunks.
/// When decoding data where the original byte count isn't a multiple of 4,
/// callers must handle truncation at a higher level.
/// For example, via a length indicator in Git binary patch.
pub fn decode_into(input: &str, output: &mut impl Extend<u8>) -> Result<(), Base85Error> {
let bytes = input.as_bytes();

if bytes.len() % 5 != 0 {
return Err(Base85Error::InvalidLength);
}

// TODO: Use `as_chunks::<5>()` when MSRV >= 1.88
for chunk in bytes.chunks_exact(5) {
let mut value: u32 = 0;
for &byte in chunk {
let digit = TABLE[byte as usize];
if digit == 0xFF {
return Err(Base85Error::InvalidCharacter(byte as char));
}
value = value * 85 + digit as u32;
}

output.extend(value.to_be_bytes());
}

Ok(())
}

/// Encodes bytes in Base85 to the provided output.
///
/// ## Limitations
///
/// The input length must be a multiple of 4.
///
/// This function does not handle padding for partial chunks.
/// Callers encoding data where the byte count isn't a multiple of 4
/// must handle padding at a higher level.
/// For example, via a length indicator in Git binary patch format.
#[allow(dead_code)] // will be used for patch formatting
pub fn encode_into(input: &[u8], output: &mut impl Extend<char>) -> Result<(), Base85Error> {
if input.len() % 4 != 0 {
return Err(Base85Error::InvalidLength);
}

// TODO: Use `as_chunks::<4>()` when MSRV >= 1.88
for chunk in input.chunks_exact(4) {
let mut value = u32::from_be_bytes(chunk.try_into().unwrap());

// Extract 5 base85 digits (least to most significant order)
let mut digits = [0u8; 5];
for digit in digits.iter_mut().rev() {
*digit = ALPHABET[(value % 85) as usize];
value /= 85;
}
output.extend(digits.iter().map(|&b| b as char));
}

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

fn decode(input: &str) -> Result<Vec<u8>, Base85Error> {
let mut result = Vec::with_capacity((input.len() / 5) * 4);
decode_into(input, &mut result)?;
Ok(result)
}

fn encode(input: &[u8]) -> Result<String, Base85Error> {
let mut result = String::with_capacity((input.len() / 4) * 5);
encode_into(input, &mut result)?;
Ok(result)
}

const TEST_VECTORS: &[(&[u8], &str)] = &[
(b"", ""),
(&[0x00, 0x00, 0x00, 0x00], "00000"),
(&[0xff, 0xff, 0xff, 0xff], "|NsC0"),
// Rust ecosystem phrases
(b"Rust", "Qgw55"),
(b"Fearless concurrency", "MrC1gY-MwEAY*TCV|8+JWo~16"),
(b"memory safe!", "ZDnn5a(N(gVP<6^"),
(b"blazing fast", "Vr*f0X>MmAW?^%5"),
(
b"zero-cost abstraction!??",
"dS!BNEn{zUbRc13b98cHV{~b6ZXrKE",
),
];

#[test]
fn table_covers_all_alphabet_chars() {
for (i, &c) in ALPHABET.iter().enumerate() {
assert_eq!(
TABLE[c as usize], i as u8,
"mismatch for char '{}' at index {}",
c as char, i
);
}
}

#[test]
fn table_rejects_invalid_chars() {
let invalid_chars = b" \t\n\r\"'\\[],:";
for &c in invalid_chars {
assert_eq!(
TABLE[c as usize], 0xFF,
"char '{}' should be invalid",
c as char
);
}
}

#[test]
fn decode_test_vectors() {
for (bytes, encoded) in TEST_VECTORS {
let result = decode(encoded).unwrap();
assert_eq!(&result, *bytes, "decode({:?}) failed", encoded);
}
}

#[test]
fn encode_test_vectors() {
for (bytes, encoded) in TEST_VECTORS {
let result = encode(bytes).unwrap();
assert_eq!(result, *encoded, "encode({:?}) failed", bytes);
}
}

#[test]
fn decode_invalid_length() {
assert!(matches!(decode("0000"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("000"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("00"), Err(Base85Error::InvalidLength)));
assert!(matches!(decode("0"), Err(Base85Error::InvalidLength)));
}

#[test]
fn decode_invalid_character() {
assert!(matches!(
decode("0000 "),
Err(Base85Error::InvalidCharacter(' '))
));
assert!(matches!(
decode("0000\""),
Err(Base85Error::InvalidCharacter('"'))
));
}

#[test]
fn encode_invalid_length() {
assert!(matches!(encode(&[0]), Err(Base85Error::InvalidLength)));
assert!(matches!(encode(&[0, 0]), Err(Base85Error::InvalidLength)));
assert!(matches!(
encode(&[0, 0, 0]),
Err(Base85Error::InvalidLength)
));
assert!(matches!(
encode(&[0, 0, 0, 0, 0]),
Err(Base85Error::InvalidLength)
));
}

#[test]
fn round_trip() {
for (bytes, _) in TEST_VECTORS {
let encoded = encode(bytes).unwrap();
let decoded = decode(&encoded).unwrap();
assert_eq!(&decoded, *bytes, "round-trip failed for {:?}", bytes);
}
}
}
Loading
Loading