From c891a98f3e5250c7b084ad46e4531a393f7c43b9 Mon Sep 17 00:00:00 2001 From: Crauzer Date: Sat, 14 Feb 2026 22:45:41 +0100 Subject: [PATCH] feat: better diff command --- crates/wadtools/src/commands/diff.rs | 228 ++++++++++++++++++++++----- crates/wadtools/src/main.rs | 43 ++++- 2 files changed, 224 insertions(+), 47 deletions(-) diff --git a/crates/wadtools/src/commands/diff.rs b/crates/wadtools/src/commands/diff.rs index 44feb9e..aec0222 100644 --- a/crates/wadtools/src/commands/diff.rs +++ b/crates/wadtools/src/commands/diff.rs @@ -1,14 +1,18 @@ -use camino::Utf8Path; use std::{ fs::{File, OpenOptions}, io::{Read, Seek}, }; use colored::Colorize; +use fancy_regex::Regex; use league_toolkit::wad::{Wad, WadChunk}; use serde::Serialize; +use std::sync::Arc; -use crate::utils::{default_hashtable_dir, format_chunk_path_hash, WadHashtable}; +use crate::{ + extractor::{should_skip_hash, should_skip_pattern}, + utils::{format_chunk_path_hash, format_size, WadHashtable}, +}; /// A difference between two WAD chunks enum ChunkDiff { @@ -31,72 +35,179 @@ struct ChunkDiffCsvRecord { new_path: String, old_uncompressed_size: usize, new_uncompressed_size: usize, + old_compressed_size: usize, + new_compressed_size: usize, + old_compression_type: String, + new_compression_type: String, } pub struct DiffArgs { pub reference: String, pub target: String, - pub hashtable_path: Option, pub output: Option, - pub hashtable_dir: Option, + pub pattern: Option, + pub hash: Option>, + pub filter_invert: bool, } -pub fn diff(args: DiffArgs) -> eyre::Result<()> { +pub fn diff(args: DiffArgs, hashtable: &WadHashtable) -> eyre::Result<()> { let reference_wad_file = File::open(&args.reference)?; let target_wad_file = File::open(&args.target)?; - let mut hashtable = WadHashtable::new()?; - if let Some(dir_override) = &args.hashtable_dir { - hashtable.add_from_dir(Utf8Path::new(dir_override))?; - } else if let Some(dir) = default_hashtable_dir() { - hashtable.add_from_dir(dir)?; - } - if let Some(hashtable_path) = args.hashtable_path { - hashtable.add_from_file(&File::open(&hashtable_path)?)?; - } - let reference_wad = Wad::mount(reference_wad_file)?; let target_wad = Wad::mount(target_wad_file)?; tracing::info!("Collecting diffs..."); let diffs = collect_diffs(&reference_wad, &target_wad); + let filter_pattern = crate::utils::create_filter_pattern(args.pattern)?; + if let Some(output_path) = args.output { - write_diffs_to_csv(&diffs, &hashtable, &output_path)?; + write_diffs_to_csv( + &diffs, + hashtable, + &output_path, + filter_pattern.as_ref(), + args.hash.as_deref(), + args.filter_invert, + )?; } else { - print_diffs(&diffs, &hashtable); + print_diffs( + &diffs, + hashtable, + filter_pattern.as_ref(), + args.hash.as_deref(), + args.filter_invert, + ); } Ok(()) } -fn print_diffs(diffs: &[ChunkDiff], hashtable: &WadHashtable) { - for diff in diffs { +/// Returns the primary path hash for a diff entry (used for filtering). +fn diff_primary_path_hash(diff: &ChunkDiff) -> u64 { + match diff { + ChunkDiff::New(chunk) => chunk.path_hash, + ChunkDiff::Removed(chunk) => chunk.path_hash, + ChunkDiff::Modified { old, .. } => old.path_hash, + ChunkDiff::Renamed { new, .. } => new.path_hash, + } +} + +/// Returns the primary resolved path for a diff entry (used for sorting/filtering). +fn diff_primary_path(diff: &ChunkDiff, hashtable: &WadHashtable) -> Arc { + match diff { + ChunkDiff::New(chunk) => hashtable.resolve_path(chunk.path_hash), + ChunkDiff::Removed(chunk) => hashtable.resolve_path(chunk.path_hash), + ChunkDiff::Modified { old, .. } => hashtable.resolve_path(old.path_hash), + ChunkDiff::Renamed { new, .. } => hashtable.resolve_path(new.path_hash), + } +} + +/// Returns true if a diff entry should be skipped based on filters. +fn should_skip_diff( + diff: &ChunkDiff, + hashtable: &WadHashtable, + filter_pattern: Option<&Regex>, + hash_filter: Option<&[u64]>, + filter_invert: bool, +) -> bool { + let path_hash = diff_primary_path_hash(diff); + if should_skip_hash(path_hash, hash_filter, filter_invert) { + return true; + } + let path = diff_primary_path(diff, hashtable); + if should_skip_pattern(path.as_ref(), filter_pattern, filter_invert) { + return true; + } + false +} + +fn print_diffs( + diffs: &[ChunkDiff], + hashtable: &WadHashtable, + filter_pattern: Option<&Regex>, + hash_filter: Option<&[u64]>, + filter_invert: bool, +) { + // Sort by resolved path + let mut sorted_indices: Vec = (0..diffs.len()).collect(); + sorted_indices.sort_by(|&a, &b| { + let path_a = diff_primary_path(&diffs[a], hashtable); + let path_b = diff_primary_path(&diffs[b], hashtable); + path_a.cmp(&path_b) + }); + + let mut count_new: usize = 0; + let mut count_removed: usize = 0; + let mut count_modified: usize = 0; + let mut count_renamed: usize = 0; + + for &idx in &sorted_indices { + let diff = &diffs[idx]; + + if should_skip_diff(diff, hashtable, filter_pattern, hash_filter, filter_invert) { + continue; + } + match diff { ChunkDiff::New(chunk) => { let path = hashtable.resolve_path(chunk.path_hash); - - println!("+ {}", path.bright_green()); + println!("{} {}", "+".bright_green(), path.bright_green()); + count_new += 1; } ChunkDiff::Removed(chunk) => { let path = hashtable.resolve_path(chunk.path_hash); - - println!("- {}", path.bright_red()); + println!("{} {}", "-".bright_red(), path.bright_red()); + count_removed += 1; } - ChunkDiff::Modified { old, new: _ } => { + ChunkDiff::Modified { old, new } => { let path = hashtable.resolve_path(old.path_hash); - - // For modified chunks, we print the chunk path in yellow, and somehow also print the new file sizes - println!("! {}", path.bright_yellow()); + let old_size = format_size(old.uncompressed_size as u64); + let new_size = format_size(new.uncompressed_size as u64); + + let mut detail = format!("({} → {})", old_size, new_size); + + if old.compression_type != new.compression_type { + detail = format!( + "({} → {}, {} → {})", + old_size, new_size, old.compression_type, new.compression_type + ); + } + + println!( + "{} {} {}", + "~".bright_yellow(), + path.bright_yellow(), + detail.bright_black() + ); + count_modified += 1; } ChunkDiff::Renamed { old, new } => { let old_path = hashtable.resolve_path(old.path_hash); let new_path = hashtable.resolve_path(new.path_hash); - - println!("! {} -> {}", old_path.bright_blue(), new_path.bright_cyan()); + println!( + "{} {} -> {}", + ">".bright_cyan(), + old_path.bright_blue(), + new_path.bright_cyan() + ); + count_renamed += 1; } } } + + let total = count_new + count_removed + count_modified + count_renamed; + if total > 0 { + println!(); + println!( + "Summary: {} new, {} removed, {} modified, {} renamed", + format!("+{}", count_new).bright_green(), + format!("-{}", count_removed).bright_red(), + format!("~{}", count_modified).bright_yellow(), + format!(">{}", count_renamed).bright_cyan(), + ); + } } fn collect_diffs( @@ -128,6 +239,9 @@ where } } + // Collect renamed hashes so we can filter out stale Removed entries + let mut renamed_old_hashes = Vec::new(); + for target_chunk in target_wad.chunks() { let reference_chunk = reference_wad.chunks().get(target_chunk.path_hash); @@ -140,6 +254,7 @@ where .find(|chunk| chunk.checksum == target_chunk.checksum); if let Some(renamed_chunk) = renamed_chunk { + renamed_old_hashes.push(renamed_chunk.path_hash); diffs.push(ChunkDiff::Renamed { old: *renamed_chunk, new: *target_chunk, @@ -150,13 +265,16 @@ where } } - // Sort diffs by path_hash to ensure deterministic output regardless of HashMap iteration order - diffs.sort_by_key(|diff| match diff { - ChunkDiff::New(chunk) => chunk.path_hash, - ChunkDiff::Removed(chunk) => chunk.path_hash, - ChunkDiff::Modified { old, .. } => old.path_hash, - ChunkDiff::Renamed { old, .. } => old.path_hash, - }); + // Filter out Removed entries whose path_hash matches a Renamed entry's old.path_hash + if !renamed_old_hashes.is_empty() { + diffs.retain(|diff| { + if let ChunkDiff::Removed(chunk) = diff { + !renamed_old_hashes.contains(&chunk.path_hash) + } else { + true + } + }); + } diffs } @@ -165,6 +283,9 @@ fn write_diffs_to_csv( diffs: &[ChunkDiff], hashtable: &WadHashtable, output_path: &str, + filter_pattern: Option<&Regex>, + hash_filter: Option<&[u64]>, + filter_invert: bool, ) -> eyre::Result<()> { tracing::info!("Writing diffs to CSV file: {}", output_path.bright_cyan()); @@ -175,7 +296,8 @@ fn write_diffs_to_csv( .open(output_path)?; let mut writer = csv::Writer::from_writer(file); - let mut records = create_csv_records(diffs, hashtable); + let mut records = + create_csv_records(diffs, hashtable, filter_pattern, hash_filter, filter_invert); records.sort_by(|a, b| a.path.cmp(&b.path)); @@ -189,9 +311,19 @@ fn write_diffs_to_csv( Ok(()) } -fn create_csv_records(diffs: &[ChunkDiff], hashtable: &WadHashtable) -> Vec { +fn create_csv_records( + diffs: &[ChunkDiff], + hashtable: &WadHashtable, + filter_pattern: Option<&Regex>, + hash_filter: Option<&[u64]>, + filter_invert: bool, +) -> Vec { let mut records = Vec::::new(); for diff in diffs { + if should_skip_diff(diff, hashtable, filter_pattern, hash_filter, filter_invert) { + continue; + } + match diff { ChunkDiff::New(chunk) => { records.push(ChunkDiffCsvRecord { @@ -199,8 +331,12 @@ fn create_csv_records(diffs: &[ChunkDiff], hashtable: &WadHashtable) -> Vec { @@ -210,7 +346,11 @@ fn create_csv_records(diffs: &[ChunkDiff], hashtable: &WadHashtable) -> Vec { @@ -221,6 +361,10 @@ fn create_csv_records(diffs: &[ChunkDiff], hashtable: &WadHashtable) -> Vec { @@ -231,6 +375,10 @@ fn create_csv_records(diffs: &[ChunkDiff], hashtable: &WadHashtable) -> Vec, + + /// Only include diffs whose resolved path matches this regex + #[arg( + short = 'x', + long, + value_name = "REGEX", + help = "Only include diffs whose resolved path matches this regex (case-insensitive by default; use (?-i) to disable)" + )] + pattern: Option, + + /// Only include diffs whose path hash matches one of these 16-char hex values + #[arg(long, value_name = "HASH", num_args = 1..)] + hash: Option>, + + /// Invert the -x and --hash filters (exclude matching diffs instead of including them) + #[arg(short = 'v', long = "filter-invert")] + filter_invert: bool, }, /// Print the default hashtable directory #[command(visible_alias = "hd")] @@ -293,13 +310,25 @@ fn main() -> eyre::Result<()> { target, hashtable, output, - } => diff(DiffArgs { - reference, - target, - hashtable_path: hashtable, - output, - hashtable_dir: args.hashtable_dir.or_else(|| config.hashtable_dir.clone()), - }), + pattern, + hash, + filter_invert, + } => { + let hashtable_dir = args.hashtable_dir.or_else(|| config.hashtable_dir.clone()); + let ht = load_hashtable(hashtable_dir.as_deref(), hashtable.as_deref())?; + let hash_filter = parse_hashes(hash)?; + diff( + DiffArgs { + reference, + target, + output, + pattern, + hash: hash_filter, + filter_invert, + }, + &ht, + ) + } Commands::HashtableDir => { if let Some(dir) = default_hashtable_dir() { println!("{}", dir);