From e31d7b004a66dee59367d0c37906231908f39e62 Mon Sep 17 00:00:00 2001 From: Crauzer Date: Sun, 1 Feb 2026 14:51:14 +0100 Subject: [PATCH] feat: collect extract stats --- Cargo.lock | 23 ++++++++- crates/wadtools/Cargo.toml | 1 + crates/wadtools/src/commands/extract.rs | 51 ++++++++++++++++--- crates/wadtools/src/commands/list.rs | 18 +------ crates/wadtools/src/extractor.rs | 66 ++++++++++++++++++------- crates/wadtools/src/main.rs | 6 +++ crates/wadtools/src/utils/mod.rs | 16 ++++++ 7 files changed, 137 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4240e7..71d0c96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -364,6 +364,20 @@ dependencies = [ "memchr", ] +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "ddsfile" version = "0.5.2" @@ -564,6 +578,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "hashbrown" version = "0.16.1" @@ -717,7 +737,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", "serde", "serde_core", ] @@ -1849,6 +1869,7 @@ dependencies = [ "colored", "convert_case", "csv", + "dashmap", "directories-next", "dirs-next", "eyre", diff --git a/crates/wadtools/Cargo.toml b/crates/wadtools/Cargo.toml index 25d6580..f9508a1 100644 --- a/crates/wadtools/Cargo.toml +++ b/crates/wadtools/Cargo.toml @@ -28,3 +28,4 @@ camino = "1.1" convert_case = "0.9.0" ureq = "2.12" rayon = "1.10" +dashmap = "6" diff --git a/crates/wadtools/src/commands/extract.rs b/crates/wadtools/src/commands/extract.rs index a32b3a7..33687e5 100644 --- a/crates/wadtools/src/commands/extract.rs +++ b/crates/wadtools/src/commands/extract.rs @@ -6,7 +6,7 @@ use league_toolkit::{file::LeagueFileKind, wad::Wad}; use crate::{ extractor::Extractor, - utils::{create_filter_pattern, WadHashtable}, + utils::{create_filter_pattern, format_size, WadHashtable}, }; use convert_case::{Case, Casing}; @@ -18,6 +18,7 @@ pub struct ExtractArgs { pub hash: Option>, pub filter_invert: bool, pub overwrite: bool, + pub show_stats: bool, } pub fn extract(args: ExtractArgs, hashtable: &WadHashtable) -> eyre::Result<()> { @@ -42,17 +43,51 @@ pub fn extract(args: ExtractArgs, hashtable: &WadHashtable) -> eyre::Result<()> parent.join(stem) } }; - let (extracted_count, skipped_existing) = + let stats = extractor.extract_chunks(&output_dir, args.filter_type.as_deref(), args.overwrite)?; - if skipped_existing > 0 { - tracing::info!( - "extracted {} chunks, skipped {} existing :)", - extracted_count, - skipped_existing + if args.show_stats { + println!(); + println!( + "{}: {}", + "WAD".bright_cyan().bold(), + args.input.bright_white() + ); + println!( + "{}: {} chunks ({})", + "Extracted".bright_cyan().bold(), + stats.extracted_count.to_string().bright_green(), + format_size(stats.bytes_written).bright_white() ); + println!( + "{}: {} existing", + "Skipped".bright_cyan().bold(), + stats.skipped_existing.to_string().bright_yellow() + ); + if !stats.by_type.is_empty() { + println!(); + println!("{}:", "By type".bright_cyan().bold()); + let mut type_entries: Vec<_> = stats.by_type.iter().collect(); + type_entries.sort_by(|a, b| b.1.cmp(a.1)); + for (kind, count) in type_entries { + let name = format!("{:?}", kind).to_case(Case::Snake); + println!( + " {:24} {}", + name.bright_magenta(), + count.to_string().bright_white() + ); + } + } } else { - tracing::info!("extracted {} chunks :)", extracted_count); + if stats.skipped_existing > 0 { + tracing::info!( + "extracted {} chunks, skipped {} existing :)", + stats.extracted_count, + stats.skipped_existing + ); + } else { + tracing::info!("extracted {} chunks :)", stats.extracted_count); + } } Ok(()) diff --git a/crates/wadtools/src/commands/list.rs b/crates/wadtools/src/commands/list.rs index 2ab4854..a0223e4 100644 --- a/crates/wadtools/src/commands/list.rs +++ b/crates/wadtools/src/commands/list.rs @@ -6,7 +6,7 @@ use std::fs::File; use crate::{ extractor::{should_skip_hash, should_skip_pattern, should_skip_type}, - utils::{create_filter_pattern, format_chunk_path_hash, WadHashtable}, + utils::{create_filter_pattern, format_chunk_path_hash, format_size, WadHashtable}, }; #[derive(Debug, Clone, Copy, Default, clap::ValueEnum)] @@ -228,19 +228,3 @@ fn print_table(output: &ListOutput, show_stats: bool) { ); } } - -fn format_size(bytes: u64) -> String { - const KB: u64 = 1024; - const MB: u64 = KB * 1024; - const GB: u64 = MB * 1024; - - if bytes >= GB { - format!("{:.2} GB", bytes as f64 / GB as f64) - } else if bytes >= MB { - format!("{:.2} MB", bytes as f64 / MB as f64) - } else if bytes >= KB { - format!("{:.2} KB", bytes as f64 / KB as f64) - } else { - format!("{} B", bytes) - } -} diff --git a/crates/wadtools/src/extractor.rs b/crates/wadtools/src/extractor.rs index e4803ba..33ad5e3 100644 --- a/crates/wadtools/src/extractor.rs +++ b/crates/wadtools/src/extractor.rs @@ -1,6 +1,7 @@ use crate::utils::{is_hex_chunk_path, truncate_middle, WadHashtable}; use camino::{Utf8Path, Utf8PathBuf}; use color_eyre::eyre::{self, Ok}; +use dashmap::DashMap; use eyre::Context; use fancy_regex::Regex; use league_toolkit::{ @@ -8,10 +9,11 @@ use league_toolkit::{ wad::{decompress_raw, Wad, WadChunk}, }; use std::{ + collections::HashMap, fs::{self, File, OpenOptions}, io::{self, Write}, sync::{ - atomic::{AtomicUsize, Ordering}, + atomic::{AtomicU64, AtomicUsize, Ordering}, mpsc, }, }; @@ -20,8 +22,15 @@ use tracing_indicatif::style::ProgressStyle; const MAX_LOG_PATH_LEN: usize = 120; +pub struct ExtractStats { + pub extracted_count: usize, + pub skipped_existing: usize, + pub bytes_written: u64, + pub by_type: HashMap, +} + enum ChunkResult { - Extracted, + Extracted(LeagueFileKind, u64), SkippedFilter, SkippedExisting, } @@ -62,7 +71,7 @@ impl<'a> Extractor<'a> { extract_directory: impl AsRef, filter_type: Option<&[LeagueFileKind]>, overwrite: bool, - ) -> eyre::Result<(usize, usize)> { + ) -> eyre::Result { let extract_directory = extract_directory.as_ref().to_path_buf(); let chunks: Vec = self.wad.chunks().iter().copied().collect(); @@ -85,6 +94,8 @@ impl<'a> Extractor<'a> { let counter = AtomicUsize::new(0); let extracted_counter = AtomicUsize::new(0); let skipped_existing_counter = AtomicUsize::new(0); + let bytes_written_counter = AtomicU64::new(0); + let by_type: DashMap = DashMap::new(); let filter_invert = self.filter_invert; let extract_dir = &extract_directory; let err_holder: std::sync::Mutex> = std::sync::Mutex::new(None); @@ -97,6 +108,8 @@ impl<'a> Extractor<'a> { let counter = &counter; let extracted_counter = &extracted_counter; let skipped_existing_counter = &skipped_existing_counter; + let bytes_written_counter = &bytes_written_counter; + let by_type = &by_type; let err_holder = &err_holder; let progress_span = &span; @@ -112,8 +125,10 @@ impl<'a> Extractor<'a> { ); match result { - std::result::Result::Ok(ChunkResult::Extracted) => { + std::result::Result::Ok(ChunkResult::Extracted(kind, size)) => { extracted_counter.fetch_add(1, Ordering::Relaxed); + bytes_written_counter.fetch_add(size, Ordering::Relaxed); + *by_type.entry(kind).or_insert(0) += 1; } std::result::Result::Ok(ChunkResult::SkippedExisting) => { skipped_existing_counter.fetch_add(1, Ordering::Relaxed); @@ -185,10 +200,14 @@ impl<'a> Extractor<'a> { return Err(err); } - Ok(( - extracted_counter.load(Ordering::Relaxed), - skipped_existing_counter.load(Ordering::Relaxed), - )) + let by_type_map: HashMap = by_type.into_iter().collect(); + + Ok(ExtractStats { + extracted_count: extracted_counter.load(Ordering::Relaxed), + skipped_existing: skipped_existing_counter.load(Ordering::Relaxed), + bytes_written: bytes_written_counter.load(Ordering::Relaxed), + by_type: by_type_map, + }) } } @@ -220,8 +239,14 @@ fn process_chunk( fs::create_dir_all(parent.as_std_path())?; } + let size = chunk_data.len() as u64; match write_chunk_file(full_path.as_std_path(), &chunk_data, overwrite) { - std::result::Result::Ok(result) => return Ok(result), + std::result::Result::Ok(ChunkWriteResult::Written) => { + return Ok(ChunkResult::Extracted(chunk_kind, size)); + } + std::result::Result::Ok(ChunkWriteResult::SkippedExisting) => { + return Ok(ChunkResult::SkippedExisting); + } Err(error) if error.kind() == io::ErrorKind::InvalidFilename => { return write_long_filename_chunk( chunk, @@ -241,6 +266,11 @@ fn process_chunk( } } +enum ChunkWriteResult { + Written, + SkippedExisting, +} + /// Writes chunk data to a file. When `overwrite` is false, uses `create_new(true)` for an /// atomic existence check, returning `SkippedExisting` on `AlreadyExists`. This avoids the /// TOCTOU race of a separate exists() check followed by write(). @@ -248,20 +278,20 @@ fn write_chunk_file( path: &std::path::Path, data: &[u8], overwrite: bool, -) -> io::Result { +) -> io::Result { if overwrite { fs::write(path, data)?; - return std::result::Result::Ok(ChunkResult::Extracted); + return std::result::Result::Ok(ChunkWriteResult::Written); } match OpenOptions::new().write(true).create_new(true).open(path) { std::result::Result::Ok(mut file) => { file.write_all(data)?; - std::result::Result::Ok(ChunkResult::Extracted) + std::result::Result::Ok(ChunkWriteResult::Written) } Err(e) if e.kind() == io::ErrorKind::AlreadyExists => { tracing::debug!("skipping existing file: {}", path.display()); - std::result::Result::Ok(ChunkResult::SkippedExisting) + std::result::Result::Ok(ChunkWriteResult::SkippedExisting) } Err(e) => Err(e), } @@ -358,11 +388,11 @@ fn write_long_filename_chunk( &hashed_path ); - Ok(write_chunk_file( - full_path.as_std_path(), - chunk_data, - overwrite, - )?) + let size = chunk_data.len() as u64; + match write_chunk_file(full_path.as_std_path(), chunk_data, overwrite)? { + ChunkWriteResult::Written => Ok(ChunkResult::Extracted(chunk_kind, size)), + ChunkWriteResult::SkippedExisting => Ok(ChunkResult::SkippedExisting), + } } #[cfg(test)] diff --git a/crates/wadtools/src/main.rs b/crates/wadtools/src/main.rs index 9e2634f..d8e0fc8 100644 --- a/crates/wadtools/src/main.rs +++ b/crates/wadtools/src/main.rs @@ -130,6 +130,10 @@ pub enum Commands { /// Overwrite existing files (default: skip existing) #[arg(long)] overwrite: bool, + + /// Show summary statistics after extraction: true/false (default: true). Example: --stats=false + #[arg(short = 's', long, value_name = "true|false", default_missing_value = "true", num_args = 0..=1, default_value_t = true)] + stats: bool, }, /// Compare two wad files /// @@ -254,6 +258,7 @@ fn main() -> eyre::Result<()> { filter_invert, list_filters, overwrite, + stats, } => { if list_filters { print_supported_filters(); @@ -276,6 +281,7 @@ fn main() -> eyre::Result<()> { hash: hash_filter.clone(), filter_invert, overwrite, + show_stats: stats, }, &ht, )?; diff --git a/crates/wadtools/src/utils/mod.rs b/crates/wadtools/src/utils/mod.rs index f41c510..d43b511 100644 --- a/crates/wadtools/src/utils/mod.rs +++ b/crates/wadtools/src/utils/mod.rs @@ -93,6 +93,22 @@ pub fn default_hashtable_dir() -> Option { None } +pub fn format_size(bytes: u64) -> String { + const KB: u64 = 1024; + const MB: u64 = KB * 1024; + const GB: u64 = MB * 1024; + + if bytes >= GB { + format!("{:.2} GB", bytes as f64 / GB as f64) + } else if bytes >= MB { + format!("{:.2} MB", bytes as f64 / MB as f64) + } else if bytes >= KB { + format!("{:.2} KB", bytes as f64 / KB as f64) + } else { + format!("{} B", bytes) + } +} + #[cfg(test)] mod tests { use super::*;