diff --git a/.gitignore b/.gitignore index ea8c4bf..212de44 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.DS_Store \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 43b35c8..eb516d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" [[package]] name = "atty" @@ -42,15 +42,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "cc" -version = "1.0.77" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" dependencies = [ "jobserver", ] @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.0.27" +version = "4.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0acbd8d28a0a60d7108d7ae850af6ba34cf2d1257fc646980e5f97ce14275966" +checksum = "4ec7a4128863c188deefe750ac1d1dfe66c236909f845af04beed823638dc1b2" dependencies = [ "bitflags", "clap_derive", @@ -93,9 +93,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.0.21" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" dependencies = [ "heck", "proc-macro-error", @@ -106,9 +106,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" dependencies = [ "os_str_bytes", ] @@ -125,16 +125,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.2" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" dependencies = [ "encode_unicode", "lazy_static", "libc", - "terminal_size", "unicode-width", - "winapi", + "windows-sys", ] [[package]] @@ -188,9 +187,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.82" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453" +checksum = "b61a7545f753a88bcbe0a70de1fcc0221e10bfc752f576754fa91e663db1622e" dependencies = [ "cc", "cxxbridge-flags", @@ -200,9 +199,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.82" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0" +checksum = "f464457d494b5ed6905c63b0c4704842aba319084a0a3561cdc1359536b53200" dependencies = [ "cc", "codespan-reporting", @@ -215,15 +214,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.82" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71" +checksum = "43c7119ce3a3701ed81aca8410b9acf6fc399d2629d057b87e2efa4e63a3aaea" [[package]] name = "cxxbridge-macro" -version = "1.0.82" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470" +checksum = "65e07508b90551e610910fa648a1878991d367064997a596135b86df30daf07e" dependencies = [ "proc-macro2", "quote", @@ -290,9 +289,9 @@ dependencies = [ [[package]] name = "git2" -version = "0.15.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2994bee4a3a6a51eb90c218523be382fd7ea09b16380b9312e9dbe955ff7c7d1" +checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc" dependencies = [ "bitflags", "libc", @@ -367,21 +366,22 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.2" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4295cbb7573c16d310e99e713cf9e75101eb190ab31fccd35f2d2691b4352b19" +checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", "number_prefix", "portable-atomic", + "unicode-segmentation", "unicode-width", ] [[package]] name = "io-lifetimes" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e394faa0efb47f9f227f1cd89978f854542b318a6f64fa695489c9c993056656" +checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ "libc", "windows-sys", @@ -389,9 +389,9 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae5bc6e2eb41c9def29a3e0f1306382807764b9b53112030eff57435667352d" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" dependencies = [ "hermit-abi 0.2.6", "io-lifetimes", @@ -425,15 +425,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.137" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libgit2-sys" -version = "0.14.0+1.5.0" +version = "0.14.2+1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47a00859c70c8a4f7218e6d1cc32875c4b55f6799445b842b0d8ed5e4c3d959b" +checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4" dependencies = [ "cc", "libc", @@ -455,18 +455,18 @@ dependencies = [ [[package]] name = "link-cplusplus" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" dependencies = [ "cc", ] [[package]] name = "linux-raw-sys" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "log" @@ -507,11 +507,11 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi 0.2.6", "libc", ] @@ -523,9 +523,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" [[package]] name = "os_str_bytes" @@ -547,9 +547,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "portable-atomic" -version = "0.3.15" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15eb2c6e362923af47e13c23ca5afb859e83d54452c55b0b9ac763b8f7c1ac16" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" [[package]] name = "proc-macro-error" @@ -577,29 +577,28 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e060280438193c554f654141c9ea9417886713b7acd75974c85b18a69a88e0b" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ - "crossbeam-deque", "either", "rayon-core", ] @@ -618,9 +617,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.3" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b1fbb4dfc4eb1d390c02df47760bb19a84bb80b301ecc947ab5406394d8223e" +checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" dependencies = [ "bitflags", "errno", @@ -638,9 +637,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" [[package]] name = "stderrlog" @@ -663,9 +662,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.103" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -681,16 +680,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "thread_local" version = "1.1.4" @@ -728,15 +717,15 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-normalization" @@ -747,6 +736,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" + [[package]] name = "unicode-width" version = "0.1.10" @@ -884,42 +879,42 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" [[package]] name = "windows_aarch64_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" [[package]] name = "windows_i686_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" [[package]] name = "windows_i686_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" [[package]] name = "windows_x86_64_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" [[package]] name = "windows_x86_64_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/Cargo.toml b/Cargo.toml index c97ec75..6372e3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,9 @@ anyhow = "1.0" clap = { version = "4.0", features = ["derive"] } log = "0.4" stderrlog = "0.5" -indicatif = "0.17" +indicatif = { version = "0.17", features = ["improved_unicode"] } rayon = "1.5" thread_local = "1.1" -git2 = { version = "0.15", default-features = false } +git2 = { version = "0.16", default-features = false } humantime = "2.1.0" chrono = "0.4.23" diff --git a/src/contributions.rs b/src/contributions.rs new file mode 100644 index 0000000..f7c5ba8 --- /dev/null +++ b/src/contributions.rs @@ -0,0 +1,294 @@ +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + path::{Path, PathBuf}, + sync::{atomic::AtomicUsize, Arc}, +}; + +use anyhow::{anyhow, Result}; +use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; +use git2::{BlameOptions, DiffOptions, Repository}; +use log::warn; +use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +use thread_local::ThreadLocal; + +#[derive(Default, Debug)] +pub(crate) struct Contributions { + pub(crate) authors: BTreeMap, + pub(crate) total_lines: usize, +} + +impl Contributions { + pub(crate) fn try_from_path( + repo: &Repository, + path: &Path, + max_age: &Option, + ) -> Result { + let blame = repo.blame_file(path, Some(BlameOptions::new().use_mailmap(true)))?; + let mut s = Self::default(); + for hunk in blame.iter() { + let lines = hunk.lines_in_hunk(); + let signature = hunk.final_signature(); + let when = signature.when(); + let commit_time = git_time_to_utc_datetime(when)?; + let age = Utc::now() - commit_time; + if let Some(max_age) = max_age { + if age > *max_age { + continue; + } + } + if let Some(email) = signature.email() { + s.add_lines(email.to_owned(), lines); + } else { + // TODO keep track of unauthored hunks somehow? + warn!("hunk without email found in {}", path.display()); + } + } + Ok(s) + } + + pub(crate) fn calculate_with_overwritten_lines_from_paths( + repo: &Repository, + paths: &HashSet, + max_age: &Option, + progress: impl Fn(usize, usize) + Sync, + ) -> Result> { + let completed = AtomicUsize::new(0); + let mut walker = repo.revwalk()?; + walker.push_head()?; + log::debug!("counting commits"); + let commits: Vec<_> = { + let mut v: Vec<_> = walker + .filter_map(|oid_res| { + match oid_res { + Ok(oid) => { + let c = repo.find_commit(oid).unwrap(); + if let Some(max_age) = max_age { + let Ok(time) = git_time_to_utc_datetime(c.time()) else { + log::warn!("Commit {} has no valid time. Ignoring.", c.id()); + return None; + }; + let age = Utc::now() - time; + if age > *max_age { + return None; + } + } + // we don't want to count merge commits. but perhaps we should somehow? + if c.parents().count() == 1 { + Some(oid) + } else { + None + } + } + Err(e) => { + log::warn!("error while walking commits: {e}"); + None + } + } + }) + .collect(); + // oldest first + v.reverse(); + v + }; + let num_commits = commits.len(); + log::debug!("calculating contributions"); + let root = repo.workdir().unwrap_or_else(|| repo.path()); + let get_repo = || -> Result<_> { Ok(Repository::discover(root)?) }; + let repo_tls: ThreadLocal = ThreadLocal::new(); + let contributions = commits + //.par_iter() + .iter() + .map(|oid| -> Result<_> { + let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); + let mailmap = repo.mailmap()?; + let c = repo.find_commit(*oid).unwrap(); + log::debug!("processing commit {}", c.id()); + debug_assert!(c.parents().count() == 1); + let parent = c.parents().next().unwrap(); + let mut contributions: HashMap = HashMap::new(); + // TODO rename handling is broken. what happens if a file is renamed and then the original file is recreated later? + let mut renames = HashMap::new(); + + let signature = c.author_with_mailmap(&mailmap)?; + if let Some(author) = signature.email() { + let mut diff = repo.diff_tree_to_tree( + Some(&parent.tree()?), + Some(&c.tree()?), + // TODO use some other diff options? patience? + Some(DiffOptions::new().context_lines(0)), + )?; + // TODO we get more sensible numbers if we don't use find_similar, but then we don't get renames + diff.find_similar(None)?; + diff.foreach( + &mut |delta, _diff_progress| { + // TODO keep track of added files as well + log::debug!("processing delta {:?}", delta); + if delta.old_file().exists() + && delta.old_file().path() != delta.new_file().path() + { + let new = delta.new_file().path().map(|p| p.to_path_buf()); + let old = delta.old_file().path().unwrap().to_path_buf(); + renames.insert(old, new); + } + true + }, + None, + Some(&mut |delta, hunk| { + log::debug!("processing hunk {:?}", hunk); + if !delta.new_file().exists() + || !matches!( + delta.status(), + git2::Delta::Added | git2::Delta::Modified + ) + { + return true; + } + let new = delta.new_file().path().unwrap().to_path_buf(); + // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? + let lines_changed = hunk.old_lines().max(hunk.new_lines()); + contributions + .entry(new) + .or_default() + .add_lines(author.to_string(), lines_changed as usize); + true + }), + None, + )?; + } else { + log::warn!("Commit {} has no valid author email", c.id()); + } + + progress( + completed.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + 1, + num_commits, + ); + Ok((contributions, renames)) + }) + .reduce( + //|| Ok((HashMap::new(), HashMap::new())), + |older, newer| { + let (older_contributions, mut older_renames) = older?; + let (newer_contributions, newer_renames) = newer?; + let mut mapped = HashMap::new(); + // TODO is the rename handling done correctly? + // TODO if a file is added in `newer` what should happen to stuff from `older`? I guess it should be discarded? + // update older contributions using the newer renames + for (old_path, contributions) in older_contributions { + match newer_renames.get(&old_path) { + Some(Some(new_path)) => { + mapped.insert(new_path.clone(), contributions); + } + Some(None) => { + // the file was removed. don't add it to the map + } + _ => { + // not renamed, so just add it to the map + mapped.insert(old_path, contributions); + } + } + } + // merge the contributions + for (path, contributions) in newer_contributions { + mapped.entry(path).or_default().merge(contributions); + } + // merge the rename mappings + 'outer: for (new_from_path, new_to_path) in newer_renames { + // TODO do this with a better data structure + for older_to_path in older_renames.values_mut() { + match older_to_path { + Some(path) if path == &new_from_path => { + *older_to_path = new_to_path; + // TODO or can there be multiple renames to the same path? + break 'outer; + } + _ => {} + } + } + older_renames.insert(new_from_path, new_to_path); + } + log::debug!("older_renames: {:?}", older_renames); + Ok((mapped, older_renames)) + }, + ) + .unwrap()? + //? + .0 + .into_iter() + .filter(|(p, _)| paths.contains(p)) + .collect(); + Ok(contributions) + } + + // TODO `ignored_users` will probably not get large enough to warrant a HashSet? + pub(crate) fn filter_ignored(&mut self, ignored_users: &[impl AsRef]) { + self.authors.retain(|k, v| { + if ignored_users.iter().any(|ignored| k == ignored.as_ref()) { + self.total_lines -= *v; + false + } else { + true + } + }); + } + + pub(crate) fn lines_by_user>(&self, author: &[S]) -> usize { + self.authors + .iter() + .filter_map(|(key, value)| { + author + .iter() + .any(|email| email.as_ref() == key) + .then_some(value) + }) + .sum() + } + + pub(crate) fn ratio_changed_by_user>(&self, author: &[S]) -> f64 { + let lines_by_user = self.lines_by_user(author); + lines_by_user as f64 / self.total_lines as f64 + } + + pub(crate) fn authors_str(&self, num_authors: usize) -> String { + let mut authors = self + .authors + .iter() + .map(|(email, lines)| (email.clone(), *lines as f64 / self.total_lines as f64)) + .collect::>(); + authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)); + authors.truncate(num_authors); + let author_str = authors + .into_iter() + .map(|(email, contribution)| format!("{email}: {:.1}%", contribution * 100.0)) + .collect::>() + .join(", "); + format!("({author_str})") + } + + fn add_lines(&mut self, author: String, lines: usize) { + self.total_lines += lines; + *self.authors.entry(author).or_default() += lines; + } + + fn merge(&mut self, other: Self) { + self.total_lines += other.total_lines; + for (author, lines) in other.authors { + *self.authors.entry(author).or_default() += lines; + } + } +} + +fn git_time_to_utc_datetime(time: git2::Time) -> Result> { + Ok(DateTime::::from_local( + NaiveDateTime::from_timestamp_opt(time.seconds(), 0) + .ok_or_else(|| anyhow!("Unable to convert commit time"))?, + FixedOffset::east_opt(time.offset_minutes() * 60).unwrap_or_else(|| { + // TODO handle error better? + warn!( + "Invalid timezone offset: {}. Defaulting to 0.", + time.offset_minutes() + ); + FixedOffset::east_opt(0).unwrap() + }), + ) + .with_timezone(&Utc)) +} diff --git a/src/main.rs b/src/main.rs index eeb44fa..63604b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,17 @@ +mod contributions; +use contributions::Contributions; + use anyhow::{anyhow, Result}; -use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use clap::{command, Parser}; -use git2::{BlameOptions, ObjectType, Repository, TreeWalkMode, TreeWalkResult}; +use git2::{ObjectType, Repository, TreeWalkMode, TreeWalkResult}; use indicatif::{ProgressBar, ProgressStyle}; use log::{debug, error, info, trace, warn}; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use std::{ cmp::Ordering, - collections::BTreeMap, + collections::{BTreeMap, HashSet}, ffi::OsStr, - path::{Path, PathBuf}, + path::PathBuf, str::FromStr, }; use thread_local::ThreadLocal; @@ -64,104 +66,15 @@ struct Opt { /// Don't go deeper than this into trees when printing. #[arg(long, conflicts_with_all = &["flat"])] max_depth: Option, -} - -#[derive(Default)] -struct Contributions { - authors: BTreeMap, - total_lines: usize, -} - -impl Contributions { - fn try_from_path( - repo: &Repository, - path: &Path, - max_age: &Option, - ) -> Result { - let blame = repo.blame_file(path, Some(BlameOptions::new().use_mailmap(true)))?; - let mut s = Self::default(); - for hunk in blame.iter() { - let lines = hunk.lines_in_hunk(); - let signature = hunk.final_signature(); - let when = signature.when(); - let commit_time = DateTime::::from_local( - NaiveDateTime::from_timestamp_opt(when.seconds(), 0) - .ok_or_else(|| anyhow!("Unable to convert commit time"))?, - FixedOffset::east_opt(when.offset_minutes() * 60).unwrap_or_else(|| { - // TODO handle error better? - warn!( - "Invalid timezone offset: {}. Defaulting to 0.", - when.offset_minutes() - ); - FixedOffset::east_opt(0).unwrap() - }), - ) - .with_timezone(&Utc); - let age = Utc::now() - commit_time; - if let Some(max_age) = max_age { - if age > *max_age { - continue; - } - } - if let Some(email) = signature.email() { - s.total_lines += lines; - *s.authors.entry(email.to_owned()).or_default() += lines; - } else { - // TODO keep track of unauthored hunks somehow? - warn!("hunk without email found in {}", path.display()); - } - } - Ok(s) - } - - // TODO `ignored_users` will probably not get large enough to warrant a HashSet? - fn filter_ignored(&mut self, ignored_users: &[impl AsRef]) { - self.authors.retain(|k, v| { - if ignored_users.iter().any(|ignored| k == ignored.as_ref()) { - self.total_lines -= *v; - false - } else { - true - } - }); - } - - fn lines_by_user>(&self, author: &[S]) -> usize { - self.authors - .iter() - .filter_map(|(key, value)| { - author - .iter() - .any(|email| email.as_ref() == key) - .then_some(value) - }) - .sum() - } - fn ratio_changed_by_user>(&self, author: &[S]) -> f64 { - let lines_by_user = self.lines_by_user(author); - lines_by_user as f64 / self.total_lines as f64 - } - - fn authors_str(&self, num_authors: usize) -> String { - let mut authors = self - .authors - .iter() - .map(|(email, lines)| (email.clone(), *lines as f64 / self.total_lines as f64)) - .collect::>(); - authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); - authors.truncate(num_authors); - let author_str = authors - .into_iter() - .map(|(email, contribution)| format!("{email}: {:.1}%", contribution * 100.0)) - .collect::>() - .join(", "); - format!("({author_str})") - } + /// Include lines that were later overwritten in the count. + #[arg(long)] + overwritten_lines: bool, + // TODO add option to ignore files/directories } -struct File<'a> { - path: &'a Path, +struct File { + path: PathBuf, contributions: Contributions, } @@ -173,7 +86,7 @@ fn print_files_sorted_percentage>( ) { let mut contributions_by_author = files .iter() - .map(|f| (f.path, f.contributions.ratio_changed_by_user(author))) + .map(|f| (&f.path, f.contributions.ratio_changed_by_user(author))) .collect::>(); contributions_by_author.sort_by(|(_, a), (_, b)| { let x = b.partial_cmp(a).unwrap_or(Ordering::Equal); @@ -393,8 +306,10 @@ fn main() -> Result<()> { } else { ProgressBar::new_spinner() }; + progress.set_style(ProgressStyle::default_bar()); let mut paths = vec![]; head.walk(TreeWalkMode::PreOrder, |dir, entry| { + progress.tick(); if let Some(ObjectType::Blob) = entry.kind() { if let Some(name) = entry.name() { let path = PathBuf::from(format!("{dir}{name}")); @@ -428,40 +343,58 @@ fn main() -> Result<()> { } else { info!("blaming all paths"); } - progress.set_style(ProgressStyle::default_bar()); - progress.set_length(paths.len() as u64); - let repo_tls: ThreadLocal = ThreadLocal::new(); - // TODO limit max number of threads? the user can set it using RAYON_NUM_THREADS by default - let mut files: Vec<_> = paths - .par_iter() - .filter_map(|path| { - debug!("blaming {}", path.display()); - let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); - let contributions = Contributions::try_from_path(repo, path, &max_age); - progress.inc(1); - let contributions = match contributions { - Ok(c) => c, - Err(e) => { - warn!("Error blaming file {} ({e})", path.display()); - return None; - } - }; - if contributions.total_lines > 0 { - Some(File { - path, - contributions, - }) - } else { - None - } + let mut files: Vec<_> = if opt.overwritten_lines { + let paths_set = paths.iter().cloned().collect::>(); + Contributions::calculate_with_overwritten_lines_from_paths( + &repo, + &paths_set, + &max_age, + |completed, total| { + progress.set_length(total as u64); + progress.set_position(completed as u64); + }, + )? + .into_iter() + .map(|(path, contributions)| File { + path, + contributions, }) - .collect(); - progress.finish_and_clear(); + .collect() + } else { + progress.set_length(paths.len() as u64); + let repo_tls: ThreadLocal = ThreadLocal::new(); + // TODO limit max number of threads? the user can set it using RAYON_NUM_THREADS by default + paths + .par_iter() + .filter_map(|path| { + debug!("blaming {}", path.display()); + let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); + let contributions = Contributions::try_from_path(repo, path, &max_age); + progress.inc(1); + let contributions = match contributions { + Ok(c) => c, + Err(e) => { + warn!("Error blaming file {} ({e})", path.display()); + return None; + } + }; + if contributions.total_lines > 0 { + Some(File { + path: path.clone(), + contributions, + }) + } else { + None + } + }) + .collect() + }; trace!("done blaming"); files .iter_mut() .for_each(|f| f.contributions.filter_ignored(&opt.ignore_user)); files.retain(|f| f.contributions.total_lines > 0); + progress.finish_and_clear(); if opt.flat { if opt.show_authors { print_file_authors(&files, opt.max_authors as usize);