From 84cb0cfad723eb5f3946884370fcf5af59a308fc Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Mon, 26 Dec 2022 15:03:54 +0100 Subject: [PATCH 01/19] Move Contributions to separate file --- Cargo.lock | 87 +++++++++++++++++++------------------ src/contributions.rs | 100 +++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 100 ++----------------------------------------- 3 files changed, 147 insertions(+), 140 deletions(-) create mode 100644 src/contributions.rs diff --git a/Cargo.lock b/Cargo.lock index 43b35c8..78f8f90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" [[package]] name = "atty" @@ -48,9 +48,9 @@ checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "cc" -version = "1.0.77" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" dependencies = [ "jobserver", ] @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.0.27" +version = "4.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0acbd8d28a0a60d7108d7ae850af6ba34cf2d1257fc646980e5f97ce14275966" +checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" dependencies = [ "bitflags", "clap_derive", @@ -188,9 +188,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.82" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453" +checksum = "5add3fc1717409d029b20c5b6903fc0c0b02fa6741d820054f4a2efa5e5816fd" dependencies = [ "cc", "cxxbridge-flags", @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.82" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0" +checksum = "b4c87959ba14bc6fbc61df77c3fcfe180fc32b93538c4f1031dd802ccb5f2ff0" dependencies = [ "cc", "codespan-reporting", @@ -215,15 +215,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.82" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71" +checksum = "69a3e162fde4e594ed2b07d0f83c6c67b745e7f28ce58c6df5e6b6bef99dfb59" [[package]] name = "cxxbridge-macro" -version = "1.0.82" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470" +checksum = "3e7e2adeb6a0d4a282e581096b06e1791532b7d576dcde5ccd9382acf55db8e6" dependencies = [ "proc-macro2", "quote", @@ -379,9 +379,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e394faa0efb47f9f227f1cd89978f854542b318a6f64fa695489c9c993056656" +checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" dependencies = [ "libc", "windows-sys", @@ -389,9 +389,9 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae5bc6e2eb41c9def29a3e0f1306382807764b9b53112030eff57435667352d" +checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" dependencies = [ "hermit-abi 0.2.6", "io-lifetimes", @@ -425,9 +425,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.137" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libgit2-sys" @@ -455,18 +455,18 @@ dependencies = [ [[package]] name = "link-cplusplus" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" dependencies = [ "cc", ] [[package]] name = "linux-raw-sys" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "log" @@ -507,11 +507,11 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi 0.2.6", "libc", ] @@ -547,9 +547,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "portable-atomic" -version = "0.3.15" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15eb2c6e362923af47e13c23ca5afb859e83d54452c55b0b9ac763b8f7c1ac16" +checksum = "81bdd679d533107e090c2704a35982fc06302e30898e63ffa26a81155c012e92" [[package]] name = "proc-macro-error" @@ -577,29 +577,28 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e060280438193c554f654141c9ea9417886713b7acd75974c85b18a69a88e0b" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ - "crossbeam-deque", "either", "rayon-core", ] @@ -618,9 +617,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.3" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b1fbb4dfc4eb1d390c02df47760bb19a84bb80b301ecc947ab5406394d8223e" +checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588" dependencies = [ "bitflags", "errno", @@ -638,9 +637,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" [[package]] name = "stderrlog" @@ -663,9 +662,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.103" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -734,9 +733,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-normalization" diff --git a/src/contributions.rs b/src/contributions.rs new file mode 100644 index 0000000..fc4c7f9 --- /dev/null +++ b/src/contributions.rs @@ -0,0 +1,100 @@ +use std::{cmp::Ordering, collections::BTreeMap, path::Path}; + +use anyhow::{anyhow, Result}; +use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; +use git2::{BlameOptions, Repository}; +use log::warn; + +#[derive(Default)] +pub(crate) struct Contributions { + pub(crate) authors: BTreeMap, + pub(crate) total_lines: usize, +} + +impl Contributions { + pub(crate) fn try_from_path( + repo: &Repository, + path: &Path, + max_age: &Option, + ) -> Result { + let blame = repo.blame_file(path, Some(BlameOptions::new().use_mailmap(true)))?; + let mut s = Self::default(); + for hunk in blame.iter() { + let lines = hunk.lines_in_hunk(); + let signature = hunk.final_signature(); + let when = signature.when(); + let commit_time = DateTime::::from_local( + NaiveDateTime::from_timestamp_opt(when.seconds(), 0) + .ok_or_else(|| anyhow!("Unable to convert commit time"))?, + FixedOffset::east_opt(when.offset_minutes() * 60).unwrap_or_else(|| { + // TODO handle error better? + warn!( + "Invalid timezone offset: {}. Defaulting to 0.", + when.offset_minutes() + ); + FixedOffset::east_opt(0).unwrap() + }), + ) + .with_timezone(&Utc); + let age = Utc::now() - commit_time; + if let Some(max_age) = max_age { + if age > *max_age { + continue; + } + } + if let Some(email) = signature.email() { + s.total_lines += lines; + *s.authors.entry(email.to_owned()).or_default() += lines; + } else { + // TODO keep track of unauthored hunks somehow? + warn!("hunk without email found in {}", path.display()); + } + } + Ok(s) + } + + // TODO `ignored_users` will probably not get large enough to warrant a HashSet? + pub(crate) fn filter_ignored(&mut self, ignored_users: &[impl AsRef]) { + self.authors.retain(|k, v| { + if ignored_users.iter().any(|ignored| k == ignored.as_ref()) { + self.total_lines -= *v; + false + } else { + true + } + }); + } + + pub(crate) fn lines_by_user>(&self, author: &[S]) -> usize { + self.authors + .iter() + .filter_map(|(key, value)| { + author + .iter() + .any(|email| email.as_ref() == key) + .then_some(value) + }) + .sum() + } + + pub(crate) fn ratio_changed_by_user>(&self, author: &[S]) -> f64 { + let lines_by_user = self.lines_by_user(author); + lines_by_user as f64 / self.total_lines as f64 + } + + pub(crate) fn authors_str(&self, num_authors: usize) -> String { + let mut authors = self + .authors + .iter() + .map(|(email, lines)| (email.clone(), *lines as f64 / self.total_lines as f64)) + .collect::>(); + authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); + authors.truncate(num_authors); + let author_str = authors + .into_iter() + .map(|(email, contribution)| format!("{email}: {:.1}%", contribution * 100.0)) + .collect::>() + .join(", "); + format!("({author_str})") + } +} diff --git a/src/main.rs b/src/main.rs index eeb44fa..264ab5c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,9 @@ +mod contributions; +use contributions::Contributions; + use anyhow::{anyhow, Result}; -use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use clap::{command, Parser}; -use git2::{BlameOptions, ObjectType, Repository, TreeWalkMode, TreeWalkResult}; +use git2::{ObjectType, Repository, TreeWalkMode, TreeWalkResult}; use indicatif::{ProgressBar, ProgressStyle}; use log::{debug, error, info, trace, warn}; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; @@ -66,100 +68,6 @@ struct Opt { max_depth: Option, } -#[derive(Default)] -struct Contributions { - authors: BTreeMap, - total_lines: usize, -} - -impl Contributions { - fn try_from_path( - repo: &Repository, - path: &Path, - max_age: &Option, - ) -> Result { - let blame = repo.blame_file(path, Some(BlameOptions::new().use_mailmap(true)))?; - let mut s = Self::default(); - for hunk in blame.iter() { - let lines = hunk.lines_in_hunk(); - let signature = hunk.final_signature(); - let when = signature.when(); - let commit_time = DateTime::::from_local( - NaiveDateTime::from_timestamp_opt(when.seconds(), 0) - .ok_or_else(|| anyhow!("Unable to convert commit time"))?, - FixedOffset::east_opt(when.offset_minutes() * 60).unwrap_or_else(|| { - // TODO handle error better? - warn!( - "Invalid timezone offset: {}. Defaulting to 0.", - when.offset_minutes() - ); - FixedOffset::east_opt(0).unwrap() - }), - ) - .with_timezone(&Utc); - let age = Utc::now() - commit_time; - if let Some(max_age) = max_age { - if age > *max_age { - continue; - } - } - if let Some(email) = signature.email() { - s.total_lines += lines; - *s.authors.entry(email.to_owned()).or_default() += lines; - } else { - // TODO keep track of unauthored hunks somehow? - warn!("hunk without email found in {}", path.display()); - } - } - Ok(s) - } - - // TODO `ignored_users` will probably not get large enough to warrant a HashSet? - fn filter_ignored(&mut self, ignored_users: &[impl AsRef]) { - self.authors.retain(|k, v| { - if ignored_users.iter().any(|ignored| k == ignored.as_ref()) { - self.total_lines -= *v; - false - } else { - true - } - }); - } - - fn lines_by_user>(&self, author: &[S]) -> usize { - self.authors - .iter() - .filter_map(|(key, value)| { - author - .iter() - .any(|email| email.as_ref() == key) - .then_some(value) - }) - .sum() - } - - fn ratio_changed_by_user>(&self, author: &[S]) -> f64 { - let lines_by_user = self.lines_by_user(author); - lines_by_user as f64 / self.total_lines as f64 - } - - fn authors_str(&self, num_authors: usize) -> String { - let mut authors = self - .authors - .iter() - .map(|(email, lines)| (email.clone(), *lines as f64 / self.total_lines as f64)) - .collect::>(); - authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); - authors.truncate(num_authors); - let author_str = authors - .into_iter() - .map(|(email, contribution)| format!("{email}: {:.1}%", contribution * 100.0)) - .collect::>() - .join(", "); - format!("({author_str})") - } -} - struct File<'a> { path: &'a Path, contributions: Contributions, From a1ec84dc1ecef0e5217f84da545bf79c7dff0f7a Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Mon, 26 Dec 2022 15:05:28 +0100 Subject: [PATCH 02/19] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ea8c4bf..212de44 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.DS_Store \ No newline at end of file From eefba79c248d102e2753c6304b4abe6a3b72abbb Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Mon, 26 Dec 2022 19:48:25 +0100 Subject: [PATCH 03/19] implementation of algorithm that includes overwritten lines --- src/contributions.rs | 130 ++++++++++++++++++++++++++++++++++++++----- src/main.rs | 2 + 2 files changed, 117 insertions(+), 15 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index fc4c7f9..0212c18 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -1,8 +1,12 @@ -use std::{cmp::Ordering, collections::BTreeMap, path::Path}; +use std::{ + cmp::Ordering, + collections::{BTreeMap, HashMap, HashSet}, + path::{Path, PathBuf}, +}; use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; -use git2::{BlameOptions, Repository}; +use git2::{BlameOptions, Commit, DiffOptions, Repository}; use log::warn; #[derive(Default)] @@ -23,19 +27,7 @@ impl Contributions { let lines = hunk.lines_in_hunk(); let signature = hunk.final_signature(); let when = signature.when(); - let commit_time = DateTime::::from_local( - NaiveDateTime::from_timestamp_opt(when.seconds(), 0) - .ok_or_else(|| anyhow!("Unable to convert commit time"))?, - FixedOffset::east_opt(when.offset_minutes() * 60).unwrap_or_else(|| { - // TODO handle error better? - warn!( - "Invalid timezone offset: {}. Defaulting to 0.", - when.offset_minutes() - ); - FixedOffset::east_opt(0).unwrap() - }), - ) - .with_timezone(&Utc); + let commit_time = time_to_utc_datetime(when)?; let age = Utc::now() - commit_time; if let Some(max_age) = max_age { if age > *max_age { @@ -53,6 +45,98 @@ impl Contributions { Ok(s) } + pub(crate) fn calculate_overwritten_from_paths( + repo: &Repository, + paths: &HashSet, + max_age: &Option, + ) -> Result> { + fn calculate( + repo: &Repository, + paths: &HashSet, + max_age: &Option, + root: &Commit, + contributions: &mut HashMap, + mut renames: HashMap, + ) -> Result<()> { + let root_time = time_to_utc_datetime(root.time())?; + let age = Utc::now() - root_time; + if let Some(max_age) = max_age { + if age > *max_age { + return Ok(()); + } + } + for parent in root.parents() { + // TODO mailmap + if let Some(author) = root.author().email() { + let mut diff = repo.diff_tree_to_tree( + Some(&parent.tree()?), + Some(&root.tree()?), + // TODO use some other diff options? patience? + Some(DiffOptions::new().context_lines(0)), + )?; + // TODO different options here? + diff.find_similar(None)?; + diff.foreach( + &mut |_, _| true, + None, + Some(&mut |delta, hunk| { + if !delta.new_file().exists() { + return true; + } + let mut mapped_new = delta.new_file().path().unwrap().to_path_buf(); + if let Some(m) = renames.get(&mapped_new) { + mapped_new = m.clone(); + } + if delta.old_file().exists() + && delta.old_file().path() != delta.new_file().path() + { + renames.insert( + delta.old_file().path().unwrap().to_path_buf(), + mapped_new.clone(), + ); + } + // TODO make sure these paths match + if paths.contains(&mapped_new) { + // TODO is this a sensible way to calculate it? + let lines_changed = hunk.old_lines().max(hunk.new_lines()); + *contributions + .entry(mapped_new) + .or_default() + .authors + .entry(author.to_string()) + .or_default() += lines_changed as usize; + } + true + }), + None, + )?; + } else { + log::warn!("Commit {} has no valid author email", root.id()); + } + calculate( + repo, + paths, + max_age, + &parent, + contributions, + renames.clone(), + )?; + } + Ok(()) + } + let head = repo.head()?.peel_to_commit()?; + let mut contributions = HashMap::new(); + calculate( + repo, + paths, + max_age, + &head, + &mut contributions, + HashMap::new(), + )?; + Ok(contributions) + } + // TODO `ignored_users` will probably not get large enough to warrant a HashSet? pub(crate) fn filter_ignored(&mut self, ignored_users: &[impl AsRef]) { self.authors.retain(|k, v| { @@ -98,3 +182,19 @@ impl Contributions { format!("({author_str})") } } + +fn time_to_utc_datetime(time: git2::Time) -> Result> { + Ok(DateTime::::from_local( + NaiveDateTime::from_timestamp_opt(time.seconds(), 0) + .ok_or_else(|| anyhow!("Unable to convert commit time"))?, + FixedOffset::east_opt(time.offset_minutes() * 60).unwrap_or_else(|| { + // TODO handle error better? + warn!( + "Invalid timezone offset: {}. Defaulting to 0.", + time.offset_minutes() + ); + FixedOffset::east_opt(0).unwrap() + }), + ) + .with_timezone(&Utc)) +} diff --git a/src/main.rs b/src/main.rs index 264ab5c..8bdc3ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,6 +66,8 @@ struct Opt { /// Don't go deeper than this into trees when printing. #[arg(long, conflicts_with_all = &["flat"])] max_depth: Option, + + // TODO add option to ignore files/directories } struct File<'a> { From c22a0d1e0834b318bd9855195c2b5731d9bc4c90 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Mon, 26 Dec 2022 20:16:46 +0100 Subject: [PATCH 04/19] added support for including overwritten lines in options --- src/contributions.rs | 19 ++++++----- src/main.rs | 76 ++++++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 0212c18..8350bbe 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -9,7 +9,7 @@ use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use git2::{BlameOptions, Commit, DiffOptions, Repository}; use log::warn; -#[derive(Default)] +#[derive(Default, Debug)] pub(crate) struct Contributions { pub(crate) authors: BTreeMap, pub(crate) total_lines: usize, @@ -35,8 +35,7 @@ impl Contributions { } } if let Some(email) = signature.email() { - s.total_lines += lines; - *s.authors.entry(email.to_owned()).or_default() += lines; + s.add_lines(email.to_owned(), lines); } else { // TODO keep track of unauthored hunks somehow? warn!("hunk without email found in {}", path.display()); @@ -45,7 +44,7 @@ impl Contributions { Ok(s) } - pub(crate) fn calculate_overwritten_from_paths( + pub(crate) fn calculate_with_overwritten_lines_from_paths( repo: &Repository, paths: &HashSet, max_age: &Option, @@ -58,6 +57,7 @@ impl Contributions { contributions: &mut HashMap, mut renames: HashMap, ) -> Result<()> { + log::debug!("calculating contributions for commit {}", root.id()); let root_time = time_to_utc_datetime(root.time())?; let age = Utc::now() - root_time; if let Some(max_age) = max_age { @@ -99,12 +99,10 @@ impl Contributions { if paths.contains(&mapped_new) { // TODO is this a sensible way to calculate it? let lines_changed = hunk.old_lines().max(hunk.new_lines()); - *contributions + contributions .entry(mapped_new) .or_default() - .authors - .entry(author.to_string()) - .or_default() += lines_changed as usize; + .add_lines(author.to_string(), lines_changed as usize); } true }), @@ -181,6 +179,11 @@ impl Contributions { .join(", "); format!("({author_str})") } + + fn add_lines(&mut self, author: String, lines: usize) { + self.total_lines += lines; + *self.authors.entry(author.to_string()).or_default() += lines; + } } fn time_to_utc_datetime(time: git2::Time) -> Result> { diff --git a/src/main.rs b/src/main.rs index 8bdc3ec..12af4b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,9 @@ use log::{debug, error, info, trace, warn}; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use std::{ cmp::Ordering, - collections::BTreeMap, + collections::{BTreeMap, HashSet}, ffi::OsStr, - path::{Path, PathBuf}, + path::PathBuf, str::FromStr, }; use thread_local::ThreadLocal; @@ -67,11 +67,14 @@ struct Opt { #[arg(long, conflicts_with_all = &["flat"])] max_depth: Option, + /// Include lines that were later overwritten in the count. + #[arg(long)] + overwritten: bool, // TODO add option to ignore files/directories } -struct File<'a> { - path: &'a Path, +struct File { + path: PathBuf, contributions: Contributions, } @@ -83,7 +86,7 @@ fn print_files_sorted_percentage>( ) { let mut contributions_by_author = files .iter() - .map(|f| (f.path, f.contributions.ratio_changed_by_user(author))) + .map(|f| (&f.path, f.contributions.ratio_changed_by_user(author))) .collect::>(); contributions_by_author.sort_by(|(_, a), (_, b)| { let x = b.partial_cmp(a).unwrap_or(Ordering::Equal); @@ -339,33 +342,44 @@ fn main() -> Result<()> { info!("blaming all paths"); } progress.set_style(ProgressStyle::default_bar()); - progress.set_length(paths.len() as u64); - let repo_tls: ThreadLocal = ThreadLocal::new(); - // TODO limit max number of threads? the user can set it using RAYON_NUM_THREADS by default - let mut files: Vec<_> = paths - .par_iter() - .filter_map(|path| { - debug!("blaming {}", path.display()); - let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); - let contributions = Contributions::try_from_path(repo, path, &max_age); - progress.inc(1); - let contributions = match contributions { - Ok(c) => c, - Err(e) => { - warn!("Error blaming file {} ({e})", path.display()); - return None; + let mut files: Vec<_> = if opt.overwritten { + let paths_set = paths.iter().cloned().collect::>(); + Contributions::calculate_with_overwritten_lines_from_paths(&repo, &paths_set, &max_age)? + .into_iter() + .map(|(path, contributions)| File { + path, + contributions, + }) + .collect() + } else { + progress.set_length(paths.len() as u64); + let repo_tls: ThreadLocal = ThreadLocal::new(); + // TODO limit max number of threads? the user can set it using RAYON_NUM_THREADS by default + paths + .par_iter() + .filter_map(|path| { + debug!("blaming {}", path.display()); + let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); + let contributions = Contributions::try_from_path(repo, path, &max_age); + progress.inc(1); + let contributions = match contributions { + Ok(c) => c, + Err(e) => { + warn!("Error blaming file {} ({e})", path.display()); + return None; + } + }; + if contributions.total_lines > 0 { + Some(File { + path: path.clone(), + contributions, + }) + } else { + None } - }; - if contributions.total_lines > 0 { - Some(File { - path, - contributions, - }) - } else { - None - } - }) - .collect(); + }) + .collect() + }; progress.finish_and_clear(); trace!("done blaming"); files From d34e91ae4eb26de9aa309fde0c3d1513b2da2a72 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Mon, 26 Dec 2022 20:44:59 +0100 Subject: [PATCH 05/19] progress --- Cargo.lock | 4 ++-- src/contributions.rs | 18 +++++++++++++++++- src/main.rs | 9 ++++++--- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78f8f90..b201184 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -547,9 +547,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "portable-atomic" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bdd679d533107e090c2704a35982fc06302e30898e63ffa26a81155c012e92" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" [[package]] name = "proc-macro-error" diff --git a/src/contributions.rs b/src/contributions.rs index 8350bbe..2751ce2 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -48,6 +48,7 @@ impl Contributions { repo: &Repository, paths: &HashSet, max_age: &Option, + progress: impl Fn(usize, usize), ) -> Result> { fn calculate( repo: &Repository, @@ -56,6 +57,7 @@ impl Contributions { root: &Commit, contributions: &mut HashMap, mut renames: HashMap, + progress: &mut dyn FnMut(), ) -> Result<()> { log::debug!("calculating contributions for commit {}", root.id()); let root_time = time_to_utc_datetime(root.time())?; @@ -65,6 +67,7 @@ impl Contributions { return Ok(()); } } + progress(); for parent in root.parents() { // TODO mailmap if let Some(author) = root.author().email() { @@ -97,7 +100,7 @@ impl Contributions { } // TODO make sure these paths match if paths.contains(&mapped_new) { - // TODO is this a sensible way to calculate it? + // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? let lines_changed = hunk.old_lines().max(hunk.new_lines()); contributions .entry(mapped_new) @@ -118,12 +121,24 @@ impl Contributions { &parent, contributions, renames.clone(), + progress, )?; } Ok(()) } let head = repo.head()?.peel_to_commit()?; + fn count_commits(repo: &Repository, root: &Commit) -> Result { + let mut commits = 1; + for parent in root.parents() { + commits += count_commits(repo, &parent)?; + } + Ok(commits) + } + log::debug!("counting commits"); + let num_commits = count_commits(repo, &head)?; + let mut commits_completed = 0; let mut contributions = HashMap::new(); + log::debug!("calculating contributions"); calculate( repo, paths, @@ -131,6 +146,7 @@ impl Contributions { &head, &mut contributions, HashMap::new(), + &mut || { commits_completed += 1; progress(commits_completed, num_commits)}, )?; Ok(contributions) } diff --git a/src/main.rs b/src/main.rs index 12af4b8..73216ef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,7 +69,7 @@ struct Opt { /// Include lines that were later overwritten in the count. #[arg(long)] - overwritten: bool, + overwritten_lines: bool, // TODO add option to ignore files/directories } @@ -342,9 +342,12 @@ fn main() -> Result<()> { info!("blaming all paths"); } progress.set_style(ProgressStyle::default_bar()); - let mut files: Vec<_> = if opt.overwritten { + let mut files: Vec<_> = if opt.overwritten_lines { let paths_set = paths.iter().cloned().collect::>(); - Contributions::calculate_with_overwritten_lines_from_paths(&repo, &paths_set, &max_age)? + Contributions::calculate_with_overwritten_lines_from_paths(&repo, &paths_set, &max_age, |completed, total| { + progress.set_length(total as u64); + progress.set_position(completed as u64); + })? .into_iter() .map(|(path, contributions)| File { path, From 8eb3fa50c2fbaa446021556fc631bab07094d643 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Tue, 27 Dec 2022 15:07:44 +0100 Subject: [PATCH 06/19] include overwritten lines --- src/contributions.rs | 156 +++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 65 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 2751ce2..4b75854 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -6,7 +6,7 @@ use std::{ use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; -use git2::{BlameOptions, Commit, DiffOptions, Repository}; +use git2::{BlameOptions, DiffOptions, Repository}; use log::warn; #[derive(Default, Debug)] @@ -50,30 +50,55 @@ impl Contributions { max_age: &Option, progress: impl Fn(usize, usize), ) -> Result> { - fn calculate( - repo: &Repository, - paths: &HashSet, - max_age: &Option, - root: &Commit, - contributions: &mut HashMap, - mut renames: HashMap, - progress: &mut dyn FnMut(), - ) -> Result<()> { - log::debug!("calculating contributions for commit {}", root.id()); - let root_time = time_to_utc_datetime(root.time())?; - let age = Utc::now() - root_time; - if let Some(max_age) = max_age { - if age > *max_age { - return Ok(()); + let mut walker = repo.revwalk()?; + walker.push_head()?; + log::debug!("counting commits"); + let commits: Vec<_> = walker + .filter_map(|oid_res| { + match oid_res { + Ok(oid) => { + let c = repo.find_commit(oid).unwrap(); + if let Some(max_age) = max_age { + let Ok(time) = time_to_utc_datetime(c.time()) else { + log::warn!("Commit {} has no valid time. Ignoring.", c.id()); + return None; + }; + let age = Utc::now() - time; + if age > *max_age { + return None; + } + } + // we don't want to count merge commits. but perhaps we should somehow? + if c.parents().count() == 1 { + Some(c) + } else { + None + } + } + Err(e) => { + log::warn!("error while walking commits: {e}"); + None + } } - } - progress(); - for parent in root.parents() { + }) + .collect(); + let num_commits = commits.len(); + log::debug!("calculating contributions"); + // TODO do this in parallel + let (contributions, _renames) = commits + .iter() + .enumerate() + .map(|(i, c)| { + debug_assert!(c.parents().count() == 1); + let parent = c.parents().next().unwrap(); + let mut contributions: HashMap = HashMap::new(); + let mut renames = HashMap::new(); + // TODO mailmap - if let Some(author) = root.author().email() { + if let Some(author) = c.author().email() { let mut diff = repo.diff_tree_to_tree( Some(&parent.tree()?), - Some(&root.tree()?), + Some(&c.tree()?), // TODO use some other diff options? patience? Some(DiffOptions::new().context_lines(0)), )?; @@ -86,24 +111,20 @@ impl Contributions { if !delta.new_file().exists() { return true; } - let mut mapped_new = delta.new_file().path().unwrap().to_path_buf(); - if let Some(m) = renames.get(&mapped_new) { - mapped_new = m.clone(); - } + let new = delta.new_file().path().unwrap().to_path_buf(); if delta.old_file().exists() && delta.old_file().path() != delta.new_file().path() { renames.insert( delta.old_file().path().unwrap().to_path_buf(), - mapped_new.clone(), + new.clone(), ); } - // TODO make sure these paths match - if paths.contains(&mapped_new) { + if paths.contains(&new) { // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? let lines_changed = hunk.old_lines().max(hunk.new_lines()); contributions - .entry(mapped_new) + .entry(new) .or_default() .add_lines(author.to_string(), lines_changed as usize); } @@ -112,42 +133,40 @@ impl Contributions { None, )?; } else { - log::warn!("Commit {} has no valid author email", root.id()); + log::warn!("Commit {} has no valid author email", c.id()); } - calculate( - repo, - paths, - max_age, - &parent, - contributions, - renames.clone(), - progress, - )?; - } - Ok(()) - } - let head = repo.head()?.peel_to_commit()?; - fn count_commits(repo: &Repository, root: &Commit) -> Result { - let mut commits = 1; - for parent in root.parents() { - commits += count_commits(repo, &parent)?; - } - Ok(commits) - } - log::debug!("counting commits"); - let num_commits = count_commits(repo, &head)?; - let mut commits_completed = 0; - let mut contributions = HashMap::new(); - log::debug!("calculating contributions"); - calculate( - repo, - paths, - max_age, - &head, - &mut contributions, - HashMap::new(), - &mut || { commits_completed += 1; progress(commits_completed, num_commits)}, - )?; + progress(i, num_commits); + Ok((contributions, renames)) + }) + .collect::>>()? + .into_iter() + .reduce(|mut acc, other| { + let mut mapped = HashMap::new(); + // TODO is the rename handling done correctly? + for (path, contributions) in acc.0 { + if let Some(new_path) = other.1.get(&path) { + mapped.insert(new_path.clone(), contributions); + } else { + mapped.insert(path, contributions); + } + } + for (path, other_contributions) in other.0 { + mapped.entry(path).or_default().merge(other_contributions); + } + 'outer: for (other_old_path, other_new_path) in other.1 { + // TODO do this with a better data structure + for acc_new in acc.1.values_mut() { + if *acc_new == other_old_path { + *acc_new = other_new_path; + // TODO or can there be multiple renames to the same path? + break 'outer; + } + } + acc.1.insert(other_old_path, other_new_path); + } + (mapped, acc.1) + }) + .unwrap_or_default(); Ok(contributions) } @@ -198,7 +217,14 @@ impl Contributions { fn add_lines(&mut self, author: String, lines: usize) { self.total_lines += lines; - *self.authors.entry(author.to_string()).or_default() += lines; + *self.authors.entry(author).or_default() += lines; + } + + fn merge(&mut self, other: Self) { + self.total_lines += other.total_lines; + for (author, lines) in other.authors { + *self.authors.entry(author).or_default() += lines; + } } } From 0838f29489a303d9829f9408b60f18f7f6e6614e Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Wed, 28 Dec 2022 21:45:12 +0100 Subject: [PATCH 07/19] cargo update --- Cargo.lock | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b201184..fac249c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,16 +125,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +checksum = "5556015fe3aad8b968e5d4124980fbe2f6aaee7aeec6b749de1faaa2ca5d0a4c" dependencies = [ "encode_unicode", "lazy_static", "libc", - "terminal_size", "unicode-width", - "winapi", + "windows-sys", ] [[package]] @@ -617,9 +616,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.5" +version = "0.36.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3807b5d10909833d3e9acd1eb5fb988f79376ff10fce42937de71a449c4c588" +checksum = "4feacf7db682c6c329c4ede12649cd36ecab0f3be5b7d74e6a20304725db4549" dependencies = [ "bitflags", "errno", @@ -680,16 +679,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "thread_local" version = "1.1.4" From 3ff27e47bee51e34bb18ccc91cccd900ced8fd17 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Wed, 28 Dec 2022 21:48:28 +0100 Subject: [PATCH 08/19] rustfmt --- src/main.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 73216ef..d191dfd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -344,16 +344,21 @@ fn main() -> Result<()> { progress.set_style(ProgressStyle::default_bar()); let mut files: Vec<_> = if opt.overwritten_lines { let paths_set = paths.iter().cloned().collect::>(); - Contributions::calculate_with_overwritten_lines_from_paths(&repo, &paths_set, &max_age, |completed, total| { - progress.set_length(total as u64); - progress.set_position(completed as u64); - })? - .into_iter() - .map(|(path, contributions)| File { - path, - contributions, - }) - .collect() + Contributions::calculate_with_overwritten_lines_from_paths( + &repo, + &paths_set, + &max_age, + |completed, total| { + progress.set_length(total as u64); + progress.set_position(completed as u64); + }, + )? + .into_iter() + .map(|(path, contributions)| File { + path, + contributions, + }) + .collect() } else { progress.set_length(paths.len() as u64); let repo_tls: ThreadLocal = ThreadLocal::new(); From c2e2181d2564fedcf7aa328cb32ddef1a0a9d303 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 15:09:03 +0100 Subject: [PATCH 09/19] cleanup --- Cargo.lock | 4 ++-- src/contributions.rs | 6 +++--- src/main.rs | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fac249c..78c9efa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -522,9 +522,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" [[package]] name = "os_str_bytes" diff --git a/src/contributions.rs b/src/contributions.rs index 4b75854..f6ded6b 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -27,7 +27,7 @@ impl Contributions { let lines = hunk.lines_in_hunk(); let signature = hunk.final_signature(); let when = signature.when(); - let commit_time = time_to_utc_datetime(when)?; + let commit_time = git_time_to_utc_datetime(when)?; let age = Utc::now() - commit_time; if let Some(max_age) = max_age { if age > *max_age { @@ -59,7 +59,7 @@ impl Contributions { Ok(oid) => { let c = repo.find_commit(oid).unwrap(); if let Some(max_age) = max_age { - let Ok(time) = time_to_utc_datetime(c.time()) else { + let Ok(time) = git_time_to_utc_datetime(c.time()) else { log::warn!("Commit {} has no valid time. Ignoring.", c.id()); return None; }; @@ -228,7 +228,7 @@ impl Contributions { } } -fn time_to_utc_datetime(time: git2::Time) -> Result> { +fn git_time_to_utc_datetime(time: git2::Time) -> Result> { Ok(DateTime::::from_local( NaiveDateTime::from_timestamp_opt(time.seconds(), 0) .ok_or_else(|| anyhow!("Unable to convert commit time"))?, diff --git a/src/main.rs b/src/main.rs index d191dfd..63604b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -306,8 +306,10 @@ fn main() -> Result<()> { } else { ProgressBar::new_spinner() }; + progress.set_style(ProgressStyle::default_bar()); let mut paths = vec![]; head.walk(TreeWalkMode::PreOrder, |dir, entry| { + progress.tick(); if let Some(ObjectType::Blob) = entry.kind() { if let Some(name) = entry.name() { let path = PathBuf::from(format!("{dir}{name}")); @@ -341,7 +343,6 @@ fn main() -> Result<()> { } else { info!("blaming all paths"); } - progress.set_style(ProgressStyle::default_bar()); let mut files: Vec<_> = if opt.overwritten_lines { let paths_set = paths.iter().cloned().collect::>(); Contributions::calculate_with_overwritten_lines_from_paths( @@ -388,12 +389,12 @@ fn main() -> Result<()> { }) .collect() }; - progress.finish_and_clear(); trace!("done blaming"); files .iter_mut() .for_each(|f| f.contributions.filter_ignored(&opt.ignore_user)); files.retain(|f| f.contributions.total_lines > 0); + progress.finish_and_clear(); if opt.flat { if opt.show_authors { print_file_authors(&files, opt.max_authors as usize); From 0e6cde218616ec870eecb480aa2f2538d8c7a3e1 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 15:28:09 +0100 Subject: [PATCH 10/19] cleanup --- src/contributions.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index f6ded6b..647161d 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -105,21 +105,29 @@ impl Contributions { // TODO different options here? diff.find_similar(None)?; diff.foreach( - &mut |_, _| true, - None, - Some(&mut |delta, hunk| { - if !delta.new_file().exists() { - return true; - } - let new = delta.new_file().path().unwrap().to_path_buf(); - if delta.old_file().exists() + &mut |delta, _diff_progress| { + if delta.new_file().exists() && delta.old_file().exists() && delta.old_file().path() != delta.new_file().path() { + let new = delta.new_file().path().unwrap().to_path_buf(); renames.insert( delta.old_file().path().unwrap().to_path_buf(), - new.clone(), + new, ); + } + true + }, + None, + Some(&mut |delta, hunk| { + if !delta.new_file().exists() + || !matches!( + delta.status(), + git2::Delta::Added | git2::Delta::Modified + ) + { + return true; } + let new = delta.new_file().path().unwrap().to_path_buf(); if paths.contains(&new) { // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? let lines_changed = hunk.old_lines().max(hunk.new_lines()); From c041fc48c48939f45093ffc666f4fbdb082768f9 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 18:55:03 +0100 Subject: [PATCH 11/19] parallelize --- src/contributions.rs | 101 +++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 647161d..08d29e4 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -1,13 +1,15 @@ use std::{ - cmp::Ordering, collections::{BTreeMap, HashMap, HashSet}, path::{Path, PathBuf}, + sync::atomic::AtomicUsize, }; use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; use git2::{BlameOptions, DiffOptions, Repository}; use log::warn; +use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; +use thread_local::ThreadLocal; #[derive(Default, Debug)] pub(crate) struct Contributions { @@ -48,8 +50,9 @@ impl Contributions { repo: &Repository, paths: &HashSet, max_age: &Option, - progress: impl Fn(usize, usize), + progress: impl Fn(usize, usize) + Sync, ) -> Result> { + let completed = AtomicUsize::new(0); let mut walker = repo.revwalk()?; walker.push_head()?; log::debug!("counting commits"); @@ -70,7 +73,7 @@ impl Contributions { } // we don't want to count merge commits. but perhaps we should somehow? if c.parents().count() == 1 { - Some(c) + Some(oid) } else { None } @@ -84,18 +87,26 @@ impl Contributions { .collect(); let num_commits = commits.len(); log::debug!("calculating contributions"); - // TODO do this in parallel + let root = repo.workdir().unwrap_or_else(|| repo.path()); + let get_repo = || -> Result<_> { Ok(Repository::discover(&root)?) }; + let repo_tls: ThreadLocal = ThreadLocal::new(); let (contributions, _renames) = commits - .iter() - .enumerate() - .map(|(i, c)| { + .par_iter() + .map(|oid| -> Result<_> { + let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); + let mailmap = repo.mailmap().ok(); + let c = repo.find_commit(*oid).unwrap(); debug_assert!(c.parents().count() == 1); let parent = c.parents().next().unwrap(); let mut contributions: HashMap = HashMap::new(); let mut renames = HashMap::new(); - // TODO mailmap - if let Some(author) = c.author().email() { + let signature = if let Some(mm) = mailmap { + mm.resolve_signature(&c.author())? + } else { + c.author() + }; + if let Some(author) = signature.email() { let mut diff = repo.diff_tree_to_tree( Some(&parent.tree()?), Some(&c.tree()?), @@ -106,15 +117,13 @@ impl Contributions { diff.find_similar(None)?; diff.foreach( &mut |delta, _diff_progress| { - if delta.new_file().exists() && delta.old_file().exists() + if delta.new_file().exists() + && delta.old_file().exists() && delta.old_file().path() != delta.new_file().path() { let new = delta.new_file().path().unwrap().to_path_buf(); - renames.insert( - delta.old_file().path().unwrap().to_path_buf(), - new, - ); - } + renames.insert(delta.old_file().path().unwrap().to_path_buf(), new); + } true }, None, @@ -143,38 +152,44 @@ impl Contributions { } else { log::warn!("Commit {} has no valid author email", c.id()); } - progress(i, num_commits); + + progress( + completed.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + 1, + num_commits, + ); Ok((contributions, renames)) }) - .collect::>>()? - .into_iter() - .reduce(|mut acc, other| { - let mut mapped = HashMap::new(); - // TODO is the rename handling done correctly? - for (path, contributions) in acc.0 { - if let Some(new_path) = other.1.get(&path) { - mapped.insert(new_path.clone(), contributions); - } else { - mapped.insert(path, contributions); + .reduce( + || Ok((HashMap::new(), HashMap::new())), + |acc, other| { + let (acc_contributions, mut acc_renames) = acc?; + let (other_contributions, other_renames) = other?; + let mut mapped = HashMap::new(); + // TODO is the rename handling done correctly? + for (path, contributions) in acc_contributions { + if let Some(new_path) = other_renames.get(&path) { + mapped.insert(new_path.clone(), contributions); + } else { + mapped.insert(path, contributions); + } } - } - for (path, other_contributions) in other.0 { - mapped.entry(path).or_default().merge(other_contributions); - } - 'outer: for (other_old_path, other_new_path) in other.1 { - // TODO do this with a better data structure - for acc_new in acc.1.values_mut() { - if *acc_new == other_old_path { - *acc_new = other_new_path; - // TODO or can there be multiple renames to the same path? - break 'outer; + for (path, contributions) in other_contributions { + mapped.entry(path).or_default().merge(contributions); + } + 'outer: for (other_old_path, other_new_path) in other_renames { + // TODO do this with a better data structure + for acc_new in acc_renames.values_mut() { + if *acc_new == other_old_path { + *acc_new = other_new_path; + // TODO or can there be multiple renames to the same path? + break 'outer; + } } + acc_renames.insert(other_old_path, other_new_path); } - acc.1.insert(other_old_path, other_new_path); - } - (mapped, acc.1) - }) - .unwrap_or_default(); + Ok((mapped, acc_renames)) + }, + )?; Ok(contributions) } @@ -213,7 +228,7 @@ impl Contributions { .iter() .map(|(email, lines)| (email.clone(), *lines as f64 / self.total_lines as f64)) .collect::>(); - authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); + authors.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)); authors.truncate(num_authors); let author_str = authors .into_iter() From 20c14c0cefe9f381da105929f3e99657478adb7f Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 18:56:49 +0100 Subject: [PATCH 12/19] clippy --- src/contributions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/contributions.rs b/src/contributions.rs index 08d29e4..54f7e9c 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -88,7 +88,7 @@ impl Contributions { let num_commits = commits.len(); log::debug!("calculating contributions"); let root = repo.workdir().unwrap_or_else(|| repo.path()); - let get_repo = || -> Result<_> { Ok(Repository::discover(&root)?) }; + let get_repo = || -> Result<_> { Ok(Repository::discover(root)?) }; let repo_tls: ThreadLocal = ThreadLocal::new(); let (contributions, _renames) = commits .par_iter() From 583099fd899e807a48d82c9188529a8eb70378c3 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 19:19:47 +0100 Subject: [PATCH 13/19] cleanup --- src/contributions.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 54f7e9c..704105c 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -94,18 +94,14 @@ impl Contributions { .par_iter() .map(|oid| -> Result<_> { let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); - let mailmap = repo.mailmap().ok(); + let mailmap = repo.mailmap()?; let c = repo.find_commit(*oid).unwrap(); debug_assert!(c.parents().count() == 1); let parent = c.parents().next().unwrap(); let mut contributions: HashMap = HashMap::new(); let mut renames = HashMap::new(); - let signature = if let Some(mm) = mailmap { - mm.resolve_signature(&c.author())? - } else { - c.author() - }; + let signature = c.author_with_mailmap(&mailmap)?; if let Some(author) = signature.email() { let mut diff = repo.diff_tree_to_tree( Some(&parent.tree()?), From 0e41bcac3216f38e84f4f9b6c1e1cf7fa9bf5d8c Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 19:21:15 +0100 Subject: [PATCH 14/19] prettier progress --- Cargo.lock | 7 +++++++ Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 78c9efa..6539ec7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,6 +373,7 @@ dependencies = [ "console", "number_prefix", "portable-atomic", + "unicode-segmentation", "unicode-width", ] @@ -735,6 +736,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/Cargo.toml b/Cargo.toml index c97ec75..d71ac13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ anyhow = "1.0" clap = { version = "4.0", features = ["derive"] } log = "0.4" stderrlog = "0.5" -indicatif = "0.17" +indicatif = { version = "0.17", features = ["improved_unicode"] } rayon = "1.5" thread_local = "1.1" git2 = { version = "0.15", default-features = false } From d12578558dbe3edb3dc01114feb5e13a5020c19b Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 1 Jan 2023 21:19:53 +0100 Subject: [PATCH 15/19] todos --- src/contributions.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 704105c..14e9bba 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -6,7 +6,7 @@ use std::{ use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; -use git2::{BlameOptions, DiffOptions, Repository}; +use git2::{BlameOptions, DiffFindOptions, DiffOptions, Repository}; use log::warn; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use thread_local::ThreadLocal; @@ -99,6 +99,7 @@ impl Contributions { debug_assert!(c.parents().count() == 1); let parent = c.parents().next().unwrap(); let mut contributions: HashMap = HashMap::new(); + // TODO rename handling is broken. what happens if a file is renamed and then the original file is recreated later? let mut renames = HashMap::new(); let signature = c.author_with_mailmap(&mailmap)?; @@ -109,8 +110,8 @@ impl Contributions { // TODO use some other diff options? patience? Some(DiffOptions::new().context_lines(0)), )?; - // TODO different options here? - diff.find_similar(None)?; + // TODO we get more sensible numbers if we don't use find_similar, but then we don't get renames + diff.find_similar(None); diff.foreach( &mut |delta, _diff_progress| { if delta.new_file().exists() From 242dc51a48d423bd66f2114a878f7767c1cd6c26 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sat, 21 Jan 2023 17:42:16 +0100 Subject: [PATCH 16/19] update git2 --- Cargo.lock | 92 ++++++++++++++++++++++---------------------- Cargo.toml | 2 +- src/contributions.rs | 4 +- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6539ec7..eb516d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -42,9 +42,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "cc" @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.0.32" +version = "4.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" +checksum = "4ec7a4128863c188deefe750ac1d1dfe66c236909f845af04beed823638dc1b2" dependencies = [ "bitflags", "clap_derive", @@ -93,9 +93,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.0.21" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" dependencies = [ "heck", "proc-macro-error", @@ -106,9 +106,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" dependencies = [ "os_str_bytes", ] @@ -125,9 +125,9 @@ dependencies = [ [[package]] name = "console" -version = "0.15.3" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5556015fe3aad8b968e5d4124980fbe2f6aaee7aeec6b749de1faaa2ca5d0a4c" +checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" dependencies = [ "encode_unicode", "lazy_static", @@ -187,9 +187,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5add3fc1717409d029b20c5b6903fc0c0b02fa6741d820054f4a2efa5e5816fd" +checksum = "b61a7545f753a88bcbe0a70de1fcc0221e10bfc752f576754fa91e663db1622e" dependencies = [ "cc", "cxxbridge-flags", @@ -199,9 +199,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c87959ba14bc6fbc61df77c3fcfe180fc32b93538c4f1031dd802ccb5f2ff0" +checksum = "f464457d494b5ed6905c63b0c4704842aba319084a0a3561cdc1359536b53200" dependencies = [ "cc", "codespan-reporting", @@ -214,15 +214,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69a3e162fde4e594ed2b07d0f83c6c67b745e7f28ce58c6df5e6b6bef99dfb59" +checksum = "43c7119ce3a3701ed81aca8410b9acf6fc399d2629d057b87e2efa4e63a3aaea" [[package]] name = "cxxbridge-macro" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e7e2adeb6a0d4a282e581096b06e1791532b7d576dcde5ccd9382acf55db8e6" +checksum = "65e07508b90551e610910fa648a1878991d367064997a596135b86df30daf07e" dependencies = [ "proc-macro2", "quote", @@ -289,9 +289,9 @@ dependencies = [ [[package]] name = "git2" -version = "0.15.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2994bee4a3a6a51eb90c218523be382fd7ea09b16380b9312e9dbe955ff7c7d1" +checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc" dependencies = [ "bitflags", "libc", @@ -366,9 +366,9 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.2" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4295cbb7573c16d310e99e713cf9e75101eb190ab31fccd35f2d2691b4352b19" +checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", "number_prefix", @@ -379,9 +379,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c" +checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ "libc", "windows-sys", @@ -431,9 +431,9 @@ checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libgit2-sys" -version = "0.14.0+1.5.0" +version = "0.14.2+1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47a00859c70c8a4f7218e6d1cc32875c4b55f6799445b842b0d8ed5e4c3d959b" +checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4" dependencies = [ "cc", "libc", @@ -577,9 +577,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.49" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" dependencies = [ "unicode-ident", ] @@ -617,9 +617,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.6" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4feacf7db682c6c329c4ede12649cd36ecab0f3be5b7d74e6a20304725db4549" +checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" dependencies = [ "bitflags", "errno", @@ -717,9 +717,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" @@ -879,42 +879,42 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" [[package]] name = "windows_aarch64_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" [[package]] name = "windows_i686_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" [[package]] name = "windows_i686_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" [[package]] name = "windows_x86_64_gnu" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" [[package]] name = "windows_x86_64_msvc" -version = "0.42.0" +version = "0.42.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/Cargo.toml b/Cargo.toml index d71ac13..6372e3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,6 @@ stderrlog = "0.5" indicatif = { version = "0.17", features = ["improved_unicode"] } rayon = "1.5" thread_local = "1.1" -git2 = { version = "0.15", default-features = false } +git2 = { version = "0.16", default-features = false } humantime = "2.1.0" chrono = "0.4.23" diff --git a/src/contributions.rs b/src/contributions.rs index 14e9bba..0eb4e77 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -6,7 +6,7 @@ use std::{ use anyhow::{anyhow, Result}; use chrono::{DateTime, FixedOffset, NaiveDateTime, Utc}; -use git2::{BlameOptions, DiffFindOptions, DiffOptions, Repository}; +use git2::{BlameOptions, DiffOptions, Repository}; use log::warn; use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use thread_local::ThreadLocal; @@ -111,7 +111,7 @@ impl Contributions { Some(DiffOptions::new().context_lines(0)), )?; // TODO we get more sensible numbers if we don't use find_similar, but then we don't get renames - diff.find_similar(None); + diff.find_similar(None)?; diff.foreach( &mut |delta, _diff_progress| { if delta.new_file().exists() From 12aeb68ce5e4b4a4b97296feb8a0465525597f17 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 22 Jan 2023 10:10:37 +0100 Subject: [PATCH 17/19] handle path filtering better --- src/contributions.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index 0eb4e77..ade6456 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -90,7 +90,7 @@ impl Contributions { let root = repo.workdir().unwrap_or_else(|| repo.path()); let get_repo = || -> Result<_> { Ok(Repository::discover(root)?) }; let repo_tls: ThreadLocal = ThreadLocal::new(); - let (contributions, _renames) = commits + let contributions = commits .par_iter() .map(|oid| -> Result<_> { let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); @@ -119,7 +119,8 @@ impl Contributions { && delta.old_file().path() != delta.new_file().path() { let new = delta.new_file().path().unwrap().to_path_buf(); - renames.insert(delta.old_file().path().unwrap().to_path_buf(), new); + let old = delta.old_file().path().unwrap().to_path_buf(); + renames.insert(old, new); } true }, @@ -134,14 +135,12 @@ impl Contributions { return true; } let new = delta.new_file().path().unwrap().to_path_buf(); - if paths.contains(&new) { - // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? - let lines_changed = hunk.old_lines().max(hunk.new_lines()); - contributions - .entry(new) - .or_default() - .add_lines(author.to_string(), lines_changed as usize); - } + // TODO is this a sensible way to calculate it? better to count lines added, removed, and changed properly? + let lines_changed = hunk.old_lines().max(hunk.new_lines()); + contributions + .entry(new) + .or_default() + .add_lines(author.to_string(), lines_changed as usize); true }), None, @@ -186,7 +185,11 @@ impl Contributions { } Ok((mapped, acc_renames)) }, - )?; + )? + .0 + .into_iter() + .filter(|(p, _)| paths.contains(p)) + .collect(); Ok(contributions) } From 59a1f221a3b6335b1e9fa0e723d107ca224f7478 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 22 Jan 2023 10:46:20 +0100 Subject: [PATCH 18/19] reverted commit order --- src/contributions.rs | 111 +++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 45 deletions(-) diff --git a/src/contributions.rs b/src/contributions.rs index ade6456..73fe502 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -1,7 +1,7 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, path::{Path, PathBuf}, - sync::atomic::AtomicUsize, + sync::{atomic::AtomicUsize, Arc}, }; use anyhow::{anyhow, Result}; @@ -56,46 +56,53 @@ impl Contributions { let mut walker = repo.revwalk()?; walker.push_head()?; log::debug!("counting commits"); - let commits: Vec<_> = walker - .filter_map(|oid_res| { - match oid_res { - Ok(oid) => { - let c = repo.find_commit(oid).unwrap(); - if let Some(max_age) = max_age { - let Ok(time) = git_time_to_utc_datetime(c.time()) else { + let commits: Vec<_> = { + let mut v: Vec<_> = walker + .filter_map(|oid_res| { + match oid_res { + Ok(oid) => { + let c = repo.find_commit(oid).unwrap(); + if let Some(max_age) = max_age { + let Ok(time) = git_time_to_utc_datetime(c.time()) else { log::warn!("Commit {} has no valid time. Ignoring.", c.id()); return None; }; - let age = Utc::now() - time; - if age > *max_age { - return None; + let age = Utc::now() - time; + if age > *max_age { + return None; + } + } + // we don't want to count merge commits. but perhaps we should somehow? + if c.parents().count() == 1 { + Some(oid) + } else { + None } } - // we don't want to count merge commits. but perhaps we should somehow? - if c.parents().count() == 1 { - Some(oid) - } else { + Err(e) => { + log::warn!("error while walking commits: {e}"); None } } - Err(e) => { - log::warn!("error while walking commits: {e}"); - None - } - } - }) - .collect(); + }) + .collect(); + // oldest first + v.reverse(); + v + }; let num_commits = commits.len(); log::debug!("calculating contributions"); let root = repo.workdir().unwrap_or_else(|| repo.path()); let get_repo = || -> Result<_> { Ok(Repository::discover(root)?) }; let repo_tls: ThreadLocal = ThreadLocal::new(); let contributions = commits - .par_iter() + //.par_iter() + .iter() .map(|oid| -> Result<_> { let repo = repo_tls.get_or_try(get_repo).expect("unable to get repo"); let mailmap = repo.mailmap()?; let c = repo.find_commit(*oid).unwrap(); + log::debug!("processing commit {}", c.id()); debug_assert!(c.parents().count() == 1); let parent = c.parents().next().unwrap(); let mut contributions: HashMap = HashMap::new(); @@ -114,11 +121,11 @@ impl Contributions { diff.find_similar(None)?; diff.foreach( &mut |delta, _diff_progress| { - if delta.new_file().exists() - && delta.old_file().exists() + log::debug!("processing delta {:?}", delta); + if delta.old_file().exists() && delta.old_file().path() != delta.new_file().path() { - let new = delta.new_file().path().unwrap().to_path_buf(); + let new = delta.new_file().path().map(|p| p.to_path_buf()); let old = delta.old_file().path().unwrap().to_path_buf(); renames.insert(old, new); } @@ -156,36 +163,50 @@ impl Contributions { Ok((contributions, renames)) }) .reduce( - || Ok((HashMap::new(), HashMap::new())), - |acc, other| { - let (acc_contributions, mut acc_renames) = acc?; - let (other_contributions, other_renames) = other?; + //|| Ok((HashMap::new(), HashMap::new())), + |older, newer| { + let (older_contributions, mut older_renames) = older?; + let (newer_contributions, newer_renames) = newer?; let mut mapped = HashMap::new(); // TODO is the rename handling done correctly? - for (path, contributions) in acc_contributions { - if let Some(new_path) = other_renames.get(&path) { - mapped.insert(new_path.clone(), contributions); - } else { - mapped.insert(path, contributions); + // update older contributions using the newer renames + for (old_path, contributions) in older_contributions { + match newer_renames.get(&old_path) { + Some(Some(new_path)) => { + mapped.insert(new_path.clone(), contributions); + } + Some(None) => { + // the file was removed. don't add it to the map + } + _ => { + // not renamed, so just add it to the map + mapped.insert(old_path, contributions); + } } } - for (path, contributions) in other_contributions { + // merge the contributions + for (path, contributions) in newer_contributions { mapped.entry(path).or_default().merge(contributions); } - 'outer: for (other_old_path, other_new_path) in other_renames { + // merge the rename mappings + 'outer: for (new_from_path, new_to_path) in newer_renames { // TODO do this with a better data structure - for acc_new in acc_renames.values_mut() { - if *acc_new == other_old_path { - *acc_new = other_new_path; - // TODO or can there be multiple renames to the same path? - break 'outer; + for older_to_path in older_renames.values_mut() { + match older_to_path { + Some(path) if path == &new_from_path => { + *older_to_path = new_to_path; + // TODO or can there be multiple renames to the same path? + break 'outer; + } + _ => {} } } - acc_renames.insert(other_old_path, other_new_path); + older_renames.insert(new_from_path, new_to_path); } - Ok((mapped, acc_renames)) + Ok((mapped, older_renames)) }, - )? + ) + .unwrap()? .0 .into_iter() .filter(|(p, _)| paths.contains(p)) From 4a34ed7647f7d8a89ea2eaeba53be507bad2ed67 Mon Sep 17 00:00:00 2001 From: Joel Nises Date: Sun, 22 Jan 2023 11:00:25 +0100 Subject: [PATCH 19/19] todos --- src/contributions.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/contributions.rs b/src/contributions.rs index 73fe502..f7c5ba8 100644 --- a/src/contributions.rs +++ b/src/contributions.rs @@ -121,6 +121,7 @@ impl Contributions { diff.find_similar(None)?; diff.foreach( &mut |delta, _diff_progress| { + // TODO keep track of added files as well log::debug!("processing delta {:?}", delta); if delta.old_file().exists() && delta.old_file().path() != delta.new_file().path() @@ -133,6 +134,7 @@ impl Contributions { }, None, Some(&mut |delta, hunk| { + log::debug!("processing hunk {:?}", hunk); if !delta.new_file().exists() || !matches!( delta.status(), @@ -169,6 +171,7 @@ impl Contributions { let (newer_contributions, newer_renames) = newer?; let mut mapped = HashMap::new(); // TODO is the rename handling done correctly? + // TODO if a file is added in `newer` what should happen to stuff from `older`? I guess it should be discarded? // update older contributions using the newer renames for (old_path, contributions) in older_contributions { match newer_renames.get(&old_path) { @@ -203,10 +206,12 @@ impl Contributions { } older_renames.insert(new_from_path, new_to_path); } + log::debug!("older_renames: {:?}", older_renames); Ok((mapped, older_renames)) }, ) .unwrap()? + //? .0 .into_iter() .filter(|(p, _)| paths.contains(p))