From 77746f59cefc54bdb4a9b04cb2da3a41fa2102c7 Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 3 Jul 2024 21:55:06 -0600 Subject: [PATCH 01/29] create `community` module, stubs for `modularity` and helper functions --- rustworkx-core/src/community/metrics.rs | 17 +++++++++++++++++ rustworkx-core/src/community/mod.rs | 3 +++ rustworkx-core/src/lib.rs | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 rustworkx-core/src/community/metrics.rs create mode 100644 rustworkx-core/src/community/mod.rs diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs new file mode 100644 index 0000000000..72286efffe --- /dev/null +++ b/rustworkx-core/src/community/metrics.rs @@ -0,0 +1,17 @@ +use num_traits::Float; +use std::collections::HashSet; +use std::vec::Vec; + +use petgraph::graph::NodeIndex; +use petgraph::visit::{Data, NodeIndexable}; + +fn _number_internal_edges(graph: &G, community: &HashSet) -> u64 {} + +fn _total_degree(graph: &G, community: &HashSet) -> u64 {} + +pub fn modularity(graph: &G, communities: &Vec>, resolution: f64) -> f64 +where + G: Data + NodeIndexable, + W: Float, +{ +} diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs new file mode 100644 index 0000000000..1b8c340353 --- /dev/null +++ b/rustworkx-core/src/community/mod.rs @@ -0,0 +1,3 @@ +mod metrics; + +pub use metrics::modularity; diff --git a/rustworkx-core/src/lib.rs b/rustworkx-core/src/lib.rs index fc5d6f5df8..882796458a 100644 --- a/rustworkx-core/src/lib.rs +++ b/rustworkx-core/src/lib.rs @@ -47,6 +47,7 @@ //! The crate provides the following graph algorithm modules //! //! * [`centrality`](./centrality/index.html) +//! * [`community`](./community/index.html) //! * [`connectivity`](./connectivity/index.html) //! * [`max_weight_matching`](./max_weight_matching/index.html) //! * [`shortest_path`](./shortest_path/index.html) @@ -97,6 +98,7 @@ pub mod bipartite_coloring; pub mod centrality; /// Module for coloring algorithms. pub mod coloring; +pub mod community; pub mod connectivity; /// Module for algorithms that work on DAGs. pub mod dag_algo; From bd1ee6cb44ba63c841a3e08456d72e622cfa3bf9 Mon Sep 17 00:00:00 2001 From: jpacold Date: Thu, 4 Jul 2024 16:15:42 -0600 Subject: [PATCH 02/29] implement `_total_edge_weight` --- rustworkx-core/src/community/metrics.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 72286efffe..ce987b7e73 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -1,17 +1,26 @@ -use num_traits::Float; +use num_traits::Num; use std::collections::HashSet; use std::vec::Vec; use petgraph::graph::NodeIndex; -use petgraph::visit::{Data, NodeIndexable}; +use petgraph::visit::{Data, EdgeRef, IntoEdgeReferences, NodeIndexable}; fn _number_internal_edges(graph: &G, community: &HashSet) -> u64 {} -fn _total_degree(graph: &G, community: &HashSet) -> u64 {} +fn _total_edge_weight(graph: &G) -> W +where + G: Data + IntoEdgeReferences, + W: Num + Copy, +{ + graph + .edge_references() + .map(|edge| *edge.weight()) + .fold(W::zero(), |s, e| s + e) +} pub fn modularity(graph: &G, communities: &Vec>, resolution: f64) -> f64 where - G: Data + NodeIndexable, - W: Float, + G: Data + NodeIndexable + IntoEdgeReferences, + W: Num, { } From 6ada2b9662cb6b17a325bd8caa872706c98df08b Mon Sep 17 00:00:00 2001 From: jpacold Date: Sat, 6 Jul 2024 21:04:15 -0600 Subject: [PATCH 03/29] implement modularity function --- rustworkx-core/src/community/metrics.rs | 81 ++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index ce987b7e73..6eabeb883c 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -1,11 +1,24 @@ +use core::fmt; use num_traits::Num; -use std::collections::HashSet; +use std::collections::HashMap; +use std::error::Error; +use std::hash::Hash; +use std::ops::AddAssign; use std::vec::Vec; -use petgraph::graph::NodeIndex; -use petgraph::visit::{Data, EdgeRef, IntoEdgeReferences, NodeIndexable}; +use petgraph::visit::{Data, EdgeRef, GraphBase, GraphProp, IntoEdgeReferences}; -fn _number_internal_edges(graph: &G, community: &HashSet) -> u64 {} +#[derive(Debug, PartialEq, Eq)] +pub struct NotAPartitionError; +impl Error for NotAPartitionError {} +impl fmt::Display for NotAPartitionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "The input communities do not form a partition of the input graph." + ) + } +} fn _total_edge_weight(graph: &G) -> W where @@ -18,9 +31,63 @@ where .fold(W::zero(), |s, e| s + e) } -pub fn modularity(graph: &G, communities: &Vec>, resolution: f64) -> f64 +pub fn modularity( + graph: &G, + communities: &Vec>, + resolution: f64, +) -> Result where - G: Data + NodeIndexable + IntoEdgeReferences, - W: Num, + G: GraphProp + Data + IntoEdgeReferences, + ::NodeId: Hash + Eq + Copy, + W: Num + Copy + Into + AddAssign, { + let mut node_to_community: HashMap = + HashMap::with_capacity(communities.iter().map(|v| v.len()).sum()); + for (ii, &ref v) in communities.iter().enumerate() { + for &node in v { + if let Some(_n) = node_to_community.insert(node, ii) { + // argument `communities` contains a duplicate node + return Err(NotAPartitionError {}); + } + } + } + + let mut internal_edge_weights = vec![W::zero(); communities.len()]; + let mut outgoing_edge_weights = vec![W::zero(); communities.len()]; + let mut incoming_edge_weights = if graph.is_directed() { + Some(vec![W::zero(); communities.len()]) + } else { + None + }; + + for edge in graph.edge_references() { + let (a, b) = (edge.source(), edge.target()); + if let (Some(&c_a), Some(&c_b)) = (node_to_community.get(&a), node_to_community.get(&b)) { + let &w = edge.weight(); + if c_a == c_b { + internal_edge_weights[c_a] += w; + } + outgoing_edge_weights[c_a] += w; + if let Some(ref mut incoming) = incoming_edge_weights { + incoming[c_b] += w; + } + } else { + // At least one node was not included in `communities` + return Err(NotAPartitionError {}); + } + } + + let two_m: f64 = 2.0 * _total_edge_weight(graph).into(); + let weight_sum = |v: Vec| -> f64 { v.iter().fold(W::zero(), |s, &w| s + w).into() }; + + let sigma_internal = weight_sum(internal_edge_weights); + let sigma_outgoing = weight_sum(outgoing_edge_weights); + + let sigma_incoming = if let Some(incoming) = incoming_edge_weights { + weight_sum(incoming) + } else { + sigma_outgoing + }; + + Ok(sigma_internal / two_m - resolution * sigma_incoming * sigma_outgoing / (two_m * two_m)) } From 75f2570822b0abc19d7c0a94fcc2bb8f6b7f2b45 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sat, 6 Jul 2024 22:53:09 -0600 Subject: [PATCH 04/29] add Rust test, fix modularity calculation --- rustworkx-core/src/community/metrics.rs | 58 ++++++++++++++++++++----- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 6eabeb883c..3e5f2ed776 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -32,7 +32,7 @@ where } pub fn modularity( - graph: &G, + graph: G, communities: &Vec>, resolution: f64, ) -> Result @@ -54,7 +54,7 @@ where let mut internal_edge_weights = vec![W::zero(); communities.len()]; let mut outgoing_edge_weights = vec![W::zero(); communities.len()]; - let mut incoming_edge_weights = if graph.is_directed() { + let mut incoming_edge_weights_opt = if graph.is_directed() { Some(vec![W::zero(); communities.len()]) } else { None @@ -68,8 +68,10 @@ where internal_edge_weights[c_a] += w; } outgoing_edge_weights[c_a] += w; - if let Some(ref mut incoming) = incoming_edge_weights { - incoming[c_b] += w; + if let Some(ref mut incoming_edge_weights) = incoming_edge_weights_opt { + incoming_edge_weights[c_b] += w; + } else { + outgoing_edge_weights[c_b] += w; } } else { // At least one node was not included in `communities` @@ -77,17 +79,49 @@ where } } - let two_m: f64 = 2.0 * _total_edge_weight(graph).into(); - let weight_sum = |v: Vec| -> f64 { v.iter().fold(W::zero(), |s, &w| s + w).into() }; + let m: f64 = _total_edge_weight(&graph).into(); - let sigma_internal = weight_sum(internal_edge_weights); - let sigma_outgoing = weight_sum(outgoing_edge_weights); + let sigma_internal: f64 = internal_edge_weights + .iter() + .fold(W::zero(), |s, &w| s + w) + .into(); - let sigma_incoming = if let Some(incoming) = incoming_edge_weights { - weight_sum(incoming) + let sigma_total_squared: W = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { + incoming_edge_weights + .iter() + .zip(outgoing_edge_weights.iter()) + .fold(W::zero(), |s, (&x, &y)| s + x * y) } else { - sigma_outgoing + outgoing_edge_weights + .iter() + .fold(W::zero(), |s, &x| s + x * x) }; - Ok(sigma_internal / two_m - resolution * sigma_incoming * sigma_outgoing / (two_m * two_m)) + Ok(sigma_internal / m - resolution * sigma_total_squared.into() / (4.0 * m * m)) +} + +#[cfg(test)] +mod tests { + use crate::generators::barbell_graph; + use petgraph::graph::UnGraph; + use petgraph::visit::{GraphBase, IntoNodeIdentifiers}; + use std::vec::Vec; + + use super::modularity; + + #[test] + fn test_modularity_barbell_graph() { + type G = UnGraph<(), f64>; + type N = ::NodeId; + + let g: G = barbell_graph(Some(3), Some(0), None, None, || (), || 1.0f64).unwrap(); + let nodes: Vec = g.node_identifiers().collect(); + let communities: Vec> = vec![ + vec![nodes[0], nodes[1], nodes[2]], + vec![nodes[3], nodes[4], nodes[5]], + ]; + let resolution = 1.0; + let m = modularity(&g, &communities, resolution).unwrap(); + assert!((m - 0.35714285714285715).abs() < 1.0e-9); + } } From cb7fd50fd1a052ad76e3c245555c29ed73205178 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 7 Jul 2024 18:29:28 -0600 Subject: [PATCH 05/29] add test for directed case --- rustworkx-core/src/community/metrics.rs | 80 ++++++++++++++++++++----- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 3e5f2ed776..424714be45 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -33,7 +33,7 @@ where pub fn modularity( graph: G, - communities: &Vec>, + communities: &[Vec], resolution: f64, ) -> Result where @@ -43,7 +43,7 @@ where { let mut node_to_community: HashMap = HashMap::with_capacity(communities.iter().map(|v| v.len()).sum()); - for (ii, &ref v) in communities.iter().enumerate() { + for (ii, v) in communities.iter().enumerate() { for &node in v { if let Some(_n) = node_to_community.insert(node, ii) { // argument `communities` contains a duplicate node @@ -86,24 +86,27 @@ where .fold(W::zero(), |s, &w| s + w) .into(); - let sigma_total_squared: W = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { + let sigma_total_squared: f64 = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { incoming_edge_weights .iter() .zip(outgoing_edge_weights.iter()) .fold(W::zero(), |s, (&x, &y)| s + x * y) + .into() } else { outgoing_edge_weights .iter() .fold(W::zero(), |s, &x| s + x * x) + .into() + / 4.0 }; - Ok(sigma_internal / m - resolution * sigma_total_squared.into() / (4.0 * m * m)) + Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) } #[cfg(test)] mod tests { use crate::generators::barbell_graph; - use petgraph::graph::UnGraph; + use petgraph::graph::{DiGraph, UnGraph}; use petgraph::visit::{GraphBase, IntoNodeIdentifiers}; use std::vec::Vec; @@ -114,14 +117,63 @@ mod tests { type G = UnGraph<(), f64>; type N = ::NodeId; - let g: G = barbell_graph(Some(3), Some(0), None, None, || (), || 1.0f64).unwrap(); - let nodes: Vec = g.node_identifiers().collect(); - let communities: Vec> = vec![ - vec![nodes[0], nodes[1], nodes[2]], - vec![nodes[3], nodes[4], nodes[5]], - ]; - let resolution = 1.0; - let m = modularity(&g, &communities, resolution).unwrap(); - assert!((m - 0.35714285714285715).abs() < 1.0e-9); + for n in 3..10 { + let g: G = barbell_graph(Some(n), Some(0), None, None, || (), || 1.0f64).unwrap(); + let nodes: Vec = g.node_identifiers().collect(); + let communities: Vec> = vec![ + (0..n).map(|ii| nodes[ii]).collect(), + (n..(2 * n)).map(|ii| nodes[ii]).collect(), + ]; + let resolution = 1.0; + let m = modularity(&g, &communities, resolution).unwrap(); + // There are two complete subgraphs, each with: + // * e = n*(n-1)/2 internal edges + // * total node degree 2*e + 1 + // The edge weight for the whole graph is 2*e + 1. So the expected + // modularity is 2 * [ e/(2*e + 1) - 1/4 ]. + let e = (n * (n - 1) / 2) as f64; + let m_expected = 2.0 * (e / (2.0 * e + 1.0) - 0.25); + assert!((m - m_expected).abs() < 1.0e-9); + } + } + + #[test] + fn test_modularity_directed() { + type G = DiGraph<(), f64>; + type N = ::NodeId; + + for n in 3..10 { + let mut g = G::with_capacity(2 * n, 2 * n + 2); + for _ii in 0..2 * n { + g.add_node(()); + } + let nodes: Vec = g.node_identifiers().collect(); + // Create two cycles + for ii in 0..n { + let jj = (ii + 1) % n; + g.add_edge(nodes[ii], nodes[jj], 1.0); + g.add_edge(nodes[n + ii], nodes[n + jj], 1.0); + } + // Add two edges connecting the cycles + g.add_edge(nodes[0], nodes[n], 1.0); + g.add_edge(nodes[n + 1], nodes[1], 1.0); + + let communities: Vec> = vec![ + (0..n).map(|ii| nodes[ii]).collect(), + (n..2 * n).map(|ii| nodes[ii]).collect(), + ]; + + let resolution = 1.0; + let m = modularity(&g, &communities, resolution).unwrap(); + + // Each cycle subgraph has: + // * n internal edges + // * total node degree n + 1 (outgoing) and n + 1 (incoming) + // The edge weight for the whole graph is 2*n + 2. So the expected + // modularity is 2 * [ n/(2*n + 2) - (n+1)^2 / (2*n + 2)^2 ] + // = n/(n + 1) - 1/2 + let m_expected = n as f64 / (n as f64 + 1.0) - 0.5; + assert!((m - m_expected).abs() < 1.0e-9); + } } } From b2f181ca86b0eaece191bb846ab6f29d6755db59 Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 21 Aug 2024 20:43:12 -0600 Subject: [PATCH 06/29] use associate type bounds to simplify `where` clauses, use `HashSet` for partition instead of `Vec` --- rustworkx-core/src/community/metrics.rs | 65 ++++++++++++------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 424714be45..838ea1a0f9 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -1,12 +1,9 @@ use core::fmt; -use num_traits::Num; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::error::Error; use std::hash::Hash; -use std::ops::AddAssign; -use std::vec::Vec; -use petgraph::visit::{Data, EdgeRef, GraphBase, GraphProp, IntoEdgeReferences}; +use petgraph::visit::{Data, EdgeRef, GraphProp, IntoEdgeReferences}; #[derive(Debug, PartialEq, Eq)] pub struct NotAPartitionError; @@ -20,26 +17,32 @@ impl fmt::Display for NotAPartitionError { } } -fn _total_edge_weight(graph: &G) -> W +pub trait ModularityGraph: + Data + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences +{ +} +impl + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences> + ModularityGraph for G +{ +} + +fn _total_edge_weight(graph: &G) -> f64 where - G: Data + IntoEdgeReferences, - W: Num + Copy, + G: ModularityGraph, { graph .edge_references() .map(|edge| *edge.weight()) - .fold(W::zero(), |s, e| s + e) + .fold(0.0, |s, e| s + e.into()) } -pub fn modularity( +pub fn modularity( graph: G, - communities: &[Vec], + communities: &[HashSet], resolution: f64, ) -> Result where - G: GraphProp + Data + IntoEdgeReferences, - ::NodeId: Hash + Eq + Copy, - W: Num + Copy + Into + AddAssign, + G: ModularityGraph, { let mut node_to_community: HashMap = HashMap::with_capacity(communities.iter().map(|v| v.len()).sum()); @@ -52,10 +55,10 @@ where } } - let mut internal_edge_weights = vec![W::zero(); communities.len()]; - let mut outgoing_edge_weights = vec![W::zero(); communities.len()]; + let mut internal_edge_weights = vec![0.0; communities.len()]; + let mut outgoing_edge_weights = vec![0.0; communities.len()]; let mut incoming_edge_weights_opt = if graph.is_directed() { - Some(vec![W::zero(); communities.len()]) + Some(vec![0.0; communities.len()]) } else { None }; @@ -65,13 +68,13 @@ where if let (Some(&c_a), Some(&c_b)) = (node_to_community.get(&a), node_to_community.get(&b)) { let &w = edge.weight(); if c_a == c_b { - internal_edge_weights[c_a] += w; + internal_edge_weights[c_a] += w.into(); } - outgoing_edge_weights[c_a] += w; + outgoing_edge_weights[c_a] += w.into(); if let Some(ref mut incoming_edge_weights) = incoming_edge_weights_opt { - incoming_edge_weights[c_b] += w; + incoming_edge_weights[c_b] += w.into(); } else { - outgoing_edge_weights[c_b] += w; + outgoing_edge_weights[c_b] += w.into(); } } else { // At least one node was not included in `communities` @@ -81,23 +84,15 @@ where let m: f64 = _total_edge_weight(&graph).into(); - let sigma_internal: f64 = internal_edge_weights - .iter() - .fold(W::zero(), |s, &w| s + w) - .into(); + let sigma_internal: f64 = internal_edge_weights.iter().fold(0.0, |s, &w| s + w); let sigma_total_squared: f64 = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { incoming_edge_weights .iter() .zip(outgoing_edge_weights.iter()) - .fold(W::zero(), |s, (&x, &y)| s + x * y) - .into() + .fold(0.0, |s, (&x, &y)| s + x * y) } else { - outgoing_edge_weights - .iter() - .fold(W::zero(), |s, &x| s + x * x) - .into() - / 4.0 + outgoing_edge_weights.iter().fold(0.0, |s, &x| s + x * x) / 4.0 }; Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) @@ -108,7 +103,7 @@ mod tests { use crate::generators::barbell_graph; use petgraph::graph::{DiGraph, UnGraph}; use petgraph::visit::{GraphBase, IntoNodeIdentifiers}; - use std::vec::Vec; + use std::collections::HashSet; use super::modularity; @@ -120,7 +115,7 @@ mod tests { for n in 3..10 { let g: G = barbell_graph(Some(n), Some(0), None, None, || (), || 1.0f64).unwrap(); let nodes: Vec = g.node_identifiers().collect(); - let communities: Vec> = vec![ + let communities: Vec> = vec![ (0..n).map(|ii| nodes[ii]).collect(), (n..(2 * n)).map(|ii| nodes[ii]).collect(), ]; @@ -158,7 +153,7 @@ mod tests { g.add_edge(nodes[0], nodes[n], 1.0); g.add_edge(nodes[n + 1], nodes[1], 1.0); - let communities: Vec> = vec![ + let communities: Vec> = vec![ (0..n).map(|ii| nodes[ii]).collect(), (n..2 * n).map(|ii| nodes[ii]).collect(), ]; From 3568a76517313a8664d3e1d0418cd4782a692268 Mon Sep 17 00:00:00 2001 From: jpacold Date: Thu, 22 Aug 2024 22:07:08 -0600 Subject: [PATCH 07/29] create `struct Partition` --- rustworkx-core/src/community/metrics.rs | 84 ++++++++++++++++--------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 838ea1a0f9..bffc7fe6a8 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use std::error::Error; use std::hash::Hash; -use petgraph::visit::{Data, EdgeRef, GraphProp, IntoEdgeReferences}; +use petgraph::visit::{Data, EdgeRef, GraphProp, IntoEdgeReferences, NodeCount}; #[derive(Debug, PartialEq, Eq)] pub struct NotAPartitionError; @@ -17,23 +17,63 @@ impl fmt::Display for NotAPartitionError { } } -pub trait ModularityGraph: - Data + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences +pub trait ModularityComputable: + Data + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences + NodeCount { } -impl + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences> - ModularityGraph for G +impl< + G: Data + Copy, NodeId: Hash + Eq> + + GraphProp + + IntoEdgeReferences + + NodeCount, + > ModularityComputable for G { } -fn _total_edge_weight(graph: &G) -> f64 +pub struct Partition where - G: ModularityGraph, + G: ModularityComputable, { - graph - .edge_references() - .map(|edge| *edge.weight()) - .fold(0.0, |s, e| s + e.into()) + graph: G, + node_to_subset: HashMap, +} + +impl Partition { + pub fn new( + graph: G, + subsets: &[HashSet], + ) -> Result, NotAPartitionError> { + let mut node_to_subset: HashMap = + HashMap::with_capacity(subsets.iter().map(|v| v.len()).sum()); + for (ii, v) in subsets.iter().enumerate() { + for &node in v { + if let Some(_n) = node_to_subset.insert(node, ii) { + // argument `communities` contains a duplicate node + return Err(NotAPartitionError {}); + } + } + } + + if node_to_subset.len() != graph.node_count() { + return Err(NotAPartitionError {}); + } + + Ok(Partition:: { + graph: graph, + node_to_subset: node_to_subset, + }) + } + + pub fn total_edge_weight(&self) -> f64 { + self.graph + .edge_references() + .map(|edge| *edge.weight()) + .fold(0.0, |s, e| s + e.into()) + } + + pub fn get_subset_id(&self, node: &G::NodeId) -> Option<&usize> { + self.node_to_subset.get(node) + } } pub fn modularity( @@ -42,18 +82,9 @@ pub fn modularity( resolution: f64, ) -> Result where - G: ModularityGraph, + G: ModularityComputable, { - let mut node_to_community: HashMap = - HashMap::with_capacity(communities.iter().map(|v| v.len()).sum()); - for (ii, v) in communities.iter().enumerate() { - for &node in v { - if let Some(_n) = node_to_community.insert(node, ii) { - // argument `communities` contains a duplicate node - return Err(NotAPartitionError {}); - } - } - } + let partition = Partition::new(graph, &communities)?; let mut internal_edge_weights = vec![0.0; communities.len()]; let mut outgoing_edge_weights = vec![0.0; communities.len()]; @@ -65,7 +96,8 @@ where for edge in graph.edge_references() { let (a, b) = (edge.source(), edge.target()); - if let (Some(&c_a), Some(&c_b)) = (node_to_community.get(&a), node_to_community.get(&b)) { + if let (Some(&c_a), Some(&c_b)) = (partition.get_subset_id(&a), partition.get_subset_id(&b)) + { let &w = edge.weight(); if c_a == c_b { internal_edge_weights[c_a] += w.into(); @@ -76,14 +108,9 @@ where } else { outgoing_edge_weights[c_b] += w.into(); } - } else { - // At least one node was not included in `communities` - return Err(NotAPartitionError {}); } } - let m: f64 = _total_edge_weight(&graph).into(); - let sigma_internal: f64 = internal_edge_weights.iter().fold(0.0, |s, &w| s + w); let sigma_total_squared: f64 = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { @@ -95,6 +122,7 @@ where outgoing_edge_weights.iter().fold(0.0, |s, &x| s + x * x) / 4.0 }; + let m: f64 = partition.total_edge_weight(); Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) } From b986a8e2bb4505f2f7163595c61b99f34cd10077 Mon Sep 17 00:00:00 2001 From: jpacold Date: Thu, 22 Aug 2024 22:48:47 -0600 Subject: [PATCH 08/29] Move modularity calculation to `Partition` --- rustworkx-core/src/community/metrics.rs | 102 +++++++++++++----------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index bffc7fe6a8..24f4bfaa93 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -12,7 +12,7 @@ impl fmt::Display for NotAPartitionError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - "The input communities do not form a partition of the input graph." + "The input subsets do not form a partition of the input graph." ) } } @@ -30,19 +30,20 @@ impl< { } -pub struct Partition +pub struct Partition<'g, G> where G: ModularityComputable, { - graph: G, + graph: &'g G, + n_subsets: usize, node_to_subset: HashMap, } -impl Partition { +impl<'g, G: ModularityComputable> Partition<'g, G> { pub fn new( - graph: G, + graph: &'g G, subsets: &[HashSet], - ) -> Result, NotAPartitionError> { + ) -> Result, NotAPartitionError> { let mut node_to_subset: HashMap = HashMap::with_capacity(subsets.iter().map(|v| v.len()).sum()); for (ii, v) in subsets.iter().enumerate() { @@ -58,8 +59,9 @@ impl Partition { return Err(NotAPartitionError {}); } - Ok(Partition:: { + Ok(Partition::<'g, G> { graph: graph, + n_subsets: subsets.len(), node_to_subset: node_to_subset, }) } @@ -74,6 +76,51 @@ impl Partition { pub fn get_subset_id(&self, node: &G::NodeId) -> Option<&usize> { self.node_to_subset.get(node) } + + pub fn modularity(&self, resolution: f64) -> Result { + let mut internal_edge_weights = vec![0.0; self.n_subsets]; + let mut outgoing_edge_weights = vec![0.0; self.n_subsets]; + + let directed = self.graph.is_directed(); + let mut incoming_edge_weights = if directed { + vec![0.0; self.n_subsets] + } else { + vec![] + }; + + for edge in self.graph.edge_references() { + let (a, b) = (edge.source(), edge.target()); + if let (Some(&c_a), Some(&c_b)) = (self.get_subset_id(&a), self.get_subset_id(&b)) { + let w: f64 = (*edge.weight()).into(); + if c_a == c_b { + internal_edge_weights[c_a] += w; + } + outgoing_edge_weights[c_a] += w; + if directed { + incoming_edge_weights[c_b] += w; + } else { + outgoing_edge_weights[c_b] += w; + } + } else { + return Err(NotAPartitionError {}); + } + } + + let sigma_internal: f64 = internal_edge_weights.iter().sum(); + + let sigma_total_squared: f64 = if directed { + incoming_edge_weights + .iter() + .zip(outgoing_edge_weights.iter()) + .map(|(&x, &y)| x * y) + .sum() + } else { + outgoing_edge_weights.iter().map(|&x| x * x).sum::() / 4.0 + }; + + let m: f64 = self.total_edge_weight(); + Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) + } } pub fn modularity( @@ -84,46 +131,9 @@ pub fn modularity( where G: ModularityComputable, { - let partition = Partition::new(graph, &communities)?; - - let mut internal_edge_weights = vec![0.0; communities.len()]; - let mut outgoing_edge_weights = vec![0.0; communities.len()]; - let mut incoming_edge_weights_opt = if graph.is_directed() { - Some(vec![0.0; communities.len()]) - } else { - None - }; - - for edge in graph.edge_references() { - let (a, b) = (edge.source(), edge.target()); - if let (Some(&c_a), Some(&c_b)) = (partition.get_subset_id(&a), partition.get_subset_id(&b)) - { - let &w = edge.weight(); - if c_a == c_b { - internal_edge_weights[c_a] += w.into(); - } - outgoing_edge_weights[c_a] += w.into(); - if let Some(ref mut incoming_edge_weights) = incoming_edge_weights_opt { - incoming_edge_weights[c_b] += w.into(); - } else { - outgoing_edge_weights[c_b] += w.into(); - } - } - } - - let sigma_internal: f64 = internal_edge_weights.iter().fold(0.0, |s, &w| s + w); - - let sigma_total_squared: f64 = if let Some(incoming_edge_weights) = incoming_edge_weights_opt { - incoming_edge_weights - .iter() - .zip(outgoing_edge_weights.iter()) - .fold(0.0, |s, (&x, &y)| s + x * y) - } else { - outgoing_edge_weights.iter().fold(0.0, |s, &x| s + x * x) / 4.0 - }; + let partition = Partition::new(&graph, &communities)?; - let m: f64 = partition.total_edge_weight(); - Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) + partition.modularity(resolution) } #[cfg(test)] From 0bcd7471b7eba9f70796e8b7fd9d7134eb4c4d8c Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 25 Aug 2024 22:03:07 -0600 Subject: [PATCH 09/29] Add struct for Louvain algorithm --- rustworkx-core/src/community/louvain.rs | 33 +++++++++++++++++++++++++ rustworkx-core/src/community/metrics.rs | 27 +++++++++++++++++--- rustworkx-core/src/community/mod.rs | 3 ++- 3 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 rustworkx-core/src/community/louvain.rs diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs new file mode 100644 index 0000000000..0b18d4150a --- /dev/null +++ b/rustworkx-core/src/community/louvain.rs @@ -0,0 +1,33 @@ +use super::metrics::{ModularityComputable, Partition}; +use std::collections::{HashMap, HashSet}; + +struct LouvainLevel<'g, G> +where + G: ModularityComputable, +{ + partition: Partition<'g, G>, +} + +impl<'g, G: ModularityComputable> LouvainLevel<'g, G> { + pub fn new(input_graph: &'g G) -> LouvainLevel<'g, G> { + LouvainLevel { + partition: Partition::<'g, G>::isolated_nodes_partition(input_graph), + } + } +} + +pub fn louvain_communities( + graph: &G, + resolution: f64, + gain_threshold: f64, + max_level: Option, + seed: Option, +) -> Vec> +where + G: ModularityComputable, +{ + let current_partition = LouvainLevel::new(graph); + + + vec![] +} diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 24f4bfaa93..52ba6a54c1 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -3,7 +3,9 @@ use std::collections::{HashMap, HashSet}; use std::error::Error; use std::hash::Hash; -use petgraph::visit::{Data, EdgeRef, GraphProp, IntoEdgeReferences, NodeCount}; +use petgraph::visit::{ + Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, NodeRef, +}; #[derive(Debug, PartialEq, Eq)] pub struct NotAPartitionError; @@ -18,14 +20,19 @@ impl fmt::Display for NotAPartitionError { } pub trait ModularityComputable: - Data + Copy, NodeId: Hash + Eq> + GraphProp + IntoEdgeReferences + NodeCount + Data + Copy, NodeId: Hash + Eq + Copy> + + GraphProp + + IntoEdgeReferences + + NodeCount + + IntoNodeReferences { } impl< - G: Data + Copy, NodeId: Hash + Eq> + G: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp + IntoEdgeReferences - + NodeCount, + + NodeCount + + IntoNodeReferences, > ModularityComputable for G { } @@ -66,6 +73,18 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { }) } + pub fn isolated_nodes_partition(graph: &'g G) -> Partition<'g, G> { + Partition::<'g, G> { + graph: graph, + n_subsets: graph.node_count(), + node_to_subset: graph + .node_references() + .enumerate() + .map(|(ii, n)| (n.id(), ii)) + .collect(), + } + } + pub fn total_edge_weight(&self) -> f64 { self.graph .edge_references() diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs index 1b8c340353..25402dcb3b 100644 --- a/rustworkx-core/src/community/mod.rs +++ b/rustworkx-core/src/community/mod.rs @@ -1,3 +1,4 @@ mod metrics; +pub use metrics::{modularity, ModularityComputable}; -pub use metrics::modularity; +mod louvain; From fb3faea8bac17aef13039d6a260c2080705824c6 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sat, 31 Aug 2024 21:41:56 -0600 Subject: [PATCH 10/29] Minimal working version for undirected graphs --- rustworkx-core/src/community/louvain.rs | 179 ++++++++++++++++++++++-- rustworkx-core/src/community/metrics.rs | 54 +++---- rustworkx-core/src/community/mod.rs | 1 + rustworkx-core/src/lib.rs | 1 + 4 files changed, 196 insertions(+), 39 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 0b18d4150a..f07647d4e1 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,33 +1,186 @@ -use super::metrics::{ModularityComputable, Partition}; +use super::metrics::{ModularityComputable, NotAPartitionError, Partition}; +use petgraph::{ + graph::UnGraph, + visit::{EdgeRef, NodeRef}, +}; use std::collections::{HashMap, HashSet}; -struct LouvainLevel<'g, G> +fn _one_level_undirected<'g, G>( + graph: &G, + current_partition: &Partition, + m: f64, + resolution: f64, +) -> Option>> where G: ModularityComputable, { - partition: Partition<'g, G>, -} + let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); + for e in graph.edge_references() { + if let (Some(&a), Some(&b)) = ( + current_partition.get_subset_id(&e.source()), + current_partition.get_subset_id(&e.target()), + ) { + let w: f64 = (*e.weight()).into(); + edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); + } + } + + let aggregated_graph: UnGraph<(), f64, usize> = + UnGraph::from_edges(edges.iter().map(|(k, &v)| (k.0, k.1, v))); + let node_count = aggregated_graph.node_count(); + + let mut node_to_community: Vec = (0..node_count).collect(); + + let mut degrees = vec![0.0; node_count]; + for e in aggregated_graph.edge_references() { + let w = e.weight(); + degrees[e.source().index()] += w; + degrees[e.target().index()] += w; + } + let mut s_tot = degrees.clone(); -impl<'g, G: ModularityComputable> LouvainLevel<'g, G> { - pub fn new(input_graph: &'g G) -> LouvainLevel<'g, G> { - LouvainLevel { - partition: Partition::<'g, G>::isolated_nodes_partition(input_graph), + let mut improved = false; + loop { + let mut performed_move = false; + for node in 0..node_count { + let mut neighbor_weights: HashMap = HashMap::new(); + for nbr in aggregated_graph.neighbors_undirected(node.into()) { + for e in aggregated_graph.edges_connecting(node.into(), nbr) { + let w = e.weight(); + let com = node_to_community[nbr.index()]; + neighbor_weights + .entry(com) + .and_modify(|x| *x += w) + .or_insert(*w); + } + } + + let mut best_gain = 0.0; + let init_com = node_to_community[node]; + let deg = degrees[init_com]; + let mut best_com = init_com; + let two_m_sq = 2.0 * m * m; + + degrees[best_com] -= deg; + + let delta = if let Some(&w) = neighbor_weights.get(&best_com) { + w + } else { + 0.0 + }; + let remove_cost = -delta / m + resolution * (s_tot[best_com] * deg) / two_m_sq; + + for (&nbr_com, &wt) in neighbor_weights.iter() { + let gain = remove_cost + wt / m - resolution * s_tot[nbr_com] * deg / two_m_sq; + if gain > best_gain { + best_gain = gain; + best_com = nbr_com; + } + } + + s_tot[best_com] += deg; + + if best_com != init_com { + performed_move = true; + node_to_community[node] = best_com; + } } + if performed_move { + improved = true; + } else { + break; + } + } + + if !improved { + return None; } + + let mut com_to_final_index = HashMap::new(); + let mut final_partition: Vec> = Vec::new(); + + for n in graph.node_identifiers() { + let prev_com = current_partition.get_subset_id(&n).unwrap_or(&0); + let inner_com = node_to_community[*prev_com]; + let new_com = if let Some(&c) = com_to_final_index.get(&inner_com) { + c + } else { + let n_com = final_partition.len(); + com_to_final_index.insert(inner_com, n_com); + final_partition.push(HashSet::new()); + n_com + }; + final_partition[new_com].insert(n); + } + Some(final_partition) } pub fn louvain_communities( - graph: &G, + graph: G, resolution: f64, gain_threshold: f64, max_level: Option, seed: Option, -) -> Vec> +) -> Result>, NotAPartitionError> where G: ModularityComputable, { - let current_partition = LouvainLevel::new(graph); - + let mut result: Vec> = graph + .node_references() + .map(|n| HashSet::from([n.id()])) + .collect(); + let mut current_partition = Partition::new(&graph, &result)?; + let m = current_partition.total_edge_weight(); - vec![] + let mut current_modularity = current_partition.modularity(resolution)?; + + let mut n_levels = 0; + while let Some(improved_partition) = + _one_level_undirected(&graph, ¤t_partition, m, resolution) + { + result = improved_partition; + current_partition = Partition::new(&graph, &result)?; + let improved_modularity = current_partition.modularity(resolution)?; + if improved_modularity - current_modularity < gain_threshold { + break; + } + current_modularity = improved_modularity; + + match max_level { + Some(t) => { + n_levels += 1; + if n_levels >= t { + break; + } + } + None => (), + }; + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use crate::community::metrics::NotAPartitionError; + use crate::generators::barbell_graph; + use petgraph::graph::UnGraph; + + use super::louvain_communities; + + #[test] + fn test_louvain_barbell_graph() -> Result<(), NotAPartitionError> { + type G = UnGraph<(), f64>; + + for n in 3..10 { + let g: G = barbell_graph(Some(n), Some(0), None, None, || (), || 1.0f64).unwrap(); + let resolution = 1.0; + let gain_threshold = 0.01; + let result = louvain_communities(&g, resolution, gain_threshold, None, None)?; + assert_eq!(result.len(), 2); + assert_eq!(result[0].len(), n); + assert_eq!(result[1].len(), n); + } + Ok(()) + } } diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 52ba6a54c1..90237fb5d4 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -4,7 +4,7 @@ use std::error::Error; use std::hash::Hash; use petgraph::visit::{ - Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, NodeRef, + Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, }; #[derive(Debug, PartialEq, Eq)] @@ -28,12 +28,12 @@ pub trait ModularityComputable: { } impl< - G: Data + Copy, NodeId: Hash + Eq + Copy> + Graph: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp + IntoEdgeReferences + NodeCount + IntoNodeReferences, - > ModularityComputable for G + > ModularityComputable for Graph { } @@ -45,6 +45,11 @@ where n_subsets: usize, node_to_subset: HashMap, } +pub struct PartitionEdgeWeights { + pub internal: Vec, + pub outgoing: Vec, + pub incoming: Option>, +} impl<'g, G: ModularityComputable> Partition<'g, G> { pub fn new( @@ -72,19 +77,6 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { node_to_subset: node_to_subset, }) } - - pub fn isolated_nodes_partition(graph: &'g G) -> Partition<'g, G> { - Partition::<'g, G> { - graph: graph, - n_subsets: graph.node_count(), - node_to_subset: graph - .node_references() - .enumerate() - .map(|(ii, n)| (n.id(), ii)) - .collect(), - } - } - pub fn total_edge_weight(&self) -> f64 { self.graph .edge_references() @@ -96,15 +88,15 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { self.node_to_subset.get(node) } - pub fn modularity(&self, resolution: f64) -> Result { + pub fn partition_edge_weights(&self) -> Result { let mut internal_edge_weights = vec![0.0; self.n_subsets]; let mut outgoing_edge_weights = vec![0.0; self.n_subsets]; let directed = self.graph.is_directed(); let mut incoming_edge_weights = if directed { - vec![0.0; self.n_subsets] + Some(vec![0.0; self.n_subsets]) } else { - vec![] + None }; for edge in self.graph.edge_references() { @@ -115,8 +107,8 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { internal_edge_weights[c_a] += w; } outgoing_edge_weights[c_a] += w; - if directed { - incoming_edge_weights[c_b] += w; + if let Some(ref mut incoming) = incoming_edge_weights { + incoming[c_b] += w; } else { outgoing_edge_weights[c_b] += w; } @@ -125,16 +117,26 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { } } - let sigma_internal: f64 = internal_edge_weights.iter().sum(); + Ok(PartitionEdgeWeights { + internal: internal_edge_weights, + outgoing: outgoing_edge_weights, + incoming: incoming_edge_weights, + }) + } + + pub fn modularity(&self, resolution: f64) -> Result { + let weights = self.partition_edge_weights()?; + + let sigma_internal: f64 = weights.internal.iter().sum(); - let sigma_total_squared: f64 = if directed { - incoming_edge_weights + let sigma_total_squared: f64 = if let Some(incoming) = weights.incoming { + incoming .iter() - .zip(outgoing_edge_weights.iter()) + .zip(weights.outgoing.iter()) .map(|(&x, &y)| x * y) .sum() } else { - outgoing_edge_weights.iter().map(|&x| x * x).sum::() / 4.0 + weights.outgoing.iter().map(|&x| x * x).sum::() / 4.0 }; let m: f64 = self.total_edge_weight(); diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs index 25402dcb3b..b547d1b598 100644 --- a/rustworkx-core/src/community/mod.rs +++ b/rustworkx-core/src/community/mod.rs @@ -2,3 +2,4 @@ mod metrics; pub use metrics::{modularity, ModularityComputable}; mod louvain; +pub use louvain::louvain_communities; diff --git a/rustworkx-core/src/lib.rs b/rustworkx-core/src/lib.rs index 882796458a..41a04e7f71 100644 --- a/rustworkx-core/src/lib.rs +++ b/rustworkx-core/src/lib.rs @@ -98,6 +98,7 @@ pub mod bipartite_coloring; pub mod centrality; /// Module for coloring algorithms. pub mod coloring; +/// Module for community detection algorithms. pub mod community; pub mod connectivity; /// Module for algorithms that work on DAGs. From ab976fbd8642a8e98d0314ce262d079d603235c8 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 1 Sep 2024 17:24:18 -0600 Subject: [PATCH 11/29] Use random seed argument --- rustworkx-core/src/community/louvain.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index f07647d4e1..b53bb6e1e5 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -3,6 +3,8 @@ use petgraph::{ graph::UnGraph, visit::{EdgeRef, NodeRef}, }; +use rand::SeedableRng; +use rand_pcg::Pcg64; use std::collections::{HashMap, HashSet}; fn _one_level_undirected<'g, G>( @@ -10,6 +12,7 @@ fn _one_level_undirected<'g, G>( current_partition: &Partition, m: f64, resolution: f64, + seed: Option, ) -> Option>> where G: ModularityComputable, @@ -42,7 +45,13 @@ where let mut improved = false; loop { let mut performed_move = false; - for node in 0..node_count { + + let mut node_shuffle: Pcg64 = match seed { + Some(rng_seed) => Pcg64::seed_from_u64(rng_seed), + None => Pcg64::from_entropy(), + }; + + for node in rand::seq::index::sample(&mut node_shuffle, node_count, node_count) { let mut neighbor_weights: HashMap = HashMap::new(); for nbr in aggregated_graph.neighbors_undirected(node.into()) { for e in aggregated_graph.edges_connecting(node.into(), nbr) { @@ -136,7 +145,7 @@ where let mut n_levels = 0; while let Some(improved_partition) = - _one_level_undirected(&graph, ¤t_partition, m, resolution) + _one_level_undirected(&graph, ¤t_partition, m, resolution, seed) { result = improved_partition; current_partition = Partition::new(&graph, &result)?; @@ -177,6 +186,8 @@ mod tests { let resolution = 1.0; let gain_threshold = 0.01; let result = louvain_communities(&g, resolution, gain_threshold, None, None)?; + // For a barbell graph, we expect the Louvain algorithm to identify + // the two complete subgraphs as the final communities assert_eq!(result.len(), 2); assert_eq!(result[0].len(), n); assert_eq!(result[1].len(), n); From e0fed65b940926918be33e1c0a549350a3655ab1 Mon Sep 17 00:00:00 2001 From: jpacold Date: Mon, 2 Sep 2024 21:07:20 -0600 Subject: [PATCH 12/29] Start factoring out helper functions, use NodeIndexable for some code cleanup --- rustworkx-core/src/community/louvain.rs | 54 ++++++------ rustworkx-core/src/community/metrics.rs | 105 ++++++++++++------------ rustworkx-core/src/community/mod.rs | 17 ++++ rustworkx-core/src/community/utils.rs | 12 +++ 4 files changed, 113 insertions(+), 75 deletions(-) create mode 100644 rustworkx-core/src/community/utils.rs diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index b53bb6e1e5..bea065fd5f 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,4 +1,7 @@ -use super::metrics::{ModularityComputable, NotAPartitionError, Partition}; +use super::metrics::{ModularityComputable, Partition}; +use super::utils::total_edge_weight; + +use super::NotAPartitionError; use petgraph::{ graph::UnGraph, visit::{EdgeRef, NodeRef}, @@ -12,6 +15,7 @@ fn _one_level_undirected<'g, G>( current_partition: &Partition, m: f64, resolution: f64, + gain_threshold: f64, seed: Option, ) -> Option>> where @@ -19,13 +23,12 @@ where { let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); for e in graph.edge_references() { - if let (Some(&a), Some(&b)) = ( - current_partition.get_subset_id(&e.source()), - current_partition.get_subset_id(&e.target()), - ) { - let w: f64 = (*e.weight()).into(); - edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); - } + let (a, b) = ( + current_partition.get_subset_id(e.source()), + current_partition.get_subset_id(e.target()), + ); + let w: f64 = (*e.weight()).into(); + edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); } let aggregated_graph: UnGraph<(), f64, usize> = @@ -42,7 +45,7 @@ where } let mut s_tot = degrees.clone(); - let mut improved = false; + let mut total_gain = 0.0; loop { let mut performed_move = false; @@ -91,17 +94,16 @@ where if best_com != init_com { performed_move = true; + total_gain += best_gain; node_to_community[node] = best_com; } } - if performed_move { - improved = true; - } else { + if !performed_move { break; } } - if !improved { + if total_gain < gain_threshold { return None; } @@ -109,8 +111,8 @@ where let mut final_partition: Vec> = Vec::new(); for n in graph.node_identifiers() { - let prev_com = current_partition.get_subset_id(&n).unwrap_or(&0); - let inner_com = node_to_community[*prev_com]; + let prev_com = current_partition.get_subset_id(n); + let inner_com = node_to_community[prev_com]; let new_com = if let Some(&c) = com_to_final_index.get(&inner_com) { c } else { @@ -139,21 +141,27 @@ where .map(|n| HashSet::from([n.id()])) .collect(); let mut current_partition = Partition::new(&graph, &result)?; - let m = current_partition.total_edge_weight(); - let mut current_modularity = current_partition.modularity(resolution)?; + let m = total_edge_weight(&graph); let mut n_levels = 0; - while let Some(improved_partition) = - _one_level_undirected(&graph, ¤t_partition, m, resolution, seed) - { + while let Some(improved_partition) = _one_level_undirected( + &graph, + ¤t_partition, + m, + resolution, + gain_threshold, + seed, + ) { + let current_modularity = current_partition.modularity(resolution); + result = improved_partition; current_partition = Partition::new(&graph, &result)?; - let improved_modularity = current_partition.modularity(resolution)?; + + let improved_modularity = current_partition.modularity(resolution); if improved_modularity - current_modularity < gain_threshold { break; } - current_modularity = improved_modularity; match max_level { Some(t) => { @@ -171,7 +179,7 @@ where #[cfg(test)] mod tests { - use crate::community::metrics::NotAPartitionError; + use crate::community::NotAPartitionError; use crate::generators::barbell_graph; use petgraph::graph::UnGraph; diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 90237fb5d4..dbba3b5452 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -1,23 +1,11 @@ -use core::fmt; -use std::collections::{HashMap, HashSet}; -use std::error::Error; -use std::hash::Hash; +use super::utils::total_edge_weight; +use super::NotAPartitionError; use petgraph::visit::{ - Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, + Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, NodeIndexable, }; - -#[derive(Debug, PartialEq, Eq)] -pub struct NotAPartitionError; -impl Error for NotAPartitionError {} -impl fmt::Display for NotAPartitionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "The input subsets do not form a partition of the input graph." - ) - } -} +use std::collections::HashSet; +use std::hash::Hash; pub trait ModularityComputable: Data + Copy, NodeId: Hash + Eq + Copy> @@ -25,6 +13,7 @@ pub trait ModularityComputable: + IntoEdgeReferences + NodeCount + IntoNodeReferences + + NodeIndexable { } impl< @@ -32,20 +21,34 @@ impl< + GraphProp + IntoEdgeReferences + NodeCount - + IntoNodeReferences, + + IntoNodeReferences + + NodeIndexable, > ModularityComputable for Graph { } +// pub fn index_map_from_subsets(subsets: &[HashSet]) -> HashMap +// where +// N: Hash + Copy + Eq, +// { +// let mut h = HashMap::with_capacity(subsets.iter().map(|s| s.len()).sum()); +// for (ii, s) in subsets.iter().enumerate() { +// for &n in s { +// h.insert(n, ii); +// } +// } +// h +// } + pub struct Partition<'g, G> where G: ModularityComputable, { graph: &'g G, n_subsets: usize, - node_to_subset: HashMap, + pub node_to_subset: Vec, } -pub struct PartitionEdgeWeights { +struct PartitionEdgeWeights { pub internal: Vec, pub outgoing: Vec, pub incoming: Option>, @@ -56,18 +59,24 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { graph: &'g G, subsets: &[HashSet], ) -> Result, NotAPartitionError> { - let mut node_to_subset: HashMap = - HashMap::with_capacity(subsets.iter().map(|v| v.len()).sum()); + // Move this into a separate helper function + let mut seen = vec![false; graph.node_count()]; + + let mut node_to_subset = vec![0; graph.node_count()]; + for (ii, v) in subsets.iter().enumerate() { for &node in v { - if let Some(_n) = node_to_subset.insert(node, ii) { + let idx = graph.to_index(node); + if seen[idx] { // argument `communities` contains a duplicate node return Err(NotAPartitionError {}); } + node_to_subset[idx] = ii; + seen[idx] = true; } } - if node_to_subset.len() != graph.node_count() { + if !seen.iter().all(|&t| t) { return Err(NotAPartitionError {}); } @@ -77,18 +86,13 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { node_to_subset: node_to_subset, }) } - pub fn total_edge_weight(&self) -> f64 { - self.graph - .edge_references() - .map(|edge| *edge.weight()) - .fold(0.0, |s, e| s + e.into()) - } - pub fn get_subset_id(&self, node: &G::NodeId) -> Option<&usize> { - self.node_to_subset.get(node) + pub fn get_subset_id(&self, node: G::NodeId) -> usize { + let idx = self.graph.to_index(node); + self.node_to_subset[idx] } - pub fn partition_edge_weights(&self) -> Result { + fn partition_edge_weights(&self) -> PartitionEdgeWeights { let mut internal_edge_weights = vec![0.0; self.n_subsets]; let mut outgoing_edge_weights = vec![0.0; self.n_subsets]; @@ -101,31 +105,28 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { for edge in self.graph.edge_references() { let (a, b) = (edge.source(), edge.target()); - if let (Some(&c_a), Some(&c_b)) = (self.get_subset_id(&a), self.get_subset_id(&b)) { - let w: f64 = (*edge.weight()).into(); - if c_a == c_b { - internal_edge_weights[c_a] += w; - } - outgoing_edge_weights[c_a] += w; - if let Some(ref mut incoming) = incoming_edge_weights { - incoming[c_b] += w; - } else { - outgoing_edge_weights[c_b] += w; - } + let (c_a, c_b) = (self.get_subset_id(a), self.get_subset_id(b)); + let w: f64 = (*edge.weight()).into(); + if c_a == c_b { + internal_edge_weights[c_a] += w; + } + outgoing_edge_weights[c_a] += w; + if let Some(ref mut incoming) = incoming_edge_weights { + incoming[c_b] += w; } else { - return Err(NotAPartitionError {}); + outgoing_edge_weights[c_b] += w; } } - Ok(PartitionEdgeWeights { + PartitionEdgeWeights { internal: internal_edge_weights, outgoing: outgoing_edge_weights, incoming: incoming_edge_weights, - }) + } } - pub fn modularity(&self, resolution: f64) -> Result { - let weights = self.partition_edge_weights()?; + pub fn modularity(&self, resolution: f64) -> f64 { + let weights = self.partition_edge_weights(); let sigma_internal: f64 = weights.internal.iter().sum(); @@ -139,8 +140,8 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { weights.outgoing.iter().map(|&x| x * x).sum::() / 4.0 }; - let m: f64 = self.total_edge_weight(); - Ok(sigma_internal / m - resolution * sigma_total_squared / (m * m)) + let m: f64 = total_edge_weight(self.graph); + sigma_internal / m - resolution * sigma_total_squared / (m * m) } } @@ -154,7 +155,7 @@ where { let partition = Partition::new(&graph, &communities)?; - partition.modularity(resolution) + Ok(partition.modularity(resolution)) } #[cfg(test)] diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs index b547d1b598..4fdbff1334 100644 --- a/rustworkx-core/src/community/mod.rs +++ b/rustworkx-core/src/community/mod.rs @@ -3,3 +3,20 @@ pub use metrics::{modularity, ModularityComputable}; mod louvain; pub use louvain::louvain_communities; + +mod utils; + +use core::fmt; +use std::error::Error; + +#[derive(Debug, PartialEq, Eq)] +pub struct NotAPartitionError; +impl Error for NotAPartitionError {} +impl fmt::Display for NotAPartitionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "The input subsets do not form a partition of the input graph." + ) + } +} diff --git a/rustworkx-core/src/community/utils.rs b/rustworkx-core/src/community/utils.rs new file mode 100644 index 0000000000..76452d2cf1 --- /dev/null +++ b/rustworkx-core/src/community/utils.rs @@ -0,0 +1,12 @@ +use super::ModularityComputable; +use petgraph::visit::EdgeRef; + +pub fn total_edge_weight(graph: &G) -> f64 +where + G: ModularityComputable, +{ + graph + .edge_references() + .map(|edge| *edge.weight()) + .fold(0.0, |s, e| s + e.into()) +} From 26092919ac0bc9c430c1baeaf0b2c15b8b99a567 Mon Sep 17 00:00:00 2001 From: jpacold Date: Tue, 3 Sep 2024 21:22:33 -0600 Subject: [PATCH 13/29] Use Vec instead of HashMap where possible --- rustworkx-core/src/community/louvain.rs | 85 +++++++++---------------- rustworkx-core/src/community/metrics.rs | 28 +++++++- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index bea065fd5f..e9bb0ee014 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,31 +1,27 @@ use super::metrics::{ModularityComputable, Partition}; use super::utils::total_edge_weight; -use super::NotAPartitionError; -use petgraph::{ - graph::UnGraph, - visit::{EdgeRef, NodeRef}, -}; +use petgraph::{graph::UnGraph, visit::EdgeRef}; use rand::SeedableRng; use rand_pcg::Pcg64; use std::collections::{HashMap, HashSet}; -fn _one_level_undirected<'g, G>( +fn one_level_undirected<'g, G>( graph: &G, - current_partition: &Partition, + partition: &mut Partition, m: f64, resolution: f64, gain_threshold: f64, seed: Option, -) -> Option>> +) -> bool where G: ModularityComputable, { let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); for e in graph.edge_references() { let (a, b) = ( - current_partition.get_subset_id(e.source()), - current_partition.get_subset_id(e.target()), + partition.subset_idx(e.source()), + partition.subset_idx(e.target()), ); let w: f64 = (*e.weight()).into(); edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); @@ -104,26 +100,29 @@ where } if total_gain < gain_threshold { - return None; + return false; } - let mut com_to_final_index = HashMap::new(); - let mut final_partition: Vec> = Vec::new(); + let mut final_index = HashMap::new(); + let mut next_com = 0; + let mut updated_partition: Vec = vec![0; node_count]; for n in graph.node_identifiers() { - let prev_com = current_partition.get_subset_id(n); + let prev_com = partition.subset_idx(n); let inner_com = node_to_community[prev_com]; - let new_com = if let Some(&c) = com_to_final_index.get(&inner_com) { + let new_com = if let Some(&c) = final_index.get(&inner_com) { c } else { - let n_com = final_partition.len(); - com_to_final_index.insert(inner_com, n_com); - final_partition.push(HashSet::new()); - n_com + let c = next_com; + final_index.insert(inner_com, c); + next_com += 1; + c }; - final_partition[new_com].insert(n); + updated_partition[graph.to_index(n)] = new_com; } - Some(final_partition) + partition.update(updated_partition); + + true } pub fn louvain_communities( @@ -132,49 +131,25 @@ pub fn louvain_communities( gain_threshold: f64, max_level: Option, seed: Option, -) -> Result>, NotAPartitionError> +) -> Vec> where G: ModularityComputable, { - let mut result: Vec> = graph - .node_references() - .map(|n| HashSet::from([n.id()])) - .collect(); - let mut current_partition = Partition::new(&graph, &result)?; + let mut partition = Partition::new_isolated_nodes(&graph); let m = total_edge_weight(&graph); let mut n_levels = 0; - while let Some(improved_partition) = _one_level_undirected( - &graph, - ¤t_partition, - m, - resolution, - gain_threshold, - seed, - ) { - let current_modularity = current_partition.modularity(resolution); - - result = improved_partition; - current_partition = Partition::new(&graph, &result)?; - - let improved_modularity = current_partition.modularity(resolution); - if improved_modularity - current_modularity < gain_threshold { - break; - } - - match max_level { - Some(t) => { - n_levels += 1; - if n_levels >= t { - break; - } + while one_level_undirected(&graph, &mut partition, m, resolution, gain_threshold, seed) { + if let Some(limit) = max_level { + n_levels += 1; + if n_levels >= limit { + break; } - None => (), - }; + } } - Ok(result) + partition.to_vec_of_hashsets() } #[cfg(test)] @@ -193,7 +168,7 @@ mod tests { let g: G = barbell_graph(Some(n), Some(0), None, None, || (), || 1.0f64).unwrap(); let resolution = 1.0; let gain_threshold = 0.01; - let result = louvain_communities(&g, resolution, gain_threshold, None, None)?; + let result = louvain_communities(&g, resolution, gain_threshold, None, None); // For a barbell graph, we expect the Louvain algorithm to identify // the two complete subgraphs as the final communities assert_eq!(result.len(), 2); diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index dbba3b5452..6cb6193b18 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -46,7 +46,7 @@ where { graph: &'g G, n_subsets: usize, - pub node_to_subset: Vec, + node_to_subset: Vec, } struct PartitionEdgeWeights { pub internal: Vec, @@ -87,11 +87,33 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { }) } - pub fn get_subset_id(&self, node: G::NodeId) -> usize { + pub fn new_isolated_nodes(graph: &'g G) -> Partition<'g, G> { + Partition { + graph: graph, + n_subsets: graph.node_count(), + node_to_subset: (0..graph.node_count()).collect(), + } + } + + pub fn update(&mut self, new_partition: Vec) { + self.node_to_subset = new_partition; + self.n_subsets = *self.node_to_subset.iter().max().unwrap_or(&0) + 1; + } + + pub fn subset_idx(&self, node: G::NodeId) -> usize { let idx = self.graph.to_index(node); self.node_to_subset[idx] } + pub fn to_vec_of_hashsets(&self) -> Vec> { + let mut v = vec![HashSet::new(); self.n_subsets]; + for (idx, &s) in self.node_to_subset.iter().enumerate() { + let node = self.graph.from_index(idx); + v[s].insert(node); + } + v + } + fn partition_edge_weights(&self) -> PartitionEdgeWeights { let mut internal_edge_weights = vec![0.0; self.n_subsets]; let mut outgoing_edge_weights = vec![0.0; self.n_subsets]; @@ -105,7 +127,7 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { for edge in self.graph.edge_references() { let (a, b) = (edge.source(), edge.target()); - let (c_a, c_b) = (self.get_subset_id(a), self.get_subset_id(b)); + let (c_a, c_b) = (self.subset_idx(a), self.subset_idx(b)); let w: f64 = (*edge.weight()).into(); if c_a == c_b { internal_edge_weights[c_a] += w; From 12f5364533cb11fc34a7c2b9dfb02887b4d865a8 Mon Sep 17 00:00:00 2001 From: jpacold Date: Tue, 3 Sep 2024 22:04:41 -0600 Subject: [PATCH 14/29] Clean up `Partition` --- rustworkx-core/src/community/metrics.rs | 66 ++++++++----------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 6cb6193b18..73f9f84f44 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -27,19 +27,6 @@ impl< { } -// pub fn index_map_from_subsets(subsets: &[HashSet]) -> HashMap -// where -// N: Hash + Copy + Eq, -// { -// let mut h = HashMap::with_capacity(subsets.iter().map(|s| s.len()).sum()); -// for (ii, s) in subsets.iter().enumerate() { -// for &n in s { -// h.insert(n, ii); -// } -// } -// h -// } - pub struct Partition<'g, G> where G: ModularityComputable, @@ -48,18 +35,12 @@ where n_subsets: usize, node_to_subset: Vec, } -struct PartitionEdgeWeights { - pub internal: Vec, - pub outgoing: Vec, - pub incoming: Option>, -} impl<'g, G: ModularityComputable> Partition<'g, G> { pub fn new( graph: &'g G, subsets: &[HashSet], ) -> Result, NotAPartitionError> { - // Move this into a separate helper function let mut seen = vec![false; graph.node_count()]; let mut node_to_subset = vec![0; graph.node_count()]; @@ -81,15 +62,15 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { } Ok(Partition::<'g, G> { - graph: graph, + graph, n_subsets: subsets.len(), - node_to_subset: node_to_subset, + node_to_subset, }) } pub fn new_isolated_nodes(graph: &'g G) -> Partition<'g, G> { Partition { - graph: graph, + graph, n_subsets: graph.node_count(), node_to_subset: (0..graph.node_count()).collect(), } @@ -105,6 +86,10 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { self.node_to_subset[idx] } + pub fn n_subsets(&self) -> usize { + self.n_subsets + } + pub fn to_vec_of_hashsets(&self) -> Vec> { let mut v = vec![HashSet::new(); self.n_subsets]; for (idx, &s) in self.node_to_subset.iter().enumerate() { @@ -114,12 +99,12 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { v } - fn partition_edge_weights(&self) -> PartitionEdgeWeights { - let mut internal_edge_weights = vec![0.0; self.n_subsets]; - let mut outgoing_edge_weights = vec![0.0; self.n_subsets]; + pub fn modularity(&self, resolution: f64) -> f64 { + let mut internal_weights = vec![0.0; self.n_subsets]; + let mut outgoing_weights = vec![0.0; self.n_subsets]; let directed = self.graph.is_directed(); - let mut incoming_edge_weights = if directed { + let mut incoming_weights = if directed { Some(vec![0.0; self.n_subsets]) } else { None @@ -130,36 +115,26 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { let (c_a, c_b) = (self.subset_idx(a), self.subset_idx(b)); let w: f64 = (*edge.weight()).into(); if c_a == c_b { - internal_edge_weights[c_a] += w; + internal_weights[c_a] += w; } - outgoing_edge_weights[c_a] += w; - if let Some(ref mut incoming) = incoming_edge_weights { + outgoing_weights[c_a] += w; + if let Some(ref mut incoming) = incoming_weights { incoming[c_b] += w; } else { - outgoing_edge_weights[c_b] += w; + outgoing_weights[c_b] += w; } } - PartitionEdgeWeights { - internal: internal_edge_weights, - outgoing: outgoing_edge_weights, - incoming: incoming_edge_weights, - } - } - - pub fn modularity(&self, resolution: f64) -> f64 { - let weights = self.partition_edge_weights(); + let sigma_internal: f64 = internal_weights.iter().sum(); - let sigma_internal: f64 = weights.internal.iter().sum(); - - let sigma_total_squared: f64 = if let Some(incoming) = weights.incoming { + let sigma_total_squared: f64 = if let Some(incoming) = incoming_weights { incoming .iter() - .zip(weights.outgoing.iter()) + .zip(outgoing_weights.iter()) .map(|(&x, &y)| x * y) .sum() } else { - weights.outgoing.iter().map(|&x| x * x).sum::() / 4.0 + outgoing_weights.iter().map(|&x| x * x).sum::() / 4.0 }; let m: f64 = total_edge_weight(self.graph); @@ -175,8 +150,7 @@ pub fn modularity( where G: ModularityComputable, { - let partition = Partition::new(&graph, &communities)?; - + let partition = Partition::new(&graph, communities)?; Ok(partition.modularity(resolution)) } From fda816cc6d9f72a2c48c6aa72f17f933ef8dd534 Mon Sep 17 00:00:00 2001 From: jpacold Date: Tue, 3 Sep 2024 22:38:57 -0600 Subject: [PATCH 15/29] Avoid copying the whole input graph on the first pass --- rustworkx-core/src/community/louvain.rs | 138 ++++++++++++++++++------ rustworkx-core/src/community/metrics.rs | 7 +- 2 files changed, 111 insertions(+), 34 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index e9bb0ee014..9791dc50ac 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,12 +1,112 @@ use super::metrics::{ModularityComputable, Partition}; use super::utils::total_edge_weight; +use petgraph::EdgeDirection; use petgraph::{graph::UnGraph, visit::EdgeRef}; use rand::SeedableRng; use rand_pcg::Pcg64; use std::collections::{HashMap, HashSet}; -fn one_level_undirected<'g, G>( +struct InnerGraph<'g, G> +where + G: ModularityComputable, +{ + input_graph: &'g G, + inner_graph: Option>, +} + +impl<'g, G: ModularityComputable> InnerGraph<'g, G> { + pub fn new(graph: &'g G, partition: &Partition) -> InnerGraph<'g, G> { + if partition.n_subsets() == graph.node_count() { + // At the start of the Louvain algorithm we put each node from the + // input graph into an isolated partition. At this stage we want to + // avoid copying the entire input graph. + return InnerGraph { + input_graph: graph, + inner_graph: None, + }; + } + + let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); + for e in graph.edge_references() { + let (a, b) = ( + partition.subset_idx(e.source()), + partition.subset_idx(e.target()), + ); + let w: f64 = (*e.weight()).into(); + edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); + } + + InnerGraph { + input_graph: graph, + inner_graph: Some(UnGraph::from_edges( + edges.iter().map(|(k, &v)| (k.0, k.1, v)), + )), + } + } + + pub fn node_count(&self) -> usize { + if let Some(g) = &self.inner_graph { + g.node_count() + } else { + self.input_graph.node_count() + } + } + + pub fn degrees(&self) -> Vec { + let mut degrees = vec![0.0; self.node_count()]; + if let Some(g) = &self.inner_graph { + for e in g.edge_references() { + let w = e.weight(); + degrees[e.source().index()] += w; + degrees[e.target().index()] += w; + } + } else { + for e in self.input_graph.edge_references() { + let w = (*e.weight()).into(); + let (a, b) = ( + self.input_graph.to_index(e.source()), + self.input_graph.to_index(e.target()), + ); + degrees[a] += w; + degrees[b] += w; + } + } + degrees + } + + pub fn neighbor_community_weights( + &self, + idx: usize, + node_to_community: &Vec, + ) -> HashMap { + let mut weights = HashMap::new(); + + let mut add_weight = |n: usize, w: f64| { + let com = node_to_community[n]; + weights.entry(com).and_modify(|x| *x += w).or_insert(w); + }; + + if let Some(g) = &self.inner_graph { + for edge in g.edges_directed(idx.into(), EdgeDirection::Outgoing) { + let n = edge.target().index(); + add_weight(n, *edge.weight()); + } + } else { + let node = self.input_graph.from_index(idx); + for edge in self + .input_graph + .edges_directed(node, EdgeDirection::Outgoing) + { + let n = self.input_graph.to_index(edge.target()); + add_weight(n, (*edge.weight()).into()); + } + } + weights + } +} + +fn one_level_undirected( graph: &G, partition: &mut Partition, m: f64, @@ -17,30 +117,14 @@ fn one_level_undirected<'g, G>( where G: ModularityComputable, { - let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); - for e in graph.edge_references() { - let (a, b) = ( - partition.subset_idx(e.source()), - partition.subset_idx(e.target()), - ); - let w: f64 = (*e.weight()).into(); - edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); - } - - let aggregated_graph: UnGraph<(), f64, usize> = - UnGraph::from_edges(edges.iter().map(|(k, &v)| (k.0, k.1, v))); - let node_count = aggregated_graph.node_count(); + let inner_graph = InnerGraph::new(graph, partition); - let mut node_to_community: Vec = (0..node_count).collect(); + let node_count = inner_graph.node_count(); - let mut degrees = vec![0.0; node_count]; - for e in aggregated_graph.edge_references() { - let w = e.weight(); - degrees[e.source().index()] += w; - degrees[e.target().index()] += w; - } + let mut degrees = inner_graph.degrees(); let mut s_tot = degrees.clone(); + let mut node_to_community: Vec = (0..node_count).collect(); let mut total_gain = 0.0; loop { let mut performed_move = false; @@ -51,17 +135,7 @@ where }; for node in rand::seq::index::sample(&mut node_shuffle, node_count, node_count) { - let mut neighbor_weights: HashMap = HashMap::new(); - for nbr in aggregated_graph.neighbors_undirected(node.into()) { - for e in aggregated_graph.edges_connecting(node.into(), nbr) { - let w = e.weight(); - let com = node_to_community[nbr.index()]; - neighbor_weights - .entry(com) - .and_modify(|x| *x += w) - .or_insert(*w); - } - } + let neighbor_weights = inner_graph.neighbor_community_weights(node, &node_to_community); let mut best_gain = 0.0; let init_com = node_to_community[node]; diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 73f9f84f44..7dbee1c372 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -2,7 +2,8 @@ use super::utils::total_edge_weight; use super::NotAPartitionError; use petgraph::visit::{ - Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoNodeReferences, NodeCount, NodeIndexable, + Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoEdgesDirected, IntoNodeReferences, NodeCount, + NodeIndexable, }; use std::collections::HashSet; use std::hash::Hash; @@ -14,6 +15,7 @@ pub trait ModularityComputable: + NodeCount + IntoNodeReferences + NodeIndexable + + IntoEdgesDirected { } impl< @@ -22,7 +24,8 @@ impl< + IntoEdgeReferences + NodeCount + IntoNodeReferences - + NodeIndexable, + + NodeIndexable + + IntoEdgesDirected, > ModularityComputable for Graph { } From 611d08a98047a46944898c0e83ed6e4090169414 Mon Sep 17 00:00:00 2001 From: jpacold Date: Tue, 3 Sep 2024 23:51:14 -0600 Subject: [PATCH 16/29] Add comments --- rustworkx-core/src/community/louvain.rs | 56 +++++++++++++++++++++++-- rustworkx-core/src/community/metrics.rs | 22 ++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 9791dc50ac..b6f673eaa1 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -7,6 +7,9 @@ use rand::SeedableRng; use rand_pcg::Pcg64; use std::collections::{HashMap, HashSet}; +/// Struct that holds an "inner graph" for one level of the Louvain algorithm, +/// i.e. a graph in which each community from the previous level is treated +/// as a single node. struct InnerGraph<'g, G> where G: ModularityComputable, @@ -16,25 +19,37 @@ where } impl<'g, G: ModularityComputable> InnerGraph<'g, G> { + /// Compute the inner graph for a given partition. + /// ToDo: fix redundant arguments pub fn new(graph: &'g G, partition: &Partition) -> InnerGraph<'g, G> { if partition.n_subsets() == graph.node_count() { // At the start of the Louvain algorithm we put each node from the - // input graph into an isolated partition. At this stage we want to - // avoid copying the entire input graph. + // input graph into its own commnuity, so the inner graph is the + // same as the input graph. We should avoid copying the input. return InnerGraph { input_graph: graph, inner_graph: None, }; } + // Construct a new graph where: + // - Node `n_i` corresponds to the `i`th community in the partition + // - Nodes `n_i` and `n_j` have an edge with weight `w`, where `w` is + // the sum of all edge weights connecting nodes in `n_i` and `n_j`. + // (including self-loops) let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); for e in graph.edge_references() { let (a, b) = ( partition.subset_idx(e.source()), partition.subset_idx(e.target()), ); + let inner_edge = if graph.is_directed() { + (std::cmp::min(a, b), std::cmp::max(a, b)) + } else { + (a, b) + }; let w: f64 = (*e.weight()).into(); - edges.entry((a, b)).and_modify(|x| *x += w).or_insert(w); + edges.entry(inner_edge).and_modify(|x| *x += w).or_insert(w); } InnerGraph { @@ -45,6 +60,7 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { } } + /// Returns the number of nodes in the inner graph pub fn node_count(&self) -> usize { if let Some(g) = &self.inner_graph { g.node_count() @@ -53,6 +69,8 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { } } + /// Returns a vector `w` where `w[i]` is the total weight of the + /// edges incident on the `i`th node. pub fn degrees(&self) -> Vec { let mut degrees = vec![0.0; self.node_count()]; if let Some(g) = &self.inner_graph { @@ -75,10 +93,13 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { degrees } + /// Given a node index `idx`, returns a map `w`. For each neighbor + /// `nbr` of `idx`, `w[nbr]` is the total weight of all the edges + /// connecting `idx` and `nbr`. pub fn neighbor_community_weights( &self, idx: usize, - node_to_community: &Vec, + node_to_community: &[usize], ) -> HashMap { let mut weights = HashMap::new(); @@ -106,6 +127,18 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { } } +/// Performs one level of the Louvain algorithm. +/// +/// Arguments: +/// +/// * `graph`: The input graph +/// * `partition`: The current partition of the input graph +/// * `m`: Total weight of the edges of `graph` +/// * `resolution` : controls whether the algorithm favors larger communities (`resolution < 1`) or smaller communities (`resolution < 1`) +/// * `gain_threshold` : minimum acceptable increase in modularity +/// * `seed` : optional seed to determine the order in which we consider moving each node into a neighboring community +/// +/// Returns true if it was possible to meet the specified `gain_threshold` by combining nodes into communities. fn one_level_undirected( graph: &G, partition: &mut Partition, @@ -124,6 +157,7 @@ where let mut degrees = inner_graph.degrees(); let mut s_tot = degrees.clone(); + // Start by placing each node into its own community let mut node_to_community: Vec = (0..node_count).collect(); let mut total_gain = 0.0; loop { @@ -134,6 +168,9 @@ where None => Pcg64::from_entropy(), }; + // Try moving each node into a neighboring community, in the order + // determined by `seed`. For each node, select the neighboring community + // that gives the largest increase in modularity (if any). for node in rand::seq::index::sample(&mut node_shuffle, node_count, node_count) { let neighbor_weights = inner_graph.neighbor_community_weights(node, &node_to_community); @@ -177,6 +214,7 @@ where return false; } + // Compute the resulting new partition of the input graph let mut final_index = HashMap::new(); let mut next_com = 0; let mut updated_partition: Vec = vec![0; node_count]; @@ -199,6 +237,16 @@ where true } +/// Runs the Louvain community detection algorithm. +/// +/// Arguments: +/// +/// * `graph`: The input graph +/// * `resolution` : controls whether the algorithm favors larger communities (`resolution < 1`) or smaller communities (`resolution < 1`) +/// * `gain_threshold` : minimum acceptable increase in modularity at each level. The algorithm will +/// terminate if it is not possible to meet this threshold by performing another level of aggregation. +/// * `max_level` : Maximum number of levels (aggregation steps) to perform +/// * `seed` : seed for RNG that determines the order in which we consider moving each node into a neighboring community pub fn louvain_communities( graph: G, resolution: f64, diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 7dbee1c372..b754a4ee72 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -8,6 +8,8 @@ use petgraph::visit::{ use std::collections::HashSet; use std::hash::Hash; +/// Trait for graphs for which it is possible to compute modularity +/// and apply the Louvain community detection method. pub trait ModularityComputable: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp @@ -30,6 +32,9 @@ impl< { } +/// Struct representing a partition of a graph as a vector +/// `[s_0, ... s_n]`, where `n` is the number of nodes in +/// the graph and node `i` belongs to subset `s_i`. pub struct Partition<'g, G> where G: ModularityComputable, @@ -40,6 +45,8 @@ where } impl<'g, G: ModularityComputable> Partition<'g, G> { + /// Creates a `Partition` from sets of graph nodes. Checks whether the + /// sets actually form a partition of the input graph. pub fn new( graph: &'g G, subsets: &[HashSet], @@ -71,6 +78,8 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { }) } + /// Creates a partition where each node of the input graph is placed + /// into its own subset (e.g. for the first step of the Louvain algorithm). pub fn new_isolated_nodes(graph: &'g G) -> Partition<'g, G> { Partition { graph, @@ -79,20 +88,25 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { } } + /// Replaces the current partition. The argument `new_partition` should be + /// a vector of size `n` (where `n` is the number of nodes in `self.graph`). pub fn update(&mut self, new_partition: Vec) { self.node_to_subset = new_partition; self.n_subsets = *self.node_to_subset.iter().max().unwrap_or(&0) + 1; } + /// Returns the index of the subset that contains `node`. pub fn subset_idx(&self, node: G::NodeId) -> usize { let idx = self.graph.to_index(node); self.node_to_subset[idx] } + /// Returns the number of subsets in the current partition. pub fn n_subsets(&self) -> usize { self.n_subsets } + /// Returns the current graph partition as a vector of sets of `NodeId`. pub fn to_vec_of_hashsets(&self) -> Vec> { let mut v = vec![HashSet::new(); self.n_subsets]; for (idx, &s) in self.node_to_subset.iter().enumerate() { @@ -102,6 +116,7 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { v } + /// Returns the modularity of the graph with the current partition. pub fn modularity(&self, resolution: f64) -> f64 { let mut internal_weights = vec![0.0; self.n_subsets]; let mut outgoing_weights = vec![0.0; self.n_subsets]; @@ -145,6 +160,13 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { } } +/// Computes the modularity of a graph, given a partition of its nodes. +/// +/// Arguments: +/// +/// * `graph` - The input graph +/// * `communities` - Sets of nodes that form a partition of `graph` +/// * `resolution` - Controls the relative weight of intra-community and inter-community edges pub fn modularity( graph: G, communities: &[HashSet], From 885045c2344b9c6cf160fce01748994b49fa25a9 Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 4 Sep 2024 20:12:17 -0600 Subject: [PATCH 17/29] Make `InnerGraph` an enum --- rustworkx-core/src/community/louvain.rs | 100 ++++++++++++------------ rustworkx-core/src/community/metrics.rs | 36 ++++----- rustworkx-core/src/community/mod.rs | 2 +- rustworkx-core/src/community/utils.rs | 4 +- 4 files changed, 71 insertions(+), 71 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index b6f673eaa1..ddcef34d83 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,4 +1,4 @@ -use super::metrics::{ModularityComputable, Partition}; +use super::metrics::{Louvain, Partition}; use super::utils::total_edge_weight; use petgraph::EdgeDirection; @@ -7,18 +7,24 @@ use rand::SeedableRng; use rand_pcg::Pcg64; use std::collections::{HashMap, HashSet}; -/// Struct that holds an "inner graph" for one level of the Louvain algorithm, +/// Enum that holds an "inner graph" for one level of the Louvain algorithm, /// i.e. a graph in which each community from the previous level is treated /// as a single node. -struct InnerGraph<'g, G> +/// +/// For the first stage of the algorithm, each node from the input graph +/// start out in its own community, so the inner graph is the same as the +/// input graph. In this case we avoid copying the input. +enum InnerGraph<'g, G> where - G: ModularityComputable, + G: Louvain, { - input_graph: &'g G, - inner_graph: Option>, + Init(&'g G), + Undirected(UnGraph<(), f64, usize>), + // Directed case is not implemented yet + // Directed(DiGraph<(), f64, usize>) } -impl<'g, G: ModularityComputable> InnerGraph<'g, G> { +impl<'g, G: Louvain> InnerGraph<'g, G> { /// Compute the inner graph for a given partition. /// ToDo: fix redundant arguments pub fn new(graph: &'g G, partition: &Partition) -> InnerGraph<'g, G> { @@ -26,10 +32,7 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { // At the start of the Louvain algorithm we put each node from the // input graph into its own commnuity, so the inner graph is the // same as the input graph. We should avoid copying the input. - return InnerGraph { - input_graph: graph, - inner_graph: None, - }; + return InnerGraph::Init(graph); } // Construct a new graph where: @@ -52,20 +55,16 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { edges.entry(inner_edge).and_modify(|x| *x += w).or_insert(w); } - InnerGraph { - input_graph: graph, - inner_graph: Some(UnGraph::from_edges( - edges.iter().map(|(k, &v)| (k.0, k.1, v)), - )), - } + InnerGraph::Undirected(UnGraph::from_edges( + edges.iter().map(|(k, &v)| (k.0, k.1, v)), + )) } /// Returns the number of nodes in the inner graph pub fn node_count(&self) -> usize { - if let Some(g) = &self.inner_graph { - g.node_count() - } else { - self.input_graph.node_count() + match self { + InnerGraph::Init(&g) => g.node_count(), + InnerGraph::Undirected(g) => g.node_count(), } } @@ -73,21 +72,21 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { /// edges incident on the `i`th node. pub fn degrees(&self) -> Vec { let mut degrees = vec![0.0; self.node_count()]; - if let Some(g) = &self.inner_graph { - for e in g.edge_references() { - let w = e.weight(); - degrees[e.source().index()] += w; - degrees[e.target().index()] += w; + match self { + InnerGraph::Init(&g) => { + for e in g.edge_references() { + let w = (*e.weight()).into(); + let (a, b) = (g.to_index(e.source()), g.to_index(e.target())); + degrees[a] += w; + degrees[b] += w; + } } - } else { - for e in self.input_graph.edge_references() { - let w = (*e.weight()).into(); - let (a, b) = ( - self.input_graph.to_index(e.source()), - self.input_graph.to_index(e.target()), - ); - degrees[a] += w; - degrees[b] += w; + InnerGraph::Undirected(g) => { + for e in g.edge_references() { + let w = e.weight(); + degrees[e.source().index()] += w; + degrees[e.target().index()] += w; + } } } degrees @@ -108,21 +107,22 @@ impl<'g, G: ModularityComputable> InnerGraph<'g, G> { weights.entry(com).and_modify(|x| *x += w).or_insert(w); }; - if let Some(g) = &self.inner_graph { - for edge in g.edges_directed(idx.into(), EdgeDirection::Outgoing) { - let n = edge.target().index(); - add_weight(n, *edge.weight()); + match self { + InnerGraph::Init(&g) => { + let node = g.from_index(idx); + for edge in g.edges_directed(node, EdgeDirection::Outgoing) { + let n = g.to_index(edge.target()); + add_weight(n, (*edge.weight()).into()); + } } - } else { - let node = self.input_graph.from_index(idx); - for edge in self - .input_graph - .edges_directed(node, EdgeDirection::Outgoing) - { - let n = self.input_graph.to_index(edge.target()); - add_weight(n, (*edge.weight()).into()); + InnerGraph::Undirected(g) => { + for edge in g.edges_directed(idx.into(), EdgeDirection::Outgoing) { + let n = edge.target().index(); + add_weight(n, *edge.weight()); + } } } + weights } } @@ -148,7 +148,7 @@ fn one_level_undirected( seed: Option, ) -> bool where - G: ModularityComputable, + G: Louvain, { let inner_graph = InnerGraph::new(graph, partition); @@ -255,9 +255,9 @@ pub fn louvain_communities( seed: Option, ) -> Vec> where - G: ModularityComputable, + G: Louvain, { - let mut partition = Partition::new_isolated_nodes(&graph); + let mut partition = Partition::new(&graph); let m = total_edge_weight(&graph); diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index b754a4ee72..3862c17304 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -10,7 +10,7 @@ use std::hash::Hash; /// Trait for graphs for which it is possible to compute modularity /// and apply the Louvain community detection method. -pub trait ModularityComputable: +pub trait Louvain: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp + IntoEdgeReferences @@ -21,14 +21,14 @@ pub trait ModularityComputable: { } impl< - Graph: Data + Copy, NodeId: Hash + Eq + Copy> + G: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp + IntoEdgeReferences + NodeCount + IntoNodeReferences + NodeIndexable + IntoEdgesDirected, - > ModularityComputable for Graph + > Louvain for G { } @@ -37,17 +37,27 @@ impl< /// the graph and node `i` belongs to subset `s_i`. pub struct Partition<'g, G> where - G: ModularityComputable, + G: Louvain, { graph: &'g G, n_subsets: usize, node_to_subset: Vec, } -impl<'g, G: ModularityComputable> Partition<'g, G> { +impl<'g, G: Louvain> Partition<'g, G> { + /// Creates a partition where each node of the input graph is placed + /// into its own subset, e.g. for the first step of the Louvain algorithm. + pub fn new(graph: &'g G) -> Partition<'g, G> { + Partition { + graph, + n_subsets: graph.node_count(), + node_to_subset: (0..graph.node_count()).collect(), + } + } + /// Creates a `Partition` from sets of graph nodes. Checks whether the /// sets actually form a partition of the input graph. - pub fn new( + pub fn from_subsets( graph: &'g G, subsets: &[HashSet], ) -> Result, NotAPartitionError> { @@ -78,16 +88,6 @@ impl<'g, G: ModularityComputable> Partition<'g, G> { }) } - /// Creates a partition where each node of the input graph is placed - /// into its own subset (e.g. for the first step of the Louvain algorithm). - pub fn new_isolated_nodes(graph: &'g G) -> Partition<'g, G> { - Partition { - graph, - n_subsets: graph.node_count(), - node_to_subset: (0..graph.node_count()).collect(), - } - } - /// Replaces the current partition. The argument `new_partition` should be /// a vector of size `n` (where `n` is the number of nodes in `self.graph`). pub fn update(&mut self, new_partition: Vec) { @@ -173,9 +173,9 @@ pub fn modularity( resolution: f64, ) -> Result where - G: ModularityComputable, + G: Louvain, { - let partition = Partition::new(&graph, communities)?; + let partition = Partition::from_subsets(&graph, communities)?; Ok(partition.modularity(resolution)) } diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs index 4fdbff1334..ea79efaa9d 100644 --- a/rustworkx-core/src/community/mod.rs +++ b/rustworkx-core/src/community/mod.rs @@ -1,5 +1,5 @@ mod metrics; -pub use metrics::{modularity, ModularityComputable}; +pub use metrics::{modularity, Louvain}; mod louvain; pub use louvain::louvain_communities; diff --git a/rustworkx-core/src/community/utils.rs b/rustworkx-core/src/community/utils.rs index 76452d2cf1..3f23bb7542 100644 --- a/rustworkx-core/src/community/utils.rs +++ b/rustworkx-core/src/community/utils.rs @@ -1,9 +1,9 @@ -use super::ModularityComputable; +use super::Louvain; use petgraph::visit::EdgeRef; pub fn total_edge_weight(graph: &G) -> f64 where - G: ModularityComputable, + G: Louvain, { graph .edge_references() From 7a2301cf21fe1f88d28243877610e80afed4563e Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 4 Sep 2024 20:28:46 -0600 Subject: [PATCH 18/29] Separate Louvain algorithm code from modularity code --- rustworkx-core/src/community/louvain.rs | 110 +++++++++++++++--------- rustworkx-core/src/community/metrics.rs | 38 ++------ rustworkx-core/src/community/mod.rs | 2 +- rustworkx-core/src/community/utils.rs | 4 +- 4 files changed, 80 insertions(+), 74 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index ddcef34d83..5f1ec2ab9a 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,4 +1,4 @@ -use super::metrics::{Louvain, Partition}; +use super::metrics::{Modularity, Partition}; use super::utils::total_edge_weight; use petgraph::EdgeDirection; @@ -16,7 +16,7 @@ use std::collections::{HashMap, HashSet}; /// input graph. In this case we avoid copying the input. enum InnerGraph<'g, G> where - G: Louvain, + G: Modularity, { Init(&'g G), Undirected(UnGraph<(), f64, usize>), @@ -24,42 +24,7 @@ where // Directed(DiGraph<(), f64, usize>) } -impl<'g, G: Louvain> InnerGraph<'g, G> { - /// Compute the inner graph for a given partition. - /// ToDo: fix redundant arguments - pub fn new(graph: &'g G, partition: &Partition) -> InnerGraph<'g, G> { - if partition.n_subsets() == graph.node_count() { - // At the start of the Louvain algorithm we put each node from the - // input graph into its own commnuity, so the inner graph is the - // same as the input graph. We should avoid copying the input. - return InnerGraph::Init(graph); - } - - // Construct a new graph where: - // - Node `n_i` corresponds to the `i`th community in the partition - // - Nodes `n_i` and `n_j` have an edge with weight `w`, where `w` is - // the sum of all edge weights connecting nodes in `n_i` and `n_j`. - // (including self-loops) - let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); - for e in graph.edge_references() { - let (a, b) = ( - partition.subset_idx(e.source()), - partition.subset_idx(e.target()), - ); - let inner_edge = if graph.is_directed() { - (std::cmp::min(a, b), std::cmp::max(a, b)) - } else { - (a, b) - }; - let w: f64 = (*e.weight()).into(); - edges.entry(inner_edge).and_modify(|x| *x += w).or_insert(w); - } - - InnerGraph::Undirected(UnGraph::from_edges( - edges.iter().map(|(k, &v)| (k.0, k.1, v)), - )) - } - +impl<'g, G: Modularity> InnerGraph<'g, G> { /// Returns the number of nodes in the inner graph pub fn node_count(&self) -> usize { match self { @@ -127,6 +92,69 @@ impl<'g, G: Louvain> InnerGraph<'g, G> { } } +/// Trait for additional functions used int the Louvain algorithm. Since the idea +/// is to compute increasingly coarse partitions of the input graph, we implement +/// these for `Partition`. +trait LouvainAlgo<'g, G> +where + G: Modularity, +{ + /// Compute the inner graph for a given partition. + fn to_inner_graph(&self) -> InnerGraph<'g, G>; + + /// Replaces the current partition. The argument `new_partition` should be + /// a vector of size `n` (where `n` is the number of nodes in `self.graph`). + fn update(&mut self, new_partition: Vec); + + /// Returns the current graph partition as a vector of sets of `NodeId`, for + /// example to return to the Python layer. + fn to_vec_of_hashsets(&self) -> Vec>; +} + +impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> { + fn to_inner_graph(&self) -> InnerGraph<'g, G> { + if self.n_subsets == self.graph.node_count() { + return InnerGraph::Init(self.graph); + } + + // Construct a new graph where: + // - Node `n_i` corresponds to the `i`th community in the partition + // - Nodes `n_i` and `n_j` have an edge with weight `w`, where `w` is + // the sum of all edge weights connecting nodes in `n_i` and `n_j`. + // (including self-loops) + let mut edges: HashMap<(usize, usize), f64> = HashMap::new(); + for e in self.graph.edge_references() { + let (a, b) = (self.subset_idx(e.source()), self.subset_idx(e.target())); + let inner_edge = if self.graph.is_directed() { + (std::cmp::min(a, b), std::cmp::max(a, b)) + } else { + (a, b) + }; + let w: f64 = (*e.weight()).into(); + edges.entry(inner_edge).and_modify(|x| *x += w).or_insert(w); + } + + InnerGraph::Undirected(UnGraph::from_edges( + edges.iter().map(|(k, &v)| (k.0, k.1, v)), + )) + } + + fn update(&mut self, new_partition: Vec) { + self.node_to_subset = new_partition; + self.n_subsets = *self.node_to_subset.iter().max().unwrap_or(&0) + 1; + } + + /// Returns the current graph partition as a vector of sets of `NodeId`. + fn to_vec_of_hashsets(&self) -> Vec> { + let mut v = vec![HashSet::new(); self.n_subsets]; + for (idx, &s) in self.node_to_subset.iter().enumerate() { + let node = self.graph.from_index(idx); + v[s].insert(node); + } + v + } +} + /// Performs one level of the Louvain algorithm. /// /// Arguments: @@ -148,9 +176,9 @@ fn one_level_undirected( seed: Option, ) -> bool where - G: Louvain, + G: Modularity, { - let inner_graph = InnerGraph::new(graph, partition); + let inner_graph = partition.to_inner_graph(); let node_count = inner_graph.node_count(); @@ -255,7 +283,7 @@ pub fn louvain_communities( seed: Option, ) -> Vec> where - G: Louvain, + G: Modularity, { let mut partition = Partition::new(&graph); diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 3862c17304..922ab3c0cb 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -10,7 +10,7 @@ use std::hash::Hash; /// Trait for graphs for which it is possible to compute modularity /// and apply the Louvain community detection method. -pub trait Louvain: +pub trait Modularity: Data + Copy, NodeId: Hash + Eq + Copy> + GraphProp + IntoEdgeReferences @@ -28,7 +28,7 @@ impl< + IntoNodeReferences + NodeIndexable + IntoEdgesDirected, - > Louvain for G + > Modularity for G { } @@ -37,14 +37,14 @@ impl< /// the graph and node `i` belongs to subset `s_i`. pub struct Partition<'g, G> where - G: Louvain, + G: Modularity, { - graph: &'g G, - n_subsets: usize, - node_to_subset: Vec, + pub graph: &'g G, + pub n_subsets: usize, + pub node_to_subset: Vec, } -impl<'g, G: Louvain> Partition<'g, G> { +impl<'g, G: Modularity> Partition<'g, G> { /// Creates a partition where each node of the input graph is placed /// into its own subset, e.g. for the first step of the Louvain algorithm. pub fn new(graph: &'g G) -> Partition<'g, G> { @@ -88,34 +88,12 @@ impl<'g, G: Louvain> Partition<'g, G> { }) } - /// Replaces the current partition. The argument `new_partition` should be - /// a vector of size `n` (where `n` is the number of nodes in `self.graph`). - pub fn update(&mut self, new_partition: Vec) { - self.node_to_subset = new_partition; - self.n_subsets = *self.node_to_subset.iter().max().unwrap_or(&0) + 1; - } - /// Returns the index of the subset that contains `node`. pub fn subset_idx(&self, node: G::NodeId) -> usize { let idx = self.graph.to_index(node); self.node_to_subset[idx] } - /// Returns the number of subsets in the current partition. - pub fn n_subsets(&self) -> usize { - self.n_subsets - } - - /// Returns the current graph partition as a vector of sets of `NodeId`. - pub fn to_vec_of_hashsets(&self) -> Vec> { - let mut v = vec![HashSet::new(); self.n_subsets]; - for (idx, &s) in self.node_to_subset.iter().enumerate() { - let node = self.graph.from_index(idx); - v[s].insert(node); - } - v - } - /// Returns the modularity of the graph with the current partition. pub fn modularity(&self, resolution: f64) -> f64 { let mut internal_weights = vec![0.0; self.n_subsets]; @@ -173,7 +151,7 @@ pub fn modularity( resolution: f64, ) -> Result where - G: Louvain, + G: Modularity, { let partition = Partition::from_subsets(&graph, communities)?; Ok(partition.modularity(resolution)) diff --git a/rustworkx-core/src/community/mod.rs b/rustworkx-core/src/community/mod.rs index ea79efaa9d..f4ab78e47a 100644 --- a/rustworkx-core/src/community/mod.rs +++ b/rustworkx-core/src/community/mod.rs @@ -1,5 +1,5 @@ mod metrics; -pub use metrics::{modularity, Louvain}; +pub use metrics::{modularity, Modularity}; mod louvain; pub use louvain::louvain_communities; diff --git a/rustworkx-core/src/community/utils.rs b/rustworkx-core/src/community/utils.rs index 3f23bb7542..64a5513336 100644 --- a/rustworkx-core/src/community/utils.rs +++ b/rustworkx-core/src/community/utils.rs @@ -1,9 +1,9 @@ -use super::Louvain; +use super::Modularity; use petgraph::visit::EdgeRef; pub fn total_edge_weight(graph: &G) -> f64 where - G: Louvain, + G: Modularity, { graph .edge_references() From c0e2de3ffe6612b32fe74508c796f830e8c3c2ff Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 4 Sep 2024 21:19:54 -0600 Subject: [PATCH 19/29] Add karate club test --- rustworkx-core/src/community/louvain.rs | 120 ++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 8 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 5f1ec2ab9a..82fe9f8acf 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -4,7 +4,7 @@ use super::utils::total_edge_weight; use petgraph::EdgeDirection; use petgraph::{graph::UnGraph, visit::EdgeRef}; use rand::SeedableRng; -use rand_pcg::Pcg64; +use rand_pcg::Pcg32; use std::collections::{HashMap, HashSet}; /// Enum that holds an "inner graph" for one level of the Louvain algorithm, @@ -191,9 +191,9 @@ where loop { let mut performed_move = false; - let mut node_shuffle: Pcg64 = match seed { - Some(rng_seed) => Pcg64::seed_from_u64(rng_seed), - None => Pcg64::from_entropy(), + let mut node_shuffle: Pcg32 = match seed { + Some(rng_seed) => Pcg32::seed_from_u64(rng_seed), + None => Pcg32::from_entropy(), }; // Try moving each node into a neighboring community, in the order @@ -245,7 +245,7 @@ where // Compute the resulting new partition of the input graph let mut final_index = HashMap::new(); let mut next_com = 0; - let mut updated_partition: Vec = vec![0; node_count]; + let mut updated_partition: Vec = vec![0; graph.node_count()]; for n in graph.node_identifiers() { let prev_com = partition.subset_idx(n); @@ -304,14 +304,13 @@ where #[cfg(test)] mod tests { - use crate::community::NotAPartitionError; use crate::generators::barbell_graph; use petgraph::graph::UnGraph; use super::louvain_communities; #[test] - fn test_louvain_barbell_graph() -> Result<(), NotAPartitionError> { + fn test_louvain_barbell_graph() { type G = UnGraph<(), f64>; for n in 3..10 { @@ -325,6 +324,111 @@ mod tests { assert_eq!(result[0].len(), n); assert_eq!(result[1].len(), n); } - Ok(()) + } + + #[test] + fn test_louvain_karate_club_graph() { + let edges = [ + (0, 1, 4.0), + (0, 2, 5.0), + (0, 3, 3.0), + (0, 4, 3.0), + (0, 5, 3.0), + (0, 6, 3.0), + (0, 7, 2.0), + (0, 8, 2.0), + (0, 10, 2.0), + (0, 11, 3.0), + (0, 12, 1.0), + (0, 13, 3.0), + (0, 17, 2.0), + (0, 19, 2.0), + (0, 21, 2.0), + (0, 31, 2.0), + (1, 2, 6.0), + (1, 3, 3.0), + (1, 7, 4.0), + (1, 13, 5.0), + (1, 17, 1.0), + (1, 19, 2.0), + (1, 21, 2.0), + (1, 30, 2.0), + (2, 3, 3.0), + (2, 7, 4.0), + (2, 8, 5.0), + (2, 9, 1.0), + (2, 13, 3.0), + (2, 27, 2.0), + (2, 28, 2.0), + (2, 32, 2.0), + (3, 7, 3.0), + (3, 12, 3.0), + (3, 13, 3.0), + (4, 6, 2.0), + (4, 10, 3.0), + (5, 6, 5.0), + (5, 10, 3.0), + (5, 16, 3.0), + (6, 16, 3.0), + (8, 30, 3.0), + (8, 32, 3.0), + (8, 33, 4.0), + (9, 33, 2.0), + (13, 33, 3.0), + (14, 32, 3.0), + (14, 33, 2.0), + (15, 32, 3.0), + (15, 33, 4.0), + (18, 32, 1.0), + (18, 33, 2.0), + (19, 33, 1.0), + (20, 32, 3.0), + (20, 33, 1.0), + (22, 32, 2.0), + (22, 33, 3.0), + (23, 25, 5.0), + (23, 27, 4.0), + (23, 29, 3.0), + (23, 32, 5.0), + (23, 33, 4.0), + (24, 25, 2.0), + (24, 27, 3.0), + (24, 31, 2.0), + (25, 31, 7.0), + (26, 29, 4.0), + (26, 33, 2.0), + (27, 33, 4.0), + (28, 31, 2.0), + (28, 33, 2.0), + (29, 32, 4.0), + (29, 33, 2.0), + (30, 32, 3.0), + (30, 33, 3.0), + (31, 32, 4.0), + (31, 33, 4.0), + (32, 33, 5.0), + ]; + let graph: UnGraph<(), f64> = UnGraph::from_edges(edges.iter()); + let communities = louvain_communities(&graph, 1.0, 0.01, None, Some(7)); + + // The result is very sensitive to the random seed. For this seed we + // happen to get the same result as: + // import networkx as nx + // g = nx.karate_club_graph() + // communities = nx.community.louvain_communities(g, weight='weight', seed=12) + let mut vecs: Vec> = communities + .iter() + .map(|h| h.iter().map(|n| n.index()).collect::>()) + .collect::>>(); + for v in vecs.iter_mut() { + v.sort(); + } + assert_eq!(vecs[0], vec![0, 1, 2, 3, 7, 11, 12, 13, 17, 19, 21]); + assert_eq!(vecs[1], vec![4, 5, 6, 10, 16]); + assert_eq!( + vecs[2], + vec![8, 9, 14, 15, 18, 20, 22, 23, 26, 27, 29, 30, 32, 33] + ); + assert_eq!(vecs[3], vec![24, 25, 28, 31]); } } From b3fad588ae419b1c0b3a7db1cb64fa7b9ac77cfc Mon Sep 17 00:00:00 2001 From: jpacold Date: Wed, 4 Sep 2024 21:59:53 -0600 Subject: [PATCH 20/29] Remove redundant argument from `one_level_undirected` --- rustworkx-core/src/community/louvain.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 82fe9f8acf..6933739406 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -144,7 +144,6 @@ impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> { self.n_subsets = *self.node_to_subset.iter().max().unwrap_or(&0) + 1; } - /// Returns the current graph partition as a vector of sets of `NodeId`. fn to_vec_of_hashsets(&self) -> Vec> { let mut v = vec![HashSet::new(); self.n_subsets]; for (idx, &s) in self.node_to_subset.iter().enumerate() { @@ -159,7 +158,6 @@ impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> { /// /// Arguments: /// -/// * `graph`: The input graph /// * `partition`: The current partition of the input graph /// * `m`: Total weight of the edges of `graph` /// * `resolution` : controls whether the algorithm favors larger communities (`resolution < 1`) or smaller communities (`resolution < 1`) @@ -168,7 +166,6 @@ impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> { /// /// Returns true if it was possible to meet the specified `gain_threshold` by combining nodes into communities. fn one_level_undirected( - graph: &G, partition: &mut Partition, m: f64, resolution: f64, @@ -243,11 +240,12 @@ where } // Compute the resulting new partition of the input graph + let input_graph = &partition.graph; let mut final_index = HashMap::new(); let mut next_com = 0; - let mut updated_partition: Vec = vec![0; graph.node_count()]; + let mut updated_partition: Vec = vec![0; input_graph.node_count()]; - for n in graph.node_identifiers() { + for n in input_graph.node_identifiers() { let prev_com = partition.subset_idx(n); let inner_com = node_to_community[prev_com]; let new_com = if let Some(&c) = final_index.get(&inner_com) { @@ -258,7 +256,7 @@ where next_com += 1; c }; - updated_partition[graph.to_index(n)] = new_com; + updated_partition[input_graph.to_index(n)] = new_com; } partition.update(updated_partition); @@ -290,7 +288,7 @@ where let m = total_edge_weight(&graph); let mut n_levels = 0; - while one_level_undirected(&graph, &mut partition, m, resolution, gain_threshold, seed) { + while one_level_undirected(&mut partition, m, resolution, gain_threshold, seed) { if let Some(limit) = max_level { n_levels += 1; if n_levels >= limit { From e399a8b117c3962ba5095c4159cf07e90949c827 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 8 Sep 2024 20:21:50 -0600 Subject: [PATCH 21/29] Fix mistake in gain calculation: update current community degrees, not node degrees --- rustworkx-core/src/community/louvain.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 6933739406..d142e70918 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -179,7 +179,7 @@ where let node_count = inner_graph.node_count(); - let mut degrees = inner_graph.degrees(); + let degrees = inner_graph.degrees(); let mut s_tot = degrees.clone(); // Start by placing each node into its own community @@ -205,7 +205,7 @@ where let mut best_com = init_com; let two_m_sq = 2.0 * m * m; - degrees[best_com] -= deg; + s_tot[best_com] -= deg; let delta = if let Some(&w) = neighbor_weights.get(&best_com) { w From ae3d19c12eb459857439ca609a27424a53e2ad3b Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 8 Sep 2024 20:35:29 -0600 Subject: [PATCH 22/29] Use degree of node instead of degree of community --- rustworkx-core/src/community/louvain.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index d142e70918..4cad0477db 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -201,7 +201,7 @@ where let mut best_gain = 0.0; let init_com = node_to_community[node]; - let deg = degrees[init_com]; + let deg = degrees[node]; let mut best_com = init_com; let two_m_sq = 2.0 * m * m; @@ -413,7 +413,7 @@ mod tests { // happen to get the same result as: // import networkx as nx // g = nx.karate_club_graph() - // communities = nx.community.louvain_communities(g, weight='weight', seed=12) + // nx.community.louvain_communities(g, weight='weight', resolution=1.0, threshold=1.0e-7, seed=12) let mut vecs: Vec> = communities .iter() .map(|h| h.iter().map(|n| n.index()).collect::>()) From bc976b486e31f9181badd142ca217aefba314e8a Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 8 Sep 2024 20:40:17 -0600 Subject: [PATCH 23/29] Update karate club test --- rustworkx-core/src/community/louvain.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 4cad0477db..df2ca8896d 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -407,13 +407,14 @@ mod tests { (32, 33, 5.0), ]; let graph: UnGraph<(), f64> = UnGraph::from_edges(edges.iter()); - let communities = louvain_communities(&graph, 1.0, 0.01, None, Some(7)); + let communities = louvain_communities(&graph, 1.0, 1.0e-7, None, Some(1)); // The result is very sensitive to the random seed. For this seed we // happen to get the same result as: // import networkx as nx // g = nx.karate_club_graph() // nx.community.louvain_communities(g, weight='weight', resolution=1.0, threshold=1.0e-7, seed=12) + // ToDo: revisit this test (too sensitive to implementation details) let mut vecs: Vec> = communities .iter() .map(|h| h.iter().map(|n| n.index()).collect::>()) From 12ffc33373695c12c6a25ea29f0d163e0a7fea80 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 8 Sep 2024 20:55:28 -0600 Subject: [PATCH 24/29] Use closure for repeated part of gain calculation --- rustworkx-core/src/community/louvain.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index df2ca8896d..50c794a083 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -207,15 +207,19 @@ where s_tot[best_com] -= deg; - let delta = if let Some(&w) = neighbor_weights.get(&best_com) { - w - } else { - 0.0 + let community_delta = |c: usize| -> f64 { + let delta = if let Some(&w) = neighbor_weights.get(&c) { + w + } else { + 0.0 + }; + -delta / m + resolution * s_tot[c] * deg / two_m_sq }; - let remove_cost = -delta / m + resolution * (s_tot[best_com] * deg) / two_m_sq; - for (&nbr_com, &wt) in neighbor_weights.iter() { - let gain = remove_cost + wt / m - resolution * s_tot[nbr_com] * deg / two_m_sq; + let remove_cost = community_delta(best_com); + + for &nbr_com in neighbor_weights.keys() { + let gain = remove_cost - community_delta(nbr_com); if gain > best_gain { best_gain = gain; best_com = nbr_com; From 234659eb4188f461fd3056ff50e2ab42de76c303 Mon Sep 17 00:00:00 2001 From: jpacold Date: Mon, 9 Sep 2024 08:03:05 -0600 Subject: [PATCH 25/29] Replace `loop` with `while performed_move` --- rustworkx-core/src/community/louvain.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 50c794a083..8d816d1ab7 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -185,8 +185,9 @@ where // Start by placing each node into its own community let mut node_to_community: Vec = (0..node_count).collect(); let mut total_gain = 0.0; - loop { - let mut performed_move = false; + let mut performed_move = true; + while performed_move { + performed_move = false; let mut node_shuffle: Pcg32 = match seed { Some(rng_seed) => Pcg32::seed_from_u64(rng_seed), @@ -234,9 +235,6 @@ where node_to_community[node] = best_com; } } - if !performed_move { - break; - } } if total_gain < gain_threshold { From 11a74fd757fc149206d855534f0549d8984ed359 Mon Sep 17 00:00:00 2001 From: jpacold Date: Mon, 9 Sep 2024 08:07:42 -0600 Subject: [PATCH 26/29] Replace `if let` with `match` --- rustworkx-core/src/community/louvain.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 8d816d1ab7..2b47a521d5 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -250,13 +250,14 @@ where for n in input_graph.node_identifiers() { let prev_com = partition.subset_idx(n); let inner_com = node_to_community[prev_com]; - let new_com = if let Some(&c) = final_index.get(&inner_com) { - c - } else { - let c = next_com; - final_index.insert(inner_com, c); - next_com += 1; - c + let new_com = match final_index.get(&inner_com) { + Some(&c) => c, + None => { + let c = next_com; + final_index.insert(inner_com, c); + next_com += 1; + c + } }; updated_partition[input_graph.to_index(n)] = new_com; } From 58857fdc92065dd8632c8928805eb95537e5b86c Mon Sep 17 00:00:00 2001 From: jpacold Date: Mon, 9 Sep 2024 21:37:43 -0600 Subject: [PATCH 27/29] Reorganize modularity and gain arithmetic --- rustworkx-core/src/community/louvain.rs | 15 +++++++++++---- rustworkx-core/src/community/metrics.rs | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 2b47a521d5..59d2081f2a 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -184,7 +184,13 @@ where // Start by placing each node into its own community let mut node_to_community: Vec = (0..node_count).collect(); + + // Keep track of the total modularity gain during this level of the + // algorithm. Note that we actually keep track of m * delta, where + // delta is the change in modularity. Later we will compare this + // against m * gain_threshold. let mut total_gain = 0.0; + let mut performed_move = true; while performed_move { performed_move = false; @@ -204,17 +210,18 @@ where let init_com = node_to_community[node]; let deg = degrees[node]; let mut best_com = init_com; - let two_m_sq = 2.0 * m * m; s_tot[best_com] -= deg; let community_delta = |c: usize| -> f64 { - let delta = if let Some(&w) = neighbor_weights.get(&c) { + let wt = if let Some(&w) = neighbor_weights.get(&c) { w } else { 0.0 }; - -delta / m + resolution * s_tot[c] * deg / two_m_sq + // As mentioned above this is m times the change in modularity + // caused by moving a node out of the community. + -wt + 0.5 * resolution * s_tot[c] * deg / m }; let remove_cost = community_delta(best_com); @@ -237,7 +244,7 @@ where } } - if total_gain < gain_threshold { + if total_gain < m * gain_threshold { return false; } diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 922ab3c0cb..9c0ced20fa 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -134,7 +134,7 @@ impl<'g, G: Modularity> Partition<'g, G> { }; let m: f64 = total_edge_weight(self.graph); - sigma_internal / m - resolution * sigma_total_squared / (m * m) + (sigma_internal - resolution * sigma_total_squared / m) / m } } From e77f71c40e3de129bea7158ec00ee50a2a680593 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 15 Sep 2024 23:11:40 -0600 Subject: [PATCH 28/29] Revise traits to compile with Rust 1.70 --- rustworkx-core/src/community/louvain.rs | 22 +++++++++++++++++----- rustworkx-core/src/community/metrics.rs | 25 +++++++++++++++++++------ rustworkx-core/src/community/utils.rs | 5 +++-- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/rustworkx-core/src/community/louvain.rs b/rustworkx-core/src/community/louvain.rs index 59d2081f2a..671a9570bc 100644 --- a/rustworkx-core/src/community/louvain.rs +++ b/rustworkx-core/src/community/louvain.rs @@ -1,8 +1,9 @@ -use super::metrics::{Modularity, Partition}; +use super::metrics::{Modularity, ModularityEdgeWeight, ModularityNodeId, Partition}; use super::utils::total_edge_weight; +use petgraph::graph::UnGraph; +use petgraph::visit::{Data, EdgeRef, GraphBase}; use petgraph::EdgeDirection; -use petgraph::{graph::UnGraph, visit::EdgeRef}; use rand::SeedableRng; use rand_pcg::Pcg32; use std::collections::{HashMap, HashSet}; @@ -24,7 +25,10 @@ where // Directed(DiGraph<(), f64, usize>) } -impl<'g, G: Modularity> InnerGraph<'g, G> { +impl<'g, G: Modularity> InnerGraph<'g, G> +where + ::EdgeWeight: ModularityEdgeWeight, +{ /// Returns the number of nodes in the inner graph pub fn node_count(&self) -> usize { match self { @@ -40,7 +44,7 @@ impl<'g, G: Modularity> InnerGraph<'g, G> { match self { InnerGraph::Init(&g) => { for e in g.edge_references() { - let w = (*e.weight()).into(); + let w: f64 = (*e.weight()).into(); let (a, b) = (g.to_index(e.source()), g.to_index(e.target())); degrees[a] += w; degrees[b] += w; @@ -111,7 +115,11 @@ where fn to_vec_of_hashsets(&self) -> Vec>; } -impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> { +impl<'g, G: Modularity> LouvainAlgo<'g, G> for Partition<'g, G> +where + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, +{ fn to_inner_graph(&self) -> InnerGraph<'g, G> { if self.n_subsets == self.graph.node_count() { return InnerGraph::Init(self.graph); @@ -174,6 +182,8 @@ fn one_level_undirected( ) -> bool where G: Modularity, + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, { let inner_graph = partition.to_inner_graph(); @@ -292,6 +302,8 @@ pub fn louvain_communities( ) -> Vec> where G: Modularity, + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, { let mut partition = Partition::new(&graph); diff --git a/rustworkx-core/src/community/metrics.rs b/rustworkx-core/src/community/metrics.rs index 9c0ced20fa..8c20f335e0 100644 --- a/rustworkx-core/src/community/metrics.rs +++ b/rustworkx-core/src/community/metrics.rs @@ -2,16 +2,21 @@ use super::utils::total_edge_weight; use super::NotAPartitionError; use petgraph::visit::{ - Data, EdgeRef, GraphProp, IntoEdgeReferences, IntoEdgesDirected, IntoNodeReferences, NodeCount, - NodeIndexable, + Data, EdgeRef, GraphBase, GraphProp, IntoEdgeReferences, IntoEdgesDirected, IntoNodeReferences, + NodeCount, NodeIndexable, }; use std::collections::HashSet; use std::hash::Hash; -/// Trait for graphs for which it is possible to compute modularity +/// Traits for graphs for which it is possible to compute modularity /// and apply the Louvain community detection method. +pub trait ModularityEdgeWeight: Into + Copy {} +impl + Copy> ModularityEdgeWeight for E {} +pub trait ModularityNodeId: Hash + Eq + Copy {} +impl ModularityNodeId for N {} + pub trait Modularity: - Data + Copy, NodeId: Hash + Eq + Copy> + Data + GraphProp + IntoEdgeReferences + NodeCount @@ -21,7 +26,7 @@ pub trait Modularity: { } impl< - G: Data + Copy, NodeId: Hash + Eq + Copy> + G: Data + GraphProp + IntoEdgeReferences + NodeCount @@ -38,13 +43,19 @@ impl< pub struct Partition<'g, G> where G: Modularity, + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, { pub graph: &'g G, pub n_subsets: usize, pub node_to_subset: Vec, } -impl<'g, G: Modularity> Partition<'g, G> { +impl<'g, G: Modularity> Partition<'g, G> +where + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, +{ /// Creates a partition where each node of the input graph is placed /// into its own subset, e.g. for the first step of the Louvain algorithm. pub fn new(graph: &'g G) -> Partition<'g, G> { @@ -152,6 +163,8 @@ pub fn modularity( ) -> Result where G: Modularity, + ::EdgeWeight: ModularityEdgeWeight, + ::NodeId: ModularityNodeId, { let partition = Partition::from_subsets(&graph, communities)?; Ok(partition.modularity(resolution)) diff --git a/rustworkx-core/src/community/utils.rs b/rustworkx-core/src/community/utils.rs index 64a5513336..1472d8ee6f 100644 --- a/rustworkx-core/src/community/utils.rs +++ b/rustworkx-core/src/community/utils.rs @@ -1,9 +1,10 @@ -use super::Modularity; -use petgraph::visit::EdgeRef; +use super::metrics::{ModularityEdgeWeight, Modularity}; +use petgraph::visit::{Data, EdgeRef}; pub fn total_edge_weight(graph: &G) -> f64 where G: Modularity, + ::EdgeWeight: ModularityEdgeWeight, { graph .edge_references() From 6f49cae7f229a8f04d332563a2256dfbdbed0214 Mon Sep 17 00:00:00 2001 From: jpacold Date: Sun, 15 Sep 2024 23:14:16 -0600 Subject: [PATCH 29/29] `cargo fmt --all` --- rustworkx-core/src/community/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rustworkx-core/src/community/utils.rs b/rustworkx-core/src/community/utils.rs index 1472d8ee6f..154a4a9f3d 100644 --- a/rustworkx-core/src/community/utils.rs +++ b/rustworkx-core/src/community/utils.rs @@ -1,4 +1,4 @@ -use super::metrics::{ModularityEdgeWeight, Modularity}; +use super::metrics::{Modularity, ModularityEdgeWeight}; use petgraph::visit::{Data, EdgeRef}; pub fn total_edge_weight(graph: &G) -> f64