From d1beb8ab7049ddc25c51123ffe811b53e32d3f2f Mon Sep 17 00:00:00 2001 From: Rajendraprasad7 Date: Mon, 24 Mar 2025 16:38:36 +0530 Subject: [PATCH] add fast parallel algorithm for undirected unweighted real world graphs --- _nx_parallel/__init__.py | 7 + nx_parallel/algorithms/__init__.py | 1 + nx_parallel/algorithms/distance_measures.py | 139 ++++++++++++++++++++ nx_parallel/interface.py | 32 +++++ 4 files changed, 179 insertions(+) create mode 100644 nx_parallel/algorithms/distance_measures.py diff --git a/_nx_parallel/__init__.py b/_nx_parallel/__init__.py index 4c5e4352..4efdd87e 100644 --- a/_nx_parallel/__init__.py +++ b/_nx_parallel/__init__.py @@ -97,6 +97,13 @@ def get_info(): 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks." }, }, + "diameter": { + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/distance_measures.py#L11", + "additional_docs": "This alternative to the more general `diameter` function is faster and allows for an approximation tolerance, though the default is to find the exact zero-tolerance result. The function uses the Iterative Fringe Upper Bound (IFUB) algorithm [1]_ with parallel computation of BFSes for fringe vertices.", + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks." + }, + }, "edge_betweenness_centrality": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/centrality/betweenness.py#L96", "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing edge betweenness centrality for each chunk concurrently.", diff --git a/nx_parallel/algorithms/__init__.py b/nx_parallel/algorithms/__init__.py index 94a495e0..9a77a3f8 100644 --- a/nx_parallel/algorithms/__init__.py +++ b/nx_parallel/algorithms/__init__.py @@ -11,3 +11,4 @@ from .tournament import * from .vitality import * from .cluster import * +from .distance_measures import * diff --git a/nx_parallel/algorithms/distance_measures.py b/nx_parallel/algorithms/distance_measures.py new file mode 100644 index 00000000..cadd6b7f --- /dev/null +++ b/nx_parallel/algorithms/distance_measures.py @@ -0,0 +1,139 @@ +"""Graph diameter""" + +import networkx as nx +import nx_parallel as nxp +from joblib import Parallel, delayed + +__all__ = ["diameter"] + + +@nxp._configure_if_nx_active() +def diameter(G, e=None, usebounds=False, weight=None, get_chunks="chunks"): + """This alternative to the more general `diameter` function is faster and + allows for an approximation tolerance, though the default is to find the + exact zero-tolerance result. The function uses the Iterative Fringe Upper + Bound (IFUB) algorithm [1]_ with parallel computation of BFSes for fringe + vertices. + + networkx.diameter : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.distance_measures.diameter.html#networkx.algorithms.distance_measures.diameter + + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the nodes as input and returns an + iterable `node_chunks`. The default chunking is done by slicing the + `nodes` into `n_jobs` number of chunks. + + Notes + ----- + The IFUB algorithm first selects an approximate "central" node using + the 4-sweep heuristic. The 4-sweep method starts from a random node, + finds its farthest node, then repeats this process four times to + approximate a central node. A BFS tree is then rooted at this node, + and eccentricities are computed layer-wise in parallel. If the max eccentricity + from a layer exceeds twice the layer index, the algorithm terminates + and returns the diameter; otherwise, it proceeds further. IFUB is + observed to compute diameters efficiently for real-world graphs [1]_. + + References + ---------- + .. [1] Crescenzi, P., Grossi, R., Lanzi, L., & Marino, A. + "On computing the diameter of real-world undirected graphs" + Theoretical Computer Science 426 (2012): 34-52. + https://doi.org/10.1016/j.tcs.2012.09.018 + """ + G = G.graph_object if isinstance(G, nxp.ParallelGraph) else G + + if not nx.is_connected(G): + raise nx.NetworkXError("Cannot compute metric because graph is not connected.") + + start_node = max(G.nodes(), key=G.degree) + lower_bound = 0 + + # First BFS from start_node + layers = list(nx.bfs_layers(G, start_node)) + max_level_node = layers[-1][0] if layers[-1] else None + + # Second BFS from max_level_node + layers = list(nx.bfs_layers(G, max_level_node)) + max_level = len(layers) - 1 + max_level_node = layers[-1][0] if layers[-1] else None + lower_bound = max(lower_bound, max_level) + + # Find a mid-level node + mid_level = max_level // 2 + mid_level_node = ( + layers[mid_level][0] if mid_level < len(layers) and layers[mid_level] else None + ) + + # Third BFS from mid_level_node + layers = list(nx.bfs_layers(G, mid_level_node)) + max_level_node = layers[-1][0] if layers[-1] else None + + # Fourth BFS from max_level_node + layers = list(nx.bfs_layers(G, max_level_node)) + max_level = len(layers) - 1 + max_level_node = layers[-1][0] if layers[-1] else None + lower_bound = max(lower_bound, max_level) + + # Find a mid-level node from the last BFS + mid_level = max_level // 2 + mid_level_node = ( + layers[mid_level][0] if mid_level < len(layers) and layers[mid_level] else None + ) + + error_tolerance = 0 + root = mid_level_node + layers = list(nx.bfs_layers(G, root)) + max_level = len(layers) - 1 + upper_bound = 2 * max_level + lower_bound = max(lower_bound, max_level) + cur_level = max_level + level_vertices = dict(enumerate(layers)) + + n_jobs = nxp.get_n_jobs() + + while upper_bound - lower_bound > error_tolerance: + fringe_vertices = level_vertices.get(cur_level, []) + + if not fringe_vertices: + cur_level -= 1 + continue + + # Parallelize the eccentricity calculation for fringe vertices + if get_chunks == "chunks": + vertex_chunks = nxp.create_iterables(G, "node", n_jobs, fringe_vertices) + else: + vertex_chunks = get_chunks(fringe_vertices) + + # Calculate eccentricity for each chunk in parallel + chunk_eccentricities = Parallel()( + delayed(_calculate_eccentricities_for_nodes)(G, chunk) + for chunk in vertex_chunks + ) + + # Find the maximum eccentricity across all chunks + cur_max_ecc = ( + max(max(eccs.values()) for eccs in chunk_eccentricities) + if chunk_eccentricities + else 0 + ) + + if max(lower_bound, cur_max_ecc) > 2 * (cur_level - 1): + return max(lower_bound, cur_max_ecc) + else: + lower_bound = max(lower_bound, cur_max_ecc) + upper_bound = 2 * (cur_level - 1) + + cur_level -= 1 + + return lower_bound + + +def _calculate_eccentricities_for_nodes(G, nodes): + """Calculate eccentricities for a subset of nodes.""" + eccentricities = {-1: 0} + for node in nodes: + layers = list(nx.bfs_layers(G, node)) + eccentricities[node] = len(layers) - 1 + return eccentricities diff --git a/nx_parallel/interface.py b/nx_parallel/interface.py index 38af8c73..d0bad20c 100644 --- a/nx_parallel/interface.py +++ b/nx_parallel/interface.py @@ -38,6 +38,8 @@ "approximate_all_pairs_node_connectivity", # Connectivity "connectivity.all_pairs_node_connectivity", + # Diameter : unweighted undirected graphs + "diameter", ] @@ -96,3 +98,33 @@ def convert_to_nx(result, *, name=None): if isinstance(result, ParallelGraph): return result.graph_object return result + + @staticmethod + def can_run(name, args, kwargs): + """Determine if the algorithm can be run with the given arguments.""" + if name == "diameter": + # Extract the graph from args + if not args: + return False + + graph = args[0] + if isinstance(graph, ParallelGraph): + graph = graph.graph_object + + if graph.is_directed(): + return ( + "Parallel diameter implementation only supports undirected graphs" + ) + + if kwargs.get("weight") is not None: + return ( + "Parallel diameter implementation only supports unweighted graphs" + ) + + for u, v, data in graph.edges(data=True): + if "weight" in data: + return "Parallel diameter implementation only supports unweighted graphs" + + return True + + return True # All other algorithms can run by default