From 74534b5513f8ee5b265fe03f2d6aa056d5fadcfa Mon Sep 17 00:00:00 2001 From: Ainsleigh Date: Mon, 12 Aug 2024 14:58:54 -0700 Subject: [PATCH 1/3] factor out compute_performance_metrics --- parc/_parc.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/parc/_parc.py b/parc/_parc.py index 930d700..e419fc8 100644 --- a/parc/_parc.py +++ b/parc/_parc.py @@ -732,15 +732,21 @@ def run_PARC(self): logger.message( f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)" ) - list_roc = [] - time_start_total = time.time() # Query dataset, k - number of closest elements (returns 2 numpy arrays) self.run_subPARC() run_time = time.time() - time_start_total logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds") + self.compute_performance_metrics(run_time) + def compute_performance_metrics(self, run_time: float): + """Compute performance metrics for the PARC algorithm. + + Args: + run_time: (float) the time taken to run the PARC algorithm. + """ + list_roc = [] targets = list(set(self.y_data_true)) n_samples = len(list(self.y_data_true)) self.f1_accumulated = 0 @@ -788,7 +794,6 @@ def run_PARC(self): self.f1_mean = f1_mean self.stats_df = df_accuracy self.majority_truth_labels = majority_truth_labels - return def run_umap_hnsw( self, From 12cbf728c7cda3f778743a887a6da3ead00a7706 Mon Sep 17 00:00:00 2001 From: Ainsleigh Date: Mon, 12 Aug 2024 14:59:28 -0700 Subject: [PATCH 2/3] rename `time_start_total` -> `time_start` --- parc/_parc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parc/_parc.py b/parc/_parc.py index e419fc8..2fe1179 100644 --- a/parc/_parc.py +++ b/parc/_parc.py @@ -732,11 +732,11 @@ def run_PARC(self): logger.message( f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)" ) - time_start_total = time.time() + time_start = time.time() # Query dataset, k - number of closest elements (returns 2 numpy arrays) self.run_subPARC() - run_time = time.time() - time_start_total + run_time = time.time() - time_start logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds") self.compute_performance_metrics(run_time) From e2edfa91d1eb2fe83b0a7296590ebe4c55166d2c Mon Sep 17 00:00:00 2001 From: Ainsleigh Date: Mon, 12 Aug 2024 15:04:03 -0700 Subject: [PATCH 3/3] remove run_subPARC --- parc/_parc.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/parc/_parc.py b/parc/_parc.py index 2fe1179..aa047c2 100644 --- a/parc/_parc.py +++ b/parc/_parc.py @@ -453,15 +453,20 @@ def run_toobig_subPARC( return node_communities - def run_subPARC(self): - + def run_PARC(self): + time_start = time.time() x_data = self.x_data + n_samples = x_data.shape[0] + n_features = x_data.shape[1] + logger.message( + f"Input data has shape {n_samples} (samples) x {n_features} (features)" + ) + large_community_factor = self.large_community_factor small_community_size = self.small_community_size jac_std_factor = self.jac_std_factor jac_weighted_edges = self.jac_weighted_edges knn = self.knn - n_samples = x_data.shape[0] if self.neighbor_graph is not None: csr_array = self.neighbor_graph @@ -626,8 +631,8 @@ def run_subPARC(self): value in list(available_neighbours)] best_group = max(available_neighbours_list, key=available_neighbours_list.count) node_communities[single_cell] = best_group - time_start = time.time() - while small_community_exists & ((time.time() - time_start) < self.small_community_timeout): + time_start_sc = time.time() + while small_community_exists & (time.time() - time_start_sc) < self.small_community_timeout: small_pop_list = [] small_community_exists = False for cluster in set(list(node_communities.flatten())): @@ -652,7 +657,9 @@ def run_subPARC(self): logger.message(f"Cluster labels and populations {len(pop_list)} {pop_list}") self.y_data_pred = node_communities - return + run_time = time.time() - time_start + logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds") + self.compute_performance_metrics(run_time) def accuracy(self, target=1): @@ -728,18 +735,6 @@ def accuracy(self, target=1): return accuracy_val, predict_class_array, majority_truth_labels, number_clusters_for_target - def run_PARC(self): - logger.message( - f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)" - ) - time_start = time.time() - - # Query dataset, k - number of closest elements (returns 2 numpy arrays) - self.run_subPARC() - run_time = time.time() - time_start - logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds") - self.compute_performance_metrics(run_time) - def compute_performance_metrics(self, run_time: float): """Compute performance metrics for the PARC algorithm.