From 74534b5513f8ee5b265fe03f2d6aa056d5fadcfa Mon Sep 17 00:00:00 2001
From: Ainsleigh <ainsleighhill@gmail.com>
Date: Mon, 12 Aug 2024 14:58:54 -0700
Subject: [PATCH 1/3] factor out compute_performance_metrics

---
 parc/_parc.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/parc/_parc.py b/parc/_parc.py
index 930d700..e419fc8 100644
--- a/parc/_parc.py
+++ b/parc/_parc.py
@@ -732,15 +732,21 @@ def run_PARC(self):
         logger.message(
             f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)"
         )
-        list_roc = []
-
         time_start_total = time.time()
 
         # Query dataset, k - number of closest elements (returns 2 numpy arrays)
         self.run_subPARC()
         run_time = time.time() - time_start_total
         logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
+        self.compute_performance_metrics(run_time)
 
+    def compute_performance_metrics(self, run_time: float):
+        """Compute performance metrics for the PARC algorithm.
+
+        Args:
+            run_time: (float) the time taken to run the PARC algorithm.
+        """
+        list_roc = []
         targets = list(set(self.y_data_true))
         n_samples = len(list(self.y_data_true))
         self.f1_accumulated = 0
@@ -788,7 +794,6 @@ def run_PARC(self):
             self.f1_mean = f1_mean
             self.stats_df = df_accuracy
             self.majority_truth_labels = majority_truth_labels
-        return
 
     def run_umap_hnsw(
             self,

From 12cbf728c7cda3f778743a887a6da3ead00a7706 Mon Sep 17 00:00:00 2001
From: Ainsleigh <ainsleighhill@gmail.com>
Date: Mon, 12 Aug 2024 14:59:28 -0700
Subject: [PATCH 2/3] rename `time_start_total` -> `time_start`

---
 parc/_parc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parc/_parc.py b/parc/_parc.py
index e419fc8..2fe1179 100644
--- a/parc/_parc.py
+++ b/parc/_parc.py
@@ -732,11 +732,11 @@ def run_PARC(self):
         logger.message(
             f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)"
         )
-        time_start_total = time.time()
+        time_start = time.time()
 
         # Query dataset, k - number of closest elements (returns 2 numpy arrays)
         self.run_subPARC()
-        run_time = time.time() - time_start_total
+        run_time = time.time() - time_start
         logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
         self.compute_performance_metrics(run_time)
 

From e2edfa91d1eb2fe83b0a7296590ebe4c55166d2c Mon Sep 17 00:00:00 2001
From: Ainsleigh <ainsleighhill@gmail.com>
Date: Mon, 12 Aug 2024 15:04:03 -0700
Subject: [PATCH 3/3] remove run_subPARC

---
 parc/_parc.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/parc/_parc.py b/parc/_parc.py
index 2fe1179..aa047c2 100644
--- a/parc/_parc.py
+++ b/parc/_parc.py
@@ -453,15 +453,20 @@ def run_toobig_subPARC(
 
         return node_communities
 
-    def run_subPARC(self):
-
+    def run_PARC(self):
+        time_start = time.time()
         x_data = self.x_data
+        n_samples = x_data.shape[0]
+        n_features = x_data.shape[1]
+        logger.message(
+            f"Input data has shape {n_samples} (samples) x {n_features} (features)"
+        )
+
         large_community_factor = self.large_community_factor
         small_community_size = self.small_community_size
         jac_std_factor = self.jac_std_factor
         jac_weighted_edges = self.jac_weighted_edges
         knn = self.knn
-        n_samples = x_data.shape[0]
 
         if self.neighbor_graph is not None:
             csr_array = self.neighbor_graph
@@ -626,8 +631,8 @@ def run_subPARC(self):
                                                  value in list(available_neighbours)]
                     best_group = max(available_neighbours_list, key=available_neighbours_list.count)
                     node_communities[single_cell] = best_group
-        time_start = time.time()
-        while small_community_exists & ((time.time() - time_start) < self.small_community_timeout):
+        time_start_sc = time.time()
+        while small_community_exists & (time.time() - time_start_sc) < self.small_community_timeout:
             small_pop_list = []
             small_community_exists = False
             for cluster in set(list(node_communities.flatten())):
@@ -652,7 +657,9 @@ def run_subPARC(self):
         logger.message(f"Cluster labels and populations {len(pop_list)} {pop_list}")
 
         self.y_data_pred = node_communities
-        return
+        run_time = time.time() - time_start
+        logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
+        self.compute_performance_metrics(run_time)
 
     def accuracy(self, target=1):
 
@@ -728,18 +735,6 @@ def accuracy(self, target=1):
 
         return accuracy_val, predict_class_array, majority_truth_labels, number_clusters_for_target
 
-    def run_PARC(self):
-        logger.message(
-            f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)"
-        )
-        time_start = time.time()
-
-        # Query dataset, k - number of closest elements (returns 2 numpy arrays)
-        self.run_subPARC()
-        run_time = time.time() - time_start
-        logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
-        self.compute_performance_metrics(run_time)
-
     def compute_performance_metrics(self, run_time: float):
         """Compute performance metrics for the PARC algorithm.