Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions parc/_parc.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,15 +453,20 @@ def run_toobig_subPARC(

return node_communities

def run_subPARC(self):

def run_PARC(self):
time_start = time.time()
x_data = self.x_data
n_samples = x_data.shape[0]
n_features = x_data.shape[1]
logger.message(
f"Input data has shape {n_samples} (samples) x {n_features} (features)"
)

large_community_factor = self.large_community_factor
small_community_size = self.small_community_size
jac_std_factor = self.jac_std_factor
jac_weighted_edges = self.jac_weighted_edges
knn = self.knn
n_samples = x_data.shape[0]

if self.neighbor_graph is not None:
csr_array = self.neighbor_graph
Expand Down Expand Up @@ -626,8 +631,8 @@ def run_subPARC(self):
value in list(available_neighbours)]
best_group = max(available_neighbours_list, key=available_neighbours_list.count)
node_communities[single_cell] = best_group
time_start = time.time()
while small_community_exists & ((time.time() - time_start) < self.small_community_timeout):
time_start_sc = time.time()
while small_community_exists & (time.time() - time_start_sc) < self.small_community_timeout:
small_pop_list = []
small_community_exists = False
for cluster in set(list(node_communities.flatten())):
Expand All @@ -652,7 +657,9 @@ def run_subPARC(self):
logger.message(f"Cluster labels and populations {len(pop_list)} {pop_list}")

self.y_data_pred = node_communities
return
run_time = time.time() - time_start
logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
self.compute_performance_metrics(run_time)

def accuracy(self, target=1):

Expand Down Expand Up @@ -728,19 +735,13 @@ def accuracy(self, target=1):

return accuracy_val, predict_class_array, majority_truth_labels, number_clusters_for_target

def run_PARC(self):
logger.message(
f"Input data has shape {self.x_data.shape[0]} (samples) x {self.x_data.shape[1]} (features)"
)
list_roc = []

time_start_total = time.time()

# Query dataset, k - number of closest elements (returns 2 numpy arrays)
self.run_subPARC()
run_time = time.time() - time_start_total
logger.message(f"Time elapsed to run PARC: {run_time:.1f} seconds")
def compute_performance_metrics(self, run_time: float):
"""Compute performance metrics for the PARC algorithm.

Args:
run_time: (float) the time taken to run the PARC algorithm.
"""
list_roc = []
targets = list(set(self.y_data_true))
n_samples = len(list(self.y_data_true))
self.f1_accumulated = 0
Expand Down Expand Up @@ -788,7 +789,6 @@ def run_PARC(self):
self.f1_mean = f1_mean
self.stats_df = df_accuracy
self.majority_truth_labels = majority_truth_labels
return

def run_umap_hnsw(
self,
Expand Down