Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions parc/_parc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ class PARC:
do_prune_local:
Whether or not to do local pruning. If ``None`` (default), set to ``False`` if the
number of samples is > 300 000, and set to ``True`` otherwise.
jac_threshold_type:
One of ``"median"`` or ``"mean"``. Determines how the Jaccard similarity threshold is
calculated during global pruning.
jac_std_factor:
The multiplier used in calculating the Jaccard similarity threshold for the similarity
between two nodes during global pruning for ``jac_threshold_type = "mean"``:
Expand Down Expand Up @@ -111,7 +114,8 @@ def __init__(
x_data: np.ndarray,
y_data_true: np.ndarray | None = None,
l2_std_factor: float = 3,
jac_std_factor: float | str = "median",
jac_threshold_type: str = "median",
jac_std_factor: float = 0.15,
do_prune_local: bool | None = None,
large_community_factor: float = 0.4,
small_community_size: int = 10,
Expand Down Expand Up @@ -140,6 +144,7 @@ def __init__(
self.neighbor_graph = neighbor_graph
self.knn_struct = knn_struct
self.l2_std_factor = l2_std_factor
self.jac_threshold_type = jac_threshold_type
self.jac_std_factor = jac_std_factor
self.jac_weighted_edges = jac_weighted_edges
self.do_prune_local = do_prune_local
Expand Down Expand Up @@ -401,7 +406,8 @@ def get_leiden_partition(
def run_toobig_subPARC(
self,
x_data,
jac_std_factor=0.3,
jac_threshold_type: str = "mean",
jac_std_factor: float = 0.3,
jac_weighted_edges=True
):

Expand All @@ -424,7 +430,7 @@ def run_toobig_subPARC(
similarities = graph.similarity_jaccard(pairs=edges_copy) # list of jaccard weights
new_edges = []
similarities_array = np.asarray(similarities)
if jac_std_factor == "median":
if jac_threshold_type == "median":
threshold = np.median(similarities)
else:
threshold = np.mean(similarities) - jac_std_factor * np.std(similarities)
Expand Down Expand Up @@ -513,6 +519,7 @@ def run_parc(self):

large_community_factor = self.large_community_factor
small_community_size = self.small_community_size
jac_threshold_type = self.jac_threshold_type
jac_std_factor = self.jac_std_factor
jac_weighted_edges = self.jac_weighted_edges
knn = self.knn
Expand Down Expand Up @@ -542,7 +549,7 @@ def run_parc(self):

similarities_array = np.asarray(similarities)

if jac_std_factor == "median":
if jac_threshold_type == "median":
threshold = np.median(similarities)
else:
threshold = np.mean(similarities) - jac_std_factor * np.std(similarities)
Expand Down