From 77df136ddada9c5e28de253d021a567919c728ac Mon Sep 17 00:00:00 2001 From: Pierre Yger Date: Thu, 11 Dec 2025 12:08:57 +0100 Subject: [PATCH 1/7] WIP --- .../sorters/internal/simplesorter.py | 7 +++---- .../clustering/cleaning_tools.py | 20 +++++++++---------- .../clustering/graph_clustering.py | 2 +- .../clustering/itersplit_tools.py | 2 +- .../sortingcomponents/clustering/positions.py | 14 +++---------- .../clustering/random_projections.py | 13 +++--------- 6 files changed, 21 insertions(+), 37 deletions(-) diff --git a/src/spikeinterface/sorters/internal/simplesorter.py b/src/spikeinterface/sorters/internal/simplesorter.py index f24e965c2e..7f9d624803 100644 --- a/src/spikeinterface/sorters/internal/simplesorter.py +++ b/src/spikeinterface/sorters/internal/simplesorter.py @@ -183,10 +183,9 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose): clust_method = clust_params.pop("method", "hdbscan") if clust_method == "hdbscan": - import hdbscan - - out = hdbscan.hdbscan(features_flat, **clust_params) - peak_labels = out[0] + from sklearn.cluster import HDBSCAN + model = HDBSCAN(**clust_params).fit(features_flat) + peak_labels = model.labels_.copy() elif clust_method == "hdbscan-gpu": from cuml.cluster import HDBSCAN as hdbscan diff --git a/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py b/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py index 8028761ccb..14d0444343 100644 --- a/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py +++ b/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py @@ -15,7 +15,7 @@ def _split_waveforms( wfs_and_noise, noise_size, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug ): import sklearn.decomposition - import hdbscan + from sklearn.cluster import HDBSCAN valid_size = wfs_and_noise.shape[0] - noise_size @@ -30,9 +30,9 @@ def _split_waveforms( local_feature = pca.fit_transform(local_feature) # hdbscan on pca - clustering = hdbscan.hdbscan(local_feature, **hdbscan_params) - local_labels_with_noise = clustering[0] - cluster_probability = clustering[2] + clustering = HDBSCAN(**hdbscan_params).fit(local_feature) + local_labels_with_noise = clustering.labels_ + cluster_probability = clustering.probabilities_ (persistent_clusters,) = np.nonzero(cluster_probability > probability_thr) local_labels_with_noise[~np.isin(local_labels_with_noise, persistent_clusters)] = -1 @@ -95,7 +95,7 @@ def _split_waveforms_nested( wfs_and_noise, noise_size, nbefore, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug ): import sklearn.decomposition - import hdbscan + from sklearn.cluster import HDBSCAN valid_size = wfs_and_noise.shape[0] - noise_size @@ -123,10 +123,10 @@ def _split_waveforms_nested( # ~ local_feature = pca.fit_transform(local_feature) # hdbscan on pca - clustering = hdbscan.hdbscan(local_feature, **hdbscan_params) - active_labels_with_noise = clustering[0] - cluster_probability = clustering[2] - (persistent_clusters,) = np.nonzero(clustering[2] > probability_thr) + clustering = HDBSCAN(**hdbscan_params).fit(local_feature) + active_labels_with_noise = clustering.labels_ + cluster_probability = clustering.probabilities_ + (persistent_clusters,) = np.nonzero(cluster_probability > probability_thr) active_labels_with_noise[~np.isin(active_labels_with_noise, persistent_clusters)] = -1 active_labels = active_labels_with_noise[active_ind < valid_size] @@ -233,7 +233,7 @@ def auto_split_clustering( """ import sklearn.decomposition - import hdbscan + from sklearn.cluster import HDBSCAN split_peak_labels = -1 * np.ones(peak_labels.size, dtype=np.int64) nb_clusters = 0 diff --git a/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py b/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py index f62a81bade..28b7cde85c 100644 --- a/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py +++ b/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py @@ -165,7 +165,7 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()): _remove_small_cluster(peak_labels, min_size=1) elif clustering_method == "hdbscan": - from hdbscan import HDBSCAN + from sklearn.cluster import HDBSCAN import scipy.sparse n_graph, connected_labels = scipy.sparse.csgraph.connected_components(distances, directed=False) diff --git a/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py b/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py index 0411166de6..93719d0cec 100644 --- a/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py +++ b/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py @@ -291,7 +291,7 @@ def split( tsvd = None if clusterer_method == "hdbscan": - from hdbscan import HDBSCAN + from sklearn.cluster import HDBSCAN clustering_kwargs.update(core_dist_n_jobs=1) clust = HDBSCAN(**clustering_kwargs) diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py index 6fe2400c56..340bea9814 100644 --- a/src/spikeinterface/sortingcomponents/clustering/positions.py +++ b/src/spikeinterface/sortingcomponents/clustering/positions.py @@ -4,14 +4,7 @@ from pathlib import Path import numpy as np - -try: - import hdbscan - - HAVE_HDBSCAN = True -except: - HAVE_HDBSCAN = False - +from sklearn.cluster import HDBSCAN class PositionsClustering: """ @@ -36,7 +29,6 @@ class PositionsClustering: @classmethod def main_function(cls, recording, peaks, params, job_kwargs=dict()): - assert HAVE_HDBSCAN, "position clustering need hdbscan to be installed" if params["peak_locations"] is None: from spikeinterface.sortingcomponents.peak_localization import localize_peaks @@ -50,8 +42,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()): location_keys = ["x", "y"] locations = np.stack([peak_locations[k] for k in location_keys], axis=1) - clustering = hdbscan.hdbscan(locations, **params["hdbscan_kwargs"]) - peak_labels = clustering[0] + clustering = HDBSCAN(**params["hdbscan_kwargs"]).fit(locations) + peak_labels = clustering.labels_ labels = np.unique(peak_labels) labels = labels[labels >= 0] diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py index ac059e22ea..781d5434a1 100644 --- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py +++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py @@ -6,13 +6,8 @@ import importlib import numpy as np -hdbscan_spec = importlib.util.find_spec("hdbscan") -if hdbscan_spec is not None: - HAVE_HDBSCAN = True - import hdbscan -else: - HAVE_HDBSCAN = False +from sklearn.cluster import HDBSCAN from spikeinterface.core.basesorting import minimum_spike_dtype from spikeinterface.core.waveform_tools import estimate_templates from spikeinterface.sortingcomponents.clustering.merging_tools import merge_peak_labels_from_templates @@ -56,8 +51,6 @@ class RandomProjectionClustering: @classmethod def main_function(cls, recording, peaks, params, job_kwargs=dict()): - assert HAVE_HDBSCAN, "random projections clustering need hdbscan to be installed" - fs = recording.get_sampling_frequency() radius_um = params.get("radius_um", 30) ms_before = params["waveforms"].get("ms_before", 0.5) @@ -105,8 +98,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()): recording, pipeline_nodes, job_kwargs=job_kwargs, job_name="extracting features", verbose=verbose ) - clustering = hdbscan.hdbscan(hdbscan_data, **params["clusterer"]) - peak_labels = clustering[0] + clustering = HDBSCAN(**params["clusterer"]).fit(hdbscan_data) + peak_labels = clustering.labels_ labels = np.unique(peak_labels) labels = labels[labels >= 0] From d47c81f5e00852c9c0d82f62aeb0efa5f8403706 Mon Sep 17 00:00:00 2001 From: Pierre Yger Date: Thu, 11 Dec 2025 12:11:43 +0100 Subject: [PATCH 2/7] n_jobs --- .../sortingcomponents/clustering/graph_clustering.py | 2 +- .../sortingcomponents/clustering/itersplit_tools.py | 2 +- src/spikeinterface/sortingcomponents/clustering/positions.py | 2 +- .../sortingcomponents/clustering/random_projections.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py b/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py index 28b7cde85c..ef2a42259c 100644 --- a/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py +++ b/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py @@ -40,7 +40,7 @@ class GraphClustering: "clusterer": dict( method="sknetwork-louvain", # min_samples=1, - # core_dist_n_jobs=-1, + # n_jobs=-1, # min_cluster_size=20, # cluster_selection_method='leaf', # allow_single_cluster=True, diff --git a/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py b/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py index 93719d0cec..6a8783fb76 100644 --- a/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py +++ b/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py @@ -293,7 +293,7 @@ def split( if clusterer_method == "hdbscan": from sklearn.cluster import HDBSCAN - clustering_kwargs.update(core_dist_n_jobs=1) + clustering_kwargs.update(n_jobs=1) clust = HDBSCAN(**clustering_kwargs) with warnings.catch_warnings(): warnings.filterwarnings("ignore") diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py index 340bea9814..0b0f8ba5e1 100644 --- a/src/spikeinterface/sortingcomponents/clustering/positions.py +++ b/src/spikeinterface/sortingcomponents/clustering/positions.py @@ -14,7 +14,7 @@ class PositionsClustering: _default_params = { "peak_locations": None, "peak_localization_kwargs": {"method": "center_of_mass"}, - "hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "core_dist_n_jobs": -1}, + "hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "n_jobs": -1}, } name = "hdbscan_positions" diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py index 781d5434a1..2dfd0ce20c 100644 --- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py +++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py @@ -30,7 +30,7 @@ class RandomProjectionClustering: "clusterer": { "min_cluster_size": 10, "allow_single_cluster": True, - "core_dist_n_jobs": -1, + "n_jobs": -1, "cluster_selection_method": "eom", }, "waveforms": {"ms_before": 0.5, "ms_after": 1.5}, From 91ca1d0939f5202a4e3dd31386e1a8f859d71777 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:16:27 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spikeinterface/sorters/internal/simplesorter.py | 1 + src/spikeinterface/sortingcomponents/clustering/positions.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/spikeinterface/sorters/internal/simplesorter.py b/src/spikeinterface/sorters/internal/simplesorter.py index 7f9d624803..fef80f246c 100644 --- a/src/spikeinterface/sorters/internal/simplesorter.py +++ b/src/spikeinterface/sorters/internal/simplesorter.py @@ -184,6 +184,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose): if clust_method == "hdbscan": from sklearn.cluster import HDBSCAN + model = HDBSCAN(**clust_params).fit(features_flat) peak_labels = model.labels_.copy() elif clust_method == "hdbscan-gpu": diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py index 0b0f8ba5e1..6ee7c221df 100644 --- a/src/spikeinterface/sortingcomponents/clustering/positions.py +++ b/src/spikeinterface/sortingcomponents/clustering/positions.py @@ -6,6 +6,7 @@ import numpy as np from sklearn.cluster import HDBSCAN + class PositionsClustering: """ hdbscan clustering on peak_locations previously done by localize_peaks() From 215abbf21a6184f380be704982913cfeca67bf2a Mon Sep 17 00:00:00 2001 From: Pierre Yger Date: Thu, 11 Dec 2025 12:25:11 +0100 Subject: [PATCH 4/7] Import --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8c3a3cf3b1..1fb6699ec3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,6 +154,7 @@ test = [ "pytest-dependency", "pytest-cov", "psutil", + "scikit-learn", # preprocessing "ibllib>=3.4.1;python_version>='3.10'", @@ -167,7 +168,6 @@ test = [ # tridesclous2 "numba<0.61.0;python_version<'3.13'", "numba>=0.61.0;python_version>='3.13'", - "hdbscan>=0.8.33", # Previous version had a broken wheel # isosplit is needed for trideclous2 noramaly but isosplit is only build until python3.11 # so lets wait a new build of isosplit6 @@ -205,7 +205,6 @@ docs = [ # for notebooks in the gallery "MEArec", # Use as an example "pandas", # in the modules gallery comparison tutorial - "hdbscan>=0.8.33", # For sorters spykingcircus2 + tridesclous "numba", # For many postprocessing functions "networkx", "skops", # For automated curation From 7304586eec56aa47704584144ca0a51447cad15d Mon Sep 17 00:00:00 2001 From: Pierre Yger Date: Thu, 11 Dec 2025 12:33:51 +0100 Subject: [PATCH 5/7] Imports --- pyproject.toml | 2 +- .../sortingcomponents/clustering/positions.py | 12 ++++++++++-- .../clustering/random_projections.py | 11 ++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1fb6699ec3..e48141d247 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -206,7 +206,7 @@ docs = [ "MEArec", # Use as an example "pandas", # in the modules gallery comparison tutorial "numba", # For many postprocessing functions - "networkx", + "networkx", "skops", # For automated curation "scikit-learn", # For automated curation "huggingface_hub", # For automated curation diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py index 6ee7c221df..568d2223a3 100644 --- a/src/spikeinterface/sortingcomponents/clustering/positions.py +++ b/src/spikeinterface/sortingcomponents/clustering/positions.py @@ -4,7 +4,15 @@ from pathlib import Path import numpy as np -from sklearn.cluster import HDBSCAN +import importlib.util + +sklearn_spec = importlib.util.find_spec("sklearn") +if sklearn_spec is not None: + HAVE_SKLEARN = True + from sklearn.cluster import HDBSCAN +else: + HAVE_SKLEARN = False + class PositionsClustering: @@ -30,7 +38,7 @@ class PositionsClustering: @classmethod def main_function(cls, recording, peaks, params, job_kwargs=dict()): - + assert HAVE_SKLEARN, "position clustering need sklearn to be installed" if params["peak_locations"] is None: from spikeinterface.sortingcomponents.peak_localization import localize_peaks diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py index 2dfd0ce20c..7f3b37bc67 100644 --- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py +++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py @@ -7,7 +7,15 @@ import numpy as np -from sklearn.cluster import HDBSCAN +import importlib.util + +sklearn_spec = importlib.util.find_spec("sklearn") +if sklearn_spec is not None: + HAVE_SKLEARN = True + from sklearn.cluster import HDBSCAN +else: + HAVE_SKLEARN = False + from spikeinterface.core.basesorting import minimum_spike_dtype from spikeinterface.core.waveform_tools import estimate_templates from spikeinterface.sortingcomponents.clustering.merging_tools import merge_peak_labels_from_templates @@ -51,6 +59,7 @@ class RandomProjectionClustering: @classmethod def main_function(cls, recording, peaks, params, job_kwargs=dict()): + assert HAVE_SKLEARN, "position clustering need sklearn to be installed" fs = recording.get_sampling_frequency() radius_um = params.get("radius_um", 30) ms_before = params["waveforms"].get("ms_before", 0.5) From b68f03abd68ed628d6a9ddb95da2b4ecdee71400 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:34:36 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 2 +- src/spikeinterface/sortingcomponents/clustering/positions.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e48141d247..1fb6699ec3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -206,7 +206,7 @@ docs = [ "MEArec", # Use as an example "pandas", # in the modules gallery comparison tutorial "numba", # For many postprocessing functions - "networkx", + "networkx", "skops", # For automated curation "scikit-learn", # For automated curation "huggingface_hub", # For automated curation diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py index 568d2223a3..232b11edc2 100644 --- a/src/spikeinterface/sortingcomponents/clustering/positions.py +++ b/src/spikeinterface/sortingcomponents/clustering/positions.py @@ -14,7 +14,6 @@ HAVE_SKLEARN = False - class PositionsClustering: """ hdbscan clustering on peak_locations previously done by localize_peaks() From 780af9b7b0f8560533fbf9ab92a08eca3bebc915 Mon Sep 17 00:00:00 2001 From: Pierre Yger Date: Thu, 11 Dec 2025 13:39:38 +0100 Subject: [PATCH 7/7] Oups --- src/spikeinterface/sorters/internal/simplesorter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/sorters/internal/simplesorter.py b/src/spikeinterface/sorters/internal/simplesorter.py index fef80f246c..ac5e4fa5c2 100644 --- a/src/spikeinterface/sorters/internal/simplesorter.py +++ b/src/spikeinterface/sorters/internal/simplesorter.py @@ -39,7 +39,7 @@ class SimpleSorter(ComponentsBasedSorter): "method": "hdbscan", "min_cluster_size": 25, "allow_single_cluster": True, - "core_dist_n_jobs": -1, + "n_jobs": -1, "cluster_selection_method": "leaf", }, # "cache_preprocessing": {"mode": None, "memory_limit": 0.5, "delete_cache": True}, @@ -58,7 +58,7 @@ class SimpleSorter(ComponentsBasedSorter): "clustering": ( "A dictionary for specifying the clustering parameters: 'method' (to cluster) default: 'hdbscan', " "'min_cluster_size' (min number of spikes per cluster) default: 25, 'allow_single_cluster' default: True, " - " 'core_dist_n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf" + " 'n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf" ), "job_kwargs": "Spikeinterface job_kwargs (see job_kwargs documentation) default 'n_jobs': -1, 'chunk_duration': '1s'", }