SpikeInterface · yger · Mar 5, 2025 · Mar 5, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -154,6 +154,7 @@ test = [
     "pytest-dependency",
     "pytest-cov",
     "psutil",
+    "scikit-learn",
 
     # preprocessing
     "ibllib>=3.4.1;python_version>='3.10'",
@@ -167,7 +168,6 @@ test = [
     # tridesclous2
     "numba<0.61.0;python_version<'3.13'",
     "numba>=0.61.0;python_version>='3.13'",
-    "hdbscan>=0.8.33",  # Previous version had a broken wheel
 
     # isosplit is needed for trideclous2 noramaly but isosplit is only build until python3.11
     # so lets wait a new build of isosplit6
@@ -205,7 +205,6 @@ docs = [
     # for notebooks in the gallery
     "MEArec", # Use as an example
     "pandas", # in the modules gallery comparison tutorial
-    "hdbscan>=0.8.33",   # For sorters spykingcircus2 + tridesclous
     "numba", # For many postprocessing functions
     "networkx",
     "skops", # For automated curation

diff --git a/src/spikeinterface/sorters/internal/simplesorter.py b/src/spikeinterface/sorters/internal/simplesorter.py
@@ -39,7 +39,7 @@ class SimpleSorter(ComponentsBasedSorter):
             "method": "hdbscan",
             "min_cluster_size": 25,
             "allow_single_cluster": True,
-            "core_dist_n_jobs": -1,
+            "n_jobs": -1,
             "cluster_selection_method": "leaf",
         },
         # "cache_preprocessing": {"mode": None, "memory_limit": 0.5, "delete_cache": True},
@@ -58,7 +58,7 @@ class SimpleSorter(ComponentsBasedSorter):
         "clustering": (
             "A dictionary for specifying the clustering parameters: 'method' (to cluster) default: 'hdbscan', "
             "'min_cluster_size' (min number of spikes per cluster) default: 25, 'allow_single_cluster' default: True, "
-            " 'core_dist_n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf"
+            " 'n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf"
         ),
         "job_kwargs": "Spikeinterface job_kwargs (see job_kwargs documentation) default 'n_jobs': -1, 'chunk_duration': '1s'",
     }
@@ -183,10 +183,10 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         clust_method = clust_params.pop("method", "hdbscan")
 
         if clust_method == "hdbscan":
-            import hdbscan
+            from sklearn.cluster import HDBSCAN
 
-            out = hdbscan.hdbscan(features_flat, **clust_params)
-            peak_labels = out[0]
+            model = HDBSCAN(**clust_params).fit(features_flat)
+            peak_labels = model.labels_.copy()
         elif clust_method == "hdbscan-gpu":
             from cuml.cluster import HDBSCAN as hdbscan
 

diff --git a/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py b/src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py
@@ -15,7 +15,7 @@ def _split_waveforms(
     wfs_and_noise, noise_size, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug
 ):
     import sklearn.decomposition
-    import hdbscan
+    from sklearn.cluster import HDBSCAN
 
     valid_size = wfs_and_noise.shape[0] - noise_size
 
@@ -30,9 +30,9 @@ def _split_waveforms(
     local_feature = pca.fit_transform(local_feature)
 
     # hdbscan on pca
-    clustering = hdbscan.hdbscan(local_feature, **hdbscan_params)
-    local_labels_with_noise = clustering[0]
-    cluster_probability = clustering[2]
+    clustering = HDBSCAN(**hdbscan_params).fit(local_feature)
+    local_labels_with_noise = clustering.labels_
+    cluster_probability = clustering.probabilities_
     (persistent_clusters,) = np.nonzero(cluster_probability > probability_thr)
     local_labels_with_noise[~np.isin(local_labels_with_noise, persistent_clusters)] = -1
 
@@ -95,7 +95,7 @@ def _split_waveforms_nested(
     wfs_and_noise, noise_size, nbefore, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug
 ):
     import sklearn.decomposition
-    import hdbscan
+    from sklearn.cluster import HDBSCAN
 
     valid_size = wfs_and_noise.shape[0] - noise_size
 
@@ -123,10 +123,10 @@ def _split_waveforms_nested(
         # ~ local_feature = pca.fit_transform(local_feature)
 
         # hdbscan on pca
-        clustering = hdbscan.hdbscan(local_feature, **hdbscan_params)
-        active_labels_with_noise = clustering[0]
-        cluster_probability = clustering[2]
-        (persistent_clusters,) = np.nonzero(clustering[2] > probability_thr)
+        clustering = HDBSCAN(**hdbscan_params).fit(local_feature)
+        active_labels_with_noise = clustering.labels_
+        cluster_probability = clustering.probabilities_
+        (persistent_clusters,) = np.nonzero(cluster_probability > probability_thr)
         active_labels_with_noise[~np.isin(active_labels_with_noise, persistent_clusters)] = -1
 
         active_labels = active_labels_with_noise[active_ind < valid_size]
@@ -233,7 +233,7 @@ def auto_split_clustering(
     """
 
     import sklearn.decomposition
-    import hdbscan
+    from sklearn.cluster import HDBSCAN
 
     split_peak_labels = -1 * np.ones(peak_labels.size, dtype=np.int64)
     nb_clusters = 0

diff --git a/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py b/src/spikeinterface/sortingcomponents/clustering/graph_clustering.py
@@ -40,7 +40,7 @@ class GraphClustering:
         "clusterer": dict(
             method="sknetwork-louvain",
             # min_samples=1,
-            # core_dist_n_jobs=-1,
+            # n_jobs=-1,
             # min_cluster_size=20,
             # cluster_selection_method='leaf',
             # allow_single_cluster=True,
@@ -165,7 +165,7 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
             _remove_small_cluster(peak_labels, min_size=1)
 
         elif clustering_method == "hdbscan":
-            from hdbscan import HDBSCAN
+            from sklearn.cluster import HDBSCAN
             import scipy.sparse
 
             n_graph, connected_labels = scipy.sparse.csgraph.connected_components(distances, directed=False)

diff --git a/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py b/src/spikeinterface/sortingcomponents/clustering/itersplit_tools.py
@@ -291,9 +291,9 @@ def split(
             tsvd = None
 
         if clusterer_method == "hdbscan":
-            from hdbscan import HDBSCAN
+            from sklearn.cluster import HDBSCAN
 
-            clustering_kwargs.update(core_dist_n_jobs=1)
+            clustering_kwargs.update(n_jobs=1)
             clust = HDBSCAN(**clustering_kwargs)
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore")

diff --git a/src/spikeinterface/sortingcomponents/clustering/positions.py b/src/spikeinterface/sortingcomponents/clustering/positions.py
@@ -4,13 +4,14 @@
 from pathlib import Path
 
 import numpy as np
+import importlib.util
 
-try:
-    import hdbscan
-
-    HAVE_HDBSCAN = True
-except:
-    HAVE_HDBSCAN = False
+sklearn_spec = importlib.util.find_spec("sklearn")
+if sklearn_spec is not None:
+    HAVE_SKLEARN = True
+    from sklearn.cluster import HDBSCAN
+else:
+    HAVE_SKLEARN = False
 
 
 class PositionsClustering:
@@ -21,7 +22,7 @@ class PositionsClustering:
     _default_params = {
         "peak_locations": None,
         "peak_localization_kwargs": {"method": "center_of_mass"},
-        "hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "core_dist_n_jobs": -1},
+        "hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "n_jobs": -1},
     }
 
     name = "hdbscan_positions"
@@ -36,8 +37,7 @@ class PositionsClustering:
 
     @classmethod
     def main_function(cls, recording, peaks, params, job_kwargs=dict()):
-        assert HAVE_HDBSCAN, "position clustering need hdbscan to be installed"
-
+        assert HAVE_SKLEARN, "position clustering need sklearn to be installed"
         if params["peak_locations"] is None:
             from spikeinterface.sortingcomponents.peak_localization import localize_peaks
 
@@ -50,8 +50,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
         location_keys = ["x", "y"]
         locations = np.stack([peak_locations[k] for k in location_keys], axis=1)
 
-        clustering = hdbscan.hdbscan(locations, **params["hdbscan_kwargs"])
-        peak_labels = clustering[0]
+        clustering = HDBSCAN(**params["hdbscan_kwargs"]).fit(locations)
+        peak_labels = clustering.labels_
 
         labels = np.unique(peak_labels)
         labels = labels[labels >= 0]

diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
@@ -6,12 +6,15 @@
 import importlib
 import numpy as np
 
-hdbscan_spec = importlib.util.find_spec("hdbscan")
-if hdbscan_spec is not None:
-    HAVE_HDBSCAN = True
-    import hdbscan
+
+import importlib.util
+
+sklearn_spec = importlib.util.find_spec("sklearn")
+if sklearn_spec is not None:
+    HAVE_SKLEARN = True
+    from sklearn.cluster import HDBSCAN
 else:
-    HAVE_HDBSCAN = False
+    HAVE_SKLEARN = False
 
 from spikeinterface.core.basesorting import minimum_spike_dtype
 from spikeinterface.core.waveform_tools import estimate_templates
@@ -35,7 +38,7 @@ class RandomProjectionClustering:
         "clusterer": {
             "min_cluster_size": 10,
             "allow_single_cluster": True,
-            "core_dist_n_jobs": -1,
+            "n_jobs": -1,
             "cluster_selection_method": "eom",
         },
         "waveforms": {"ms_before": 0.5, "ms_after": 1.5},
@@ -56,8 +59,7 @@ class RandomProjectionClustering:
 
     @classmethod
     def main_function(cls, recording, peaks, params, job_kwargs=dict()):
-        assert HAVE_HDBSCAN, "random projections clustering need hdbscan to be installed"
-
+        assert HAVE_SKLEARN, "position clustering need sklearn to be installed"
         fs = recording.get_sampling_frequency()
         radius_um = params.get("radius_um", 30)
         ms_before = params["waveforms"].get("ms_before", 0.5)
@@ -105,8 +107,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
             recording, pipeline_nodes, job_kwargs=job_kwargs, job_name="extracting features", verbose=verbose
         )
 
-        clustering = hdbscan.hdbscan(hdbscan_data, **params["clusterer"])
-        peak_labels = clustering[0]
+        clustering = HDBSCAN(**params["clusterer"]).fit(hdbscan_data)
+        peak_labels = clustering.labels_
 
         labels = np.unique(peak_labels)
         labels = labels[labels >= 0]