Skip to content
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ test = [
"pytest-dependency",
"pytest-cov",
"psutil",
"scikit-learn",

# preprocessing
"ibllib>=3.4.1;python_version>='3.10'",
Expand All @@ -167,7 +168,6 @@ test = [
# tridesclous2
"numba<0.61.0;python_version<'3.13'",
"numba>=0.61.0;python_version>='3.13'",
"hdbscan>=0.8.33", # Previous version had a broken wheel

# isosplit is needed for trideclous2 noramaly but isosplit is only build until python3.11
# so lets wait a new build of isosplit6
Expand Down Expand Up @@ -205,7 +205,6 @@ docs = [
# for notebooks in the gallery
"MEArec", # Use as an example
"pandas", # in the modules gallery comparison tutorial
"hdbscan>=0.8.33", # For sorters spykingcircus2 + tridesclous
"numba", # For many postprocessing functions
"networkx",
"skops", # For automated curation
Expand Down
10 changes: 5 additions & 5 deletions src/spikeinterface/sorters/internal/simplesorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class SimpleSorter(ComponentsBasedSorter):
"method": "hdbscan",
"min_cluster_size": 25,
"allow_single_cluster": True,
"core_dist_n_jobs": -1,
"n_jobs": -1,
"cluster_selection_method": "leaf",
},
# "cache_preprocessing": {"mode": None, "memory_limit": 0.5, "delete_cache": True},
Expand All @@ -58,7 +58,7 @@ class SimpleSorter(ComponentsBasedSorter):
"clustering": (
"A dictionary for specifying the clustering parameters: 'method' (to cluster) default: 'hdbscan', "
"'min_cluster_size' (min number of spikes per cluster) default: 25, 'allow_single_cluster' default: True, "
" 'core_dist_n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf"
" 'n_jobs' (parallelization) default: -1, cluster_selection_method (for hdbscan) default: leaf"
),
"job_kwargs": "Spikeinterface job_kwargs (see job_kwargs documentation) default 'n_jobs': -1, 'chunk_duration': '1s'",
}
Expand Down Expand Up @@ -183,10 +183,10 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
clust_method = clust_params.pop("method", "hdbscan")

if clust_method == "hdbscan":
import hdbscan
from sklearn.cluster import HDBSCAN

out = hdbscan.hdbscan(features_flat, **clust_params)
peak_labels = out[0]
model = HDBSCAN(**clust_params).fit(features_flat)
peak_labels = model.labels_.copy()
elif clust_method == "hdbscan-gpu":
from cuml.cluster import HDBSCAN as hdbscan

Expand Down
20 changes: 10 additions & 10 deletions src/spikeinterface/sortingcomponents/clustering/cleaning_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def _split_waveforms(
wfs_and_noise, noise_size, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug
):
import sklearn.decomposition
import hdbscan
from sklearn.cluster import HDBSCAN

valid_size = wfs_and_noise.shape[0] - noise_size

Expand All @@ -30,9 +30,9 @@ def _split_waveforms(
local_feature = pca.fit_transform(local_feature)

# hdbscan on pca
clustering = hdbscan.hdbscan(local_feature, **hdbscan_params)
local_labels_with_noise = clustering[0]
cluster_probability = clustering[2]
clustering = HDBSCAN(**hdbscan_params).fit(local_feature)
local_labels_with_noise = clustering.labels_
cluster_probability = clustering.probabilities_
(persistent_clusters,) = np.nonzero(cluster_probability > probability_thr)
local_labels_with_noise[~np.isin(local_labels_with_noise, persistent_clusters)] = -1

Expand Down Expand Up @@ -95,7 +95,7 @@ def _split_waveforms_nested(
wfs_and_noise, noise_size, nbefore, n_components_by_channel, n_components, hdbscan_params, probability_thr, debug
):
import sklearn.decomposition
import hdbscan
from sklearn.cluster import HDBSCAN

valid_size = wfs_and_noise.shape[0] - noise_size

Expand Down Expand Up @@ -123,10 +123,10 @@ def _split_waveforms_nested(
# ~ local_feature = pca.fit_transform(local_feature)

# hdbscan on pca
clustering = hdbscan.hdbscan(local_feature, **hdbscan_params)
active_labels_with_noise = clustering[0]
cluster_probability = clustering[2]
(persistent_clusters,) = np.nonzero(clustering[2] > probability_thr)
clustering = HDBSCAN(**hdbscan_params).fit(local_feature)
active_labels_with_noise = clustering.labels_
cluster_probability = clustering.probabilities_
(persistent_clusters,) = np.nonzero(cluster_probability > probability_thr)
active_labels_with_noise[~np.isin(active_labels_with_noise, persistent_clusters)] = -1

active_labels = active_labels_with_noise[active_ind < valid_size]
Expand Down Expand Up @@ -233,7 +233,7 @@ def auto_split_clustering(
"""

import sklearn.decomposition
import hdbscan
from sklearn.cluster import HDBSCAN

split_peak_labels = -1 * np.ones(peak_labels.size, dtype=np.int64)
nb_clusters = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GraphClustering:
"clusterer": dict(
method="sknetwork-louvain",
# min_samples=1,
# core_dist_n_jobs=-1,
# n_jobs=-1,
# min_cluster_size=20,
# cluster_selection_method='leaf',
# allow_single_cluster=True,
Expand Down Expand Up @@ -165,7 +165,7 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
_remove_small_cluster(peak_labels, min_size=1)

elif clustering_method == "hdbscan":
from hdbscan import HDBSCAN
from sklearn.cluster import HDBSCAN
import scipy.sparse

n_graph, connected_labels = scipy.sparse.csgraph.connected_components(distances, directed=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,9 @@ def split(
tsvd = None

if clusterer_method == "hdbscan":
from hdbscan import HDBSCAN
from sklearn.cluster import HDBSCAN

clustering_kwargs.update(core_dist_n_jobs=1)
clustering_kwargs.update(n_jobs=1)
clust = HDBSCAN(**clustering_kwargs)
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
Expand Down
22 changes: 11 additions & 11 deletions src/spikeinterface/sortingcomponents/clustering/positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from pathlib import Path

import numpy as np
import importlib.util

try:
import hdbscan

HAVE_HDBSCAN = True
except:
HAVE_HDBSCAN = False
sklearn_spec = importlib.util.find_spec("sklearn")
if sklearn_spec is not None:
HAVE_SKLEARN = True
from sklearn.cluster import HDBSCAN
else:
HAVE_SKLEARN = False


class PositionsClustering:
Expand All @@ -21,7 +22,7 @@ class PositionsClustering:
_default_params = {
"peak_locations": None,
"peak_localization_kwargs": {"method": "center_of_mass"},
"hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "core_dist_n_jobs": -1},
"hdbscan_kwargs": {"min_cluster_size": 20, "allow_single_cluster": True, "n_jobs": -1},
}

name = "hdbscan_positions"
Expand All @@ -36,8 +37,7 @@ class PositionsClustering:

@classmethod
def main_function(cls, recording, peaks, params, job_kwargs=dict()):
assert HAVE_HDBSCAN, "position clustering need hdbscan to be installed"

assert HAVE_SKLEARN, "position clustering need sklearn to be installed"
if params["peak_locations"] is None:
from spikeinterface.sortingcomponents.peak_localization import localize_peaks

Expand All @@ -50,8 +50,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
location_keys = ["x", "y"]
locations = np.stack([peak_locations[k] for k in location_keys], axis=1)

clustering = hdbscan.hdbscan(locations, **params["hdbscan_kwargs"])
peak_labels = clustering[0]
clustering = HDBSCAN(**params["hdbscan_kwargs"]).fit(locations)
peak_labels = clustering.labels_

labels = np.unique(peak_labels)
labels = labels[labels >= 0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import importlib
import numpy as np

hdbscan_spec = importlib.util.find_spec("hdbscan")
if hdbscan_spec is not None:
HAVE_HDBSCAN = True
import hdbscan

import importlib.util

sklearn_spec = importlib.util.find_spec("sklearn")
if sklearn_spec is not None:
HAVE_SKLEARN = True
from sklearn.cluster import HDBSCAN
else:
HAVE_HDBSCAN = False
HAVE_SKLEARN = False

from spikeinterface.core.basesorting import minimum_spike_dtype
from spikeinterface.core.waveform_tools import estimate_templates
Expand All @@ -35,7 +38,7 @@ class RandomProjectionClustering:
"clusterer": {
"min_cluster_size": 10,
"allow_single_cluster": True,
"core_dist_n_jobs": -1,
"n_jobs": -1,
"cluster_selection_method": "eom",
},
"waveforms": {"ms_before": 0.5, "ms_after": 1.5},
Expand All @@ -56,8 +59,7 @@ class RandomProjectionClustering:

@classmethod
def main_function(cls, recording, peaks, params, job_kwargs=dict()):
assert HAVE_HDBSCAN, "random projections clustering need hdbscan to be installed"

assert HAVE_SKLEARN, "position clustering need sklearn to be installed"
fs = recording.get_sampling_frequency()
radius_um = params.get("radius_um", 30)
ms_before = params["waveforms"].get("ms_before", 0.5)
Expand Down Expand Up @@ -105,8 +107,8 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
recording, pipeline_nodes, job_kwargs=job_kwargs, job_name="extracting features", verbose=verbose
)

clustering = hdbscan.hdbscan(hdbscan_data, **params["clusterer"])
peak_labels = clustering[0]
clustering = HDBSCAN(**params["clusterer"]).fit(hdbscan_data)
peak_labels = clustering.labels_

labels = np.unique(peak_labels)
labels = labels[labels >= 0]
Expand Down