diff --git a/docs/source/_static/recipes/cleaning_cluster_population.py b/docs/source/_static/recipes/cleaning_cluster_population.py
new file mode 100644
index 00000000..ff3223c1
--- /dev/null
+++ b/docs/source/_static/recipes/cleaning_cluster_population.py
@@ -0,0 +1,79 @@
+"""Code from the Cleaning Cluster Population tutorial."""
+
+from pathlib import Path
+
+import numpy as np
+
+import dynsight
+from dynsight.data_processing import cleaning_cluster_population
+from dynsight.trajectory import Trj
+
+
+def main() -> None:
+ """Code from the Spatial Denoising tutorial."""
+ # Loading an example trajectory
+ files_path = Path.cwd()
+ trj = Trj.init_from_xtc(
+ traj_file=files_path / "ice_water_ox.xtc",
+ topo_file=files_path / "ice_water_ox.gro",
+ )
+
+ # Computing TimeSOAP descriptor
+ _, tsoap = trj.get_timesoap(
+ r_cut=10,
+ n_max=8,
+ l_max=8,
+ n_jobs=4, # Adjust n_jobs according to your computer capabilities
+ )
+
+ # Applying Spatial Denoising
+ sliced_trj = trj.with_slice(slice(0, -1, 1))
+ sp_denoised_tsoap = tsoap.spatial_average(
+ trj=sliced_trj,
+ r_cut=10,
+ n_jobs=4, # Adjust n_jobs according to your computer capabilities
+ )
+
+ # Performing Onion Clustering on the descriptor computed
+ delta_t_list, n_clust, unclass_frac, labels = (
+ sp_denoised_tsoap.get_onion_analysis(
+ delta_t_min=2,
+ delta_t_num=20,
+ fig1_path=files_path / "denoised_onion_analysis.png",
+ fig2_path=files_path / "cluster_population.png",
+ )
+ )
+
+ # Saving Onion output in an array
+ onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T
+
+ # Assigning clusters with population <5% to the unclassified environment
+ # (label=-1)
+ cleaned_labels = cleaning_cluster_population(
+ labels,
+ threshold=0.05,
+ assigned_env=-1,
+ )
+
+ # Updating the data and plotting the cleaned number of clusters and
+ # unclassified fraction.
+ # Since unchanged, windows can be copied from above.
+ delta_t_list = onion_output[:, 0]
+
+ n_clust = np.zeros(delta_t_list.shape[0], dtype=np.int64)
+ unclass_frac = np.zeros(delta_t_list.shape[0])
+ for i in range(delta_t_list.shape[0]):
+ n_clust[i] = np.unique(cleaned_labels[:, :, i]).size - 1
+ unclass_frac[i] = np.sum(cleaned_labels[:, :, i] == -1) / np.size(
+ cleaned_labels[:, :, i]
+ )
+
+ cleaned_onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T
+
+ dynsight.onion.plot_smooth.plot_time_res_analysis(
+ files_path / "cleaned_onion_analysis.png", cleaned_onion_output
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/source/_static/style.css b/docs/source/_static/style.css
index 16c570e0..b8e0d3a9 100644
--- a/docs/source/_static/style.css
+++ b/docs/source/_static/style.css
@@ -44,4 +44,3 @@
font-weight: 600;
text-align: center;
}
-
diff --git a/docs/source/_static/tutorials/cleaning_cluster_population/cleaned_onion_analysis.png b/docs/source/_static/tutorials/cleaning_cluster_population/cleaned_onion_analysis.png
new file mode 100644
index 00000000..74cb8668
Binary files /dev/null and b/docs/source/_static/tutorials/cleaning_cluster_population/cleaned_onion_analysis.png differ
diff --git a/docs/source/_static/tutorials/cleaning_cluster_population/cluster_population.png b/docs/source/_static/tutorials/cleaning_cluster_population/cluster_population.png
new file mode 100644
index 00000000..f9599ac7
Binary files /dev/null and b/docs/source/_static/tutorials/cleaning_cluster_population/cluster_population.png differ
diff --git a/docs/source/data_processing.rst b/docs/source/data_processing.rst
index 7963b687..0a7c3b3a 100644
--- a/docs/source/data_processing.rst
+++ b/docs/source/data_processing.rst
@@ -33,3 +33,11 @@ Classification
savereferences <_autosummary/dynsight.data_processing.savereferences>
getreferencesfromdataset <_autosummary/dynsight.data_processing.getreferencesfromdataset>
applyclassification <_autosummary/dynsight.data_processing.applyclassification>
+
+Clustering
+----------
+
+.. toctree::
+ :maxdepth: 1
+
+ cleaning_cluster_population <_autosummary/dynsight.data_processing.cleaning_cluster_population>
diff --git a/docs/source/logs.rst b/docs/source/logs.rst
index 57f030a6..0917b564 100644
--- a/docs/source/logs.rst
+++ b/docs/source/logs.rst
@@ -3,19 +3,20 @@ Logs
dynsight logging system.
-.. warning::
+.. note::
- A default instance of :class:`Logger` is **automatically created** when importing the ``dynsight`` package.
- This instance is available as ``dynsight.logs.logger``.
-
- You can configure it, for example to disable the automatic recording of datasets, using:
+ A default :class:`Logger` is used, it writes to file every step processed by ``dynsight`` (e.g. comptuting a descriptor, performing clustering, etc.).
+
+ An option of :class:`Logger` that automatically saves and records the dataset
+ can be activated after importing the ``dynsight`` package by using:
.. code-block:: python
import dynsight
- dynsight.logs.logger.configure(auto_recording=False)
+ dynsight.logs.logger.configure(auto_recording=True)
- You can also access all its attributes and methods described in the Logs page below.
+The automatically stored datasets can be extracted using the
+:meth:`Logger.extract_datasets` method. A complete list of available attributes and methods is provided below.
-----
Usage
diff --git a/docs/source/tutorials/cleaning_cluster_population.rst b/docs/source/tutorials/cleaning_cluster_population.rst
new file mode 100644
index 00000000..c0185ff2
--- /dev/null
+++ b/docs/source/tutorials/cleaning_cluster_population.rst
@@ -0,0 +1,215 @@
+Cleaning Cluster Population
+===========================
+
+Sometimes, clusters obtained with Onion Clustering analysis can be very small.
+To better interpret the results, it can be useful to remove those ones by assigning them to
+the cluster of the unclassified particles.
+This is achieved through the class, :class:`.data_processing.cleaning_cluster_population()`, which
+assign the cluster under a certain population threshold to a specific cluster selected by the user.
+
+At the end of every section, you will find links to download the full ``python`` scripts
+and its relevant input files.
+
+As an example, we consider the ouput of the analysis computed in the `spatial denoising tutorial <./spatial_denoising.html>`_.
+Briefly, we consider the denoised ``TimeSOAP`` descriptor that can be obtained from:
+
+.. code-block:: python
+
+ import numpy as np
+ from pathlib import Path
+ import dynsight
+ from dynsight.trajectory import Trj
+ from dynsight.data_processing import cleaning_cluster_population
+
+ files_path = Path.cwd()
+ trj = Trj.init_from_xtc(
+ traj_file=files_path / "ice_water_ox.xtc",
+ topo_file=files_path / "ice_water_ox.gro",
+ )
+
+ _, tsoap = trj.get_timesoap(
+ r_cut=10,
+ n_max=8,
+ l_max=8,
+ n_jobs=4, # Adjust n_jobs according to your computer capabilities
+ )
+
+ sliced_trj = trj.with_slice(slice(0, -1, 1))
+ sp_denoised_tsoap = tsoap.spatial_average(
+ trj=sliced_trj,
+ r_cut=10,
+ n_jobs=4, # Adjust n_jobs according to your computer capabilities
+ )
+
+ delta_t_list, n_clust, unclass_frac, labels = sp_denoised_tsoap.get_onion_analysis(
+ delta_t_min=2,
+ delta_t_num=20,
+ fig1_path=files_path / "denoised_onion_analysis.png",
+ fig2_path=files_path / "cluster_population.png",
+ )
+
+.. testcode:: cleaning_cluster_population_test
+ :hide:
+
+ from pathlib import Path
+ from dynsight.trajectory import Trj
+
+ files_path = Path("source/_static/simulations")
+ trj = Trj.init_from_xtc(
+ traj_file=files_path / "ice_water_ox.xtc",
+ topo_file=files_path / "ice_water_ox.gro",
+ )
+
+ assert trj.n_atoms == 2048
+ assert trj.n_frames == 1001
+
+.. testcode:: cleaning_cluster_population_test
+ :hide:
+
+ import numpy as np
+
+ trj_test = trj.with_slice(slice(0, 2, 1))
+
+ expected_tests = Path("source/_static/tutorials/spatial_denoising/doctests")
+
+ soap_test = trj_test.get_soap(
+ r_cut=10,
+ n_max=8,
+ l_max=8,
+ n_jobs=1, # Adjust n_jobs according to your computer capabilities
+ )
+
+ _, tsoap_test = trj.get_timesoap(
+ soap_insight=soap_test,
+ )
+
+ assert tsoap_test.meta["r_cut"]==10
+ assert tsoap_test.meta["n_max"]==8
+ assert tsoap_test.meta["l_max"]==8
+
+ reference_tsoap = np.load(expected_tests / "test_tsoap.npy")
+ assert np.allclose(tsoap_test.dataset, reference_tsoap, atol=1e-6)
+
+ sliced_trj_test = trj.with_slice(slice(0, 1, 1))
+ sp_denoised_tsoap_test = tsoap_test.spatial_average(
+ trj=sliced_trj_test,
+ r_cut=10,
+ n_jobs=1,
+ )
+
+ reference_denoised_tsoap = np.load(expected_tests / "test_denoised_tsoap.npy")
+ assert np.allclose(sp_denoised_tsoap_test.dataset, reference_denoised_tsoap, atol=1e-6)
+
+For further details users should refer to `spatial denoising tutorial <./spatial_denoising.html>`_.
+
+Figure ``cluster_population.png`` shows the population of every cluster, each color is a different cluster and
+blue refers to the unclassified fraction:
+
+.. image:: ../_static/tutorials/cleaning_cluster_population/cluster_population.png
+ :scale: 15%
+ :align: center
+
+Before cleaning the cluster we have to save the output from the Onion analysis in an array:
+
+.. code-block:: python
+
+ onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T
+
+The small clusters can be removed and assigned to the unclassified fraction using the
+class :class:`.data_processing.cleaning_cluster_population()`:
+
+.. code-block:: python
+
+ cleaned_labels = cleaning_cluster_population(labels, threshold=0.05, assigned_env=-1)
+
+where ``cleaned_labels`` has the same dimensions as ``labels``. Now we can reproduce the plot with the number
+of clusters and the unclassified fraction after re-organizing the data. In particular,
+:class:`.onion.plot_smooth.plot_time_res_analysis()`, which gives the plot that we want to obtain,
+requires and array with the list of the time windows, the number of clusters at every ∆t, and the unclassified
+fraction. Therefore, before plotting the graph, we need to create it by copying the list of time windows from
+the one given by the Onion analysis, and calculate the number of clusters and the unclassified fraction from the
+cleaned labels:
+
+.. code-block:: python
+
+ delta_t_list = onion_output[:, 0] # Since unchanged, windows can be copied from above.
+
+ n_clust = np.zeros(delta_t_list.shape[0],dtype=np.int64)
+ unclass_frac = np.zeros(delta_t_list.shape[0])
+ for i in range(delta_t_list.shape[0]):
+ n_clust[i] = np.unique(cleaned_labels[:, :, i]).size - 1
+ unclass_frac[i] = np.sum(cleaned_labels[:, :, i] == -1) / np.size(cleaned_labels[:, :, i])
+
+ cleaned_onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T
+
+ dynsight.onion.plot_smooth.plot_time_res_analysis("cleaned_onion_analysis.png", cleaned_onion_output)
+
+.. testcode:: cleaning_cluster_population_test
+ :hide:
+
+ from dynsight.data_processing import cleaning_cluster_population
+
+ expected_tests = Path("../tests/data_processing/cluster/test_cluster")
+
+ labels = np.zeros((4, 10, 3), dtype=int)
+
+ labels[:, :, 0] = np.array(
+ [
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 2],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 2],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 3],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 3],
+ ]
+ )
+
+ labels[:, :, 1] = np.array(
+ [
+ [0, 0, 0, 0, 0, 1, 1, 1, 1, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 1, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 4, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 4, 4],
+ ]
+ )
+
+ labels[:, :, 2] = np.array(
+ [
+ [0, 0, 9, 9, 0, 1, 1, 1, 1, 9],
+ [0, 0, 0, 9, 0, 1, 1, 1, 1, 9],
+ [0, 0, 0, 9, 0, 1, 1, 1, 1, 9],
+ [0, 9, 0, 0, 0, 1, 1, 1, 1, 9],
+ ]
+ )
+
+ test_clean_pop = cleaning_cluster_population(
+ labels,
+ threshold=0.05,
+ assigned_env=99,
+ )
+
+ exp_clean_pop = np.load(expected_tests / "c0_clean_pop_th5_ass99_exNone.npy")
+ assert np.array_equal(exp_clean_pop, test_clean_pop)
+
+On the left are reported the results from Onion clustering on the denoised time-series (`denoised_onion_analysis.png`
+from `spatial denoising tutorial <./spatial_denoising.html>`_), while on the rigth is reported the figure
+``cleaned_onion_analysis.png``.
+
+.. image:: ../_static/tutorials/spatial_denoising/denoised_onion_analysis.png
+ :scale: 8%
+ :align: left
+
+.. image:: ../_static/tutorials/cleaning_cluster_population/cleaned_onion_analysis.png
+ :scale: 8%
+ :align: right
+
+.. raw:: html
+
+
+
+Full scripts and input files
+----------------------------
+
+.. raw:: html
+
+ ⬇️ Download the .gro file
+ ⬇️ Download the .xtc file
+ ⬇️ Download Python Script
diff --git a/docs/source/tutorials/spatial_denoising.rst b/docs/source/tutorials/spatial_denoising.rst
index 435d3636..6f9934d5 100644
--- a/docs/source/tutorials/spatial_denoising.rst
+++ b/docs/source/tutorials/spatial_denoising.rst
@@ -221,7 +221,7 @@ Full scripts and input files
assert soap_test.meta["l_max"]==8
assert np.allclose(soap_test.dataset, reference_soap, atol=1e-6)
- _, tsoap_test = trj.get_timesoap(
+ _, tsoap_test = trj_test.get_timesoap(
soap_insight=soap_test,
)
diff --git a/docs/source/tutorials_menu.rst b/docs/source/tutorials_menu.rst
index 2a55ce01..9fb76bdc 100644
--- a/docs/source/tutorials_menu.rst
+++ b/docs/source/tutorials_menu.rst
@@ -7,6 +7,8 @@ to help you get started with dynsight and explore its various features.
We are continuously working to expand this section with more tutorials,
so stay tuned for future updates!
+Fundamentals
+------------
.. grid:: 3
:gutter: 3
@@ -44,6 +46,33 @@ so stay tuned for future updates!
.. rubric:: More Soon...
:class: tutorial-card-title
+Advanced topics
+---------------
+.. grid:: 3
+ :gutter: 3
+
+ .. grid-item-card::
+ :link: tutorials/cleaning_cluster_population
+ :link-type: doc
+ :class-card: tutorial-card
+
+ .. image:: _static/tutorials/cleaning_cluster_population/cleaned_onion_analysis.png
+ :alt: Cleaning Cluster Population
+ :class: tutorial-card-img
+
+ .. rubric:: Cleaning Cluster Population
+ :class: tutorial-card-title
+
+ .. grid-item-card::
+ :class-card: tutorial-card
+
+ .. image:: _static/logo_dynsight.png
+ :alt: More Soon...
+ :class: tutorial-card-img
+
+ .. rubric:: More Soon...
+ :class: tutorial-card-title
+
.. toctree::
:hidden:
:maxdepth: 2
@@ -51,6 +80,7 @@ so stay tuned for future updates!
Getting Started
Spatial Denoising
+ Cleaning Cluster Population
Other example files
-------------------
diff --git a/examples/onion_analysis.py b/examples/onion_analysis.py
index a230a596..64a76e4e 100644
--- a/examples/onion_analysis.py
+++ b/examples/onion_analysis.py
@@ -98,7 +98,7 @@ def main() -> None:
coord_1d = Insight(coord_2d.dataset[:, :, 0])
# Test onion clustering on a wide range of time resolutions
- delta_t_list, n_clust, unclass_frac = coord_1d.get_onion_analysis(
+ delta_t_list, n_clust, unclass_frac, envs = coord_1d.get_onion_analysis(
fig1_path=data_path / "time-res_1d.png",
fig2_path=data_path / "pop_fracs_1d.png",
)
@@ -110,7 +110,7 @@ def main() -> None:
onion_results.plot_state_populations(data_path / "state_pops_1d.png")
# Test onion clustering on a wide range of time resolutions
- delta_t_list, n_clust, unclass_frac = coord_2d.get_onion_analysis(
+ delta_t_list, n_clust, unclass_frac, envs = coord_2d.get_onion_analysis(
fig1_path=data_path / "time-res_2d.png",
fig2_path=data_path / "pop_fracs_2d.png",
)
diff --git a/src/dynsight/_internal/data_processing/clusters.py b/src/dynsight/_internal/data_processing/clusters.py
new file mode 100644
index 00000000..8563c3f0
--- /dev/null
+++ b/src/dynsight/_internal/data_processing/clusters.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+ from numpy.typing import NDArray
+
+from dynsight.logs import logger
+
+
+def cleaning_cluster_population(
+ labels: NDArray[np.int64],
+ threshold: float,
+ assigned_env: int,
+ excluded_env: int | list[int] | None = None,
+) -> NDArray[np.int64]:
+ """Replace labels of low-population clusters with a reference label.
+
+ This function identifies clusters whose relative population is below a
+ given threshold and reassigns their labels to a specified environment.
+ The population of each cluster is computed as the fraction of elements
+ belonging to that label, either for 2D inputs (`(n_atoms, n_frames)`)
+ or for 3D inputs (`(n_atoms, n_frames, n_dims)`, where n_dims can
+ correspond to the different ∆t from Onion clustering).
+ Clusters with a population smaller than or equal to the `threshold` are
+ considered negligible and are replaced by the `assigned_env` label,
+ while all other labels are preserved.
+ `excluded_env` give the possibility to exclude some clusters from
+ the re-labeling.
+
+ Parameters:
+ labels:
+ NumPy array containing the label values.
+ The array should have dimensions corresponding
+ to either (n_atoms, n_frames) for 2D inputs,
+ or (n_atoms, n_frames, n_dims) for 3D inputs.
+ threshold:
+ A float value from 0 to 1 that defines the threshold at which
+ small clusters are neglected.
+ assigned_env:
+ The label at which smaller clusters are assigned to, if the label
+ already exists the population extracted will be merged to the
+ existing one.
+ excluded_env:
+ Clusters that need to be preserved even if their population is
+ under the threshold.
+
+ Returns:
+ A NumPy array of the same shape as the input descriptor array,
+ containing the updated labels. If the input
+ array is 2D (n_atoms, n_frames), the output will be a 2D array of
+ the same shape. Otherwise, if the input is 3D
+ (n_atoms, n_frames, n_dims), the output will also be a 3D array
+ of the same shape.
+ The labels of bigger clusters are uneffected by the re-labeling.
+
+ Raises:
+ ValueError:
+ If the input descriptor array does not have 2 or 3 dimensions,
+ an error is raised.
+
+ Example:
+
+ .. code-block:: python
+
+ from dynsight.data_processing import cleaning_cluster_population
+ import numpy as np
+
+ original_labels = np.load('labels_array.npy')
+
+ cleaned_labels = cleaning_cluster_population(
+ labels=original_labels,
+ threshold=0.1,
+ assigned_env=99,
+ )
+
+ In this example, the labels of the smaller clusters (lower than 10%)
+ from `original_labels` are replaced with label 99. The result is
+ stored in `cleaned_labels`, a NumPy array.
+ """
+ dimension = 2
+ if labels.ndim not in (dimension, dimension + 1):
+ msg = "descriptor_array must be 2D or 3D."
+ raise ValueError(msg)
+
+ if excluded_env is None:
+ excluded_arr: NDArray[np.int64] = np.array([], dtype=np.int64)
+ elif isinstance(excluded_env, int):
+ excluded_arr = np.array([excluded_env], dtype=np.int64)
+ else:
+ excluded_arr = np.array(excluded_env, dtype=np.int64)
+
+ missing = np.setdiff1d(excluded_arr, np.unique(labels))
+
+ if missing.size > 0:
+ logger.warning(f"Excluded value(s) not found in labels: {missing}")
+
+ if labels.ndim == dimension:
+ flat = labels.ravel()
+ unique, counts = np.unique(flat, return_counts=True)
+
+ populations = counts / flat.size
+ small_clusters = unique[populations <= threshold]
+
+ small_clusters = small_clusters[~np.isin(small_clusters, excluded_arr)]
+
+ new_labels = labels.copy()
+ if small_clusters.size > 0:
+ new_labels[np.isin(labels, small_clusters)] = assigned_env
+
+ elif labels.ndim == dimension + 1:
+ new_labels = labels.copy()
+ for i in range(labels.shape[2]):
+ lab = labels[:, :, i]
+ flat = lab.ravel()
+ unique, counts = np.unique(flat, return_counts=True)
+
+ populations = counts / flat.size
+ small_clusters = unique[populations <= threshold]
+
+ small_clusters = small_clusters[
+ ~np.isin(small_clusters, excluded_arr)
+ ]
+
+ if small_clusters.size > 0:
+ mask = np.isin(lab, small_clusters)
+ new_labels[:, :, i][mask] = assigned_env
+
+ return new_labels
diff --git a/src/dynsight/_internal/logs.py b/src/dynsight/_internal/logs.py
index b568bddb..736e037e 100644
--- a/src/dynsight/_internal/logs.py
+++ b/src/dynsight/_internal/logs.py
@@ -68,7 +68,7 @@ class Logger:
def __init__(
self,
*,
- auto_recording: bool = True,
+ auto_recording: bool = False,
) -> None:
self._log: list[str] = []
self._recorded_data: list[RecordedDataset] = []
@@ -79,7 +79,7 @@ def __init__(
def configure(
self,
*,
- auto_recording: bool = True,
+ auto_recording: bool = False,
) -> None:
"""Adjusts the runtime configuration of the logger.
@@ -107,6 +107,18 @@ def log(self, msg: str) -> None:
console.info(msg)
self._log.append(history_entry)
+ def warning(self, msg: str) -> None:
+ """Records an informational warning message to the log.
+
+ Parameters:
+ msg:
+ The message to record.
+ """
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+ history_entry = f"[{timestamp}] {msg}"
+ console.warning(msg)
+ self._log.append(history_entry)
+
def save_history(self, filename: Path) -> None:
"""Saves the current log history to a text file.
diff --git a/src/dynsight/_internal/trajectory/insight.py b/src/dynsight/_internal/trajectory/insight.py
index bcc793af..30828e79 100644
--- a/src/dynsight/_internal/trajectory/insight.py
+++ b/src/dynsight/_internal/trajectory/insight.py
@@ -279,7 +279,12 @@ def get_onion_analysis(
bins: str | int = "auto",
number_of_sigmas: float = 3.0,
max_area_overlap: float = 0.8,
- ) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]:
+ ) -> tuple[
+ NDArray[np.float64],
+ NDArray[np.int64],
+ NDArray[np.float64],
+ NDArray[np.int64],
+ ]:
"""Perform the full onion time resolution analysis.
Note: this method uses the "onion smooth" functions (see documentation
@@ -310,6 +315,7 @@ def get_onion_analysis(
* delta_t_list: The list of ∆t used.
* n_clust: The number of clusters at each ∆t.
* unclass_frac: The fraction of unclassified data at each ∆t.
+ * list_of_labels: Labels at each ∆t.
"""
if delta_t_max is None:
delta_t_max = self.dataset.shape[1]
@@ -318,6 +324,10 @@ def get_onion_analysis(
)
n_clust = np.zeros(delta_t_list.size, dtype=int)
unclass_frac = np.zeros(delta_t_list.size)
+ list_of_labels = np.zeros(
+ (self.dataset.shape[0], self.dataset.shape[1], delta_t_list.size),
+ dtype=np.int64,
+ )
list_of_pop = []
for i, delta_t in enumerate(delta_t_list):
@@ -327,6 +337,7 @@ def get_onion_analysis(
number_of_sigmas,
max_area_overlap,
)
+ list_of_labels[:, :, i] = on_cl.labels
n_clust[i] = len(on_cl.state_list)
unclass_frac[i] = np.sum(on_cl.labels == -1) / self.dataset.size
list_of_pop.append(
@@ -358,4 +369,4 @@ def get_onion_analysis(
logger.log(
f"Performed full onion clustering analysis with args {attr_dict}."
)
- return delta_t_list, n_clust, unclass_frac
+ return delta_t_list, n_clust, unclass_frac, list_of_labels
diff --git a/src/dynsight/data_processing.py b/src/dynsight/data_processing.py
index db4eb959..329c855a 100644
--- a/src/dynsight/data_processing.py
+++ b/src/dynsight/data_processing.py
@@ -10,6 +10,9 @@
mergereferences,
savereferences,
)
+from dynsight._internal.data_processing.clusters import (
+ cleaning_cluster_population,
+)
from dynsight._internal.data_processing.distances import (
kernelsoap,
simplekernelsoap,
@@ -20,6 +23,7 @@
__all__ = [
"applyclassification",
+ "cleaning_cluster_population",
"createreferencesfromtrajectory",
"getdistancebetween",
"getdistancesfromref",
diff --git a/tests/data_processing/cluster/__init__.py b/tests/data_processing/cluster/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/data_processing/cluster/case_data.py b/tests/data_processing/cluster/case_data.py
new file mode 100644
index 00000000..62691758
--- /dev/null
+++ b/tests/data_processing/cluster/case_data.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class CleanPopCaseData:
+ name: str
+ expected_clean_pop: str
+ threshold: float
+ assigned_env: int
+ excluded_env: int | list[int] | None = None
diff --git a/tests/data_processing/cluster/conftest.py b/tests/data_processing/cluster/conftest.py
new file mode 100644
index 00000000..dde87166
--- /dev/null
+++ b/tests/data_processing/cluster/conftest.py
@@ -0,0 +1,70 @@
+import pytest
+
+from tests.data_processing.cluster.case_data import CleanPopCaseData
+
+
+@pytest.fixture(
+ scope="session",
+ params=(
+ # Case 0: Cleaning 5%
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c0_clean_pop_th5_ass99_exNone.npy",
+ threshold=0.05,
+ assigned_env=99,
+ excluded_env=None,
+ name=name,
+ ),
+ # Case 1: Cleaning 15%
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c1_clean_pop_th15_ass99_exNone.npy",
+ threshold=0.15,
+ assigned_env=99,
+ excluded_env=None,
+ name=name,
+ ),
+ # Case 2: Cleaning 25%
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c2_clean_pop_th25_ass99_exNone.npy",
+ threshold=0.25,
+ assigned_env=99,
+ excluded_env=None,
+ name=name,
+ ),
+ # Case 3: Cleaning 25%, excluding 4
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c3_clean_pop_th25_ass99_ex4.npy",
+ threshold=0.25,
+ assigned_env=99,
+ excluded_env=4,
+ name=name,
+ ),
+ # Case 4: Cleaning 25%, excluding 3,4
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c4_clean_pop_th25_ass99_ex3-4.npy",
+ threshold=0.25,
+ assigned_env=99,
+ excluded_env=[3, 4],
+ name=name,
+ ),
+ # Case 5: Cleaning 25%, excluding 3,7
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c5_clean_pop_th25_ass99_ex3-7.npy",
+ threshold=0.25,
+ assigned_env=99,
+ excluded_env=[3, 7],
+ name=name,
+ ),
+ # Case 6: Cleaning 25%, excluding 3,4
+ lambda name: CleanPopCaseData(
+ expected_clean_pop="c6_clean_pop_th25_ass1_exNone.npy",
+ threshold=0.25,
+ assigned_env=1,
+ excluded_env=None,
+ name=name,
+ ),
+ ),
+)
+def case_data(request: pytest.FixtureRequest) -> CleanPopCaseData:
+ return request.param(
+ f"{request.fixturename}{request.param_index}", # type: ignore [attr-defined]
+ )
diff --git a/tests/data_processing/cluster/test_cluster.py b/tests/data_processing/cluster/test_cluster.py
new file mode 100644
index 00000000..14b420b7
--- /dev/null
+++ b/tests/data_processing/cluster/test_cluster.py
@@ -0,0 +1,61 @@
+"""Pytest for dynsight.lens.compute_lens."""
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from dynsight.data_processing import cleaning_cluster_population
+
+from .case_data import CleanPopCaseData
+
+
+def test_clean_pop_noexcl(case_data: CleanPopCaseData) -> None:
+ original_dir = Path(__file__).resolve().parent
+ expected_clean_pop = (
+ original_dir / "test_cluster" / case_data.expected_clean_pop
+ )
+
+ labels = np.zeros((4, 10, 3), dtype=int)
+
+ labels[:, :, 0] = np.array(
+ [
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 2],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 2],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 3],
+ [0, 0, 0, 1, 1, 1, 2, 2, 2, 3],
+ ]
+ )
+
+ labels[:, :, 1] = np.array(
+ [
+ [0, 0, 0, 0, 0, 1, 1, 1, 1, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 1, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 4, 4],
+ [0, 0, 0, 0, 0, 1, 1, 1, 4, 4],
+ ]
+ )
+
+ labels[:, :, 2] = np.array(
+ [
+ [0, 0, 9, 9, 0, 1, 1, 1, 1, 9],
+ [0, 0, 0, 9, 0, 1, 1, 1, 1, 9],
+ [0, 0, 0, 9, 0, 1, 1, 1, 1, 9],
+ [0, 9, 0, 0, 0, 1, 1, 1, 1, 9],
+ ]
+ )
+
+ test_clean_pop = cleaning_cluster_population(
+ labels,
+ threshold=case_data.threshold,
+ assigned_env=case_data.assigned_env,
+ excluded_env=case_data.excluded_env,
+ )
+
+ if not expected_clean_pop.exists():
+ np.save(expected_clean_pop, test_clean_pop)
+ pytest.fail(
+ "Clean_pop test files were not present. They have been created."
+ )
+ exp_clean_pop = np.load(expected_clean_pop)
+ assert np.array_equal(exp_clean_pop, test_clean_pop)
diff --git a/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy
new file mode 100644
index 00000000..f5f8a9a5
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy
new file mode 100644
index 00000000..983b0079
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy
new file mode 100644
index 00000000..e9d0699e
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy b/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy
new file mode 100644
index 00000000..e5f0e61d
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy b/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy
new file mode 100644
index 00000000..231db420
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy b/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy
new file mode 100644
index 00000000..48a3285e
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy differ
diff --git a/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy b/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy
new file mode 100644
index 00000000..d73149ea
Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy differ
diff --git a/tests/logger/test_logger.py b/tests/logger/test_logger.py
index 88f3549e..37645964 100644
--- a/tests/logger/test_logger.py
+++ b/tests/logger/test_logger.py
@@ -9,6 +9,7 @@
def test_zip_arch() -> None:
"""Ensure the zip archive is created and contains the expected files."""
+ logger.configure(auto_recording=True)
logger.clear_history()
original_dir = Path(__file__).absolute().parent