diff --git a/docs/source/_static/recipes/cleaning_cluster_population.py b/docs/source/_static/recipes/cleaning_cluster_population.py new file mode 100644 index 00000000..e3c932dd --- /dev/null +++ b/docs/source/_static/recipes/cleaning_cluster_population.py @@ -0,0 +1,78 @@ +"""Code from the Spatial Denoising tutorial.""" + +from pathlib import Path + +import numpy as np + +import dynsight +from dynsight.data_processing import cleaning_cluster_population +from dynsight.trajectory import Trj + + +def main() -> None: + """Code from the Spatial Denoising tutorial.""" + # Loading an example trajectory + files_path = Path("INPUT") + trj = Trj.init_from_xtc( + traj_file=files_path / "ice_water_ox.xtc", + topo_file=files_path / "ice_water_ox.gro", + ) + + # Computing TimeSOAP descriptor + _, tsoap = trj.get_timesoap( + r_cut=10, + n_max=8, + l_max=8, + n_jobs=4, # Adjust n_jobs according to your computer capabilities + ) + + # Applying Spatial Denoising + sliced_trj = trj.with_slice(slice(0, -1, 1)) + sp_denoised_tsoap = tsoap.spatial_average( + trj=sliced_trj, + r_cut=10, + n_jobs=4, # Adjust n_jobs according to your computer capabilities + ) + + # Performing Onion Clustering on the descriptor computed + delta_t_list, n_clust, unclass_frac, labels = ( + sp_denoised_tsoap.get_onion_analysis( + delta_t_min=2, + delta_t_num=20, + fig1_path=files_path / "denoised_onion_analysis.png", + fig2_path=files_path / "cluster_population.png", + ) + ) + + # Saving Onion output in an array + onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T + + # Assigning clusters with population <5% to the unclassified environment + # (label=-1) + cleaned_labels = cleaning_cluster_population( + labels, threshold=0.05, assigned_env=-1 + ) + + # Updating the data and plotting the cleaned number of clusters and + # unclassified fraction + delta_t_list = onion_output[ + :, 0 + ] # since unchanged, windows can be copied from above + + n_clust = np.zeros(delta_t_list.shape[0], dtype=np.int64) + unclass_frac = np.zeros(delta_t_list.shape[0]) + for i in range(delta_t_list.shape[0]): + n_clust[i] = np.unique(cleaned_labels[:, :, i]).size - 1 + unclass_frac[i] = np.sum(cleaned_labels[:, :, i] == -1) / np.size( + cleaned_labels[:, :, i] + ) + + cleaned_onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T + + dynsight.onion.plot_smooth.plot_time_res_analysis( + files_path / "cleaned_onion_analysis.png", cleaned_onion_output + ) + + +if __name__ == "__main__": + main() diff --git a/docs/source/_static/tutorials/tips_and_tricks/cleaned_onion_analysis.png b/docs/source/_static/tutorials/tips_and_tricks/cleaned_onion_analysis.png new file mode 100644 index 00000000..74cb8668 Binary files /dev/null and b/docs/source/_static/tutorials/tips_and_tricks/cleaned_onion_analysis.png differ diff --git a/docs/source/_static/tutorials/tips_and_tricks/cluster_population.png b/docs/source/_static/tutorials/tips_and_tricks/cluster_population.png new file mode 100644 index 00000000..f9599ac7 Binary files /dev/null and b/docs/source/_static/tutorials/tips_and_tricks/cluster_population.png differ diff --git a/docs/source/data_processing.rst b/docs/source/data_processing.rst index 7963b687..0a7c3b3a 100644 --- a/docs/source/data_processing.rst +++ b/docs/source/data_processing.rst @@ -33,3 +33,11 @@ Classification savereferences <_autosummary/dynsight.data_processing.savereferences> getreferencesfromdataset <_autosummary/dynsight.data_processing.getreferencesfromdataset> applyclassification <_autosummary/dynsight.data_processing.applyclassification> + +Clustering +---------- + +.. toctree:: + :maxdepth: 1 + + cleaning_cluster_population <_autosummary/dynsight.data_processing.cleaning_cluster_population> diff --git a/docs/source/logs.rst b/docs/source/logs.rst index 57f030a6..d9092c1f 100644 --- a/docs/source/logs.rst +++ b/docs/source/logs.rst @@ -3,17 +3,17 @@ Logs dynsight logging system. -.. warning:: +.. note:: - A default instance of :class:`Logger` is **automatically created** when importing the ``dynsight`` package. - This instance is available as ``dynsight.logs.logger``. - - You can configure it, for example to disable the automatic recording of datasets, using: + A default :class:`Logger` is istantiated to keep the user updated on the ongoing computational steps. + + An option of :class:`Logger` that automatically saves and records the dataset + can be activated after importing the ``dynsight`` package by using: .. code-block:: python import dynsight - dynsight.logs.logger.configure(auto_recording=False) + dynsight.logs.logger.configure(auto_recording=True) You can also access all its attributes and methods described in the Logs page below. diff --git a/docs/source/tutorials/spatial_denoising.rst b/docs/source/tutorials/spatial_denoising.rst index 435d3636..6f9934d5 100644 --- a/docs/source/tutorials/spatial_denoising.rst +++ b/docs/source/tutorials/spatial_denoising.rst @@ -221,7 +221,7 @@ Full scripts and input files assert soap_test.meta["l_max"]==8 assert np.allclose(soap_test.dataset, reference_soap, atol=1e-6) - _, tsoap_test = trj.get_timesoap( + _, tsoap_test = trj_test.get_timesoap( soap_insight=soap_test, ) diff --git a/docs/source/tutorials/tips_and_tricks.rst b/docs/source/tutorials/tips_and_tricks.rst new file mode 100644 index 00000000..1c58f62a --- /dev/null +++ b/docs/source/tutorials/tips_and_tricks.rst @@ -0,0 +1,219 @@ +Tips and Tricks +=============== + +Welcome to the Tips and Trick section of the dynsight platform. +Here we will show you additional hints that can help you when analyzing your data. + + +At the end of every section, you will find links to download the full ``python`` scripts +and its relevant input files. + +Cleaning cluster population +--------------------------- + +Sometimes, clusters obtained with Onion Clustering analysis can be very small. +To better interpret the results, it can be useful to remove those ones by assigning them to +the cluster of the unclassified particles. +This is achieved through the class, :class:`.data_processing.cleaning_cluster_population()`, which +assign the cluster under a certain population threshold to a specific cluster selected by the user. + +As an example, we consider the ouput of the analysis computed in the `spatial denoising tutorial <./spatial_denoising.html>`_. +Briefly, we consider the denoised ``TimeSOAP`` descriptor that can be ontained from: + +.. code-block:: python + + from pathlib import Path + from dynsight.trajectory import Trj + + files_path = Path("source/_static/simulations") + trj = Trj.init_from_xtc( + traj_file=files_path / "ice_water_ox.xtc", + topo_file=files_path / "ice_water_ox.gro", + ) + + _, tsoap = trj.get_timesoap( + r_cut=10, + n_max=8, + l_max=8, + n_jobs=4, # Adjust n_jobs according to your computer capabilities + ) + + sliced_trj = trj.with_slice(slice(0, -1, 1)) + sp_denoised_tsoap = tsoap.spatial_average( + trj=sliced_trj, + r_cut=10, + n_jobs=4, # Adjust n_jobs according to your computer capabilities + ) + + delta_t_list, n_clust, unclass_frac, labels = sp_denoised_tsoap.get_onion_analysis( + delta_t_min=2, + delta_t_num=20, + fig1_path=files_path / "denoised_onion_analysis.png", + fig2_path=files_path / "cluster_population.png", + ) + +.. testcode:: tips_and_tricks_test + :hide: + + from pathlib import Path + from dynsight.trajectory import Trj + + files_path = Path("source/_static/simulations") + trj = Trj.init_from_xtc( + traj_file=files_path / "ice_water_ox.xtc", + topo_file=files_path / "ice_water_ox.gro", + ) + + assert trj.n_atoms == 2048 + assert trj.n_frames == 1001 + +.. testcode:: tips_and_tricks_test + :hide: + + import numpy as np + + trj_test = trj.with_slice(slice(0, 2, 1)) + + expected_tests = Path("source/_static/tutorials/spatial_denoising/doctests") + + soap_test = trj_test.get_soap( + r_cut=10, + n_max=8, + l_max=8, + n_jobs=1, # Adjust n_jobs according to your computer capabilities + ) + + _, tsoap_test = trj.get_timesoap( + soap_insight=soap_test, + ) + + assert tsoap_test.meta["r_cut"]==10 + assert tsoap_test.meta["n_max"]==8 + assert tsoap_test.meta["l_max"]==8 + + reference_tsoap = np.load(expected_tests / "test_tsoap.npy") + assert np.allclose(tsoap_test.dataset, reference_tsoap, atol=1e-6) + + sliced_trj_test = trj.with_slice(slice(0, 1, 1)) + sp_denoised_tsoap_test = tsoap_test.spatial_average( + trj=sliced_trj_test, + r_cut=10, + n_jobs=1, + ) + + reference_denoised_tsoap = np.load(expected_tests / "test_denoised_tsoap.npy") + assert np.allclose(sp_denoised_tsoap_test.dataset, reference_denoised_tsoap, atol=1e-6) + +For further details users should refers to `spatial denoising tutorial <./spatial_denoising.html>`_. + +Figure `cluster_population.png` shows the population of every cluster, where blue refers to the unclassified fraction: + +.. image:: ../_static/tutorials/tips_and_tricks/cluster_population.png + :scale: 15% + :align: center + +Before cleaning the cluster we have to save the output from the Onion analysis in an array: + +.. code-block:: python + + import numpy as np + + onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T + +The small clusters can be removed and assigned to the unclassified fraction using the +class :class:`.data_processing.cleaning_cluster_population()`: + +.. code-block:: python + + from dynsight.data_processing import cleaning_cluster_population + + cleaned_labels = cleaning_cluster_population(labels, threshold=0.05, assigned_env=-1) + +where `leaned_labels` has the same dimensions as `labels`. Now we can reproduce the plot with the number +of clusters and the unclassified fraction after re-organizing the data: + +.. code-block:: python + + import dynsight + + delta_t_list = onion_output[:, 0] #since unchanged, windows can be copied from above + + n_clust = np.zeros(delta_t_list.shape[0],dtype=np.int64) + unclass_frac = np.zeros(delta_t_list.shape[0]) + for i in range(delta_t_list.shape[0]): + n_clust[i] = np.unique(cleaned_labels[:, :, i]).size - 1 + unclass_frac[i] = np.sum(cleaned_labels[:, :, i] == -1) / np.size(cleaned_labels[:, :, i]) + + cleaned_onion_output = np.array([delta_t_list, n_clust, unclass_frac]).T + + dynsight.onion.plot_smooth.plot_time_res_analysis("cleaned_onion_analysis.png", cleaned_onion_output) + +.. testcode:: tips_and_tricks_test + :hide: + + from dynsight.data_processing import cleaning_cluster_population + + expected_tests = Path("../tests/data_processing/cluster/test_cluster") + + labels = np.zeros((4, 10, 3), dtype=int) + + labels[:, :, 0] = np.array( + [ + [0, 0, 0, 1, 1, 1, 2, 2, 2, 2], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 2], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 3], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 3], + ] + ) + + labels[:, :, 1] = np.array( + [ + [0, 0, 0, 0, 0, 1, 1, 1, 1, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 1, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 4, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 4, 4], + ] + ) + + labels[:, :, 2] = np.array( + [ + [0, 0, 9, 9, 0, 1, 1, 1, 1, 9], + [0, 0, 0, 9, 0, 1, 1, 1, 1, 9], + [0, 0, 0, 9, 0, 1, 1, 1, 1, 9], + [0, 9, 0, 0, 0, 1, 1, 1, 1, 9], + ] + ) + + test_clean_pop = cleaning_cluster_population( + labels, + threshold=0.05, + assigned_env=99, + ) + + exp_clean_pop = np.load(expected_tests / "c0_clean_pop_th5_ass99_exNone.npy") + assert np.array_equal(exp_clean_pop, test_clean_pop) + +On the left are reported the results from Onion clustering on the denoised time-series (`denoised_onion_analysis.png` +from `spatial denoising tutorial <./spatial_denoising.html>`_), while on the rigth is reported the figure +`cleaned_onion_analysis.png` + +.. image:: ../_static/tutorials/spatial_denoising/denoised_onion_analysis.png + :scale: 8% + :align: left + +.. image:: ../_static/tutorials/tips_and_tricks/cleaned_onion_analysis.png + :scale: 8% + :align: right + +.. raw:: html + +
+ +Full scripts and input files +---------------------------- + +.. raw:: html + + ⬇️ Download the .gro file
+ ⬇️ Download the .xtc file
+ ⬇️ Download Python Script diff --git a/docs/source/tutorials_menu.rst b/docs/source/tutorials_menu.rst index 2a55ce01..72631381 100644 --- a/docs/source/tutorials_menu.rst +++ b/docs/source/tutorials_menu.rst @@ -34,6 +34,18 @@ so stay tuned for future updates! .. rubric:: Spatial Denoising :class: tutorial-card-title + .. grid-item-card:: + :link: tutorials/tips_and_tricks + :link-type: doc + :class-card: tutorial-card + + .. image:: _static/tutorials/spatial_denoising/denoised_onion_analysis.png + :alt: Tips and Tricks + :class: tutorial-card-img + + .. rubric:: Tips and Tricks + :class: tutorial-card-title + .. grid-item-card:: :class-card: tutorial-card @@ -51,6 +63,7 @@ so stay tuned for future updates! Getting Started Spatial Denoising + Tips and Tricks Other example files ------------------- diff --git a/examples/onion_analysis.py b/examples/onion_analysis.py index a230a596..64a76e4e 100644 --- a/examples/onion_analysis.py +++ b/examples/onion_analysis.py @@ -98,7 +98,7 @@ def main() -> None: coord_1d = Insight(coord_2d.dataset[:, :, 0]) # Test onion clustering on a wide range of time resolutions - delta_t_list, n_clust, unclass_frac = coord_1d.get_onion_analysis( + delta_t_list, n_clust, unclass_frac, envs = coord_1d.get_onion_analysis( fig1_path=data_path / "time-res_1d.png", fig2_path=data_path / "pop_fracs_1d.png", ) @@ -110,7 +110,7 @@ def main() -> None: onion_results.plot_state_populations(data_path / "state_pops_1d.png") # Test onion clustering on a wide range of time resolutions - delta_t_list, n_clust, unclass_frac = coord_2d.get_onion_analysis( + delta_t_list, n_clust, unclass_frac, envs = coord_2d.get_onion_analysis( fig1_path=data_path / "time-res_2d.png", fig2_path=data_path / "pop_fracs_2d.png", ) diff --git a/src/dynsight/_internal/data_processing/clusters.py b/src/dynsight/_internal/data_processing/clusters.py new file mode 100644 index 00000000..b61b213d --- /dev/null +++ b/src/dynsight/_internal/data_processing/clusters.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from numpy.typing import NDArray + +from dynsight.logs import logger + + +def cleaning_cluster_population( + labels: NDArray[np.int64], + threshold: float, + assigned_env: int, + excluded_env: int | list[int] | None = None, +) -> NDArray[np.int64]: + """Replace labels of low-population clusters with a reference label. + + This function identifies clusters whose relative population is below a + given threshold and reassigns their labels to a specified environment. + The population of each cluster is computed as the fraction of elements + belonging to that label, either for 2D inputs (`(n_atoms, n_frames)`) + or for 3D inputs (`(n_atoms, n_frames, n_dims)`, where n_dims can + correspond to the different ∆t from Onion clustering). + Clusters with a population smaller than or equal to the `threshold` are + considered negligible and are replaced by the `assigned_env` label, + while all other labels are preserved. + `excluded_env` give the possibility to exclude some clusters from + the re-labeling. + + Parameters: + labels: + NumPy array containing the label values. + The array should have dimensions corresponding + to either (n_atoms, n_frames) for 2D inputs, + or (n_atoms, n_frames, n_dims) for 3D inputs. + threshold: + A float value from 0 to 1 that defines the threshold at which + small clusters are neglected. + assigned_env: + The label at which smaller clusters are assigned to, if the label + already exists the population extracted will be merged to the + existing one. + excluded_env: + Clusters that need to be preserved even if their population is + under the threshold. + + Returns: + A NumPy array of the same shape as the input descriptor array, + containing the updated labels. If the input + array is 2D (n_atoms, n_frames), the output will be a 2D array of + the same shape. Otherwise, if the input is 3D + (n_atoms, n_frames, n_dims), the output will also be a 3D array + of the same shape. + The labels of bigger clusters are uneffected by the re-labeling. + + Raises: + ValueError: + If the input descriptor array does not have 2 or 3 dimensions, + an error is raised. + + Example: + + .. code-block:: python + + from dynsight.data_processing import cleaning_cluster_population + import numpy as np + + original_labels = np.load('labels_array.npy') + + cleaned_labels = cleaning_cluster_population( + labels=original_labels, + threshold=0.1, + assigned_env=99, + ) + + In this example, the labels of the smaller clusters (lower than 10%) + from `original_labels` are replaced with label 99. The result is + stored in `cleaned_labels`, a NumPy array. + """ + dimension = 2 + if labels.ndim < dimension or labels.ndim > dimension + 1: + msg = "descriptor_array must be 2D or 3D." + raise ValueError(msg) + + if excluded_env is None: + excluded_arr: NDArray[np.int64] = np.array([], dtype=np.int64) + elif isinstance(excluded_env, int): + excluded_arr = np.array([excluded_env], dtype=np.int64) + else: + excluded_arr = np.array(excluded_env, dtype=np.int64) + + missing = np.setdiff1d(excluded_arr, np.unique(labels)) + + if missing.size > 0: + logger.warning(f"Excluded value(s) not found in labels: {missing}") + + if labels.ndim == dimension: + flat = labels.ravel() + unique, counts = np.unique(flat, return_counts=True) + + populations = counts / flat.size + small_clusters = unique[populations <= threshold] + + small_clusters = small_clusters[~np.isin(small_clusters, excluded_arr)] + + new_labels = labels.copy() + if small_clusters.size > 0: + new_labels[np.isin(labels, small_clusters)] = assigned_env + + if labels.ndim == dimension + 1: + new_labels = labels.copy() + for i in range(labels.shape[2]): + lab = labels[:, :, i] + flat = lab.ravel() + unique, counts = np.unique(flat, return_counts=True) + + populations = counts / flat.size + small_clusters = unique[populations <= threshold] + + small_clusters = small_clusters[ + ~np.isin(small_clusters, excluded_arr) + ] + + if small_clusters.size > 0: + mask = np.isin(lab, small_clusters) + new_labels[:, :, i][mask] = assigned_env + + return new_labels diff --git a/src/dynsight/_internal/logs.py b/src/dynsight/_internal/logs.py index b568bddb..736e037e 100644 --- a/src/dynsight/_internal/logs.py +++ b/src/dynsight/_internal/logs.py @@ -68,7 +68,7 @@ class Logger: def __init__( self, *, - auto_recording: bool = True, + auto_recording: bool = False, ) -> None: self._log: list[str] = [] self._recorded_data: list[RecordedDataset] = [] @@ -79,7 +79,7 @@ def __init__( def configure( self, *, - auto_recording: bool = True, + auto_recording: bool = False, ) -> None: """Adjusts the runtime configuration of the logger. @@ -107,6 +107,18 @@ def log(self, msg: str) -> None: console.info(msg) self._log.append(history_entry) + def warning(self, msg: str) -> None: + """Records an informational warning message to the log. + + Parameters: + msg: + The message to record. + """ + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + history_entry = f"[{timestamp}] {msg}" + console.warning(msg) + self._log.append(history_entry) + def save_history(self, filename: Path) -> None: """Saves the current log history to a text file. diff --git a/src/dynsight/_internal/trajectory/insight.py b/src/dynsight/_internal/trajectory/insight.py index bcc793af..30828e79 100644 --- a/src/dynsight/_internal/trajectory/insight.py +++ b/src/dynsight/_internal/trajectory/insight.py @@ -279,7 +279,12 @@ def get_onion_analysis( bins: str | int = "auto", number_of_sigmas: float = 3.0, max_area_overlap: float = 0.8, - ) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]: + ) -> tuple[ + NDArray[np.float64], + NDArray[np.int64], + NDArray[np.float64], + NDArray[np.int64], + ]: """Perform the full onion time resolution analysis. Note: this method uses the "onion smooth" functions (see documentation @@ -310,6 +315,7 @@ def get_onion_analysis( * delta_t_list: The list of ∆t used. * n_clust: The number of clusters at each ∆t. * unclass_frac: The fraction of unclassified data at each ∆t. + * list_of_labels: Labels at each ∆t. """ if delta_t_max is None: delta_t_max = self.dataset.shape[1] @@ -318,6 +324,10 @@ def get_onion_analysis( ) n_clust = np.zeros(delta_t_list.size, dtype=int) unclass_frac = np.zeros(delta_t_list.size) + list_of_labels = np.zeros( + (self.dataset.shape[0], self.dataset.shape[1], delta_t_list.size), + dtype=np.int64, + ) list_of_pop = [] for i, delta_t in enumerate(delta_t_list): @@ -327,6 +337,7 @@ def get_onion_analysis( number_of_sigmas, max_area_overlap, ) + list_of_labels[:, :, i] = on_cl.labels n_clust[i] = len(on_cl.state_list) unclass_frac[i] = np.sum(on_cl.labels == -1) / self.dataset.size list_of_pop.append( @@ -358,4 +369,4 @@ def get_onion_analysis( logger.log( f"Performed full onion clustering analysis with args {attr_dict}." ) - return delta_t_list, n_clust, unclass_frac + return delta_t_list, n_clust, unclass_frac, list_of_labels diff --git a/src/dynsight/data_processing.py b/src/dynsight/data_processing.py index db4eb959..329c855a 100644 --- a/src/dynsight/data_processing.py +++ b/src/dynsight/data_processing.py @@ -10,6 +10,9 @@ mergereferences, savereferences, ) +from dynsight._internal.data_processing.clusters import ( + cleaning_cluster_population, +) from dynsight._internal.data_processing.distances import ( kernelsoap, simplekernelsoap, @@ -20,6 +23,7 @@ __all__ = [ "applyclassification", + "cleaning_cluster_population", "createreferencesfromtrajectory", "getdistancebetween", "getdistancesfromref", diff --git a/tests/data_processing/cluster/__init__.py b/tests/data_processing/cluster/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data_processing/cluster/case_data.py b/tests/data_processing/cluster/case_data.py new file mode 100644 index 00000000..62691758 --- /dev/null +++ b/tests/data_processing/cluster/case_data.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class CleanPopCaseData: + name: str + expected_clean_pop: str + threshold: float + assigned_env: int + excluded_env: int | list[int] | None = None diff --git a/tests/data_processing/cluster/conftest.py b/tests/data_processing/cluster/conftest.py new file mode 100644 index 00000000..dde87166 --- /dev/null +++ b/tests/data_processing/cluster/conftest.py @@ -0,0 +1,70 @@ +import pytest + +from tests.data_processing.cluster.case_data import CleanPopCaseData + + +@pytest.fixture( + scope="session", + params=( + # Case 0: Cleaning 5% + lambda name: CleanPopCaseData( + expected_clean_pop="c0_clean_pop_th5_ass99_exNone.npy", + threshold=0.05, + assigned_env=99, + excluded_env=None, + name=name, + ), + # Case 1: Cleaning 15% + lambda name: CleanPopCaseData( + expected_clean_pop="c1_clean_pop_th15_ass99_exNone.npy", + threshold=0.15, + assigned_env=99, + excluded_env=None, + name=name, + ), + # Case 2: Cleaning 25% + lambda name: CleanPopCaseData( + expected_clean_pop="c2_clean_pop_th25_ass99_exNone.npy", + threshold=0.25, + assigned_env=99, + excluded_env=None, + name=name, + ), + # Case 3: Cleaning 25%, excluding 4 + lambda name: CleanPopCaseData( + expected_clean_pop="c3_clean_pop_th25_ass99_ex4.npy", + threshold=0.25, + assigned_env=99, + excluded_env=4, + name=name, + ), + # Case 4: Cleaning 25%, excluding 3,4 + lambda name: CleanPopCaseData( + expected_clean_pop="c4_clean_pop_th25_ass99_ex3-4.npy", + threshold=0.25, + assigned_env=99, + excluded_env=[3, 4], + name=name, + ), + # Case 5: Cleaning 25%, excluding 3,7 + lambda name: CleanPopCaseData( + expected_clean_pop="c5_clean_pop_th25_ass99_ex3-7.npy", + threshold=0.25, + assigned_env=99, + excluded_env=[3, 7], + name=name, + ), + # Case 6: Cleaning 25%, excluding 3,4 + lambda name: CleanPopCaseData( + expected_clean_pop="c6_clean_pop_th25_ass1_exNone.npy", + threshold=0.25, + assigned_env=1, + excluded_env=None, + name=name, + ), + ), +) +def case_data(request: pytest.FixtureRequest) -> CleanPopCaseData: + return request.param( + f"{request.fixturename}{request.param_index}", # type: ignore [attr-defined] + ) diff --git a/tests/data_processing/cluster/test_cluster.py b/tests/data_processing/cluster/test_cluster.py new file mode 100644 index 00000000..14b420b7 --- /dev/null +++ b/tests/data_processing/cluster/test_cluster.py @@ -0,0 +1,61 @@ +"""Pytest for dynsight.lens.compute_lens.""" + +from pathlib import Path + +import numpy as np +import pytest + +from dynsight.data_processing import cleaning_cluster_population + +from .case_data import CleanPopCaseData + + +def test_clean_pop_noexcl(case_data: CleanPopCaseData) -> None: + original_dir = Path(__file__).resolve().parent + expected_clean_pop = ( + original_dir / "test_cluster" / case_data.expected_clean_pop + ) + + labels = np.zeros((4, 10, 3), dtype=int) + + labels[:, :, 0] = np.array( + [ + [0, 0, 0, 1, 1, 1, 2, 2, 2, 2], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 2], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 3], + [0, 0, 0, 1, 1, 1, 2, 2, 2, 3], + ] + ) + + labels[:, :, 1] = np.array( + [ + [0, 0, 0, 0, 0, 1, 1, 1, 1, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 1, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 4, 4], + [0, 0, 0, 0, 0, 1, 1, 1, 4, 4], + ] + ) + + labels[:, :, 2] = np.array( + [ + [0, 0, 9, 9, 0, 1, 1, 1, 1, 9], + [0, 0, 0, 9, 0, 1, 1, 1, 1, 9], + [0, 0, 0, 9, 0, 1, 1, 1, 1, 9], + [0, 9, 0, 0, 0, 1, 1, 1, 1, 9], + ] + ) + + test_clean_pop = cleaning_cluster_population( + labels, + threshold=case_data.threshold, + assigned_env=case_data.assigned_env, + excluded_env=case_data.excluded_env, + ) + + if not expected_clean_pop.exists(): + np.save(expected_clean_pop, test_clean_pop) + pytest.fail( + "Clean_pop test files were not present. They have been created." + ) + exp_clean_pop = np.load(expected_clean_pop) + assert np.array_equal(exp_clean_pop, test_clean_pop) diff --git a/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy new file mode 100644 index 00000000..f5f8a9a5 Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c0_clean_pop_th5_ass99_exNone.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy new file mode 100644 index 00000000..983b0079 Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c1_clean_pop_th15_ass99_exNone.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy b/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy new file mode 100644 index 00000000..e9d0699e Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c2_clean_pop_th25_ass99_exNone.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy b/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy new file mode 100644 index 00000000..e5f0e61d Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c3_clean_pop_th25_ass99_ex4.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy b/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy new file mode 100644 index 00000000..231db420 Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c4_clean_pop_th25_ass99_ex3-4.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy b/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy new file mode 100644 index 00000000..48a3285e Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c5_clean_pop_th25_ass99_ex3-7.npy differ diff --git a/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy b/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy new file mode 100644 index 00000000..d73149ea Binary files /dev/null and b/tests/data_processing/cluster/test_cluster/c6_clean_pop_th25_ass1_exNone.npy differ diff --git a/tests/logger/test_logger.py b/tests/logger/test_logger.py index 88f3549e..37645964 100644 --- a/tests/logger/test_logger.py +++ b/tests/logger/test_logger.py @@ -9,6 +9,7 @@ def test_zip_arch() -> None: """Ensure the zip archive is created and contains the expected files.""" + logger.configure(auto_recording=True) logger.clear_history() original_dir = Path(__file__).absolute().parent