diff --git a/.circleci/config.yml b/.circleci/config.yml
index e9f2313..a84e983 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,7 +9,7 @@ orbs:
   # Orb commands and jobs help you with common scripting around a language/tool
   # so you dont have to copy and paste it everywhere.
   # See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
-  python: circleci/python@1.5.0
+  python: circleci/python@2.1.1
 
 # Define a job to be invoked later in a workflow.
 # See: https://circleci.com/docs/2.0/configuration-reference/#jobs
@@ -33,6 +33,7 @@ jobs:
           name: build clustpy
           command: |
             python -m pip install --upgrade pip
+            pip install build Cython numpy
             pip install pytest
             pip install -e .
       - run:
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
deleted file mode 100644
index 43c4307..0000000
--- a/.github/workflows/lint.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a single version of Python
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Check Lint
-
-on:
-  push:
-    branches: [ "main" ]
-  pull_request:
-    branches: [ "main" ]
-
-permissions:
-  contents: read
-
-jobs:
-  lint:
-
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
-        with:
-          python-version: '3.12'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install flake8
-
-      - name: Lint with flake8
-        run: |
-          # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 064a51a..698a487 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -21,27 +21,28 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5
       with:
         python-version: '3.12'
 
-    - name: Display Python version
-      run: python -c "import sys; print(sys.version)"
-
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install build
+      
     - name: Build package
       run: python -m build --sdist
+
     - name: Publish package to Test PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
         user: __token__
         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
         repository_url: https://test.pypi.org/legacy/
+        
     - name: Publish package to PyPI
       if: startsWith(github.ref, 'refs/tags')
       uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test-main.yml b/.github/workflows/test-main.yml
index 38adfc8..f9b8ce0 100644
--- a/.github/workflows/test-main.yml
+++ b/.github/workflows/test-main.yml
@@ -13,25 +13,47 @@ permissions:
   contents: read
 
 jobs:
-  build:
+  lint:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip' # Speeds up flake8 installation
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install flake8
 
+      - name: Lint with flake8
+        run: |
+          # stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+  build:
+    needs: lint  # This job only starts if 'lint' passes
     runs-on: ubuntu-latest
     strategy:
-      # You can use PyPy versions in python-version.
-      # For example, pypy-2.7 and pypy-3.8
+      fail-fast: false # don't break 3.12 if 3.10 fails
       matrix:
         python-version: ["3.12", "3.10"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
+
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-
-      # You can test your matrix by printing the current Python version
-      - name: Display Python version
-        run: python -c "import sys; print(sys.version)"
+          cache: 'pip' # Automatically caches your dependencies
 
       - name: Install dependencies
         run: |
@@ -39,11 +61,19 @@ jobs:
           pip install pytest pytest-cov
           pip install -e .[full]
 
-      - name: Test with pytest
+      - name: Test with pytest (with codecov)
+        if: ${{ matrix.python-version == '3.10' }}
+        run: |
+          pytest -m "not largedata" --cov --cov-report=xml
+
+      - name: Test with pytest (without codecov)
+        if: ${{ matrix.python-version != '3.10' }}
         run: |
-          pytest -m "not largedata" --cov
+          pytest -m "not largedata"
 
       - name: Upload coverage reports to Codecov
-        uses: codecov/codecov-action@v4.0.1
+        if: ${{ matrix.python-version == '3.10' }}
+        uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
+          fail_ci_if_error: true # Helpful to know if upload failed
diff --git a/clustpy/data/_utils.py b/clustpy/data/_utils.py
index 7a08782..70c4466 100644
--- a/clustpy/data/_utils.py
+++ b/clustpy/data/_utils.py
@@ -7,14 +7,17 @@
 except:
     print(
         "[WARNING] Could not import nltk in clustpy.data.real_world_data to use the SnowballStemmer. Please install nltk by 'pip install nltk' if necessary")
+try:
+    from PIL import Image
+except:
+    print(
+        "[WARNING] Could not import PIL in clustpy.data.real_world_data. Please install PIL by 'pip install Pillow' if necessary")
 import numpy as np
-import urllib.request
 import os
 from pathlib import Path
-import ssl
-from PIL import Image
 from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
 from sklearn.feature_selection import VarianceThreshold
+from sklearn.datasets import fetch_file
 
 
 DEFAULT_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_datafiles")
@@ -63,11 +66,11 @@ def _download_file(file_url: str, filename_local: str) -> None:
     filename_local : str
         local name of the file after it has been downloaded
     """
+    local_path = Path(filename_local)
+    local_dir = local_path.parent
+    local_filename = local_path.name
     print("Downloading data set from {0} to {1}".format(file_url, filename_local))
-    default_ssl = ssl._create_default_https_context
-    ssl._create_default_https_context = ssl._create_unverified_context
-    urllib.request.urlretrieve(file_url, filename_local)
-    ssl._create_default_https_context = default_ssl
+    fetch_file(file_url, folder=local_dir, local_filename=local_filename)
 
 
 def _download_file_from_google_drive(file_id: str, filename_local: str, chunk_size: int = 32768) -> None:
@@ -187,7 +190,7 @@ def _load_image_data(image: str, image_size: tuple, color_image: bool) -> np.nda
     image_data : np.ndarray
         The numpy array containing the image data
     """
-    if type(image) is str:
+    if isinstance(image, str):
         pil_image = Image.open(image)
     else:
         pil_image = Image.fromarray(np.uint8(image))
@@ -196,7 +199,8 @@ def _load_image_data(image: str, image_size: tuple, color_image: bool) -> np.nda
     # Convert to coherent size
     if image_size is not None:
         pil_image = pil_image.resize(image_size)
-    image_data = np.asarray(pil_image)
+    image_data = np.array(pil_image).copy()
+    pil_image.close()
     assert image_size is None or image_data.shape == (
         image_size[0], image_size[1], 3), "Size of image is not correct. Should be {0} but is {1}".format(image_size,
                                                                                                           image_data.shape)
diff --git a/clustpy/data/real_torchvision_data.py b/clustpy/data/real_torchvision_data.py
index 03c1eec..a3c0fb0 100644
--- a/clustpy/data/real_torchvision_data.py
+++ b/clustpy/data/real_torchvision_data.py
@@ -1,7 +1,6 @@
 import torchvision
 import torch
 import numpy as np
-import ssl
 from clustpy.data._utils import _get_download_dir, _load_image_data, flatten_images
 from sklearn.datasets._base import Bunch
 
@@ -45,7 +44,7 @@ def _get_data_and_labels(dataset: torchvision.datasets.VisionDataset, image_size
             labels.append(label)
             image_data = _load_image_data(path, image_size, True)
             data_list.append(image_data)
-        # Convert data form list to numpy array
+        # Convert data from list to numpy array
         data = np.array(data_list)
         labels = np.array(labels)
     if type(data) is np.ndarray:
@@ -89,8 +88,6 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
     assert subset in ["all", "train",
                       "test"], "subset must match 'all', 'train' or 'test'. Your input {0}".format(subset)
     # Get data from source
-    default_ssl = ssl._create_default_https_context
-    ssl._create_default_https_context = ssl._create_unverified_context
     if subset == "all" or subset == "train":
         # Load training data
         if uses_train_param:
@@ -117,7 +114,6 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
     # Convert data to float and labels to int
     data = data.float()
     labels = labels.int()
-    ssl._create_default_https_context = default_ssl
     # Check data dimensions
     if data.dim() < 3 or data.dim() > 5:
         raise Exception(
@@ -137,7 +133,7 @@ def _load_torch_image_data(data_source: torchvision.datasets.VisionDataset, subs
         # Some dataset (e.g., SVHN) do not have the class information included
         if hasattr(dataset, "classes"):
             return Bunch(dataset_name=dataset.__class__.__name__, data=data_flatten, target=labels_numpy,
-                         images=data_image, image_format=image_format, classes=dataset.classes)
+                         images=data_image, image_format=image_format, classes=dataset.classes.copy())
         else:
             return Bunch(dataset_name=dataset.__class__.__name__, data=data_flatten, target=labels_numpy,
                          images=data_image, image_format=image_format)
diff --git a/clustpy/data/real_uci_data.py b/clustpy/data/real_uci_data.py
index f34c755..219e7e5 100644
--- a/clustpy/data/real_uci_data.py
+++ b/clustpy/data/real_uci_data.py
@@ -1,9 +1,4 @@
-try:
-    from PIL import Image
-except:
-    print(
-        "[WARNING] Could not import PIL in clustpy.data.real_world_data. Please install PIL by 'pip install Pillow' if necessary")
-from clustpy.data._utils import _download_file, _get_download_dir, _decompress_z_file, _load_data_file, flatten_images, _transform_text_data
+from clustpy.data._utils import _download_file, _get_download_dir, _decompress_z_file, _load_data_file, flatten_images, _transform_text_data, _load_image_data
 import os
 import numpy as np
 import zipfile
@@ -1167,8 +1162,7 @@ def load_cmu_faces(return_X_y: bool = False, downloads_path: str = None) -> Bunc
             if not image.endswith("_4.pgm"):
                 continue
             # get image data
-            image_data = Image.open(path_images + "/" + image)
-            image_array = np.array(image_data)
+            image_array = _load_image_data(path_images + "/" + image, None, False)
             # Get labels
             name_parts = image.split("_")
             user_id = np.argwhere(names == name_parts[0])[0][0]
@@ -1188,7 +1182,7 @@ def load_cmu_faces(return_X_y: bool = False, downloads_path: str = None) -> Bunc
         return data_flatten, labels
     else:
         return Bunch(dataset_name="CMUFace", data=data_flatten, target=labels, images=data_image, image_format="HW",
-                     classes=[names, positions, expressions, eyes])
+                     classes=(names, positions, expressions, eyes))
 
 
 def load_gene_expression_cancer_rna_seq(return_X_y: bool = False, downloads_path: str = None):
diff --git a/clustpy/data/real_video_data.py b/clustpy/data/real_video_data.py
index d58730b..8a3211b 100644
--- a/clustpy/data/real_video_data.py
+++ b/clustpy/data/real_video_data.py
@@ -2,7 +2,7 @@
     import cv2
 except:
     print("[WARNING] Could not import cv2 in clustpy.data.real_video_data. Please install cv2 by 'pip install opencv-python' if necessary")
-from clustpy.data._utils import _download_file, _get_download_dir, _load_image_data, flatten_images
+from clustpy.data._utils import _download_file, _get_download_dir, flatten_images
 import numpy as np
 import os
 import zipfile
@@ -33,19 +33,26 @@ def _load_video(path: str, image_size: tuple) -> np.ndarray:
     """
     # Load video
     vid = cv2.VideoCapture(path)
+    if not vid.isOpened():
+        vid.release()
+        raise IOError(f"OpenCV could not open {path}. This usually indicates missing codecs (ffmpeg/libav).")
     video_array = []
     # Iterate over frames
-    successful = True
-    while successful:
-        successful, frame_array = vid.read()
-        if successful:
+    try:
+        while True:
+            successful, frame_array = vid.read()
+            if not successful:
+                break
             is_color_image = frame_array.ndim == 3 and frame_array.shape[2] == 3
             if is_color_image:
                 frame_array = cv2.cvtColor(frame_array, cv2.COLOR_BGR2RGB)
             if image_size is not None:
-                frame_array = _load_image_data(frame_array, image_size, is_color_image)
-            video_array.append(frame_array)
-    vid.release()
+                frame_array = cv2.resize(frame_array, image_size, interpolation=cv2.INTER_AREA)
+            video_array.append(frame_array.copy())
+    finally:
+        vid.release()
+    if len(video_array) == 0:
+        raise ValueError(f"Video at {path} yielded 0 frames. File might be corrupted.")
     # Transform list to numpy array
     video_array = np.array(video_array, dtype="uint8")
     return video_array
@@ -90,7 +97,8 @@ def _downsample_frames(data: np.ndarray, labels: np.ndarray, frame_sampling_rati
 """
 
 
-def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float = 1, return_X_y: bool = False,
+def load_video_weizmann(use_actions : tuple = None, use_persons : tuple = None, 
+                        image_size: tuple = None, frame_sampling_ratio: float = 1, return_X_y: bool = False,
                         downloads_path: str = None) -> Bunch:
     """
     Load the Weizmann video data set.
@@ -102,6 +110,10 @@ def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float =
 
     Parameters
     ----------
+    use_actions : tuple
+        Specify the actions. Can be None if all actions should be used (default: None)
+    use_persons : tuple
+        Specify the persons. Can be None if all persons should be used (default: None)
     image_size : tuple
         The single frames can be downsized. This is necessary for large datasets.
         The tuple equals (width, height) of the images.
@@ -129,13 +141,17 @@ def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float =
     """
     directory = _get_download_dir(downloads_path) + "/Video_Weizmann/"
     all_actions = ["walk", "run", "jump", "side", "bend", "wave1", "wave2", "pjump", "jack", "skip"]
+    if use_actions is None:
+        use_actions = all_actions.copy()
+    assert all([action in all_actions for action in use_actions])
     all_persons = ["daria", "denis", "eli", "ido", "ira", "lena", "lyova", "moshe", "shahar"]
-    all_data = np.zeros(
-        (0, 144 if image_size is None else image_size[0], 180 if image_size is None else image_size[1], 3),
-        dtype="uint8")
-    labels = np.zeros((0, 2), dtype="int32")
+    if use_persons is None:
+        use_persons = all_persons.copy()
+    assert all([person in all_persons for person in use_persons])
+    all_data_list = []
+    labels_list = []
     # Download data
-    for action in all_actions:
+    for action in use_actions:
         my_zip_file = action + ".zip"
         filename = directory + my_zip_file
         if not os.path.isfile(filename):
@@ -151,7 +167,6 @@ def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float =
     for v_file in os.listdir(directory):
         # Ignore zip files
         if v_file.endswith(".avi"):
-            data_local = _load_video(directory + "/" + v_file, image_size)
             # Get name of person and type of activity
             relevant_parts = v_file.split(".")[0]
             person = relevant_parts.split("_")[0]
@@ -161,15 +176,23 @@ def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float =
                 action = action[:-1]
             assert person in all_persons, "Wrong person. {0} is unknown".format(person)
             assert action in all_actions, "Wrong action. {0} is unknown".format(action)
+            if person not in use_persons or action not in use_actions:
+                continue
+            # Load video
+            data_local = _load_video(directory + "/" + v_file, image_size)
             # Transform string to label
-            label_person = all_persons.index(person)
-            label_action = all_actions.index(action)
+            label_person = use_persons.index(person)
+            label_action = use_actions.index(action)
             labels_local = np.array([[label_action, label_person]] * data_local.shape[0], dtype="int32")
             # Downsample frames
             data_local, labels_local = _downsample_frames(data_local, labels_local, frame_sampling_ratio)
             # Update data and labels
-            all_data = np.append(all_data, data_local, axis=0)
-            labels = np.append(labels, labels_local, axis=0)
+            all_data_list.append(data_local)
+            labels_list.append(labels_local)
+    all_data = np.concatenate(all_data_list, axis=0)
+    labels = np.concatenate(labels_list, axis=0)
+    del all_data_list
+    del labels_list
     # Flatten data
     data_flatten = flatten_images(all_data, "HWC")
     # Return values
@@ -180,7 +203,7 @@ def load_video_weizmann(image_size: tuple = None, frame_sampling_ratio: float =
         data_image = np.transpose(all_data, [0, 3, 1, 2])
         image_format = "CHW"
         return Bunch(dataset_name="VideoWeizmann", data=data_flatten, target=labels, images=data_image,
-                     image_format=image_format)
+                     image_format=image_format, classes=(use_actions, use_persons))
 
 
 def load_video_keck_gesture(subset: str = "all", image_size: tuple = (200, 200), frame_sampling_ratio: float = 1,
@@ -283,10 +306,8 @@ def parse_frames_file(frames_file: str) -> (dict, dict):
         # Get Relevant frames
         _download_file("http://www.zhuolin.umiacs.io/PrototypeTree/sequences.txt", frames_file)
     # Load data and labels
-    all_data = np.zeros(
-        (0, 480 if image_size is None else image_size[0], 640 if image_size is None else image_size[1], 3),
-        dtype="uint8")
-    labels = np.zeros((0, 2), dtype="int32")
+    all_data_list = []
+    labels_list = []
     # Get frame limits from sequences file
     frames_train_dict, frames_test_dict = parse_frames_file(frames_file)
     # Get necessary directories
@@ -315,8 +336,12 @@ def parse_frames_file(frames_file: str) -> (dict, dict):
             # Downsample frames
             data_local, labels_local = _downsample_frames(data_local, labels_local, frame_sampling_ratio)
             # Update data and labels
-            all_data = np.append(all_data, data_local, axis=0)
-            labels = np.append(labels, labels_local, axis=0)
+            all_data_list.append(data_local)
+            labels_list.append(labels_local)
+    all_data = np.concatenate(all_data_list, axis=0)
+    labels = np.concatenate(labels_list, axis=0)
+    del all_data_list
+    del labels_list
     # Flatten data
     data_flatten = flatten_images(all_data, "HWC")
     # Return values
diff --git a/clustpy/data/real_world_data.py b/clustpy/data/real_world_data.py
index 347b0ec..f9637fd 100644
--- a/clustpy/data/real_world_data.py
+++ b/clustpy/data/real_world_data.py
@@ -704,4 +704,4 @@ def load_webkb(use_universities: tuple = ("cornell", "texas", "washington", "wis
     if return_X_y:
         return data, labels
     else:
-        return Bunch(dataset_name="WebKB", data=data, target=labels, classes=[use_categories, use_universities])
+        return Bunch(dataset_name="WebKB", data=data, target=labels, classes=(use_categories, use_universities))
diff --git a/clustpy/data/tests/test_real_clustpy_data.py b/clustpy/data/tests/test_real_clustpy_data.py
index a5f5114..192856f 100644
--- a/clustpy/data/tests/test_real_clustpy_data.py
+++ b/clustpy/data/tests/test_real_clustpy_data.py
@@ -1,23 +1,7 @@
 from clustpy.data.tests._helpers_for_tests import _helper_test_data_loader
 from clustpy.data import load_aloi_small, load_fruit, load_nrletters, load_stickfigures
-from pathlib import Path
-import os
-import shutil
 import pytest
 
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_clustpy")
-
-
-@pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
-    # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
-    # Test functions will be run at this point
-    yield
-    # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
-
 
 @pytest.mark.data
 def test_load_aloi_small():
diff --git a/clustpy/data/tests/test_real_medical_mnist_data.py b/clustpy/data/tests/test_real_medical_mnist_data.py
index f77f0fb..c08139d 100644
--- a/clustpy/data/tests/test_real_medical_mnist_data.py
+++ b/clustpy/data/tests/test_real_medical_mnist_data.py
@@ -3,452 +3,447 @@
     load_retina_mnist, load_breast_mnist, load_blood_mnist, load_tissue_mnist, load_organ_a_mnist, load_organ_c_mnist, \
     load_organ_s_mnist, load_organ_mnist_3d, load_nodule_mnist_3d, load_adrenal_mnist_3d, load_fracture_mnist_3d, \
     load_vessel_mnist_3d, load_synapse_mnist_3d
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_medical_mnist")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 @pytest.mark.data
-def test_load_path_mnist():
+def test_load_path_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_path_mnist, 107180, 2352, 9, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_path_mnist, 107180, 2352, 9, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (107180, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Train data set
-    dataset = _helper_test_data_loader(load_path_mnist, 89996, 2352, 9, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_path_mnist, 89996, 2352, 9, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (89996, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_path_mnist, 10004, 2352, 9, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_path_mnist, 10004, 2352, 9, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10004, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Test data set
-    dataset = _helper_test_data_loader(load_path_mnist, 7180, 2352, 9, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_path_mnist, 7180, 2352, 9, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (7180, 3, 28, 28)
     assert dataset.image_format == "CHW"
 
 
 @pytest.mark.data
-def test_load_chest_mnist():
+def test_load_chest_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_chest_mnist, 112120, 784, [2] * 14, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_chest_mnist, 112120, 784, [2] * 14, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (112120, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_chest_mnist, 78468, 784, [2] * 14, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_chest_mnist, 78468, 784, [2] * 14, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (78468, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_chest_mnist, 11219, 784, [2] * 14, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_chest_mnist, 11219, 784, [2] * 14, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (11219, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_chest_mnist, 22433, 784, [2] * 14, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_chest_mnist, 22433, 784, [2] * 14, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (22433, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_derma_mnist():
+def test_load_derma_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_derma_mnist, 10015, 2352, 7, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_derma_mnist, 10015, 2352, 7, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10015, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Train data set
-    dataset = _helper_test_data_loader(load_derma_mnist, 7007, 2352, 7, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_derma_mnist, 7007, 2352, 7, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (7007, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_derma_mnist, 1003, 2352, 7, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_derma_mnist, 1003, 2352, 7, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1003, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Test data set
-    dataset = _helper_test_data_loader(load_derma_mnist, 2005, 2352, 7, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_derma_mnist, 2005, 2352, 7, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (2005, 3, 28, 28)
     assert dataset.image_format == "CHW"
 
 
 @pytest.mark.data
-def test_load_oct_mnist():
+def test_load_oct_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_oct_mnist, 109309, 784, 4, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_oct_mnist, 109309, 784, 4, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (109309, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_oct_mnist, 97477, 784, 4, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_oct_mnist, 97477, 784, 4, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (97477, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_oct_mnist, 10832, 784, 4, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_oct_mnist, 10832, 784, 4, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10832, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_oct_mnist, 1000, 784, 4, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_oct_mnist, 1000, 784, 4, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1000, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_pneumonia_mnist():
+def test_load_pneumonia_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_pneumonia_mnist, 5856, 784, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_pneumonia_mnist, 5856, 784, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (5856, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_pneumonia_mnist, 4708, 784, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_pneumonia_mnist, 4708, 784, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (4708, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_pneumonia_mnist, 524, 784, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_pneumonia_mnist, 524, 784, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (524, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_pneumonia_mnist, 624, 784, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_pneumonia_mnist, 624, 784, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (624, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_retina_mnist():
+def test_load_retina_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_retina_mnist, 1600, 2352, 5, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_retina_mnist, 1600, 2352, 5, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1600, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Train data set
-    dataset = _helper_test_data_loader(load_retina_mnist, 1080, 2352, 5, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_retina_mnist, 1080, 2352, 5, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1080, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_retina_mnist, 120, 2352, 5, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_retina_mnist, 120, 2352, 5, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (120, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Test data set
-    dataset = _helper_test_data_loader(load_retina_mnist, 400, 2352, 5, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_retina_mnist, 400, 2352, 5, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (400, 3, 28, 28)
     assert dataset.image_format == "CHW"
 
 
 @pytest.mark.data
-def test_load_breast_mnist():
+def test_load_breast_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_breast_mnist, 780, 784, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_breast_mnist, 780, 784, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (780, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_breast_mnist, 546, 784, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_breast_mnist, 546, 784, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (546, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_breast_mnist, 78, 784, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_breast_mnist, 78, 784, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (78, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_breast_mnist, 156, 784, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_breast_mnist, 156, 784, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (156, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_blood_mnist():
+def test_load_blood_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_blood_mnist, 17092, 2352, 8, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_blood_mnist, 17092, 2352, 8, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (17092, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Train data set
-    dataset = _helper_test_data_loader(load_blood_mnist, 11959, 2352, 8, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_blood_mnist, 11959, 2352, 8, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (11959, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_blood_mnist, 1712, 2352, 8, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_blood_mnist, 1712, 2352, 8, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1712, 3, 28, 28)
     assert dataset.image_format == "CHW"
     # Test data set
-    dataset = _helper_test_data_loader(load_blood_mnist, 3421, 2352, 8, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_blood_mnist, 3421, 2352, 8, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (3421, 3, 28, 28)
     assert dataset.image_format == "CHW"
 
 
 @pytest.mark.data
-def test_load_tissue_mnist():
+def test_load_tissue_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_tissue_mnist, 236386, 784, 8, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_tissue_mnist, 236386, 784, 8, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (236386, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_tissue_mnist, 165466, 784, 8, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_tissue_mnist, 165466, 784, 8, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (165466, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_tissue_mnist, 23640, 784, 8, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_tissue_mnist, 23640, 784, 8, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (23640, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_tissue_mnist, 47280, 784, 8, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_tissue_mnist, 47280, 784, 8, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (47280, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_organ_a_mnist():
+def test_load_organ_a_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_organ_a_mnist, 58850, 784, 11, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_a_mnist, 58850, 784, 11, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (58850, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_organ_a_mnist, 34581, 784, 11, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_a_mnist, 34581, 784, 11, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (34581, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_organ_a_mnist, 6491, 784, 11, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_a_mnist, 6491, 784, 11, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (6491, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_organ_a_mnist, 17778, 784, 11, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_a_mnist, 17778, 784, 11, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (17778, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_organ_c_mnist():
+def test_load_organ_c_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_organ_c_mnist, 23660, 784, 11, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_c_mnist, 23660, 784, 11, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (23660, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_organ_c_mnist, 13000, 784, 11, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_c_mnist, 13000, 784, 11, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (13000, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_organ_c_mnist, 2392, 784, 11, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_c_mnist, 2392, 784, 11, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (2392, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_organ_c_mnist, 8268, 784, 11, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_c_mnist, 8268, 784, 11, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (8268, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_organ_s_mnist():
+def test_load_organ_s_mnist(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_organ_s_mnist, 25221, 784, 11, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_s_mnist, 25221, 784, 11, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (25221, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
-    dataset = _helper_test_data_loader(load_organ_s_mnist, 13940, 784, 11, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_s_mnist, 13940, 784, 11, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (13940, 28, 28)
     assert dataset.image_format == "HW"
     # Validation data set
-    dataset = _helper_test_data_loader(load_organ_s_mnist, 2452, 784, 11, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_s_mnist, 2452, 784, 11, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (2452, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
-    dataset = _helper_test_data_loader(load_organ_s_mnist, 8829, 784, 11, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_s_mnist, 8829, 784, 11, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (8829, 28, 28)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_organ_mnist_3d():
+def test_load_organ_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_organ_mnist_3d, 1743, 21952, 11, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_mnist_3d, 1743, 21952, 11, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1743, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_organ_mnist_3d, 972, 21952, 11, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_mnist_3d, 972, 21952, 11, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (972, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_organ_mnist_3d, 161, 21952, 11, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_mnist_3d, 161, 21952, 11, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (161, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_organ_mnist_3d, 610, 21952, 11, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_organ_mnist_3d, 610, 21952, 11, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (610, 28, 28, 28)
     assert dataset.image_format == "HWD"
 
 
 @pytest.mark.data
-def test_load_nodule_mnist_3d():
+def test_load_nodule_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 1633, 21952, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 1633, 21952, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1633, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 1158, 21952, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 1158, 21952, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1158, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 165, 21952, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 165, 21952, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (165, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 310, 21952, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_nodule_mnist_3d, 310, 21952, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (310, 28, 28, 28)
     assert dataset.image_format == "HWD"
 
 
 @pytest.mark.data
-def test_load_adrenal_mnist_3d():
+def test_load_adrenal_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 1584, 21952, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 1584, 21952, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1584, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 1188, 21952, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 1188, 21952, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1188, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 98, 21952, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 98, 21952, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (98, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 298, 21952, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_adrenal_mnist_3d, 298, 21952, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (298, 28, 28, 28)
     assert dataset.image_format == "HWD"
 
 
 @pytest.mark.data
-def test_load_fracture_mnist_3d():
+def test_load_fracture_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 1370, 21952, 3, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 1370, 21952, 3, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1370, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 1027, 21952, 3, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 1027, 21952, 3, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1027, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 103, 21952, 3, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 103, 21952, 3, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (103, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 240, 21952, 3, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_fracture_mnist_3d, 240, 21952, 3, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (240, 28, 28, 28)
     assert dataset.image_format == "HWD"
 
 
 @pytest.mark.data
-def test_load_vessel_mnist_3d():
+def test_load_vessel_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 1909, 21952, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 1909, 21952, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1909, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 1335, 21952, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 1335, 21952, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1335, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 192, 21952, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 192, 21952, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (192, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 382, 21952, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_vessel_mnist_3d, 382, 21952, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (382, 28, 28, 28)
     assert dataset.image_format == "HWD"
 
 
 @pytest.mark.data
-def test_load_synapse_mnist_3d():
+def test_load_synapse_mnist_3d(my_tmp_dir):
     # Full data set
-    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 1759, 21952, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 1759, 21952, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1759, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Train data set
-    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 1230, 21952, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 1230, 21952, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (1230, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Validation data set
-    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 177, 21952, 2, dataloader_params={"subset": "val", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 177, 21952, 2, dataloader_params={"subset": "val", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (177, 28, 28, 28)
     assert dataset.image_format == "HWD"
     # Test data set
-    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 352, 21952, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    dataset = _helper_test_data_loader(load_synapse_mnist_3d, 352, 21952, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape ==  (352, 28, 28, 28)
     assert dataset.image_format == "HWD"
diff --git a/clustpy/data/tests/test_real_timeseries_data.py b/clustpy/data/tests/test_real_timeseries_data.py
index ae77b42..549c362 100644
--- a/clustpy/data/tests/test_real_timeseries_data.py
+++ b/clustpy/data/tests/test_real_timeseries_data.py
@@ -1,119 +1,114 @@
 from clustpy.data.tests._helpers_for_tests import _helper_test_data_loader
 from clustpy.data import load_motestrain, load_proximal_phalanx_outline, load_diatom_size_reduction, load_symbols, \
     load_olive_oil, load_plane, load_sony_aibo_robot_surface, load_two_patterns, load_lsst
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_timeseries")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_motestrain():
+def test_load_motestrain(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_motestrain, 1272, 84, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_motestrain, 1272, 84, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_motestrain, 20, 84, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_motestrain, 20, 84, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_motestrain, 1252, 84, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_motestrain, 1252, 84, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_proximal_phalanx_outline():
+def test_load_proximal_phalanx_outline(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_proximal_phalanx_outline, 876, 80, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_proximal_phalanx_outline, 876, 80, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_proximal_phalanx_outline, 600, 80, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_proximal_phalanx_outline, 600, 80, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_proximal_phalanx_outline, 276, 80, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_proximal_phalanx_outline, 276, 80, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_diatom_size_reduction():
+def test_load_diatom_size_reduction(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_diatom_size_reduction, 322, 345, 4, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_diatom_size_reduction, 322, 345, 4, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_diatom_size_reduction, 16, 345, 4, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_diatom_size_reduction, 16, 345, 4, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_diatom_size_reduction, 306, 345, 4, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_diatom_size_reduction, 306, 345, 4, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_symbols():
+def test_load_symbols(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_symbols, 1020, 398, 6, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_symbols, 1020, 398, 6, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_symbols, 25, 398, 6, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_symbols, 25, 398, 6, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_symbols, 995, 398, 6, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_symbols, 995, 398, 6, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_olive_oil():
+def test_load_olive_oil(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_olive_oil, 60, 570, 4, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_olive_oil, 60, 570, 4, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_olive_oil, 30, 570, 4, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_olive_oil, 30, 570, 4, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_olive_oil, 30, 570, 4, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_olive_oil, 30, 570, 4, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_plane():
+def test_load_plane(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_plane, 210, 144, 7, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_plane, 210, 144, 7, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_plane, 105, 144, 7, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_plane, 105, 144, 7, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_plane, 105, 144, 7, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_plane, 105, 144, 7, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_sony_aibo_robot_surface():
+def test_load_sony_aibo_robot_surface(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_sony_aibo_robot_surface, 621, 70, 2, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_sony_aibo_robot_surface, 621, 70, 2, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_sony_aibo_robot_surface, 20, 70, 2, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_sony_aibo_robot_surface, 20, 70, 2, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_sony_aibo_robot_surface, 601, 70, 2, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_sony_aibo_robot_surface, 601, 70, 2, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_two_patterns():
+def test_load_two_patterns(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_two_patterns, 5000, 128, 4, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_two_patterns, 5000, 128, 4, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_two_patterns, 1000, 128, 4, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_two_patterns, 1000, 128, 4, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_two_patterns, 4000, 128, 4, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_two_patterns, 4000, 128, 4, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
 
 
 @pytest.mark.data
 @pytest.mark.timeseriesdata
-def test_load_lsst():
+def test_load_lsst(my_tmp_dir):
     # Full data set
-    _helper_test_data_loader(load_lsst, 4925, 216, 14, dataloader_params={"subset": "all", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_lsst, 4925, 216, 14, dataloader_params={"subset": "all", "downloads_path":my_tmp_dir})
     # Train data set
-    _helper_test_data_loader(load_lsst, 2459, 216, 14, dataloader_params={"subset": "train", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_lsst, 2459, 216, 14, dataloader_params={"subset": "train", "downloads_path":my_tmp_dir})
     # Test data set
-    _helper_test_data_loader(load_lsst, 2466, 216, 14, dataloader_params={"subset": "test", "downloads_path":TEST_DOWNLOAD_PATH})
+    _helper_test_data_loader(load_lsst, 2466, 216, 14, dataloader_params={"subset": "test", "downloads_path":my_tmp_dir})
diff --git a/clustpy/data/tests/test_real_torchvision_data.py b/clustpy/data/tests/test_real_torchvision_data.py
index b987fa7..b854d20 100644
--- a/clustpy/data/tests/test_real_torchvision_data.py
+++ b/clustpy/data/tests/test_real_torchvision_data.py
@@ -2,23 +2,18 @@
 from clustpy.data import load_usps, load_mnist, load_fmnist, load_kmnist, load_cifar10, load_svhn, load_stl10, \
     load_gtsrb, load_cifar100
 import torchvision.datasets
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_torchvision")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 # Check if loading methods still exist (could be renamed/moved)
@@ -37,22 +32,22 @@ def test_torchvision_data_methods():
 
 # Do not skip USPS as it is the smallest dataset and can check the torchvision data loading mechanism
 @pytest.mark.data
-def test_load_usps():
+def test_load_usps(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_usps, 9298, 256, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (9298, 16, 16)
     assert dataset.image_format == "HW"
     # Train data set
     dataset = _helper_test_data_loader(load_usps, 7291, 256, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (7291, 16, 16)
     assert dataset.image_format == "HW"
     # Test data set
     dataset = _helper_test_data_loader(load_usps, 2007, 256, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (2007, 16, 16)
     assert dataset.image_format == "HW"
@@ -60,22 +55,22 @@ def test_load_usps():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_mnist():
+def test_load_mnist(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_mnist, 70000, 784, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (70000, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
     dataset = _helper_test_data_loader(load_mnist, 60000, 784, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (60000, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
     dataset = _helper_test_data_loader(load_mnist, 10000, 784, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10000, 28, 28)
     assert dataset.image_format == "HW"
@@ -83,22 +78,22 @@ def test_load_mnist():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_kmnist():
+def test_load_kmnist(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_kmnist, 70000, 784, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (70000, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
     dataset = _helper_test_data_loader(load_kmnist, 60000, 784, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (60000, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
     dataset = _helper_test_data_loader(load_kmnist, 10000, 784, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10000, 28, 28)
     assert dataset.image_format == "HW"
@@ -106,22 +101,22 @@ def test_load_kmnist():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_fmnist():
+def test_load_fmnist(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_fmnist, 70000, 784, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (70000, 28, 28)
     assert dataset.image_format == "HW"
     # Train data set
     dataset = _helper_test_data_loader(load_fmnist, 60000, 784, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (60000, 28, 28)
     assert dataset.image_format == "HW"
     # Test data set
     dataset = _helper_test_data_loader(load_fmnist, 10000, 784, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10000, 28, 28)
     assert dataset.image_format == "HW"
@@ -129,22 +124,22 @@ def test_load_fmnist():
 
 # Do not skip cifar10 as it is the smallest 3-channel dataset and can check channel normalization
 @pytest.mark.data
-def test_load_cifar10():
+def test_load_cifar10(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_cifar10, 60000, 3072, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (60000, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_cifar10, 50000, 3072, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (50000, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Test data set
     dataset = _helper_test_data_loader(load_cifar10, 10000, 3072, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (10000, 3, 32, 32)
     assert dataset.image_format == "CHW"
@@ -152,22 +147,22 @@ def test_load_cifar10():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_cifar100():
+def test_load_cifar100(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_cifar100, 60000, 3072, 100,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (60000, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_cifar100, 50000, 3072, 100,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (50000, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Test data set
     dataset = _helper_test_data_loader(load_cifar100, 10000, 3072, 20,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir,
                                                           "use_superclasses": True})
     # Non-flatten
     assert dataset.images.shape == (10000, 3, 32, 32)
@@ -176,22 +171,22 @@ def test_load_cifar100():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_svhn():
+def test_load_svhn(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_svhn, 99289, 3072, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (99289, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_svhn, 73257, 3072, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (73257, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Test data set
     dataset = _helper_test_data_loader(load_svhn, 26032, 3072, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (26032, 3, 32, 32)
     assert dataset.image_format == "CHW"
@@ -199,22 +194,22 @@ def test_load_svhn():
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_stl10():
+def test_load_stl10(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_stl10, 13000, 27648, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (13000, 3, 96, 96)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_stl10, 5000, 27648, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (5000, 3, 96, 96)
     assert dataset.image_format == "CHW"
     # Test data set
     dataset = _helper_test_data_loader(load_stl10, 8000, 27648, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (8000, 3, 96, 96)
     assert dataset.image_format == "CHW"
@@ -222,22 +217,22 @@ def test_load_stl10():
 
 @pytest.mark.data
 # Do not skip GTSRB as the loading mechanism is different to the other torchvision dataloaders
-def test_load_gtsrb():
+def test_load_gtsrb(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_gtsrb, 39270, 3072, 43,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (39270, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_gtsrb, 26640, 3072, 43,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (26640, 3, 32, 32)
     assert dataset.image_format == "CHW"
     # Test data set (with image size 30x30)
     dataset = _helper_test_data_loader(load_gtsrb, 12630, 2700, 43,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir,
                                                           "image_size": (30, 30)})
     # Non-flatten
     assert dataset.images.shape == (12630, 3, 30, 30)
diff --git a/clustpy/data/tests/test_real_uci_data.py b/clustpy/data/tests/test_real_uci_data.py
index 103ee1a..f6318da 100644
--- a/clustpy/data/tests/test_real_uci_data.py
+++ b/clustpy/data/tests/test_real_uci_data.py
@@ -4,258 +4,253 @@
     load_user_knowledge, load_breast_tissue, load_forest_types, load_dermatology, load_multiple_features, \
     load_statlog_australian_credit_approval, load_breast_cancer_wisconsin_original, load_optdigits, load_semeion, \
     load_cmu_faces, load_gene_expression_cancer_rna_seq, load_sport_articles, load_wholesale_customers, load_reuters21578
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_uci")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 @pytest.mark.data
-def test_load_banknotes():
-    _helper_test_data_loader(load_banknotes, 1372, 4, 2, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_banknotes(my_tmp_dir):
+    _helper_test_data_loader(load_banknotes, 1372, 4, 2, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_spambase():
-    _helper_test_data_loader(load_spambase, 4601, 57, 2, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_spambase(my_tmp_dir):
+    _helper_test_data_loader(load_spambase, 4601, 57, 2, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_seeds():
-    _helper_test_data_loader(load_seeds, 210, 7, 3, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_seeds(my_tmp_dir):
+    _helper_test_data_loader(load_seeds, 210, 7, 3, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_skin():
-    _helper_test_data_loader(load_skin, 245057, 3, 2, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_skin(my_tmp_dir):
+    _helper_test_data_loader(load_skin, 245057, 3, 2, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_soybean_small():
-    _helper_test_data_loader(load_soybean_small, 47, 35, 4, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_soybean_small(my_tmp_dir):
+    _helper_test_data_loader(load_soybean_small, 47, 35, 4, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_soybean_large():
+def test_load_soybean_large(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_soybean_large, 562, 35, 15,
-                             dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Train data set
     _helper_test_data_loader(load_soybean_large, 266, 35, 15,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Test data set
     _helper_test_data_loader(load_soybean_large, 296, 35, 15,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_pendigits():
+def test_load_pendigits(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_pendigits, 10992, 16, 10,
-                             dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Train data set
     _helper_test_data_loader(load_pendigits, 7494, 16, 10,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Test data set
     _helper_test_data_loader(load_pendigits, 3498, 16, 10,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_ecoli():
-    _helper_test_data_loader(load_ecoli, 336, 7, 8, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_ecoli(my_tmp_dir):
+    _helper_test_data_loader(load_ecoli, 336, 7, 8, dataloader_params={"downloads_path": my_tmp_dir})
     # Check if ignoring small clusters works
     _helper_test_data_loader(load_ecoli, 327, 7, 5,
-                             dataloader_params={"ignore_small_clusters": True, "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"ignore_small_clusters": True, "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_hrtu2():
-    _helper_test_data_loader(load_htru2, 17898, 8, 2, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_hrtu2(my_tmp_dir):
+    _helper_test_data_loader(load_htru2, 17898, 8, 2, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_letterrecognition():
+def test_load_letterrecognition(my_tmp_dir):
     _helper_test_data_loader(load_letterrecognition, 20000, 16, 26,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_har():
+def test_load_har(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_har, 10299, 561, 6,
-                             dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Train data set
     _helper_test_data_loader(load_har, 7352, 561, 6,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Test data set
     _helper_test_data_loader(load_har, 2947, 561, 6,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_statlog_shuttle():
+def test_load_statlog_shuttle(my_tmp_dir):
     # 7z probably not installed! -> data and labels can be None
-    dataset = load_statlog_shuttle(downloads_path=TEST_DOWNLOAD_PATH)
+    dataset = load_statlog_shuttle(downloads_path=my_tmp_dir)
     if dataset is not None:
         # Full data set
         _helper_test_data_loader(load_statlog_shuttle, 58000, 9, 7,
-                                 dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                 dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
         # Train data set
         _helper_test_data_loader(load_statlog_shuttle, 43500, 9, 7,
-                                 dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                 dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
         # Test data set
         _helper_test_data_loader(load_statlog_shuttle, 14500, 9, 7,
-                                 dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                 dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_mice_protein():
-    _helper_test_data_loader(load_mice_protein, 1077, 68, 8, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_mice_protein(my_tmp_dir):
+    _helper_test_data_loader(load_mice_protein, 1077, 68, 8, dataloader_params={"downloads_path": my_tmp_dir})
     # Check if additional labels work
     _helper_test_data_loader(load_mice_protein, 1077, 68, [8, 72, 2, 2, 2],
-                             dataloader_params={"return_additional_labels": True, "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"return_additional_labels": True, "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_user_knowledge():
+def test_load_user_knowledge(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_user_knowledge, 403, 5, 4,
-                             dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Train data set
     _helper_test_data_loader(load_user_knowledge, 258, 5, 4,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Test data set
     _helper_test_data_loader(load_user_knowledge, 145, 5, 4,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_breast_tissue():
-    _helper_test_data_loader(load_breast_tissue, 106, 9, 6, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_breast_tissue(my_tmp_dir):
+    _helper_test_data_loader(load_breast_tissue, 106, 9, 6, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_forest_types():
+def test_load_forest_types(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_forest_types, 523, 27, 4,
-                             dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Train data set
     _helper_test_data_loader(load_forest_types, 198, 27, 4,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Test data set
     _helper_test_data_loader(load_forest_types, 325, 27, 4,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_dermatology():
-    _helper_test_data_loader(load_dermatology, 358, 34, 6, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_dermatology(my_tmp_dir):
+    _helper_test_data_loader(load_dermatology, 358, 34, 6, dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_multiple_features():
+def test_load_multiple_features(my_tmp_dir):
     _helper_test_data_loader(load_multiple_features, 2000, 649, 10,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_statlog_australian_credit_approval():
+def test_load_statlog_australian_credit_approval(my_tmp_dir):
     _helper_test_data_loader(load_statlog_australian_credit_approval, 690, 14, 2,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_breast_cancer_wisconsin_original():
+def test_load_breast_cancer_wisconsin_original(my_tmp_dir):
     _helper_test_data_loader(load_breast_cancer_wisconsin_original, 683, 9, 2,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_optdigits():
+def test_load_optdigits(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_optdigits, 5620, 64, 10,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (5620, 8, 8)
     assert dataset.image_format == "HW"
     # Train data set
     dataset = _helper_test_data_loader(load_optdigits, 3823, 64, 10,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (3823, 8, 8)
     assert dataset.image_format == "HW"
     # Test data set
     dataset = _helper_test_data_loader(load_optdigits, 1797, 64, 10,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1797, 8, 8)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_semeion():
+def test_load_semeion(my_tmp_dir):
     dataset = _helper_test_data_loader(load_semeion, 1593, 256, 10,
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1593, 16, 16)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_cmu_faces():
+def test_load_cmu_faces(my_tmp_dir):
     dataset = _helper_test_data_loader(load_cmu_faces, 624, 960, [20, 4, 4, 2],
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (624, 30, 32)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_gene_expression_cancer_rna_seq():
+def test_load_gene_expression_cancer_rna_seq(my_tmp_dir):
     _helper_test_data_loader(load_gene_expression_cancer_rna_seq, 801, 20531, 5,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_sport_articles():
+def test_load_sport_articles(my_tmp_dir):
     _helper_test_data_loader(load_sport_articles, 1000, 55, 2,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_wholesale_customers():
+def test_load_wholesale_customers(my_tmp_dir):
     _helper_test_data_loader(load_wholesale_customers, 440, 6, [2, 3],
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
 
 
 @pytest.mark.data
-def test_load_reuters21578():
+def test_load_reuters21578(my_tmp_dir):
     # Full data set
     _helper_test_data_loader(load_reuters21578, 8367, 2000, 5,
-                             dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"downloads_path": my_tmp_dir})
     # Lewis train data
     _helper_test_data_loader(load_reuters21578, 5791, 2000, 5,
-                             dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train", "downloads_path": my_tmp_dir})
     # Lewis test data
     _helper_test_data_loader(load_reuters21578, 2300, 2000, 5,
-                             dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test", "downloads_path": my_tmp_dir})
     # cgi train data
     _helper_test_data_loader(load_reuters21578, 8091, 2000, 5,
-                             dataloader_params={"subset": "train-cgi", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "train-cgi", "downloads_path": my_tmp_dir})
     # cgi test data
     _helper_test_data_loader(load_reuters21578, 276, 2000, 5,
-                             dataloader_params={"subset": "test-cgi", "downloads_path": TEST_DOWNLOAD_PATH})
+                             dataloader_params={"subset": "test-cgi", "downloads_path": my_tmp_dir})
diff --git a/clustpy/data/tests/test_real_video_data.py b/clustpy/data/tests/test_real_video_data.py
index b5343d5..6df4464 100644
--- a/clustpy/data/tests/test_real_video_data.py
+++ b/clustpy/data/tests/test_real_video_data.py
@@ -2,23 +2,18 @@
 from clustpy.data.tests._helpers_for_tests import _helper_test_data_loader
 from clustpy.data import load_video_weizmann, load_video_keck_gesture
 from clustpy.data.real_video_data import _downsample_frames
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_video")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 def test_downsample_frames():
@@ -41,37 +36,41 @@ def test_downsample_frames():
 
 
 @pytest.mark.data
-def test_load_video_weizmann():
-    dataset = _helper_test_data_loader(load_video_weizmann, None, 77760, [10, 9],
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})  # N not always 5687
+def test_load_video_weizmann(my_tmp_dir):
+    dataset = _helper_test_data_loader(load_video_weizmann, None, 77760, [2, 2],
+                                       dataloader_params={"use_actions": ["walk", "run"], "use_persons": ["daria", "denis"],
+                                                          "downloads_path": my_tmp_dir})  # N not always the same (5687)
     # Non-flatten
     assert dataset.images.shape[1:] == (3, 144, 180)
     assert dataset.image_format == "CHW"
+    data_full_size = dataset.data.shape[0]
     # Change image size and downsample
     dataset = _helper_test_data_loader(load_video_weizmann, None, 30000, [10, 9],
                                        dataloader_params={"image_size": (100, 100), "frame_sampling_ratio": 0.5,
-                                                          "downloads_path": TEST_DOWNLOAD_PATH})  # N not always 5687
+                                                          "downloads_path": my_tmp_dir})  # N not always the same (5687)
     # Non-flatten
     assert dataset.images.shape[1:] == (3, 100, 100)
     assert dataset.image_format == "CHW"
     # Check downsampling
-    data = dataset.data
-    assert data.shape[0] / 5687 < 0.55 and data.shape[0] / 5687 > 0.49
+    data_subsampled = dataset.data
+    label_subsampled = dataset.target
+    data_subset_size = data_subsampled[(label_subsampled[:, 0] < 2) & (label_subsampled[:, 1] < 2)].shape[0]
+    assert data_subset_size / data_full_size < 0.55 and data_subset_size / data_full_size > 0.49
 
 
 @pytest.mark.largedata
 @pytest.mark.data
-def test_load_video_keck_gesture():
+def test_load_video_keck_gesture(my_tmp_dir):
     dataset = _helper_test_data_loader(load_video_keck_gesture, None, 120000, [15, 4],
                                        dataloader_params={"subset": "all",
-                                                          "downloads_path": TEST_DOWNLOAD_PATH})  # N not always 25457
+                                                          "downloads_path": my_tmp_dir})  # N not always the same (25457)
     # Non-flatten
     assert dataset.images.shape[1:] == (3, 200, 200)
     assert dataset.image_format == "CHW"
     # Test data
     dataset = _helper_test_data_loader(load_video_keck_gesture, None, 120000, [15, 3],
                                        dataloader_params={"subset": "train",
-                                                          "downloads_path": TEST_DOWNLOAD_PATH})  # N not always 11911
+                                                          "downloads_path": my_tmp_dir})  # N not always the same (11911)
     # Non-flatten
     assert dataset.images.shape[1:] == (3, 200, 200)
     assert dataset.image_format == "CHW"
@@ -79,7 +78,7 @@ def test_load_video_keck_gesture():
     dataset = _helper_test_data_loader(load_video_keck_gesture, None, 30000, [15, 4],
                                        dataloader_params={"image_size": (100, 100), "frame_sampling_ratio": 0.5,
                                                           "subset": "test",
-                                                          "downloads_path": TEST_DOWNLOAD_PATH})  # N not always 13546
+                                                          "downloads_path": my_tmp_dir})  # N not always the same (13546)
     # Non-flatten
     assert dataset.images.shape[1:] == (3, 100, 100)
     assert dataset.image_format == "CHW"
diff --git a/clustpy/data/tests/test_real_world_data.py b/clustpy/data/tests/test_real_world_data.py
index 1b9c8ec..c1dba3c 100644
--- a/clustpy/data/tests/test_real_world_data.py
+++ b/clustpy/data/tests/test_real_world_data.py
@@ -1,23 +1,18 @@
 from clustpy.data.tests._helpers_for_tests import _helper_test_data_loader
 from clustpy.data import load_iris, load_wine, load_breast_cancer, load_olivetti_faces, load_newsgroups, load_rcv1, \
     load_imagenet_dog, load_imagenet10, load_coil20, load_coil100, load_webkb
-from pathlib import Path
-import os
-import shutil
 import pytest
-
-TEST_DOWNLOAD_PATH = str(Path.home() / "Downloads/clustpy_testfiles_realworld")
+import shutil
 
 
 @pytest.fixture(autouse=True, scope='function')
-def run_around_tests():
+def my_tmp_dir(tmp_path):
     # Code that will run before the tests
-    if not os.path.isdir(TEST_DOWNLOAD_PATH):
-        os.makedirs(TEST_DOWNLOAD_PATH)
+    tmp_dir = str(tmp_path)
     # Test functions will be run at this point
-    yield
+    yield tmp_dir
     # Code that will run after the tests
-    shutil.rmtree(TEST_DOWNLOAD_PATH)
+    shutil.rmtree(tmp_dir)
 
 
 @pytest.mark.data
@@ -66,31 +61,31 @@ def test_load_rcv1():
 
 @pytest.mark.data
 @pytest.mark.largedata
-def test_load_imagenet_dog():
+def test_load_imagenet_dog(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_imagenet_dog, 20580, 150528, 120,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir,
                                                           "breeds": None})
     # Non-flatten
     assert dataset.images.shape == (20580, 3, 224, 224)
     assert dataset.image_format == "CHW"
     # Train data set
     dataset = _helper_test_data_loader(load_imagenet_dog, 12000, 150528, 120,
-                                       dataloader_params={"subset": "train", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "train", "downloads_path": my_tmp_dir,
                                                           "breeds": None})
     # Non-flatten
     assert dataset.images.shape == (12000, 3, 224, 224)
     assert dataset.image_format == "CHW"
     # Test data set
     dataset = _helper_test_data_loader(load_imagenet_dog, 8580, 150528, 120,
-                                       dataloader_params={"subset": "test", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "test", "downloads_path": my_tmp_dir,
                                                           "breeds": None})
     # Non-flatten
     assert dataset.images.shape == (8580, 3, 224, 224)
     assert dataset.image_format == "CHW"
     # Test default breeds and different image size
     dataset = _helper_test_data_loader(load_imagenet_dog, 2574, 3072, 15,
-                                       dataloader_params={"subset": "all", "downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"subset": "all", "downloads_path": my_tmp_dir,
                                                           "image_size": (32, 32)})
     # Non-flatten
     assert dataset.images.shape == (2574, 3, 32, 32)
@@ -99,16 +94,16 @@ def test_load_imagenet_dog():
 
 @pytest.mark.data
 @pytest.mark.largedata
-def test_load_imagenet10():
+def test_load_imagenet10(my_tmp_dir):
     # Full data set
     dataset = _helper_test_data_loader(load_imagenet10, 13000, 150528, 10,
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (13000, 3, 224, 224)
     assert dataset.image_format == "CHW"
     # Test different image size
     dataset = _helper_test_data_loader(load_imagenet10, 13000, 27648, 10,
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH,
+                                       dataloader_params={"downloads_path": my_tmp_dir,
                                                           "use_224_size": False})
     # Non-flatten
     assert dataset.images.shape == (13000, 3, 96, 96)
@@ -116,23 +111,23 @@ def test_load_imagenet10():
 
 
 @pytest.mark.data
-def test_load_coil20():
+def test_load_coil20(my_tmp_dir):
     dataset = _helper_test_data_loader(load_coil20, 1440, 16384, 20,
-                                       dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+                                       dataloader_params={"downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (1440, 128, 128)
     assert dataset.image_format == "HW"
 
 
 @pytest.mark.data
-def test_load_coil100():
-    dataset = _helper_test_data_loader(load_coil100, 7200, 49152, 100, dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
+def test_load_coil100(my_tmp_dir):
+    dataset = _helper_test_data_loader(load_coil100, 7200, 49152, 100, dataloader_params={"downloads_path": my_tmp_dir})
     # Non-flatten
     assert dataset.images.shape == (7200, 3, 128, 128)
     assert dataset.image_format == "CHW"
 
 
 @pytest.mark.data
-def test_load_webkb():
-    _helper_test_data_loader(load_webkb, 1041, 323, [4, 4], dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH})
-    _helper_test_data_loader(load_webkb, 8282, 761, [7, 5], dataloader_params={"downloads_path": TEST_DOWNLOAD_PATH, "use_categories": None, "use_universities": None})
+def test_load_webkb(my_tmp_dir):
+    _helper_test_data_loader(load_webkb, 1041, 323, [4, 4], dataloader_params={"downloads_path": my_tmp_dir})
+    _helper_test_data_loader(load_webkb, 8282, 761, [7, 5], dataloader_params={"downloads_path": my_tmp_dir, "use_categories": None, "use_universities": None})
diff --git a/clustpy/metrics/__init__.py b/clustpy/metrics/__init__.py
index 18be708..b2a5cc2 100644
--- a/clustpy/metrics/__init__.py
+++ b/clustpy/metrics/__init__.py
@@ -8,7 +8,7 @@
     multiple_labelings_pc_jaccard_score, multiple_labelings_pc_precision_score, multiple_labelings_pc_rand_score, \
     multiple_labelings_pc_recall_score
 from .confusion_matrix import ConfusionMatrix
-from .hierarchical_metrics import dendrogram_purity, leaf_purity
+from .hierarchical_metrics import dendrogram_purity, leaf_purity, node_purity
 
 __all__ = ['variation_of_information',
            'unsupervised_clustering_accuracy',
@@ -33,4 +33,5 @@
            'dendrogram_purity',
            'leaf_purity',
            'purity',
-           'cvnn_score']
+           'cvnn_score',
+           'node_purity']
diff --git a/clustpy/metrics/_metrics_utils.py b/clustpy/metrics/_metrics_utils.py
index b69e49b..b24137b 100644
--- a/clustpy/metrics/_metrics_utils.py
+++ b/clustpy/metrics/_metrics_utils.py
@@ -1,9 +1,11 @@
 import numpy as np
+from sklearn.metrics.cluster._supervised import check_clusterings
+from sklearn.utils import check_X_y
 
 
-def _check_number_of_points(labels_true: np.ndarray, labels_pred: np.ndarray) -> bool:
+def _check_labels_arrays(labels_true: np.ndarray, labels_pred: np.ndarray, allow_2d_labels: bool = False) -> (np.ndarray, np.ndarray):
     """
-    Check if the length of the ground truth labels and the prediction labels match.
+    Check that the ground truth labels and the prediction labels are compatible.
     If they do not match throw an exception.
 
     Parameters
@@ -12,14 +14,63 @@ def _check_number_of_points(labels_true: np.ndarray, labels_pred: np.ndarray) ->
         The ground truth labels of the data set
     labels_pred : np.ndarray
         The labels as predicted by a clustering algorithm
+    allow_2d_labels: bool
+        Specifies whether 2d labels (multiple label sets) are allowed (default: False)
 
     Returns
     -------
-    boolean : bool
-        True if execution was successful
+    tuple : (np.ndarray, np.ndarray)
+        The ground truth labels,
+        The predicted labels
     """
-    if labels_pred.shape[0] != labels_true.shape[0]:
-        raise Exception(
-            "Number of objects of the prediction and ground truth are not equal.\nNumber of prediction objects: " + str(
-                labels_pred.shape[0]) + "\nNumber of ground truth objects: " + str(labels_true.shape[0]))
-    return True
\ No newline at end of file
+    labels_true = np.asarray(labels_true).astype(int)
+    labels_pred = np.asarray(labels_pred).astype(int)
+
+    if labels_true.ndim == 1 and labels_pred.ndim == 1:
+        labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+    elif allow_2d_labels:
+        true_ref = labels_true[:, 0].copy() if labels_true.ndim > 1 else labels_true.copy()
+        pred_ref = labels_pred[:, 0].copy() if labels_pred.ndim > 1 else labels_pred.copy()
+        if labels_true.ndim > 1:
+            labels_true = labels_true.copy()
+            for i in range(labels_true.shape[1]):
+                # Align each column of 'true' against the reference of 'pred'
+                labels_true[:, i], _ = check_clusterings(labels_true[:, i], pred_ref)
+        else:
+            labels_true, _ = check_clusterings(labels_true, pred_ref)
+        if labels_pred.ndim > 1:
+            labels_pred = labels_pred.copy()
+            for i in range(labels_pred.shape[1]):
+                # Align each column of 'pred' against the (now potentially updated) 'true' reference
+                _, labels_pred[:, i] = check_clusterings(true_ref, labels_pred[:, i])
+        else:
+            _, labels_pred = check_clusterings(true_ref, labels_pred)
+    else:
+        raise ValueError(f"Your labels are not 1d arrays. Shape of labels_true: {labels_true.shape}, shape of labels_pred: {labels_pred.shape}")
+    return labels_true, labels_pred
+
+
+def _check_length_data_and_labels(X: np.ndarray, labels: np.ndarray) -> (np.ndarray, np.ndarray):
+    """
+    Check that the data and the prediction labels are compatible.
+    If they do not match throw an exception.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        The data set
+    labels : np.ndarray
+        The labels as predicted by a clustering algorithm
+
+     Returns
+    -------
+    tuple : (np.ndarray, np.ndarray)
+        The data set,
+        The predicted labels
+    """
+    X, labels = check_X_y(X, labels)
+    labels = labels.astype(int)
+    n_pred_clusters = len(np.unique(labels))
+    if n_pred_clusters == 1 or n_pred_clusters == X.shape[0]:
+        raise ValueError("The number of different labels must be within [2, n_samples -1]")
+    return X, labels
diff --git a/clustpy/metrics/confusion_matrix.py b/clustpy/metrics/confusion_matrix.py
index 1c34179..20c78f5 100644
--- a/clustpy/metrics/confusion_matrix.py
+++ b/clustpy/metrics/confusion_matrix.py
@@ -1,10 +1,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy.optimize import linear_sum_assignment
-from clustpy.metrics._metrics_utils import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays
 
 
-def _rearrange(confusion_matrix: np.ndarray) -> np.ndarray:
+def _rearrange(confusion_matrix: np.ndarray) -> (np.ndarray, np.ndarray):
     """
     Rearrange the confusion matrix in such a way that the sum of the diagonal is maximized.
     Thereby, the best matching combination of labels will be shown.
@@ -20,27 +20,30 @@ def _rearrange(confusion_matrix: np.ndarray) -> np.ndarray:
     Returns
     -------
     rearranged_confusion_matrix : np.ndarray
-        The rearranged confusion matrix.
-        If number of ground truth labels is larger than the number of predicted labels, the resulting confusion matrix will be quadradic with multiple 0 columns.
+        The rearranged confusion matrix
+        (If number of ground truth labels is larger than the number of predicted labels, the resulting confusion matrix will be quadradic with multiple 0 columns),
+        The indices regarding the rearrangement
     """
     # Change order using the Hungarian Method
     max_number_labels = max(confusion_matrix.shape)
     rearranged_confusion_matrix = np.zeros((max_number_labels, max_number_labels), dtype=confusion_matrix.dtype)
     # Linear sum assignment tries to minimize the diagonal sum -> use negative confusion_matrix
-    rearranged_confusion_matrix[:confusion_matrix.shape[0], :confusion_matrix.shape[1]] = -confusion_matrix
-    indices = linear_sum_assignment(rearranged_confusion_matrix)
-    # Revert values back to positive range, change order of the columns
-    rearranged_confusion_matrix = -rearranged_confusion_matrix[:, indices[1]]
+    rearranged_confusion_matrix[:confusion_matrix.shape[0], :confusion_matrix.shape[1]] = confusion_matrix
+    indices = linear_sum_assignment(-rearranged_confusion_matrix)
+    # Change order of the columns
+    rearranged_order = indices[1]
+    rearranged_confusion_matrix = rearranged_confusion_matrix[:, rearranged_order]
     rearranged_confusion_matrix = rearranged_confusion_matrix[:confusion_matrix.shape[0], :]
     # If there are more columns than rows sort remaining columns by highest value
     if confusion_matrix.shape[1] > confusion_matrix.shape[0]:
         missing_columns = np.arange(confusion_matrix.shape[0], confusion_matrix.shape[1])
         missing_columns_order = np.argsort(np.max(rearranged_confusion_matrix[:, missing_columns], axis=0))[::-1]
         rearranged_confusion_matrix[:, missing_columns] = rearranged_confusion_matrix[:, missing_columns[missing_columns_order]]
-    return rearranged_confusion_matrix
+        rearranged_order[missing_columns] = rearranged_order[missing_columns[missing_columns_order]]
+    return rearranged_confusion_matrix, rearranged_order
 
 
-def _plot_confusion_matrix(confusion_matrix: np.ndarray, show_text: bool, figsize: tuple, cmap: str, textcolor: str,
+def _plot_confusion_matrix(confusion_matrix: np.ndarray, show_text: bool, row_names : list, column_names : list, figsize: tuple, cmap: str, textcolor: str,
                            vmin: float, vmax: float) -> None:
     """
     Plot the confusion matrix.
@@ -51,6 +54,10 @@ def _plot_confusion_matrix(confusion_matrix: np.ndarray, show_text: bool, figsiz
         The confusion matrix to plot
     show_text : bool
         Show the value in each cell as text
+    row_names : list
+        List of containing the names of the rows
+    column_names : list
+        List of containing the names of the columns
     figsize : tuple
         Tuple indicating the height and width of the plot
     cmap : str
@@ -66,9 +73,17 @@ def _plot_confusion_matrix(confusion_matrix: np.ndarray, show_text: bool, figsiz
         If None, it will be set as the maximum value within the confusion matrix.
         Used to choose the color from the colormap
     """
+    if len(row_names) != confusion_matrix.shape[0]:
+        raise ValueError("Length of the row names list must match the number of rows (ground turth clusters) in the confusion matrix. Length is {0} and number of rows is {1}".format(len(row_names), confusion_matrix.shape[0]))
+    if len(column_names) != confusion_matrix.shape[1]:
+        raise ValueError("Length of the column names list must match the number of columns (predicted clusters) in the confusion matrix. Length is {0} and number of columns is {1}".format(len(column_names), confusion_matrix.shape[1]))
     fig, ax = plt.subplots(figsize=figsize)
     # Plot confusion matrix using colors
     ax.imshow(confusion_matrix, cmap=cmap, vmin=vmin, vmax=vmax)
+    ax.set_xticks(np.arange(confusion_matrix.shape[1]))
+    ax.set_xticklabels(column_names)
+    ax.set_yticks(np.arange(confusion_matrix.shape[0]))
+    ax.set_yticklabels(row_names)
     # Optional: Add text to the color cells
     if show_text:
         for i in range(confusion_matrix.shape[0]):
@@ -90,8 +105,9 @@ class ConfusionMatrix():
         The ground truth labels of the data set
     labels_pred : np.ndarray
         The labels as predicted by a clustering algorithm
-    shape : tuple
-        Shape of the resulting confusion matrix (default: None)
+    shape : tuple | str | None
+        The desired shape of the confusion matrix. 
+        Can be "square" to encforce a squared confusion matrix (default: None)
 
     Attributes
     ----------
@@ -99,22 +115,27 @@ class ConfusionMatrix():
         The confusion matrix
     """
 
-    def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray, shape: tuple=None):
-        _check_number_of_points(labels_true, labels_pred)
-        if np.any(labels_true < 0):
-            labels_true = labels_true.copy()
-            labels_true -= labels_true.min()
-        if np.any(labels_pred < 0):
-            labels_pred = labels_pred.copy()
-            labels_pred -= labels_pred.min()
-        labels_true = labels_true.astype(int)
-        labels_pred = labels_pred.astype(int)
+    def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray, shape: tuple | str | None=None):
+        labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred)
+        true_clusters, true_clusters_idx = np.unique(labels_true, return_inverse=True)
+        pred_clusters, pred_clusters_idx = np.unique(labels_pred, return_inverse=True)
+        self.true_clusters = true_clusters
+        self.pred_clusters = pred_clusters
         if shape is None:
-            conf_matrix = np.zeros((labels_true.max() + 1, labels_pred.max() + 1), dtype=int)
+            shape = (len(true_clusters), len(pred_clusters))
         else:
-            assert len(shape) == 2 and shape[0] > labels_true.max() and shape[1] > labels_pred.max(), f"Shape must contain two values such that shape[0] > labels_true.max() and shape[1] > labels_true.max(). Your values: shape = {shape}, labels_true.max() = {labels_true.max()}, labels_pred.max() = {labels_pred.max()}"
-            conf_matrix = np.zeros(shape, dtype=int)
-        np.add.at(conf_matrix, (labels_true, labels_pred), 1)
+            if shape == "square":
+                max_labels = max(len(true_clusters), len(pred_clusters))
+                shape = (max_labels, max_labels)
+            else:
+                assert len(shape) == 2 and shape[0] >= len(true_clusters) and shape[1] >= len(pred_clusters), f"Shape must be 'square' or a tuple containing two values such that shape[0] >= len(np.unique(labels_true)) and shape[1] >= len(np.unique(labels_pred)). Your values: shape = {shape}, len(np.unique(labels_true)) = {len(np.unique(labels_true))}, len(np.unique(labels_pred)) = {len(np.unique(labels_pred))}"
+            # Fill unique label information (self.true_clusters and self.pred_clusters) with -2 placeholders
+            if shape[0] > len(true_clusters):
+                self.true_clusters = np.append(self.true_clusters, [-2] * (shape[0] - len(true_clusters)))
+            if shape[1] > len(pred_clusters):
+                self.pred_clusters = np.append(self.pred_clusters, [-2] * (shape[1] - len(pred_clusters)))
+        conf_matrix = np.zeros(shape, dtype=int)
+        np.add.at(conf_matrix, (true_clusters_idx, pred_clusters_idx), 1)
         self.confusion_matrix = conf_matrix
 
     def __str__(self):
@@ -148,12 +169,14 @@ def rearrange(self, inplace: bool = True) -> np.ndarray:
             The rearranged confusion matrix
             If number of ground truth labels is larer than the number of predicted labels, the resulting confusion matrix will be quadradic with multiple 0 columns.
         """
-        rearranged_confusion_matrix = _rearrange(self.confusion_matrix)
+        rearranged_confusion_matrix, rearranged_order = _rearrange(self.confusion_matrix)
         if inplace:
             self.confusion_matrix = rearranged_confusion_matrix
+            self.pred_clusters = self.pred_clusters[rearranged_order[:len(self.pred_clusters)]]
         return rearranged_confusion_matrix
 
-    def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "YlGn", textcolor: str = "black",
+    def plot(self, show_text: bool = True, ground_truth_names: list | None = None, 
+             figsize: tuple = (10, 10), cmap: str = "YlGn", textcolor: str = "black", 
              vmin: int = 0, vmax: int = None) -> None:
         """
         Plot the confusion matrix.
@@ -162,6 +185,8 @@ def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "Y
         ----------
         show_text : bool
             Show the value in each cell as text (default: True)
+        ground_truth_names : list | None
+            List of containing the names of the ground truth clusters
         figsize : tuple
             Tuple indicating the height and width of the plot (default: (10, 10))
         cmap : str
@@ -177,4 +202,6 @@ def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "Y
             If None, it will be set as the maximum value within the confusion matrix.
             Used to choose the color from the colormap (default: None)
         """
-        _plot_confusion_matrix(self.confusion_matrix, show_text, figsize, cmap, textcolor, vmin, vmax)
+        if ground_truth_names is None:
+            ground_truth_names = self.true_clusters
+        _plot_confusion_matrix(self.confusion_matrix, show_text, ground_truth_names, self.pred_clusters, figsize, cmap, textcolor, vmin, vmax)
diff --git a/clustpy/metrics/external_clustering_metrics.py b/clustpy/metrics/external_clustering_metrics.py
index 90855cb..5744e4b 100644
--- a/clustpy/metrics/external_clustering_metrics.py
+++ b/clustpy/metrics/external_clustering_metrics.py
@@ -3,7 +3,7 @@
 from clustpy.metrics.confusion_matrix import ConfusionMatrix
 from scipy.special import comb
 from sklearn.metrics import normalized_mutual_info_score as nmi
-from clustpy.metrics._metrics_utils import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays
 
 
 def variation_of_information(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
@@ -29,21 +29,17 @@ def variation_of_information(labels_true: np.ndarray, labels_pred: np.ndarray) -
     Meilă, Marina. "Comparing clusterings by the variation of information."
     Learning theory and kernel machines. Springer, Berlin, Heidelberg, 2003. 173-187.
     """
-    _check_number_of_points(labels_true, labels_pred)
+    confusion_matrix = ConfusionMatrix(labels_true, labels_pred).confusion_matrix
     n = len(labels_true)
-    cluster_ids_true = np.unique(labels_true)
-    cluster_ids_pred = np.unique(labels_pred)
-    result = 0.0
-    for id_true in cluster_ids_true:
-        points_in_cluster_gt = np.argwhere(labels_true == id_true)[:, 0]
-        p = len(points_in_cluster_gt) / n
-        for id_pred in cluster_ids_pred:
-            points_in_cluster_pred = np.argwhere(labels_pred == id_pred)[:, 0]
-            q = len(points_in_cluster_pred) / n
-            r = len([point for point in points_in_cluster_gt if point in points_in_cluster_pred]) / n
-            if r != 0:
-                result += r * (np.log(r / p) + np.log(r / q))
-    vi = -1 * result
+    p = confusion_matrix.sum(1).reshape((-1, 1)) / n
+    q = confusion_matrix.sum(0).reshape((1, -1)) / n
+    r = confusion_matrix / n
+    # Consider zero entries
+    mask = (r == 0)
+    r[mask] = 1
+    result = r * (np.log(r / p) + np.log(r / q))
+    result[mask] = 0
+    vi = -result.sum()
     return vi
 
 
@@ -72,13 +68,9 @@ def unsupervised_clustering_accuracy(labels_true: np.ndarray, labels_pred: np.nd
     Yang, Yi, et al. "Image clustering using local discriminant models and global integration."
     IEEE Transactions on Image Processing 19.10 (2010): 2761-2773.
     """
-    _check_number_of_points(labels_true, labels_pred)
-    max_label = int(max(labels_pred.max(), labels_true.max()) + 1)
-    match_matrix = np.zeros((max_label, max_label), dtype=np.int64)
-    for i in range(labels_true.shape[0]):
-        match_matrix[int(labels_true[i]), int(labels_pred[i])] -= 1
-    indices = linear_sum_assignment(match_matrix)
-    acc = -np.sum(match_matrix[indices]) / labels_pred.size
+    confusion_matrix = ConfusionMatrix(labels_true, labels_pred, "square").confusion_matrix
+    indices = linear_sum_assignment(-confusion_matrix)
+    acc = np.sum(confusion_matrix[indices]) / len(labels_true)
     return acc
 
 
@@ -110,17 +102,16 @@ def information_theoretic_external_cluster_validity_measure(labels_true: np.ndar
     Byron E. Dom. 2002. "An information-theoretic external cluster-validity measure."
     In Proceedings of the Eighteenth conference on Uncertainty in artificial intelligence (UAI'02).
     """
-    _check_number_of_points(labels_true, labels_pred)
     # Build confusion matrix
-    cm = ConfusionMatrix(labels_true, labels_pred)
+    confusion_matrix = ConfusionMatrix(labels_true, labels_pred).confusion_matrix
     n_points = labels_true.shape[0]
-    n_classes = cm.confusion_matrix.shape[0]
+    n_classes = confusion_matrix.shape[0]
     # Get number of objects per predicted label
-    hks = np.sum(cm.confusion_matrix, axis=0)
+    hks = np.sum(confusion_matrix, axis=0)
     # Calculate Q_0
-    cm_tmp = cm.confusion_matrix.copy()  # Needed if some cells are 0 so log can be calculated
+    cm_tmp = confusion_matrix.copy()  # Needed if some cells are 0 so log can be calculated
     cm_tmp[cm_tmp == 0] = 1  # will later be multiplied by 0, so this does not change the final result
-    empirical_conditional_entropy = cm.confusion_matrix / n_points * np.log(cm_tmp / hks)
+    empirical_conditional_entropy = confusion_matrix / n_points * np.log(cm_tmp / hks)
     empirical_conditional_entropy = - np.sum(
         empirical_conditional_entropy)  # [~np.isnan(empirical_conditional_entropy)])
     sum_binom_coefficient = np.sum([np.log(comb(hk + n_classes - 1, n_classes - 1)) for hk in hks])
@@ -128,7 +119,7 @@ def information_theoretic_external_cluster_validity_measure(labels_true: np.ndar
     if scale:
         # --- Scale Q_0 to (0, 1] ---
         # Get number of objects per ground truth label
-        hcs = np.sum(cm.confusion_matrix, axis=1)
+        hcs = np.sum(confusion_matrix, axis=1)
         # Calculate Q_2
         min_Q_0 = np.sum([np.log(comb(hc + n_classes - 1, n_classes - 1)) for hc in hcs]) / n_points
         entropy_H_C = -np.sum([hc / n_points * np.log(hc / n_points) for hc in hcs])
@@ -164,7 +155,7 @@ def fair_normalized_mutual_information(labels_true: np.ndarray, labels_pred: np.
     Amelio, Alessia, and Clara Pizzuti. "Is normalized mutual information a fair measure for comparing community detection methods?."
     Proceedings of the 2015 IEEE/ACM international conference on advances in social networks analysis and mining 2015. 2015.
     """
-    _check_number_of_points(labels_true, labels_pred)
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred)
     # Get the normalized mutual information
     my_nmi = nmi(labels_true, labels_pred)
     # Get number of clusters
@@ -199,7 +190,6 @@ def purity(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
     -------
     Manning, Christopher D. An introduction to information retrieval. 2009.
     """
-    _check_number_of_points(labels_true, labels_pred)
     conf_matrix = ConfusionMatrix(labels_true, labels_pred).confusion_matrix
     best_matches = np.max(conf_matrix, axis=0)
     purity = np.sum(best_matches) / labels_true.shape[0]
diff --git a/clustpy/metrics/hierarchical_metrics.py b/clustpy/metrics/hierarchical_metrics.py
index a8ec145..cdc3353 100644
--- a/clustpy/metrics/hierarchical_metrics.py
+++ b/clustpy/metrics/hierarchical_metrics.py
@@ -1,16 +1,45 @@
-from clustpy.hierarchical._cluster_tree import BinaryClusterTree
+from clustpy.hierarchical._cluster_tree import BinaryClusterTree, _ClusterTreeNode
 from clustpy.metrics.confusion_matrix import ConfusionMatrix
-from clustpy.metrics.external_clustering_metrics import purity
-from clustpy.metrics._metrics_utils import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays
 import numpy as np
 
 
+def node_purity(node: _ClusterTreeNode, labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
+    """
+    Calculate the purity of this node within a Cluster Tree.
+    A leaf with no assigned points receives a purity score of 0.
+
+    Parameters
+    ----------
+    node: _ClusterTreeNode
+        The node of a clustering tree
+    labels_true : np.ndarray
+        The ground truth labels of the data set
+    labels_pred : np.ndarray
+        The labels as predicted by a clustering algorithm
+
+    Returns
+    -------
+    node_purity : float
+        The node purity
+    """
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred)
+    samples_in_leaf = np.isin(labels_pred, node.labels)
+    if np.any(samples_in_leaf):
+        sizes_gt_matches = np.unique(labels_true[samples_in_leaf], return_counts=True)[1]
+        node_purity = sizes_gt_matches.max() / samples_in_leaf.sum()
+    else:
+        node_purity = 0.
+    return node_purity
+
+
 def leaf_purity(
     tree: BinaryClusterTree, labels_true: np.ndarray, labels_pred: np.ndarray
 ) -> float:
     """
     Calculates the leaf purity of the tree.
-    Uses labels fromm leafs in the tree to calculate the purity (see clustpy.metrics.purity).
+    The leaf purity is equal to a weighted average of the maximum class purity across all leaves.
+    Uses labels from leafs in the tree to identify the most frequent ground truth class and weights the score by the size of the leaf.
     If each label contains a single label, this is equal to the standard purity metric.
 
     Parameters
@@ -32,12 +61,14 @@ def leaf_purity(
     Mautz, Dominik, Claudia Plant, and Christian Böhm. "Deepect: The deep embedded cluster tree."
     Data Science and Engineering 5 (2020): 419-432.
     """
-    _check_number_of_points(labels_true, labels_pred)
+    cm = ConfusionMatrix(labels_true, labels_pred)
     leaf_nodes, _ = tree.get_leaf_and_split_nodes()
-    labels_pred_adj = -np.ones(labels_pred.shape[0])
-    for i, leaf_node in enumerate(leaf_nodes):
-        labels_pred_adj[np.isin(labels_pred, leaf_node.labels)] = i
-    leaf_purity = purity(labels_true, labels_pred_adj)
+    leaf_purity = 0
+    for leaf_node in leaf_nodes:
+        relevant_columns = np.isin(cm.pred_clusters, leaf_node.labels)
+        column_sum = cm.confusion_matrix[:, relevant_columns].sum(1)
+        leaf_purity += column_sum.max()
+    leaf_purity = leaf_purity / len(labels_true)
     return leaf_purity
 
 
@@ -74,7 +105,7 @@ def dendrogram_purity(
     """
     if labels_pred is None:
         labels_pred = np.arange(labels_true.shape[0])
-    _check_number_of_points(labels_true, labels_pred)
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred)
     if type(dendrogram) is BinaryClusterTree:
         # Transform ClusterTree to sklearn dendrogram
         dendrogram = dendrogram.export_sklearn_dendrogram()
diff --git a/clustpy/metrics/internal_clustering_metrics.py b/clustpy/metrics/internal_clustering_metrics.py
index 6d46610..d2de64e 100644
--- a/clustpy/metrics/internal_clustering_metrics.py
+++ b/clustpy/metrics/internal_clustering_metrics.py
@@ -1,6 +1,7 @@
 from sklearn.neighbors import NearestNeighbors
 from scipy.spatial.distance import pdist
 import numpy as np
+from clustpy.metrics._metrics_utils import _check_length_data_and_labels
 
 
 def cvnn_score(X: np.ndarray, labels: np.ndarray | int | tuple, n_neighbors: int = 5, metric: str = "euclidean") -> float | np.ndarray:
@@ -55,29 +56,26 @@ def _internal_cvnn_score(X: np.ndarray, labels: np.ndarray, nrbs_indices: np.nda
         tuple : (float, float)
             The cluster spearation and cluster compactness value
         """
+        X, labels = _check_length_data_and_labels(X, labels)
         assert isinstance(labels, np.ndarray), "labels must be of type np.nddary. Your input has type {0}".format(type(labels))
         unique_clusters = np.unique(labels)
         # Calculate neighbor weights
         n_neighbors = nrbs_indices.shape[1]
-        n_neighbors_not_in_cluster = np.zeros(X.shape[0])
-        for k in range(n_neighbors):
-            n_neighbors_not_in_cluster += (labels != labels[nrbs_indices[:, k]])
-        n_neighbors_not_in_cluster /= n_neighbors
+        n_neighbors_not_in_cluster = (labels.reshape((-1, 1)) != labels[nrbs_indices]).mean(1)
         cluster_separation_scores = np.zeros(unique_clusters.shape[0])
         cluster_compactness_scores = np.zeros(unique_clusters.shape[0])
         # Do per-cluster calculations
-        for c in unique_clusters:
+        for i, c in enumerate(unique_clusters):
             in_cluster = (labels == c)
             # Calculate separation (mean of neighbor weights in cluster)
-            cluster_separation_scores[c] = n_neighbors_not_in_cluster[in_cluster].mean()
+            cluster_separation_scores[i] = n_neighbors_not_in_cluster[in_cluster].mean()
             # Calculate compartness (mean of pair-wise distances in cluster)
             X_in_cluster = X[in_cluster]
             if X_in_cluster.shape[0] > 1:
                 cluster_distances = pdist(X_in_cluster, metric=metric)
-                in_cluster_pairs = (X_in_cluster.shape[0] * (X_in_cluster.shape[0] - 1)) / 2
-                cluster_compactness_scores[c] = cluster_distances.sum() / in_cluster_pairs
+                cluster_compactness_scores[i] = cluster_distances.mean()
             else:
-                cluster_compactness_scores[c] = 0
+                cluster_compactness_scores[i] = 0
         # Calculate final CVNN
         cluster_separation_final = cluster_separation_scores.max()
         cluster_compactness_final = cluster_compactness_scores.sum()
@@ -97,7 +95,12 @@ def _internal_cvnn_score(X: np.ndarray, labels: np.ndarray, nrbs_indices: np.nda
             cluster_separations[i] = cluster_separation_l
             cluster_compactnesses[i] = cluster_compactness_l
         # Normalize scores
-        cvnn = cluster_separations / cluster_separations.max() + cluster_compactnesses / cluster_compactnesses.max()
+        max_cluster_separations = cluster_separations.max()
+        max_cluster_compactnesses = cluster_compactnesses.max()
+        if max_cluster_separations != 0 and max_cluster_compactnesses != 0:
+            cvnn = cluster_separations / max_cluster_separations + cluster_compactnesses / max_cluster_compactnesses
+        else:
+            cvnn = 0
     elif isinstance(labels, np.ndarray):
         # Do not normalize scores
         cluster_separation, cluster_compactness = _internal_cvnn_score(X, labels, nrbs_indices, metric)
diff --git a/clustpy/metrics/multipe_labelings_scoring.py b/clustpy/metrics/multipe_labelings_scoring.py
index 0409c08..21b8d18 100644
--- a/clustpy/metrics/multipe_labelings_scoring.py
+++ b/clustpy/metrics/multipe_labelings_scoring.py
@@ -1,5 +1,5 @@
 import numpy as np
-from clustpy.metrics._metrics_utils import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays
 from clustpy.metrics.pair_counting_scores import PairCountingScores, _f1_score, _recall_score, _precision_score, \
     _rand_score, _jaccard_score
 from sklearn.metrics import normalized_mutual_info_score as nmi
@@ -251,7 +251,7 @@ def _get_multiple_labelings_pair_counting_categories(labels_true: np.ndarray, la
         The number of false negatives,
         The number of true negatives
     """
-    _check_number_of_points(labels_true, labels_pred)
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred, allow_2d_labels=True)
     if labels_true.ndim == 1:
         labels_true = labels_true.reshape((-1, 1))
     if labels_pred.ndim == 1:
@@ -393,7 +393,7 @@ class MultipleLabelingsConfusionMatrix(ConfusionMatrix):
 
     def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray, metric: Callable = nmi,
                  remove_noise_spaces: bool = True, metric_params: dict = {}):
-        _check_number_of_points(labels_true, labels_pred)
+        labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred, allow_2d_labels=True)
         assert type(metric_params) is dict, "metric_params must be a dict"
         assert callable(metric), "metric must be a method"
         # Reshape labels if we have only a single set of labels
@@ -413,8 +413,11 @@ def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray, metric: Cal
             for j in range(labels_pred.shape[1]):
                 confusion_matrix[i, j] = metric(labels_true[:, i], labels_pred[:, j], **metric_params)
         self.confusion_matrix = confusion_matrix
+        self.true_clusters = np.arange(labels_true.shape[1])
+        self.pred_clusters = np.arange(labels_pred.shape[1])
 
-    def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "YlGn", textcolor: str = "black",
+    def plot(self, show_text: bool = True, ground_truth_names: list | None = None, 
+            figsize: tuple = (10, 10), cmap: str = "YlGn", textcolor: str = "black",
              vmin: float = 0.0, vmax: float = 1.0) -> None:
         """
         Plot the Multiple Labelings Confusion Matrix.
@@ -424,6 +427,8 @@ def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "Y
         ----------
         show_text : bool
             Show the value in each cell as text (default: True)
+        ground_truth_names : list | None
+            List of containing the names of the ground truth cluster sets
         figsize : tuple
             Tuple indicating the height and width of the plot (default: (10, 10))
         cmap : str
@@ -439,7 +444,9 @@ def plot(self, show_text: bool = True, figsize: tuple = (10, 10), cmap: str = "Y
             If None, it will be set as the maximum value within the confusion matrix.
             Used to choose the color from the colormap (default: 1.0)
         """
-        _plot_confusion_matrix(self.confusion_matrix, show_text, figsize, cmap, textcolor, vmin=vmin, vmax=vmax)
+        if ground_truth_names is None:
+            ground_truth_names = self.true_clusters
+        _plot_confusion_matrix(self.confusion_matrix, show_text, ground_truth_names, self.pred_clusters, figsize, cmap, textcolor, vmin=vmin, vmax=vmax)
 
     def aggregate(self, aggregation_strategy: str = "max") -> float:
         """
@@ -488,9 +495,9 @@ def aggregate(self, aggregation_strategy: str = "max") -> float:
             if aggregation_strategy == "permut-max":
                 # Linear sum assignment tries to minimize the diagonal sum -> use negative confusion_matrix
                 rearranged_confusion_matrix[:self.confusion_matrix.shape[0],
-                :self.confusion_matrix.shape[1]] = -self.confusion_matrix
-                indices = linear_sum_assignment(rearranged_confusion_matrix)
-                rearranged_confusion_matrix = -rearranged_confusion_matrix[:, indices[1]]
+                :self.confusion_matrix.shape[1]] = self.confusion_matrix
+                indices = linear_sum_assignment(-rearranged_confusion_matrix)
+                rearranged_confusion_matrix = rearranged_confusion_matrix[:, indices[1]]
             else:
                 rearranged_confusion_matrix[:self.confusion_matrix.shape[0],
                 :self.confusion_matrix.shape[1]] = self.confusion_matrix
@@ -536,9 +543,9 @@ def is_multi_labelings_n_clusters_correct(labels_true: np.ndarray, labels_pred:
     Parameters
     ----------
     labels_true : np.ndarray
-        The true set of labelings. Shape must match (n_samples, n_subspaces)
+        The true set of labelings. Shape must match (n_samples, n_labelings)
     labels_pred : np.ndarray
-        The predicted set of labelings. Shape must match (n_samples, n_subspaces)
+        The predicted set of labelings. Shape must match (n_samples, n_labelings)
     check_subset : bool
         Boolean defines if it is sufficient if a subset of n_clusters_pred is equal to n_clusters_true (default: True)
     remove_noise_spaces : bool
@@ -549,7 +556,7 @@ def is_multi_labelings_n_clusters_correct(labels_true: np.ndarray, labels_pred:
     is_equal : bool
         Boolean indicating if the number of clusters of labels_true and labels_pred matches
     """
-    _check_number_of_points(labels_true, labels_pred)
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred, allow_2d_labels=True)
     if labels_true.ndim == 1:
         labels_true = labels_true.reshape((-1, 1))
     if labels_pred.ndim == 1:
diff --git a/clustpy/metrics/pair_counting_scores.py b/clustpy/metrics/pair_counting_scores.py
index ddae0b2..e3dbcff 100644
--- a/clustpy/metrics/pair_counting_scores.py
+++ b/clustpy/metrics/pair_counting_scores.py
@@ -1,4 +1,4 @@
-from clustpy.metrics._metrics_utils import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays
 import numpy as np
 
 """
@@ -333,9 +333,7 @@ def _get_pair_counting_categories(labels_true: np.ndarray, labels_pred: np.ndarr
         The number of false negatives,
         The number of true negatives
     """
-    _check_number_of_points(labels_true, labels_pred)
-    if labels_true.ndim != 1 or labels_pred.ndim != 1:
-        raise Exception("labels_true and labels_pred labels should just contain a single column.")
+    labels_true, labels_pred = _check_labels_arrays(labels_true, labels_pred)
     n_tp = 0
     n_fp = 0
     n_fn = 0
@@ -384,7 +382,6 @@ class PairCountingScores():
     """
 
     def __init__(self, labels_true: np.ndarray, labels_pred: np.ndarray):
-        _check_number_of_points(labels_true, labels_pred)
         n_tp, n_fp, n_fn, n_tn = _get_pair_counting_categories(labels_true, labels_pred)
         self.n_tp = n_tp
         self.n_fp = n_fp
diff --git a/clustpy/metrics/tests/test_confusion_matrix.py b/clustpy/metrics/tests/test_confusion_matrix.py
index a7d34d1..5f5c37a 100644
--- a/clustpy/metrics/tests/test_confusion_matrix.py
+++ b/clustpy/metrics/tests/test_confusion_matrix.py
@@ -1,7 +1,8 @@
 import numpy as np
 from clustpy.metrics import ConfusionMatrix
-from clustpy.metrics.confusion_matrix import _rearrange
+from clustpy.metrics.confusion_matrix import _rearrange, _plot_confusion_matrix
 from unittest.mock import patch
+import pytest
 
 
 def test_rearrange():
@@ -10,31 +11,34 @@ def test_rearrange():
                                  [0, 0, 0, 50],
                                  [30, 10, 5, 5],
                                  [5, 5, 35, 5]])
-    rearranged_confusion_matrix = _rearrange(confusion_matrix)
+    rearranged_confusion_matrix, rearrange_order = _rearrange(confusion_matrix)
     assert np.array_equal(rearranged_confusion_matrix, np.array([[45, 2, 0, 3],
                                                                  [0, 50, 0, 0],
                                                                  [10, 5, 30, 5],
                                                                  [5, 5, 5, 35]]))
+    assert np.array_equal(rearrange_order, np.array([1, 3, 0, 2]))
     # More prediction labels than ground truth
     confusion_matrix = np.array([[0, 10, 45, 3, 2, 25],
                                  [0, 10, 0, 0, 50, 25],
                                  [30, 10, 10, 5, 5, 25],
                                  [5, 10, 5, 35, 5, 25]])
-    rearranged_confusion_matrix = _rearrange(confusion_matrix)
+    rearranged_confusion_matrix, rearrange_order = _rearrange(confusion_matrix)
     assert np.array_equal(rearranged_confusion_matrix, np.array([[45, 2, 0, 3, 25, 10],
                                                                  [0, 50, 0, 0, 25, 10],
                                                                  [10, 5, 30, 5, 25, 10],
                                                                  [5, 5, 5, 35, 25, 10]]))
+    assert np.array_equal(rearrange_order, np.array([2, 4, 0, 3, 5, 1]))
     # More ground truth labels than prediction
     confusion_matrix = np.array([[0, 3, 2],
                                  [0, 0, 50],
                                  [30, 5, 5],
                                  [5, 35, 5]])
-    rearranged_confusion_matrix = _rearrange(confusion_matrix)
+    rearranged_confusion_matrix, rearrange_order = _rearrange(confusion_matrix)
     assert np.array_equal(rearranged_confusion_matrix, np.array([[0, 2, 0, 3],
                                                                  [0, 50, 0, 0],
                                                                  [0, 5, 30, 5],
                                                                  [0, 5, 5, 35]]))
+    assert np.array_equal(rearrange_order, np.array([3, 2, 0, 1]))
 
 
 """
@@ -60,25 +64,50 @@ def test_confusion_matrix_object():
                             [0, 1, 0, 1]])
     assert np.array_equal(cm.confusion_matrix, expected_cm)
     # Third test
-    labels_true = np.array([0, 1, 2, -3, 0, 1, 2, -3])
+    labels_true = np.array([0, 1, 2, -1, 0, 1, 2, -1])
     labels_pred = np.array([0, 0, -1, -1, 2, 2, -1, 3])
     cm = ConfusionMatrix(labels_true, labels_pred)
-    expected_cm = np.array([[1, 0, 0, 0, 1],
-                            [0, 0, 0, 0, 0],
-                            [0, 0, 0, 0, 0],
-                            [0, 1, 0, 1, 0],
-                            [0, 1, 0, 1, 0],
-                            [2, 0, 0, 0, 0]])
+    expected_cm = np.array([[1, 0, 0, 1],
+                            [0, 1, 1, 0],
+                            [0, 1, 1, 0],
+                            [2, 0, 0, 0]])
     assert np.array_equal(cm.confusion_matrix, expected_cm)
 
 
+
+def test_confusion_matrix_object_with_shape():
+    # First test
+    labels_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
+    labels_pred = np.array([0, 0, 1, 1, 2, 2, 3, 3])
+    cm = ConfusionMatrix(labels_true, labels_pred, "square")
+    expected_cm = np.array([[2, 2, 0, 0],
+                            [0, 0, 2, 2],
+                            [0, 0, 0, 0],
+                            [0, 0, 0, 0]])
+    assert np.array_equal(cm.confusion_matrix, expected_cm)
+    assert np.array_equal(cm.true_clusters, np.array([0,1,-2,-2]))
+    assert np.array_equal(cm.pred_clusters, np.array([0,1,2,3]))
+    # Second test
+    cm = ConfusionMatrix(labels_pred, labels_true, (5, 6))
+    expected_cm = np.array([[2, 0, 0, 0, 0, 0],
+                            [2, 0, 0, 0, 0, 0],
+                            [0, 2, 0, 0, 0, 0],
+                            [0, 2, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0]])
+    assert np.array_equal(cm.confusion_matrix, expected_cm)
+    assert np.array_equal(cm.true_clusters, np.array([0,1,2,3,-2]))
+    assert np.array_equal(cm.pred_clusters, np.array([0,1,-2,-2,-2,-2]))
+
+
 def test_confusion_matrix_rearrange():
     labels_true = np.array([0, 1, 2, 3, 0, 1, 2, 3])
-    labels_pred = np.array([0, 0, 1, 1, 2, 2, 3, 3])
+    labels_pred = np.array([-1, -1, 1, 1, 2, 2, 3, 3])
     cm = ConfusionMatrix(labels_true, labels_pred)
     cm_copy = cm.confusion_matrix.copy()
     rearranged_cm = cm.rearrange(inplace=False)
     assert np.array_equal(cm.confusion_matrix, cm_copy)
+    assert np.array_equal(cm.true_clusters, np.array([0, 1, 2, 3]))
+    assert np.array_equal(cm.pred_clusters, np.array([-1, 1, 2, 3]))
     expected_rearranged_cm = np.array([[1, 1, 0, 0],
                                        [1, 1, 0, 0],
                                        [0, 0, 1, 1],
@@ -86,6 +115,20 @@ def test_confusion_matrix_rearrange():
     assert np.array_equal(rearranged_cm, expected_rearranged_cm)
     rearranged_cm = cm.rearrange(inplace=True)
     assert np.array_equal(cm.confusion_matrix, rearranged_cm)
+    assert np.array_equal(cm.pred_clusters, np.array([-1, 2, 1, 3]))
+
+
+@patch("matplotlib.pyplot.show")  # Used to test plots (show will not be called)
+def test_plot_confusion_matrix(mock_fig):
+    cm = np.array([[1, 0, 1, 0],
+                    [1, 0, 1, 0],
+                    [0, 1, 0, 1],
+                    [0, 1, 0, 1]])
+    with pytest.raises(ValueError):
+        _plot_confusion_matrix(cm, True, ["One", "Two"], ["One", "Two", "Three", "Four"], (5,5), "YlGn", "red", "0", "100")
+    with pytest.raises(ValueError):
+        _plot_confusion_matrix(cm, True, ["One", "Two", "Three", "Four"], ["One", "Two"], (5,5), "YlGn", "red", "0", "100")
+    assert None == _plot_confusion_matrix(cm, True, ["One", "Two", "Three", "Four"], ["One", "Two", "Three", "Four"], (5,5), "YlGn", "red", "0", "100")
 
 
 @patch("matplotlib.pyplot.show")  # Used to test plots (show will not be called)
@@ -93,4 +136,4 @@ def test_confusion_matrix_plot(mock_fig):
     labels_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
     labels_pred = np.array([0, 0, 1, 1, 2, 2, 3, 3])
     cm = ConfusionMatrix(labels_true, labels_pred)
-    assert None == cm.plot()
+    assert None == cm.plot()
\ No newline at end of file
diff --git a/clustpy/metrics/tests/test_external_clustering_metrics.py b/clustpy/metrics/tests/test_external_clustering_metrics.py
index 7327672..0729b78 100644
--- a/clustpy/metrics/tests/test_external_clustering_metrics.py
+++ b/clustpy/metrics/tests/test_external_clustering_metrics.py
@@ -8,14 +8,16 @@ def test_unsupervised_clustering_accuracy():
     l1 = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
     l2 = np.array([1, 1, 2, 2, 3, 3, 4, 4, 0, 0])
     assert unsupervised_clustering_accuracy(l1, l2) == 1.0
-    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 4, 4])
+    l2 = np.array([-1, -1, 1, 1, 1, 2, 3, 3, 4, 4])
     assert unsupervised_clustering_accuracy(l1, l2) == 0.9
     l2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     assert unsupervised_clustering_accuracy(l1, l2) == 0.5
     l2 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
     assert unsupervised_clustering_accuracy(l1, l2) == 0.2
-    l2 = np.array([0, 0, 0, 0, 2, 2, 3, 3, 4, 1])
+    l2 = np.array([0, 0, 0, 0, 2, 2, 3, 3, 4, -1])
     assert unsupervised_clustering_accuracy(l1, l2) == 0.7
+    l2 = np.array([4, 4, 4, 1, 2, 2, 3, 3, 0, -1])
+    assert unsupervised_clustering_accuracy(l1, l2) == 0.8
 
 
 def test_variation_of_information():
@@ -23,7 +25,7 @@ def test_variation_of_information():
     l2 = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
     assert variation_of_information(l1, l2) == 0.0
     l1 = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
-    l2 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
+    l2 = np.array([1, 1, 1, 1, 1, -1, -1, -1, -1, -1])
     assert variation_of_information(l1, l2) == 0.0
     l1 = np.array([1, 1, 1, 1, 0, 0, 0, 0])
     l2 = np.array([0, 0, 1, 1, 1, 1, 1, 1])
@@ -38,10 +40,13 @@ def test_information_theoretic_external_cluster_validity_measure():
     scaled_result_1 = information_theoretic_external_cluster_validity_measure(l1, l2, True)
     assert scaled_result_1 == 1.0
     # Medium cluster result
-    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 3, 4])
+    l2 = np.array([-1, -1, 1, 1, 1, 2, 3, 3, 3, 4])
     non_scaled_result_2 = information_theoretic_external_cluster_validity_measure(l1, l2, False)
     scaled_result_2 = information_theoretic_external_cluster_validity_measure(l1, l2)
     assert scaled_result_2 >= 0 and scaled_result_2 <= 1
+    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 3, 4])
+    assert non_scaled_result_2 == information_theoretic_external_cluster_validity_measure(l1, l2, False)
+    assert scaled_result_2 == information_theoretic_external_cluster_validity_measure(l1, l2)
     # Poor cluster result
     l2 = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 0])
     non_scaled_result_3 = information_theoretic_external_cluster_validity_measure(l1, l2, False)
@@ -56,10 +61,12 @@ def test_fair_normalized_mutual_information():
     l2 = np.array([1, 1, 2, 2, 3, 3, 4, 4, 0, 0])
     fnmi1 = fair_normalized_mutual_information(l1, l2)
     assert fnmi1 == 1.0
-    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 4, 4])
+    l2 = np.array([-1, -1, 1, 1, 1, 2, 3, 3, 4, 4])
     fnmi2 = fair_normalized_mutual_information(l1, l2)
     assert fnmi2 < fnmi1
     assert fnmi2 == nmi(l1, l2)
+    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 4, 4])
+    assert fnmi2 == fair_normalized_mutual_information(l1, l2)
     l2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     fnmi3 = fair_normalized_mutual_information(l1, l2)
     assert fnmi3 < fnmi2
@@ -74,7 +81,7 @@ def test_purity():
     l1 = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
     l2 = np.array([1, 1, 2, 2, 3, 3, 4, 4, 0, 0])
     assert purity(l1, l2) == 1.0
-    l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 4, 4])
+    l2 = np.array([-1, -1, 1, 1, 1, 2, 3, 3, 4, 4])
     assert purity(l1, l2) == 0.9
     l2 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     assert purity(l1, l2) == 1.0
diff --git a/clustpy/metrics/tests/test_hierarchical_metrics.py b/clustpy/metrics/tests/test_hierarchical_metrics.py
index 9eed73e..02bf0b7 100644
--- a/clustpy/metrics/tests/test_hierarchical_metrics.py
+++ b/clustpy/metrics/tests/test_hierarchical_metrics.py
@@ -1,9 +1,31 @@
-from clustpy.metrics import dendrogram_purity, leaf_purity
+from clustpy.metrics import dendrogram_purity, leaf_purity, node_purity
 from clustpy.metrics.hierarchical_metrics import _get_parent_matrix
 from clustpy.hierarchical._cluster_tree import BinaryClusterTree
 import numpy as np
 
 
+def test_node_purity():
+    bct = BinaryClusterTree()
+    node_023, node_145 = bct.split_cluster(0)
+    node_03, node_2 = bct.split_cluster(0)
+    node_0, node_3 = bct.split_cluster(0)
+    node_15, node_4 = bct.split_cluster(1)
+    node_1, node_5 = bct.split_cluster(1)
+    l1 = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])
+    l2 = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 0, 0, 0])
+    assert node_purity(bct.root_node_, l1, l2) == 1/5
+    assert node_purity(node_023, l1, l2) == 1/3
+    assert node_purity(node_145, l1, l2) == 1/2
+    assert node_purity(node_03, l1, l2) == 1/2
+    assert node_purity(node_2, l1, l2) == 1.
+    assert node_purity(node_0, l1, l2) == 1.
+    assert node_purity(node_3, l1, l2) == 1.
+    assert node_purity(node_15, l1, l2) == 1.
+    assert node_purity(node_4, l1, l2) == 1.
+    assert node_purity(node_1, l1, l2) == 1.
+    assert node_purity(node_5, l1, l2) == 0.
+
+
 def test_leaf_purity():
     bct = BinaryClusterTree()
     bct.split_cluster(0)
diff --git a/clustpy/metrics/tests/test_metrics_utils.py b/clustpy/metrics/tests/test_metrics_utils.py
index c73376e..63640ef 100644
--- a/clustpy/metrics/tests/test_metrics_utils.py
+++ b/clustpy/metrics/tests/test_metrics_utils.py
@@ -1,10 +1,34 @@
-from clustpy.metrics.external_clustering_metrics import _check_number_of_points
+from clustpy.metrics._metrics_utils import _check_labels_arrays, _check_length_data_and_labels
 import pytest
 import numpy as np
 
-def test_check_number_of_points():
-    l1 = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
+def test_check_labels_arrays():
+    l1 = np.array([0., 0., 1., 1., 2., 2., 3., 3., 4., 4.])
+    assert l1.dtype == float
     l2 = np.array([0, 0, 1, 1, 1, 2, 3, 3, 4, 4])
-    assert _check_number_of_points(l1, l2) == True
-    with pytest.raises(Exception):
-        _check_number_of_points(l1, l2[1:])
\ No newline at end of file
+    l1, l2 =_check_labels_arrays(l1, l2)
+    assert l1.dtype == int and l2.dtype == int
+    with pytest.raises(ValueError):
+        _check_labels_arrays(l1, l2[1:])
+    l3 = np.c_[l1, l2]
+    with pytest.raises(ValueError):
+        _check_labels_arrays(l1, l3)
+    l1, l3 =_check_labels_arrays(l1, l3, allow_2d_labels = True)
+    assert l1.shape == (10, ) and l3.shape == (10, 2)
+    l3, l1 =_check_labels_arrays(l3, l1, allow_2d_labels = True)
+    assert l1.shape == (10, ) and l3.shape == (10, 2)
+    l3, l4 =_check_labels_arrays(l3, l3, allow_2d_labels = True)
+    assert l3.shape == (10, 2) and l4.shape == (10, 2)
+
+
+def test_check_length_data_and_labels():
+    l1 = np.array([0., 0., 1., 1., 2., 2., 3., 3., 4., 4.])
+    assert l1.dtype == float
+    X = np.array([[0., 2.], [1., 2.], [2., 3.], [3., 4.], [4., 5.], [5., 6.], [6., 7.], [7., 8.], [8., 9.], [9., 10.]])
+    print(X.shape)
+    X, l1 =_check_length_data_and_labels(X, l1)
+    assert X.dtype == float and l1.dtype == int
+    with pytest.raises(ValueError):
+        _check_length_data_and_labels(X, l1[1:])
+    with pytest.raises(ValueError):
+        _check_length_data_and_labels(X, np.array([0] * 10))
diff --git a/clustpy/metrics/tests/test_multiple_labelings_scoring.py b/clustpy/metrics/tests/test_multiple_labelings_scoring.py
index 79eb378..8f18f38 100644
--- a/clustpy/metrics/tests/test_multiple_labelings_scoring.py
+++ b/clustpy/metrics/tests/test_multiple_labelings_scoring.py
@@ -82,9 +82,9 @@ def test_is_multi_labelings_n_clusters_correct():
     labels_true = np.array([[0, 0, 0, 0, 1],
                             [0, 0, -1, 1, 2],
                             [0, 0, 0, 0, 0]]).T
-    labels_pred = np.array([[[0, 0, -1, 0, 1],
+    labels_pred = np.array([[0, 0, -1, 0, 1],
                              [0, 0, 0, 1, 2],
-                             [0, 0, 1, 2, 3]]]).T
+                             [0, 0, 1, 2, 3]]).T
     assert is_multi_labelings_n_clusters_correct(labels_true, labels_pred, check_subset=True,
                                                  remove_noise_spaces=True) == True
     assert is_multi_labelings_n_clusters_correct(labels_true, labels_pred, check_subset=True,
diff --git a/clustpy/utils/checks.py b/clustpy/utils/checks.py
index 76ce289..ef681d8 100644
--- a/clustpy/utils/checks.py
+++ b/clustpy/utils/checks.py
@@ -1,4 +1,4 @@
-from sklearn.utils.estimator_checks import check_estimator
+from sklearn.utils.estimator_checks import estimator_checks_generator
 from sklearn.base import BaseEstimator
 import numpy as np
 from sklearn.utils import check_X_y, check_array, check_random_state
@@ -16,7 +16,7 @@ def check_clustpy_estimator(estimator_obj: BaseEstimator, checks_to_ignore: tupl
     checks_to_ignore : tuple | list
         List containing the names of checks to ignore (default: ("check_complex_data"))
     """
-    all_checks = check_estimator(estimator_obj, True)
+    all_checks = estimator_checks_generator(estimator_obj)
     for estimator, check in all_checks:
         check_name = check.func.__name__
         if not check_name in checks_to_ignore:
diff --git a/clustpy/utils/dip.c b/clustpy/utils/dip.c
index b11f6f3..32420ee 100644
--- a/clustpy/utils/dip.c
+++ b/clustpy/utils/dip.c
@@ -45,6 +45,7 @@ Compile Windows: cc -fPIC -shared -std=c99 -o dip.dll dip.c
 Compile Linux: cc -fPIC -shared -o dip.so dip.c
 */
 
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <Python.h>
 #include <numpy/arrayobject.h>
 
@@ -306,13 +307,13 @@ static PyObject *method_c_diptest(PyObject *self, PyObject *args) {
     return NULL;
   }
   // Convert PyObjects to C arrays
-  c_x = (double*)py_x->data;
-  c_low_high = (int*)py_low_high->data;
-  c_modaltriangle = (int*)py_modaltriangle->data;
-  c_gcm = (int*)py_gcm->data;
-  c_lcm = (int*)py_lcm->data;
-  c_mn = (int*)py_mn->data;
-  c_mj = (int*)py_mj->data;
+  c_x = (double*)PyArray_DATA(py_x);
+  c_low_high = (int*)PyArray_DATA(py_low_high);
+  c_modaltriangle = (int*)PyArray_DATA(py_modaltriangle);
+  c_gcm = (int*)PyArray_DATA(py_gcm);
+  c_lcm = (int*)PyArray_DATA(py_lcm);
+  c_mn = (int*)PyArray_DATA(py_mn);
+  c_mj = (int*)PyArray_DATA(py_mj);
   // Execute C diptest method
   double dip_value = fast_diptest(c_x, c_low_high, c_modaltriangle, c_gcm, c_lcm, c_mn, c_mj, n, debug);
   // Return dip value
@@ -334,5 +335,8 @@ static struct PyModuleDef diptestModule = {
 
 PyMODINIT_FUNC PyInit_dipModule(void) {
   import_array();
+  if (PyErr_Occurred()) {
+      return NULL;
+  }
   return PyModule_Create(&diptestModule);
 };
diff --git a/clustpy/utils/diptest.py b/clustpy/utils/diptest.py
index 2d191d2..b993e62 100644
--- a/clustpy/utils/diptest.py
+++ b/clustpy/utils/diptest.py
@@ -107,15 +107,18 @@ def _dip_c_impl(X: np.ndarray, debug: bool) -> (float, tuple, tuple, np.ndarray,
         The minorant values,
         The majorant values
     """
+    # Ensure X is float64 and contiguous
+    X_input = np.ascontiguousarray(X, dtype=np.float64)
+    n = X_input.shape[0]
     # Create reference numpy arrays
-    modal_interval = np.zeros(2, dtype=np.int32)
-    modal_triangle = -np.ones(3, dtype=np.int32)
-    gcm = np.zeros(X.shape, dtype=np.int32)
-    lcm = np.zeros(X.shape, dtype=np.int32)
-    mj = np.zeros(X.shape, dtype=np.int32)
-    mn = np.zeros(X.shape, dtype=np.int32)
+    modal_interval = np.zeros(2, dtype=np.int32, order='C')
+    modal_triangle = -np.ones(3, dtype=np.int32, order='C')
+    gcm = np.zeros(n, dtype=np.int32, order='C')
+    lcm = np.zeros(n, dtype=np.int32, order='C')
+    mj = np.zeros(n, dtype=np.int32, order='C')
+    mn = np.zeros(n, dtype=np.int32, order='C')
     # Execute C function
-    dip_value = c_diptest(X.astype(np.float64), modal_interval, modal_triangle, gcm, lcm, mn, mj, X.shape[0],
+    dip_value = c_diptest(X_input, modal_interval, modal_triangle, gcm, lcm, mn, mj, n,
                           1 if debug else 0)
     return dip_value, (modal_interval[0], modal_interval[1]), (
         modal_triangle[0], modal_triangle[1], modal_triangle[2]), gcm, lcm, mn, mj
diff --git a/clustpy/utils/evaluation.py b/clustpy/utils/evaluation.py
index 6996048..d95d205 100644
--- a/clustpy/utils/evaluation.py
+++ b/clustpy/utils/evaluation.py
@@ -219,7 +219,7 @@ def evaluate_dataset(X: np.ndarray, evaluation_algorithms: list, evaluation_metr
                                              1]) == 1, "Some names of your metrics do not seem to be unique! Note that metrics must not be named 'runtime' or 'n_clusters'"
     header = pd.MultiIndex.from_product([algo_names, metric_names], names=["algorithm", "metric"])
     value_placeholder = np.zeros((n_repetitions, len(algo_names) * len(metric_names)))
-    df = pd.DataFrame(value_placeholder, columns=header, index=range(n_repetitions))
+    df = pd.DataFrame(value_placeholder, columns=header, index=[str(rep) for rep in range(n_repetitions)])
     for eval_algo in evaluation_algorithms:
         automatically_set_n_clusters = False
         try:
@@ -336,11 +336,11 @@ def evaluate_dataset(X: np.ndarray, evaluation_algorithms: list, evaluation_metr
                                 if X_test is not None and labels_predicted_test is not None:
                                     result_test = eval_metric.method(X_test, labels_true_test, labels_predicted_test, algo_obj,
                                                                      **eval_metric.params)
-                            df.at[rep, (eval_algo.name, eval_metric.name)] = result
+                            df.at[str(rep), (eval_algo.name, eval_metric.name)] = result
                             if not quiet:
                                 print("-- {0}: {1}".format(eval_metric.name, result))
                             if X_test is not None and labels_predicted_test is not None:
-                                df.at[rep, (eval_algo.name, eval_metric.name + "_TEST")] = result_test
+                                df.at[str(rep), (eval_algo.name, eval_metric.name + "_TEST")] = result_test
                                 if not quiet:
                                     print("-- {0} (TEST): {1}".format(eval_metric.name, result_test))
                         except Exception as e:
@@ -348,28 +348,29 @@ def evaluate_dataset(X: np.ndarray, evaluation_algorithms: list, evaluation_metr
                                 print("Metric {0} raised an exception and will be skipped".format(eval_metric.name))
                                 print('Error on line {}'.format(sys.exc_info()[-1].tb_lineno), type(e).__name__, e)
                 if add_runtime:
-                    df.at[rep, (eval_algo.name, "runtime")] = runtime
+                    df.at[str(rep), (eval_algo.name, "runtime")] = runtime
                     if not quiet:
                         print("-- runtime: {0}".format(runtime))
                 if add_n_clusters:
                     n_clusters = _get_n_clusters_from_algo(algo_obj)
-                    df.at[rep, (eval_algo.name, "n_clusters")] = n_clusters
+                    df.at[str(rep), (eval_algo.name, "n_clusters")] = n_clusters
                     if not quiet:
                         print("-- n_clusters: {0}".format(n_clusters))
                 if eval_algo.deterministic:
                     for element in range(1, n_repetitions):
-                        if add_runtime:
-                            df.at[element, (eval_algo.name, "runtime")] = df.at[
-                                0, (eval_algo.name, "runtime")]
-                        if add_n_clusters:
-                            df.at[element, (eval_algo.name, "n_clusters")] = df.at[
-                                0, (eval_algo.name, "n_clusters")]
+
                         for eval_metric in evaluation_metrics:
-                            df.at[element, (eval_algo.name, eval_metric.name)] = df.at[
-                                0, (eval_algo.name, eval_metric.name)]
+                            df.at[str(element), (eval_algo.name, eval_metric.name)] = df.at[
+                                "0", (eval_algo.name, eval_metric.name)]
                             if X_test is not None:
-                                df.at[element, (eval_algo.name, eval_metric.name + "_TEST")] = df.at[
-                                    0, (eval_algo.name, eval_metric.name + "_TEST")]
+                                df.at[str(element), (eval_algo.name, eval_metric.name + "_TEST")] = df.at[
+                                    "0", (eval_algo.name, eval_metric.name + "_TEST")]
+                        if add_runtime:
+                            df.at[str(element), (eval_algo.name, "runtime")] = df.at[
+                                "0", (eval_algo.name, "runtime")]
+                        if add_n_clusters:
+                            df.at[str(element), (eval_algo.name, "n_clusters")] = df.at[
+                                "0", (eval_algo.name, "n_clusters")]
                     break
         except Exception as e:
             if not quiet:
@@ -606,8 +607,8 @@ def _get_data_and_labels_from_evaluation_dataset(data_input: np.ndarray, data_lo
     return X, labels_true, X_test, labels_true_test
 
 
-def evaluation_df_to_latex_table(df: pd.DataFrame | str, relevant_row : str | int = "mean", output_path: str = None, pm_row: str | int | None = "std", 
-                                 bracket_row: str | int | None = None, best_in_bold: bool = True, second_best_underlined: bool = True, 
+def evaluation_df_to_latex_table(df: pd.DataFrame | str, relevant_row : str = "mean", output_path: str = None, pm_row: str | None = "std", 
+                                 bracket_row: str | None = None, best_in_bold: bool = True, second_best_underlined: bool = True, 
                                  third_best_dashed_underlined: bool = False, color_by_value: str = None, higher_is_better: list = None, 
                                  multiplier: int | float | list | None = 100, decimal_places: int = 1, color_min_max: tuple = (5, 70)) -> str:
     """
@@ -621,13 +622,13 @@ def evaluation_df_to_latex_table(df: pd.DataFrame | str, relevant_row : str | in
     ----------
     df : pd.DataFrame | str
         The pandas dataframe. Can also be a string that contains the path to the saved dataframe
-    relevant_row : str | int
+    relevant_row : str
         The name of the row in the df that is used to create the latex table (default: "mean")
     output_path : str
         The path were the resulting latex table text file will be stored (default: None)
-    pm_row : str | int
+    pm_row : str
         The name of the row in the df that should be added to the latex table after the value from relevant_row separated by plus-minus (default: "std")
-    bracket_row : str | int
+    bracket_row : str
         The name of the row in the df that should be added to the latex table in brackets after the value from relevant_row and, if stated, the value from pm_row (default: None)
     best_in_bold : bool
         Print best value for each combination of dataset and metric in bold.
diff --git a/clustpy/utils/tests/test_evaluation.py b/clustpy/utils/tests/test_evaluation.py
index 4a968a8..0ec76c9 100644
--- a/clustpy/utils/tests/test_evaluation.py
+++ b/clustpy/utils/tests/test_evaluation.py
@@ -139,12 +139,12 @@ def test_evaluate_dataset_with_neural_networks_as_iteration_parameters():
                           labels_true=L, n_repetitions=n_repetitions, add_runtime=False, add_n_clusters=False,
                           save_path=None, random_state=1)
     # Check if scores are equal
-    assert abs(df.at[0, ("DEC1", "nmi")] - df.at[0, ("DEC2", "nmi")]) < 1e-8  # is equal
-    assert abs(df.at[0, ("DEC1", "silhouette")] - df.at[0, ("DEC2", "silhouette")]) < 1e-8  # is equal
-    assert abs(df.at[1, ("DEC1", "nmi")] - df.at[1, ("DEC2", "nmi")]) < 1e-8  # is equal
-    assert abs(df.at[1, ("DEC1", "silhouette")] - df.at[1, ("DEC2", "silhouette")]) < 1e-8  # is equal
-    assert abs(df.at[0, ("DEC1", "nmi")] - df.at[1, ("DEC1", "nmi")]) > 1e-2  # is not equal
-    assert abs(df.at[0, ("DEC1", "silhouette")] - df.at[1, ("DEC1", "silhouette")]) > 1e-2  # is not equal
+    assert abs(df.at["0", ("DEC1", "nmi")] - df.at["0", ("DEC2", "nmi")]) < 1e-8  # is equal
+    assert abs(df.at["0", ("DEC1", "silhouette")] - df.at["0", ("DEC2", "silhouette")]) < 1e-8  # is equal
+    assert abs(df.at["1", ("DEC1", "nmi")] - df.at["1", ("DEC2", "nmi")]) < 1e-8  # is equal
+    assert abs(df.at["1", ("DEC1", "silhouette")] - df.at["1", ("DEC2", "silhouette")]) < 1e-8  # is equal
+    assert abs(df.at["0", ("DEC1", "nmi")] - df.at["1", ("DEC1", "nmi")]) > 1e-2  # is not equal
+    assert abs(df.at["0", ("DEC1", "silhouette")] - df.at["1", ("DEC1", "silhouette")]) > 1e-2  # is not equal
 
 
 @pytest.fixture
@@ -292,14 +292,14 @@ def test_evaluation_df_to_latex_table_single_dataset():
     df = evaluate_dataset(X=X, evaluation_algorithms=algorithms, evaluation_metrics=metrics, labels_true=L,
                           n_repetitions=n_repetitions, add_runtime=False,
                           add_n_clusters=False, save_path="df.csv", random_state=1, aggregation_functions=[np.max, np.std])
-    output_str1 = evaluation_df_to_latex_table(df, 1, "latex1.txt", None, None, False, False, False, None, None, None, 0)
+    output_str1 = evaluation_df_to_latex_table(df, "1", "latex1.txt", None, None, False, False, False, None, None, None, 0)
     output_str1 = output_str1.split("\n")
     assert os.path.isfile("latex1.txt")
     read_file1 = open("latex1.txt", "r").readlines()
     assert len(output_str1) == len(read_file1)
     assert all([output_str1[i] + "\n" == read_file1[i] for i in range(len(output_str1) - 1)] + [output_str1[-1] == read_file1[-1]])
     # Test with input file
-    output_str2 = evaluation_df_to_latex_table("df.csv", 1, "latex2.txt", "std", "max", True, True, True, "red", [True, True, False],
+    output_str2 = evaluation_df_to_latex_table("df.csv", "1", "latex2.txt", "std", "max", True, True, True, "red", [True, True, False],
                                                 100, 2)
     output_str2 = output_str2.split("\n")
     assert os.path.isfile("latex2.txt")
diff --git a/codecov.yml b/codecov.yml
index e004668..eb95c13 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -1,2 +1,22 @@
 ignore:
-  - ".*/tests"  # ignore folders and all its contents
\ No newline at end of file
+  - "**/tests/**/*"
+  - "setup.py"
+  - "**/__init__.py"
+
+coverage:
+  status:
+    project:
+      default:
+        # 'auto' compares coverage against the base branch. 
+        # It ensures coverage doesn't drop.
+        target: auto 
+        threshold: 1% # Allows a 1% drop before turning the check red
+    patch:
+      default:
+        target: 85%   # New code in the PR should have at least 80% coverage
+        base: auto
+
+comment:
+  layout: "reach, diff, flags, files"
+  behavior: default
+  require_changes: true  # Only comment if coverage actually changes
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 39a675d..c47f6ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,51 @@
 [build-system]
-requires = ["setuptools", "wheel", "Cython>=0.29", "numpy >= 1.15"]
\ No newline at end of file
+requires = ["setuptools", "wheel", "Cython>=3.0", "numpy>=2.0.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "clustpy"
+dynamic = ["version"]
+description = "A Python library for advanced clustering algorithms"
+readme = "README.md"
+authors = [{name = "Collin Leiber", email = "leiber@dbs.ifi.lmu.de"}]
+license = {text = "BSD-3-Clause License"}
+requires-python = ">=3.10"
+dependencies = [
+    "numpy",
+    "scipy",
+    "scikit-learn>=1.6",
+    "matplotlib",
+    "torch",
+    "pandas",
+    "tqdm",
+    "torchvision"
+]
+
+[project.optional-dependencies]
+full = ["Pillow", "nltk", "xlrd", "requests", "opencv-python-headless<4.13"]
+
+[project.urls]
+Homepage = "https://clustpy.readthedocs.io/en/latest/"
+
+[tool.setuptools]
+package-data = {"clustpy" = ["data/datasets/*.data"]}
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["clustpy*"]
+exclude = ["*tests*", "docs*"]
+
+[tool.setuptools.dynamic]
+version = {attr = "clustpy.__version__"}
+
+[tool.pytest.ini_options]
+# Merged from your pytest.ini
+markers = [
+    "data: marks tests concerning data loaders",
+    "largedata: marks tests concerning large data loaders (e.g. image data sets from torchvision)",
+    "timeseriesdata: marks tests concerning dataloader from www.timeseriesclassification.com"
+]
+
+[tool.coverage.run]
+source = ["clustpy"]
+omit = ["*/tests/*"]
\ No newline at end of file
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index aa37366..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[pytest]
-markers =
-    data: marks tests concerning data loaders
-    largedata: marks tests concerning large data loaders (e.g. image data sets from torchvision)
-    timeseriesdata: marks tests concerning dataloader from www.timeseriesclassification.com
diff --git a/setup.py b/setup.py
index c04f376..1d7b16f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,41 +1,11 @@
-from setuptools import setup, find_packages, Extension
-import clustpy
+from setuptools import setup, Extension
 import numpy as np
 
 
-def _load_readme():
-    with open("README.md", "r") as file:
-        readme = file.read()
-    return readme
-
-
 dip_extension = Extension('clustpy.utils.dipModule',
                           include_dirs=[np.get_include()],
                           sources=['clustpy/utils/dip.c'])
 
-setup(
-    name='clustpy',
-    version=clustpy.__version__,
-    packages=find_packages(exclude=["*tests"]),
-    package_data={'clustpy': ['data/datasets/*.data']},
-    url='https://clustpy.readthedocs.io/en/latest/',
-    license='BSD-3-Clause License',
-    author='Collin Leiber',
-    author_email='leiber@dbs.ifi.lmu.de',
-    description='A Python library for advanced clustering algorithms',
-    long_description=_load_readme(),
-    long_description_content_type="text/markdown",
-    python_requires='>=3.10',
-    install_requires=['numpy',
-                      'scipy',
-                      'scikit-learn',
-                      'matplotlib',
-                      'torch',
-                      'pandas',
-                      'tqdm',
-                      'torchvision'],
-    extras_require={
-        'full': ['Pillow', 'nltk', 'xlrd', 'opencv-python', 'requests']
-    },
-    ext_modules=[dip_extension]
-)
+
+if __name__ == "__main__":
+    setup(ext_modules=[dip_extension])