From 8fac667551a73dbd0b47d93188f856f051423fa8 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:20:38 +0100
Subject: [PATCH 01/20] #33 Rebinning function for 1d and 2d tensors (1)

Implement
---
 tum_esm_utils/rebinning.py | 59 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tum_esm_utils/rebinning.py

diff --git a/tum_esm_utils/rebinning.py b/tum_esm_utils/rebinning.py
new file mode 100644
index 0000000..532af48
--- /dev/null
+++ b/tum_esm_utils/rebinning.py
@@ -0,0 +1,59 @@
+"""Functions to rebin binned data poins
+
+Implements: `rebin_1d`, `rebin_2d`.
+
+This requires you to install this utils library with the optional `modeling` dependency:
+
+```bash
+pip install "tum_esm_utils[modeling]"
+# or
+pdm add "tum_esm_utils[modeling]"
+```"""
+
+from typing import Any
+import numpy as np
+
+
+def _rebin_first_dimension(
+    arr: np.ndarray[Any, Any],
+    new_bin_count: int,
+) -> np.ndarray[Any, Any]:
+    """Internal rebinning function."""
+
+    old_bin_count = len(arr)
+    new_bins = np.zeros(shape=(new_bin_count, *arr.shape[1:]), dtype=np.float64)
+    scale = old_bin_count / new_bin_count
+    for i in range(new_bin_count):
+        start = i * scale
+        end = (i + 1) * scale
+        left = int(np.floor(start))
+        right = int(np.floor(end))
+
+        if left == right:
+            new_bins[i] += arr[left] * (end - start)
+        else:
+            new_bins[i] += arr[left] * (left + 1 - start)
+            for j in range(left + 1, right):
+                new_bins[i] += arr[j]
+            if right < old_bin_count:
+                new_bins[i] += arr[right] * (end - right)
+    return new_bins
+
+
+def rebin_1d(
+    arr: np.ndarray[Any, Any],
+    new_bin_count: int,
+) -> np.ndarray[Any, Any]:
+    """Rebins a 1D array to a new number of bins."""
+    if len(arr.shape) != 1:
+        raise ValueError("Input array must be 1D.")
+    return _rebin_first_dimension(arr, new_bin_count)
+
+
+def rebin_2d(
+    arr: np.ndarray[Any, Any],
+    new_x_bins: int,
+    new_y_bins: int,
+) -> np.ndarray[Any, Any]:
+    """Rebins a 2D array to new number of bins in x and y dimensions."""
+    return _rebin_first_dimension(_rebin_first_dimension(arr.T, new_x_bins).T, new_y_bins)

From d7b7a6aa2578bab50057bddca4e7efa2cfa87ff8 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:20:58 +0100
Subject: [PATCH 02/20] #33 Rebinning function for 1d and 2d tensors (2)

Test
---
 tests/test_rebinning.py | 115 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 tests/test_rebinning.py

diff --git a/tests/test_rebinning.py b/tests/test_rebinning.py
new file mode 100644
index 0000000..0c97ee2
--- /dev/null
+++ b/tests/test_rebinning.py
@@ -0,0 +1,115 @@
+import time
+import numpy as np
+import pytest
+import tum_esm_utils.rebinning
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_rebin_1d() -> None:
+    a = np.array([1, 2, 3, 4])
+
+    b1 = tum_esm_utils.rebinning.rebin_1d(a, 1)
+    b1_expected = np.array([10])
+    assert np.allclose(b1, b1_expected), f"Expected {b1_expected}, got {b1}"
+    assert np.isclose(np.sum(a), np.sum(b1)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b1)}"
+
+    b2 = tum_esm_utils.rebinning.rebin_1d(a, 2)
+    b2_expected = np.array([3, 7])
+    assert np.allclose(b2, b2_expected), f"Expected {b2_expected}, got {b2}"
+    assert np.isclose(np.sum(a), np.sum(b2)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b2)}"
+
+    b3 = tum_esm_utils.rebinning.rebin_1d(a, 3)
+    b3_expected = np.array([1.6666, 3.3333, 5])
+    assert np.allclose(b3, b3_expected, atol=0.001), f"Expected {b3_expected}, got {b3}"
+    assert np.isclose(np.sum(a), np.sum(b3)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b3)}"
+
+    b4 = tum_esm_utils.rebinning.rebin_1d(a, 4)
+    b4_expected = np.array([1, 2, 3, 4])
+    assert np.allclose(b4, b4_expected), f"Expected {b4_expected}, got {b4}"
+    assert np.isclose(np.sum(a), np.sum(b4)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b4)}"
+
+    b5 = tum_esm_utils.rebinning.rebin_1d(a, 5)
+    b5_expected = np.array([0.8, 1.4, 2, 2.6, 3.2])
+    assert np.allclose(b5, b5_expected), f"Expected {b5_expected}, got {b5}"
+    assert np.isclose(np.sum(a), np.sum(b5)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b5)}"
+
+    b6 = tum_esm_utils.rebinning.rebin_1d(a, 6)
+    b6_expected = np.array([0.6666, 1, 1.3333, 2, 2.3333, 2.6666])
+    assert np.allclose(b6, b6_expected, atol=0.001), f"Expected {b6_expected}, got {b6}"
+    assert np.isclose(np.sum(a), np.sum(b6)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b6)}"
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_rebin_2d() -> None:
+    a = np.array(
+        [
+            [1, 2, 3, 4],
+            [5, 6, 7, 8],
+            [9, 10, 11, 12],
+            [13, 14, 15, 16],
+            [17, 18, 19, 20],
+            [21, 22, 23, 24],
+        ]
+    )
+
+    b23 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=2, new_y_bins=3)
+    b23_expected = np.array(
+        [
+            [1 + 2 + 5 + 6, 3 + 4 + 7 + 8],
+            [9 + 10 + 13 + 14, 11 + 12 + 15 + 16],
+            [17 + 18 + 21 + 22, 19 + 20 + 23 + 24],
+        ]
+    )
+    assert np.allclose(b23, b23_expected), f"Expected {b23_expected}, got {b23}"
+
+    b13 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=3)
+    b13_expected = np.array(
+        [
+            [1 + 2 + 5 + 6 + 3 + 4 + 7 + 8],
+            [9 + 10 + 13 + 14 + 11 + 12 + 15 + 16],
+            [17 + 18 + 21 + 22 + 19 + 20 + 23 + 24],
+        ]
+    )
+    assert np.allclose(b13, b13_expected), f"Expected {b13_expected}, got {b13}"
+
+    b22 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=2, new_y_bins=2)
+    b22_expected = np.array(
+        [
+            [1 + 2 + 5 + 6 + 9 + 10, 3 + 4 + 7 + 8 + 11 + 12],
+            [13 + 14 + 17 + 18 + 21 + 22, 15 + 16 + 19 + 20 + 23 + 24],
+        ]
+    )
+    assert np.allclose(b22, b22_expected), f"Expected {b22_expected}, got {b22}"
+
+    b12 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=2)
+    b12_expected = np.array(
+        [
+            [1 + 2 + 5 + 6 + 9 + 10 + 3 + 4 + 7 + 8 + 11 + 12],
+            [13 + 14 + 17 + 18 + 21 + 22 + 15 + 16 + 19 + 20 + 23 + 24],
+        ]
+    )
+    assert np.allclose(b12, b12_expected), f"Expected {b12_expected}, got {b12}"
+
+    b11 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=1)
+    b11_expected = np.array([[np.sum(a)]])
+    assert np.allclose(b11, b11_expected), f"Expected {b11_expected}, got {b11}"
+
+    # test rebinning performance
+
+    large_a = np.random.rand(1000, 1000)
+    t1 = time.time()
+    tum_esm_utils.rebinning.rebin_2d(large_a, new_x_bins=100, new_y_bins=100)
+    t2 = time.time()
+    print(f"Rebinning 1000x1000 to 100x100 took {t2 - t1:.4f} seconds.")
+    # Rebinning 1000x1000 to 100x100 took 0.0032 seconds (on an M4 Pro Chip)
+
+    large_a = np.random.rand(100, 100)
+    t1 = time.time()
+    tum_esm_utils.rebinning.rebin_2d(large_a, new_x_bins=1000, new_y_bins=1000)
+    t2 = time.time()
+    print(f"Rebinning 100x100 to 1000x1000 took {t2 - t1:.4f} seconds.")
+    # Rebinning 100x100 to 1000x1000 took 0.0062 seconds (on an M4 Pro Chip)
+
+    # assert False comment out to see the performance prints

From 1dd68fac7c204351618312e6389f62f1c732affa Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:21:10 +0100
Subject: [PATCH 03/20] #33 Rebinning function for 1d and 2d tensors (3)

Add numpy to modeling dependencies
---
 pdm.lock                  | 10 +++++-----
 pyproject.toml            |  9 +++++++--
 tum_esm_utils/__init__.py |  7 +++++++
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/pdm.lock b/pdm.lock
index 5ff6766..7acf716 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,7 +5,7 @@
 groups = ["default", "all", "dev", "em27", "modeling", "opus", "plotting", "polars"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:d2b3bc2a264b28c65499ecec63701101208fdd567951e29956d0fb2f00cb072b"
+content_hash = "sha256:1363e6ec1e3d3f69b1c8d96c44e2fd1697192c0680c7b2f3bb8f76be72d3b510"
 
 [[metadata.targets]]
 requires_python = "~=3.10"
@@ -1325,8 +1325,8 @@ files = [
 
 [[package]]
 name = "requests"
-version = "2.32.4"
-requires_python = ">=3.8"
+version = "2.32.5"
+requires_python = ">=3.9"
 summary = "Python HTTP for Humans."
 groups = ["default", "dev"]
 dependencies = [
@@ -1336,8 +1336,8 @@ dependencies = [
     "urllib3<3,>=1.21.1",
 ]
 files = [
-    {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"},
-    {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"},
+    {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"},
+    {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"},
 ]
 
 [[package]]
diff --git a/pyproject.toml b/pyproject.toml
index 24e312a..c127f41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ authors = [
 ]
 dependencies = [
     "filelock>=3.18.0",
-    "requests>=2.32.3",
+    "requests>=2.32.5",
     "psutil>=7.1.2",
     "pydantic>=2.11.3",
     "pytz>=2025.2",
@@ -68,7 +68,12 @@ all = [
     "scipy>=1.15.2",
     "skyfield>=1.53",
 ]
-modeling = ["polars>=1.16.0", "scipy>=1.15.2", "skyfield>=1.53"]
+modeling = [
+    "polars>=1.16.0",
+    "scipy>=1.15.2",
+    "skyfield>=1.53",
+    "numpy>=2.2.6",
+]
 
 [build-system]
 requires = ["pdm-backend"]
diff --git a/tum_esm_utils/__init__.py b/tum_esm_utils/__init__.py
index a623692..5714a07 100644
--- a/tum_esm_utils/__init__.py
+++ b/tum_esm_utils/__init__.py
@@ -27,6 +27,7 @@
         files as files,
         mathematics as mathematics,
         processes as processes,
+        rebinning as rebinning,
         shell as shell,
         sqlitelock as sqlitelock,
         system as system,
@@ -61,3 +62,9 @@
         from . import column as column
     except ImportError:
         pass
+
+    # requires extra "modeling"
+    try:
+        from . import rebinning as rebinning
+    except ImportError:
+        pass

From 145e9a7854c76093a5265d848536a296ce46d181 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:22:12 +0100
Subject: [PATCH 04/20] #33 Rebinning function for 1d and 2d tensors (4)

Update API reference
---
 docs/pages/api-reference.md | 35 +++++++++++++++++++++++++++++++++++
 docs/scripts/sync-docs.py   |  1 +
 2 files changed, 36 insertions(+)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index e3a9144..e13ddbc 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -1376,6 +1376,41 @@ terminated forcefully after the given timeout (in seconds).
   The list of terminated PIDs.
 
 
+## `tum_esm_utils.rebinning`
+
+Functions to rebin binned data poins
+
+Implements: `rebin_1d`, `rebin_2d`.
+
+This requires you to install this utils library with the optional `modeling` dependency:
+
+```bash
+pip install "tum_esm_utils[modeling]"
+## `or`
+pdm add "tum_esm_utils[modeling]"
+```
+
+
+##### `rebin_1d`
+
+```python
+def rebin_1d(arr: np.ndarray[Any, Any],
+             new_bin_count: int) -> np.ndarray[Any, Any]
+```
+
+Rebins a 1D array to a new number of bins.
+
+
+##### `rebin_2d`
+
+```python
+def rebin_2d(arr: np.ndarray[Any, Any], new_x_bins: int,
+             new_y_bins: int) -> np.ndarray[Any, Any]
+```
+
+Rebins a 2D array to new number of bins in x and y dimensions.
+
+
 ## `tum_esm_utils.shell`
 
 Implements custom logging functionality, because the
diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py
index f108afd..c749d27 100644
--- a/docs/scripts/sync-docs.py
+++ b/docs/scripts/sync-docs.py
@@ -35,6 +35,7 @@
     "opus.http_interface",
     "plotting",
     "processes",
+    "rebinning",
     "shell",
     "sqlitelock",
     "system",

From a60f6b4246da46c48fe23636d5dddc6b8c019bbc Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:50:32 +0100
Subject: [PATCH 05/20] #31 More user friendly netcdf file API (1)

Implement
---
 tum_esm_utils/netcdf.py | 183 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 tum_esm_utils/netcdf.py

diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py
new file mode 100644
index 0000000..61ec73a
--- /dev/null
+++ b/tum_esm_utils/netcdf.py
@@ -0,0 +1,183 @@
+"""A thin wrapper over the netCDF4 library to make working with NetCDF files easier.
+
+Implements: `NetCDFFile`
+
+This requires you to install this utils library with the optional `netcdf` dependencies:
+
+```bash
+pip install "tum_esm_utils[netcdf]"
+# or
+pdm add "tum_esm_utils[netcdf]"
+```"""
+
+from typing import Any, Literal, Optional
+import os
+import netCDF4 as nc
+
+
+class NetCDFFile:
+    def __init__(
+        self,
+        filepath: str,
+        parallel: bool = False,
+        diskless: bool = True,
+        mode: Literal["w", "a"] = "w",
+    ) -> None:
+        """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier.
+
+        If writing to a new file, it will first write to the filepath+ ".tmp" and  rename it to the final
+        filepath when closing the file. This ensures that the final filepath will only exist if the file
+        was written completely. In append mode, the filepath is not changes."""
+
+        self.tmp_filepath = filepath[:-3] + ".tmp.nc"
+        self.filepath = filepath
+        self.mode = mode
+
+        if mode == "w" and os.path.isfile(self.tmp_filepath):
+            os.remove(self.tmp_filepath)
+
+        self.ds = nc.Dataset(
+            self.tmp_filepath if mode == "w" else self.filepath,
+            mode=mode,
+            format="NETCDF4",
+            parallel=parallel,
+            diskless=diskless,
+            persist=True,
+        )
+        self.dimensions: dict[str, nc.Dimension] = {}
+        self.variables: dict[str, nc.Variable[Any]] = {}
+        self.attributes: dict[str, str] = {}
+
+        if mode == "a":
+            for dim_name, dim in self.ds.dimensions.items():
+                self.dimensions[dim_name] = dim
+            for var_name, var in self.ds.variables.items():
+                self.variables[var_name] = var
+            for attr_name in self.ds.ncattrs():
+                self.attributes[attr_name] = self.ds.getncattr(attr_name)
+
+    def create_dimension(self, name: str, size: int) -> None:
+        """Create a new dimension in the NetCDF file.
+
+        Raises:
+            ValueError: If the dimension already exists."""
+
+        if name in self.dimensions:
+            raise ValueError(f"Dimension {name} already exists in the NetCDF file")
+
+        d = self.ds.createDimension(name, size)
+        assert isinstance(d, nc.Dimension)
+        self.dimensions[name] = d
+
+    def create_variable(
+        self,
+        name: str,
+        dimensions: tuple[nc.Dimension | str, ...],
+        units: str,
+        long_name: Optional[str] = None,
+        description: Optional[str] = None,
+        fill_value: Optional[float | int] = None,
+        chunk_dimensions: list[str] = [],
+        datatype: Literal["f4", "f8", "i4", "i8"] = "f4",
+    ) -> None:
+        """Create a new variable in the NetCDF file.
+
+        Raises:
+            ValueError: If the variable already exists or if a dimension is not found."""
+        if name in self.variables:
+            raise ValueError(f"Variable {name} already exists in the NetCDF file")
+
+        object_dimensions: list[nc.Dimension] = []
+        for dimension in dimensions:
+            if isinstance(dimension, str):
+                if dimension not in self.dimensions:
+                    raise ValueError(f"Dimension {dimension} not found in the NetCDF file")
+                object_dimensions.append(self.dimensions[dimension])
+            else:
+                if dimension.name not in self.dimensions:
+                    raise ValueError(f"Dimension {dimension.name} not found in the NetCDF file")
+                object_dimensions.append(dimension)
+
+        chunk_sizes = [dimension.size for dimension in object_dimensions]
+        for i, dimension in enumerate(object_dimensions):
+            if dimension.name in chunk_dimensions:
+                chunk_sizes[i] = 1
+
+        var: Any = self.ds.createVariable(  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+            name,
+            datatype=datatype,
+            dimensions=object_dimensions,
+            zlib=(len(dimensions) > 1) or (name != object_dimensions[0].name),
+            complevel=2,
+            fill_value=fill_value,
+            chunksizes=chunk_sizes if len(chunk_dimensions) > 0 else None,
+        )
+        var.units = units
+        if long_name is not None:
+            var.long_name = long_name
+        if description is not None:
+            var.description = description
+        self.variables[name] = var
+
+    def import_dimension(
+        self,
+        dimension: nc.Dimension,
+        new_name: Optional[str] = None,
+    ) -> None:
+        """Import a dimension from another NetCDF file.
+
+        Raises:
+            ValueError: If the dimension already exists."""
+
+        if dimension.name in self.dimensions:
+            raise ValueError(f"Dimension {dimension.name} already exists in the NetCDF file")
+        self.create_dimension(dimension.name if new_name is None else new_name, dimension.size)
+
+    def import_variable(
+        self,
+        variable: nc.Variable,  # type: ignore
+        new_name: Optional[str] = None,
+    ) -> None:
+        """Import a variable from another NetCDF file.
+
+        Raises:
+            ValueError: If the variable already exists."""
+
+        if variable.name in self.variables:
+            raise ValueError(f"Variable {variable.name} already exists in the NetCDF file")
+        name = variable.name if new_name is None else new_name
+        self.create_variable(
+            name=name,
+            dimensions=variable.dimensions,
+            units=str(variable.units),
+            long_name=variable.long_name if hasattr(variable, "long_name") else None,  # pyright: ignore[reportUnknownArgumentType]
+            description=variable.description if hasattr(variable, "description") else None,  # pyright: ignore[reportUnknownArgumentType]
+            fill_value=float(variable.get_fill_value()),
+        )
+        self.variables[name][:] = variable[:]
+
+    def add_attribute(self, key: str, value: str) -> None:
+        """Add a global attribute to the NetCDF file.
+
+        Raises:
+            ValueError: If the attribute already exists."""
+
+        if key in self.attributes:
+            raise ValueError(f"Attribute {key} already exists in the NetCDF file")
+        self.attributes[key] = value
+        self.ds.setncattr(key, value)
+
+    def close(self) -> None:
+        """Close the NetCDF file, possibly renaming the temporary file to the final filepath."""
+
+        self.ds.close()
+        if self.mode == "w":
+            if os.path.isfile(self.filepath):
+                os.remove(self.filepath)
+            os.rename(self.tmp_filepath, self.filepath)
+
+        del self
+
+    def __getitem__(self, key: str) -> nc.Variable:  # type: ignore
+        """Get a variable from the NetCDF file."""
+        return self.variables[key]

From fe93b65aaf8545e86e66f6d8ed5e7e5817e78969 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 00:50:46 +0100
Subject: [PATCH 06/20] #31 More user friendly netcdf file API (2)

Add dependencies
---
 pdm.lock                  | 76 ++++++++++++++++++++++++++++++++++++---
 pyproject.toml            |  9 ++---
 tum_esm_utils/__init__.py |  6 ++++
 3 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/pdm.lock b/pdm.lock
index 7acf716..c14ac03 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -2,10 +2,10 @@
 # It is not intended for manual editing.
 
 [metadata]
-groups = ["default", "all", "dev", "em27", "modeling", "opus", "plotting", "polars"]
+groups = ["default", "all", "dev", "em27", "modeling", "netcdf", "opus", "plotting", "polars"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:1363e6ec1e3d3f69b1c8d96c44e2fd1697192c0680c7b2f3bb8f76be72d3b510"
+content_hash = "sha256:175bedc668865a812f60a5bb238b5ad17a7e13438f1023d331ccc9c7f5169628"
 
 [[metadata.targets]]
 requires_python = "~=3.10"
@@ -65,12 +65,55 @@ name = "certifi"
 version = "2025.6.15"
 requires_python = ">=3.7"
 summary = "Python package for providing Mozilla's CA Bundle."
-groups = ["default", "all", "dev", "modeling"]
+groups = ["default", "all", "dev", "modeling", "netcdf"]
 files = [
     {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"},
     {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"},
 ]
 
+[[package]]
+name = "cftime"
+version = "1.6.5"
+requires_python = ">=3.10"
+summary = "Time-handling functionality from netcdf4-python"
+groups = ["all", "netcdf"]
+dependencies = [
+    "numpy>=1.21.2",
+]
+files = [
+    {file = "cftime-1.6.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8ad81e8cb0eb873b33c3d1e22c6168163fdc64daa8f7aeb4da8092f272575f4d"},
+    {file = "cftime-1.6.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12d95c6af852114a13301c5a61e41afdbd1542e72939c1083796f8418b9b8b0e"},
+    {file = "cftime-1.6.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2659b7df700e27d9e3671f686ce474dfb5fc274966961edf996acc148dfa094a"},
+    {file = "cftime-1.6.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:94cebdfcda6a985b8e69aed22d00d6b8aa1f421495adbdcff1d59b3e896d81e2"},
+    {file = "cftime-1.6.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:179681b023349a2fe277ceccc89d4fc52c0dd105cb59b7187b5bc5d442875133"},
+    {file = "cftime-1.6.5-cp310-cp310-win_amd64.whl", hash = "sha256:d8b9fdecb466879cfe8ca4472b229b6f8d0bb65e4ffd44266ae17484bac2cf38"},
+    {file = "cftime-1.6.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:474e728f5a387299418f8d7cb9c52248dcd5d977b2a01de7ec06bba572e26b02"},
+    {file = "cftime-1.6.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ab9e80d4de815cac2e2d88a2335231254980e545d0196eb34ee8f7ed612645f1"},
+    {file = "cftime-1.6.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ad24a563784e4795cb3d04bd985895b5db49ace2cbb71fcf1321fd80141f9a52"},
+    {file = "cftime-1.6.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a3cda6fd12c7fb25eff40a6a857a2bf4d03e8cc71f80485d8ddc65ccbd80f16a"},
+    {file = "cftime-1.6.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28cda78d685397ba23d06273b9c916c3938d8d9e6872a537e76b8408a321369b"},
+    {file = "cftime-1.6.5-cp311-cp311-win_amd64.whl", hash = "sha256:93ead088e3a216bdeb9368733a0ef89a7451dfc1d2de310c1c0366a56ad60dc8"},
+    {file = "cftime-1.6.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:eef25caed5ebd003a38719bd3ff8847cd52ef2ea56c3ebdb2c9345ba131fc7c5"},
+    {file = "cftime-1.6.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c87d2f3b949e45463e559233c69e6a9cf691b2b378c1f7556166adfabbd1c6b0"},
+    {file = "cftime-1.6.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:82cb413973cc51b55642b3a1ca5b28db5b93a294edbef7dc049c074b478b4647"},
+    {file = "cftime-1.6.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85ba8e7356d239cfe56ef7707ac30feaf67964642ac760a82e507ee3c5db4ac4"},
+    {file = "cftime-1.6.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:456039af7907a3146689bb80bfd8edabd074c7f3b4eca61f91b9c2670addd7ad"},
+    {file = "cftime-1.6.5-cp312-cp312-win_amd64.whl", hash = "sha256:da84534c43699960dc980a9a765c33433c5de1a719a4916748c2d0e97a071e44"},
+    {file = "cftime-1.6.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4aba66fd6497711a47c656f3a732c2d1755ad15f80e323c44a8716ebde39ddd5"},
+    {file = "cftime-1.6.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89e7cba699242366e67d6fb5aee579440e791063f92a93853610c91647167c0d"},
+    {file = "cftime-1.6.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2f1eb43d7a7b919ec99aee709fb62ef87ef1cf0679829ef93d37cc1c725781e9"},
+    {file = "cftime-1.6.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e02a1d80ffc33fe469c7db68aa24c4a87f01da0c0c621373e5edadc92964900b"},
+    {file = "cftime-1.6.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18ab754805233cdd889614b2b3b86a642f6d51a57a1ec327c48053f3414f87d8"},
+    {file = "cftime-1.6.5-cp313-cp313-win_amd64.whl", hash = "sha256:6c27add8f907f4a4cd400e89438f2ea33e2eb5072541a157a4d013b7dbe93f9c"},
+    {file = "cftime-1.6.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c69ce3bdae6a322cbb44e9ebc20770d47748002fb9d68846a1e934f1bd5daf0b"},
+    {file = "cftime-1.6.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e62e9f2943e014c5ef583245bf2e878398af131c97e64f8cd47c1d7baef5c4e2"},
+    {file = "cftime-1.6.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7da5fdaa4360d8cb89b71b8ded9314f2246aa34581e8105c94ad58d6102d9e4f"},
+    {file = "cftime-1.6.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bff865b4ea4304f2744a1ad2b8149b8328b321dd7a2b9746ef926d229bd7cd49"},
+    {file = "cftime-1.6.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e552c5d1c8a58f25af7521e49237db7ca52ed2953e974fe9f7c4491e95fdd36c"},
+    {file = "cftime-1.6.5-cp314-cp314-win_amd64.whl", hash = "sha256:e645b095dc50a38ac454b7e7f0742f639e7d7f6b108ad329358544a6ff8c9ba2"},
+    {file = "cftime-1.6.5.tar.gz", hash = "sha256:8225fed6b9b43fb87683ebab52130450fc1730011150d3092096a90e54d1e81e"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.2"
@@ -737,6 +780,31 @@ files = [
     {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
 ]
 
+[[package]]
+name = "netcdf4"
+version = "1.7.3"
+requires_python = ">=3.10"
+summary = "Provides an object-oriented python interface to the netCDF version 4 library"
+groups = ["all", "netcdf"]
+dependencies = [
+    "certifi",
+    "cftime",
+    "numpy",
+]
+files = [
+    {file = "netcdf4-1.7.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:db761afd3a6b9482df018c4783e0bdf99141a41db1f14c68c89986effb182d57"},
+    {file = "netcdf4-1.7.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ad4c2d9b469248d83cbacb70ad9e7d3a6c0ba27febe839c90192147199745ba4"},
+    {file = "netcdf4-1.7.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6986d039717582071e55ae9c6fbebfe4e5bbbc3af122fc3db0c0c09c4d8955e"},
+    {file = "netcdf4-1.7.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:348e79b4f26f2e403fe3c54364e9297e4ef326c7ee12f9be01c037db853d26c0"},
+    {file = "netcdf4-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:6ab71f5d70e55e8584d168d5158efdb2fd8d350a033d0c27d942c3d399587f54"},
+    {file = "netcdf4-1.7.3-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:801c222d8ad35fd7dc7e9aa7ea6373d184bcb3b8ee6b794c5fbecaa5155b1792"},
+    {file = "netcdf4-1.7.3-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:83dbfd6f10a0ec785d5296016bd821bbe9f0df780be72fc00a1f0d179d9c5f0f"},
+    {file = "netcdf4-1.7.3-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:949e086d4d2612b49e5b95f60119d216c9ceb7b17bc771e9e0fa0e9b9c0a2f9f"},
+    {file = "netcdf4-1.7.3-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c764ba6f6a1421cab5496097e8a1c4d2e36be2a04880dfd288bb61b348c217e"},
+    {file = "netcdf4-1.7.3-cp311-abi3-win_amd64.whl", hash = "sha256:1b6c646fa179fb1e5e8d6e8231bc78cc0311eceaa1241256b5a853f1d04055b9"},
+    {file = "netcdf4-1.7.3.tar.gz", hash = "sha256:83f122fc3415e92b1d4904fd6a0898468b5404c09432c34beb6b16c533884673"},
+]
+
 [[package]]
 name = "nodeenv"
 version = "1.9.1"
@@ -793,7 +861,7 @@ name = "numpy"
 version = "2.2.6"
 requires_python = ">=3.10"
 summary = "Fundamental package for array computing in Python"
-groups = ["all", "modeling", "opus", "plotting"]
+groups = ["all", "modeling", "netcdf", "opus", "plotting"]
 files = [
     {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"},
     {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"},
diff --git a/pyproject.toml b/pyproject.toml
index c127f41..89c7d2b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,6 +60,8 @@ plotting = ["matplotlib>=3.10.7", "numpy>=2.2.5"]
 em27 = ["polars>=1.16.0"]
 polars = ["polars>=1.16.0"] # alias for em27 (will be removed with 3.0)
 opus = ["numpy>=2.2.5", "tenacity>=9.1.2"]
+modeling = ["polars>=1.16.0", "scipy>=1.15.2", "skyfield>=1.53", "numpy>=2.2.6"]
+netcdf = ["netcdf4>=1.7.3"]
 all = [
     "matplotlib>=3.10.1",
     "numpy>=2.2.5",
@@ -67,12 +69,7 @@ all = [
     "tenacity>=9.1.2",
     "scipy>=1.15.2",
     "skyfield>=1.53",
-]
-modeling = [
-    "polars>=1.16.0",
-    "scipy>=1.15.2",
-    "skyfield>=1.53",
-    "numpy>=2.2.6",
+    "netcdf4>=1.7.3",
 ]
 
 [build-system]
diff --git a/tum_esm_utils/__init__.py b/tum_esm_utils/__init__.py
index 5714a07..4177ce4 100644
--- a/tum_esm_utils/__init__.py
+++ b/tum_esm_utils/__init__.py
@@ -68,3 +68,9 @@
         from . import rebinning as rebinning
     except ImportError:
         pass
+
+    # requires extra "netcdf"
+    try:
+        from . import netcdf as netcdf
+    except ImportError:
+        pass

From 4cb95bfcac758ec3365fe41b8c6b7c4df479dbdf Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 01:03:02 +0100
Subject: [PATCH 07/20] #31 More user friendly netcdf file API (3)

refine new API
---
 tests/test_static_types.py |  5 +++-
 tum_esm_utils/netcdf.py    | 52 +++++++++++++++++++++++++++++---------
 2 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/tests/test_static_types.py b/tests/test_static_types.py
index ecd4d9d..5e27721 100644
--- a/tests/test_static_types.py
+++ b/tests/test_static_types.py
@@ -13,6 +13,7 @@ def _rmdir(path: str) -> None:
 
 
 @pytest.mark.order(1)
+@pytest.mark.quick
 def test_with_mypy() -> None:
     _rmdir(".mypy_cache/3.*/tum_esm_utils")
     _rmdir(".mypy_cache/3.*/tests")
@@ -21,6 +22,8 @@ def test_with_mypy() -> None:
         print(f"Checking {path} ...")
         assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m mypy {path}") == 0
 
+
 @pytest.mark.order(1)
+@pytest.mark.quick
 def test_with_pyright() -> None:
-    assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m pyright") == 0
\ No newline at end of file
+    assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m pyright") == 0
diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py
index 61ec73a..2a94c1d 100644
--- a/tum_esm_utils/netcdf.py
+++ b/tum_esm_utils/netcdf.py
@@ -21,7 +21,7 @@ def __init__(
         filepath: str,
         parallel: bool = False,
         diskless: bool = True,
-        mode: Literal["w", "a"] = "w",
+        mode: Literal["w", "a", "r"] = "w",
     ) -> None:
         """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier.
 
@@ -29,6 +29,7 @@ def __init__(
         filepath when closing the file. This ensures that the final filepath will only exist if the file
         was written completely. In append mode, the filepath is not changes."""
 
+        assert filepath.endswith(".nc"), "Only the .nc file extension is supported"
         self.tmp_filepath = filepath[:-3] + ".tmp.nc"
         self.filepath = filepath
         self.mode = mode
@@ -48,7 +49,7 @@ def __init__(
         self.variables: dict[str, nc.Variable[Any]] = {}
         self.attributes: dict[str, str] = {}
 
-        if mode == "a":
+        if mode in ["a", "r"]:
             for dim_name, dim in self.ds.dimensions.items():
                 self.dimensions[dim_name] = dim
             for var_name, var in self.ds.variables.items():
@@ -60,7 +61,11 @@ def create_dimension(self, name: str, size: int) -> None:
         """Create a new dimension in the NetCDF file.
 
         Raises:
-            ValueError: If the dimension already exists."""
+            ValueError: If the dimension already exists
+            RuntimeError: If the NetCDF file is not opened in write mode."""
+
+        if self.mode == "r":
+            raise RuntimeError("Cannot create dimension in read-only mode")
 
         if name in self.dimensions:
             raise ValueError(f"Dimension {name} already exists in the NetCDF file")
@@ -79,11 +84,18 @@ def create_variable(
         fill_value: Optional[float | int] = None,
         chunk_dimensions: list[str] = [],
         datatype: Literal["f4", "f8", "i4", "i8"] = "f4",
+        zlib: bool = True,
+        compression_level: int = 2,
     ) -> None:
         """Create a new variable in the NetCDF file.
 
         Raises:
-            ValueError: If the variable already exists or if a dimension is not found."""
+            ValueError: If the variable already exists or if a dimension is not found.
+            RuntimeError: If the NetCDF file is not opened in write mode."""
+
+        if self.mode == "r":
+            raise RuntimeError("Cannot create dimension in read-only mode")
+
         if name in self.variables:
             raise ValueError(f"Variable {name} already exists in the NetCDF file")
 
@@ -103,12 +115,12 @@ def create_variable(
             if dimension.name in chunk_dimensions:
                 chunk_sizes[i] = 1
 
-        var: Any = self.ds.createVariable(  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+        var: Any = self.ds.createVariable(  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
             name,
             datatype=datatype,
             dimensions=object_dimensions,
-            zlib=(len(dimensions) > 1) or (name != object_dimensions[0].name),
-            complevel=2,
+            zlib=zlib and ((len(dimensions) > 1) or (name != object_dimensions[0].name)),
+            complevel=compression_level,  # type: ignore
             fill_value=fill_value,
             chunksizes=chunk_sizes if len(chunk_dimensions) > 0 else None,
         )
@@ -127,7 +139,11 @@ def import_dimension(
         """Import a dimension from another NetCDF file.
 
         Raises:
-            ValueError: If the dimension already exists."""
+            ValueError: If the dimension already exists.
+            RuntimeError: If the NetCDF file is not opened in write mode."""
+
+        if self.mode == "r":
+            raise RuntimeError("Cannot import dimension in read-only mode")
 
         if dimension.name in self.dimensions:
             raise ValueError(f"Dimension {dimension.name} already exists in the NetCDF file")
@@ -135,13 +151,19 @@ def import_dimension(
 
     def import_variable(
         self,
-        variable: nc.Variable,  # type: ignore
+        variable: "nc.Variable[Any]",
         new_name: Optional[str] = None,
+        zlib: bool = True,
+        compression_level: int = 2,
     ) -> None:
         """Import a variable from another NetCDF file.
 
         Raises:
-            ValueError: If the variable already exists."""
+            ValueError: If the variable already exists.
+            RuntimeError: If the NetCDF file is not opened in write mode."""
+
+        if self.mode == "r":
+            raise RuntimeError("Cannot import variable in read-only mode")
 
         if variable.name in self.variables:
             raise ValueError(f"Variable {variable.name} already exists in the NetCDF file")
@@ -153,6 +175,8 @@ def import_variable(
             long_name=variable.long_name if hasattr(variable, "long_name") else None,  # pyright: ignore[reportUnknownArgumentType]
             description=variable.description if hasattr(variable, "description") else None,  # pyright: ignore[reportUnknownArgumentType]
             fill_value=float(variable.get_fill_value()),
+            zlib=zlib,
+            compression_level=compression_level,
         )
         self.variables[name][:] = variable[:]
 
@@ -160,7 +184,11 @@ def add_attribute(self, key: str, value: str) -> None:
         """Add a global attribute to the NetCDF file.
 
         Raises:
-            ValueError: If the attribute already exists."""
+            ValueError: If the attribute already exists.
+            RuntimeError: If the NetCDF file is not opened in write mode."""
+
+        if self.mode == "r":
+            raise RuntimeError("Cannot add attribute in read-only mode")
 
         if key in self.attributes:
             raise ValueError(f"Attribute {key} already exists in the NetCDF file")
@@ -178,6 +206,6 @@ def close(self) -> None:
 
         del self
 
-    def __getitem__(self, key: str) -> nc.Variable:  # type: ignore
+    def __getitem__(self, key: str) -> "nc.Variable[Any]":
         """Get a variable from the NetCDF file."""
         return self.variables[key]

From 38ea88da154590ba818087fed5612c2215a7efdc Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 01:05:48 +0100
Subject: [PATCH 08/20] #31 More user friendly netcdf file API (4)

test new API
---
 tests/test_netcdf.py | 88 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 tests/test_netcdf.py

diff --git a/tests/test_netcdf.py b/tests/test_netcdf.py
new file mode 100644
index 0000000..8aee2ee
--- /dev/null
+++ b/tests/test_netcdf.py
@@ -0,0 +1,88 @@
+import pytest
+import tempfile
+import os
+import numpy as np
+import netCDF4 as nc
+import tum_esm_utils.files
+from tum_esm_utils.netcdf import NetCDFFile
+
+PROJECT_DIR = tum_esm_utils.files.get_parent_dir_path(__file__, current_depth=2)
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_netcdffile_create_and_read() -> None:
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Create a NetCDF file in a temporary directory
+        filepath = os.path.join(tmpdirname, "test.nc")
+        ncfile = NetCDFFile(str(filepath), mode="w")
+
+        assert not os.path.exists(filepath)
+        assert os.path.exists(filepath[:-3] + ".tmp.nc")
+
+        # Create dimensions
+        ncfile.create_dimension("time", 10)
+        ncfile.create_dimension("lat", 5)
+        ncfile.create_dimension("lon", 5)
+
+        # Create variable
+        ncfile.create_variable(
+            name="temperature",
+            dimensions=("time", "lat", "lon"),
+            units="K",
+            long_name="Air temperature",
+            description="Synthetic temperature data",
+            fill_value=-9999.0,
+            datatype="f4",
+        )
+
+        # Write data
+        data = np.random.rand(10, 5, 5).astype(np.float32)
+        ncfile.variables["temperature"][:] = data
+
+        # Add attribute
+        ncfile.add_attribute("title", "Test NetCDF File")
+
+        # Close file
+        ncfile.close()
+
+        # Reopen and check contents
+        ncfile2 = nc.Dataset(str(filepath), mode="a")
+        assert "temperature" in ncfile2.variables
+        assert ncfile2.variables["temperature"].shape == (10, 5, 5)
+        np.testing.assert_array_almost_equal(ncfile2.variables["temperature"][:], data)
+        assert ncfile2.getncattr("title") == "Test NetCDF File"
+        ncfile2.close()
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_netcdffile_import_dimension_and_variable() -> None:
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Create source NetCDF file
+        src_filepath = os.path.join(tmpdirname, "src.nc")
+        src_nc = NetCDFFile(str(src_filepath), mode="w")
+        src_nc.create_dimension("x", 4)
+        src_nc.create_variable(
+            name="foo", dimensions=("x",), units="1", fill_value=0, datatype="i4"
+        )
+        src_nc.variables["foo"][:] = np.arange(4)
+        src_nc.close()
+
+        # Open source and target files
+        src_nc = NetCDFFile(str(src_filepath), mode="r")
+        tgt_filepath = os.path.join(tmpdirname, "tgt.nc")
+        tgt_nc = NetCDFFile(str(tgt_filepath), mode="w")
+
+        # Import dimension and variable
+        tgt_nc.import_dimension(src_nc.dimensions["x"])
+        tgt_nc.import_variable(src_nc.variables["foo"])
+        tgt_nc.close()
+        src_nc.close()
+
+        # Check imported data
+        tgt_nc = NetCDFFile(str(tgt_filepath), mode="r")
+        assert "x" in tgt_nc.dimensions
+        assert "foo" in tgt_nc.variables
+        np.testing.assert_array_equal(tgt_nc.variables["foo"][:], np.arange(4))
+        tgt_nc.close()

From 00d7d386957fa547cc25205e98c6e6e300494198 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 01:06:29 +0100
Subject: [PATCH 09/20] #31 More user friendly netcdf file API (5)

add to API reference
---
 docs/pages/api-reference.md | 139 ++++++++++++++++++++++++++++++++++++
 docs/scripts/sync-docs.py   |   1 +
 2 files changed, 140 insertions(+)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index e13ddbc..d950fdc 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -812,6 +812,145 @@ can lead to floating point errors, i.e. `1 % 0.1 == 0.09999999999999998`.
 Using `math.fmod` also does not seem to work correctly with floats.
 
 
+## `tum_esm_utils.netcdf`
+
+A thin wrapper over the netCDF4 library to make working with NetCDF files easier.
+
+Implements: `NetCDFFile`
+
+This requires you to install this utils library with the optional `netcdf` dependencies:
+
+```bash
+pip install "tum_esm_utils[netcdf]"
+## `or`
+pdm add "tum_esm_utils[netcdf]"
+```
+
+
+### `NetCDFFile` Objects
+
+```python
+class NetCDFFile()
+```
+
+
+##### `__init__`
+
+```python
+def __init__(filepath: str,
+             parallel: bool = False,
+             diskless: bool = True,
+             mode: Literal["w", "a", "r"] = "w") -> None
+```
+
+A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier.
+
+If writing to a new file, it will first write to the filepath+ ".tmp" and  rename it to the final
+filepath when closing the file. This ensures that the final filepath will only exist if the file
+was written completely. In append mode, the filepath is not changes.
+
+
+##### `create_dimension`
+
+```python
+def create_dimension(name: str, size: int) -> None
+```
+
+Create a new dimension in the NetCDF file.
+
+**Raises**:
+
+- `ValueError` - If the dimension already exists
+- `RuntimeError` - If the NetCDF file is not opened in write mode.
+
+
+##### `create_variable`
+
+```python
+def create_variable(name: str,
+                    dimensions: tuple[nc.Dimension | str, ...],
+                    units: str,
+                    long_name: Optional[str] = None,
+                    description: Optional[str] = None,
+                    fill_value: Optional[float | int] = None,
+                    chunk_dimensions: list[str] = [],
+                    datatype: Literal["f4", "f8", "i4", "i8"] = "f4",
+                    zlib: bool = True,
+                    compression_level: int = 2) -> None
+```
+
+Create a new variable in the NetCDF file.
+
+**Raises**:
+
+- `ValueError` - If the variable already exists or if a dimension is not found.
+- `RuntimeError` - If the NetCDF file is not opened in write mode.
+
+
+##### `import_dimension`
+
+```python
+def import_dimension(dimension: nc.Dimension,
+                     new_name: Optional[str] = None) -> None
+```
+
+Import a dimension from another NetCDF file.
+
+**Raises**:
+
+- `ValueError` - If the dimension already exists.
+- `RuntimeError` - If the NetCDF file is not opened in write mode.
+
+
+##### `import_variable`
+
+```python
+def import_variable(variable: "nc.Variable[Any]",
+                    new_name: Optional[str] = None,
+                    zlib: bool = True,
+                    compression_level: int = 2) -> None
+```
+
+Import a variable from another NetCDF file.
+
+**Raises**:
+
+- `ValueError` - If the variable already exists.
+- `RuntimeError` - If the NetCDF file is not opened in write mode.
+
+
+##### `add_attribute`
+
+```python
+def add_attribute(key: str, value: str) -> None
+```
+
+Add a global attribute to the NetCDF file.
+
+**Raises**:
+
+- `ValueError` - If the attribute already exists.
+- `RuntimeError` - If the NetCDF file is not opened in write mode.
+
+
+##### `close`
+
+```python
+def close() -> None
+```
+
+Close the NetCDF file, possibly renaming the temporary file to the final filepath.
+
+
+##### `__getitem__`
+
+```python
+def __getitem__(key: str) -> "nc.Variable[Any]"
+```
+
+Get a variable from the NetCDF file.
+
+
 ## `tum_esm_utils.opus`
 
 Functions for interacting with OPUS files.
diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py
index c749d27..2cd5fda 100644
--- a/docs/scripts/sync-docs.py
+++ b/docs/scripts/sync-docs.py
@@ -30,6 +30,7 @@
     "em27",
     "files",
     "mathematics",
+    "netcdf",
     "opus",
     "opus.file_interface",
     "opus.http_interface",

From cb63b61aef1b4e2dd7e5e07c1a41f478ae8030de Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 20:39:48 +0100
Subject: [PATCH 10/20] #31 More user friendly netcdf file API (6)

refine interface
---
 tum_esm_utils/netcdf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py
index 2a94c1d..b45d346 100644
--- a/tum_esm_utils/netcdf.py
+++ b/tum_esm_utils/netcdf.py
@@ -21,7 +21,7 @@ def __init__(
         filepath: str,
         parallel: bool = False,
         diskless: bool = True,
-        mode: Literal["w", "a", "r"] = "w",
+        mode: Literal["w", "a", "r"] = "r",
     ) -> None:
         """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier.
 
@@ -180,17 +180,17 @@ def import_variable(
         )
         self.variables[name][:] = variable[:]
 
-    def add_attribute(self, key: str, value: str) -> None:
+    def add_attribute(self, key: str, value: str, allow_overwrite: bool = False) -> None:
         """Add a global attribute to the NetCDF file.
 
         Raises:
-            ValueError: If the attribute already exists.
+            ValueError: If the attribute already exists and `allow_overwrite` is False.
             RuntimeError: If the NetCDF file is not opened in write mode."""
 
         if self.mode == "r":
             raise RuntimeError("Cannot add attribute in read-only mode")
 
-        if key in self.attributes:
+        if (not allow_overwrite) and (key in self.attributes):
             raise ValueError(f"Attribute {key} already exists in the NetCDF file")
         self.attributes[key] = value
         self.ds.setncattr(key, value)

From f512653f66d310b8683543ba7cc1b51d04d01e96 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 20:40:47 +0100
Subject: [PATCH 11/20] #31 More user friendly netcdf file API (7)

add function to compress an existing nc file
---
 tum_esm_utils/netcdf.py | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py
index b45d346..779a860 100644
--- a/tum_esm_utils/netcdf.py
+++ b/tum_esm_utils/netcdf.py
@@ -1,6 +1,6 @@
 """A thin wrapper over the netCDF4 library to make working with NetCDF files easier.
 
-Implements: `NetCDFFile`
+Implements: `NetCDFFile`, `compress_netcdf_file`.
 
 This requires you to install this utils library with the optional `netcdf` dependencies:
 
@@ -209,3 +209,40 @@ def close(self) -> None:
     def __getitem__(self, key: str) -> "nc.Variable[Any]":
         """Get a variable from the NetCDF file."""
         return self.variables[key]
+
+
+def compress_netcdf_file(
+    source_filepath: str,
+    destination_filepath: str,
+    compression_level: int = 2,
+) -> None:
+    """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed.
+
+    Raises:
+        FileNotFoundError: If the source file does not exist.
+        FileExistsError: If the destination file already exists.
+    """
+
+    if not os.path.isfile(source_filepath):
+        raise FileNotFoundError(f"Source file {source_filepath} does not exist.")
+    if os.path.isfile(destination_filepath):
+        raise FileExistsError(f"Destination file {destination_filepath} already exists.")
+
+    src_nc = NetCDFFile(source_filepath, mode="r")
+    dest_nc = NetCDFFile(destination_filepath, mode="w")
+
+    # Copy dimensions
+    for dim in src_nc.dimensions.values():
+        dest_nc.import_dimension(dim)
+
+    # Copy variables
+    for var in src_nc.variables.values():
+        dest_nc.import_variable(var, compression_level=compression_level)
+
+    # Copy attributes
+    for attr_name, attr_value in src_nc.attributes.items():
+        dest_nc.add_attribute(attr_name, attr_value)
+
+    src_nc.close()
+    dest_nc.close()
+

From 7213d2bd81afd2e39ed76dffe02b5ca9feaf7df5 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 22:51:53 +0100
Subject: [PATCH 12/20] #31 More user friendly netcdf file API (8)

add function to remove items from an nc file
---
 tum_esm_utils/netcdf.py | 49 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py
index 779a860..e557d5f 100644
--- a/tum_esm_utils/netcdf.py
+++ b/tum_esm_utils/netcdf.py
@@ -1,6 +1,6 @@
 """A thin wrapper over the netCDF4 library to make working with NetCDF files easier.
 
-Implements: `NetCDFFile`, `compress_netcdf_file`.
+Implements: `NetCDFFile`, `remove_elements_from_netcdf_file`, `compress_netcdf_file`.
 
 This requires you to install this utils library with the optional `netcdf` dependencies:
 
@@ -211,12 +211,15 @@ def __getitem__(self, key: str) -> "nc.Variable[Any]":
         return self.variables[key]
 
 
-def compress_netcdf_file(
+def remove_elements_from_netcdf_file(
     source_filepath: str,
     destination_filepath: str,
+    variables_to_remove: list[str] = [],
+    dimensions_to_remove: list[str] = [],
+    attributes_to_remove: list[str] = [],
     compression_level: int = 2,
 ) -> None:
-    """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed.
+    """Create a new NetCDF file by copying an existing one, but removing specified variables, dimensions, and attributes. This is useful because NetCDF4 does not support removing elements from an existing file.
 
     Raises:
         FileNotFoundError: If the source file does not exist.
@@ -231,18 +234,48 @@ def compress_netcdf_file(
     src_nc = NetCDFFile(source_filepath, mode="r")
     dest_nc = NetCDFFile(destination_filepath, mode="w")
 
+    # check that no variable depends on a dimension to be removed
+    vars = [v for v in src_nc.variables.values() if v.name not in variables_to_remove]
+    for var in vars:
+        for dim_name in var.dimensions:
+            if dim_name in dimensions_to_remove:
+                raise ValueError(
+                    f"Cannot remove dimension {dim_name} because it is used by variable {var.name}."
+                )
+
     # Copy dimensions
-    for dim in src_nc.dimensions.values():
-        dest_nc.import_dimension(dim)
+    for dim_name, dim in src_nc.dimensions.items():
+        if dim_name not in dimensions_to_remove:
+            dest_nc.import_dimension(dim)
 
     # Copy variables
-    for var in src_nc.variables.values():
-        dest_nc.import_variable(var, compression_level=compression_level)
+    for var_name, var in src_nc.variables.items():
+        if var_name not in variables_to_remove:
+            dest_nc.import_variable(var, compression_level=compression_level)
 
     # Copy attributes
     for attr_name, attr_value in src_nc.attributes.items():
-        dest_nc.add_attribute(attr_name, attr_value)
+        if attr_name not in attributes_to_remove:
+            dest_nc.add_attribute(attr_name, attr_value)
 
     src_nc.close()
     dest_nc.close()
 
+
+def compress_netcdf_file(
+    source_filepath: str,
+    destination_filepath: str,
+    compression_level: int = 2,
+) -> None:
+    """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed.
+
+    Raises:
+        FileNotFoundError: If the source file does not exist.
+        FileExistsError: If the destination file already exists.
+    """
+
+    remove_elements_from_netcdf_file(
+        source_filepath,
+        destination_filepath,
+        compression_level=compression_level,
+    )

From 21f0c5b4641643f1c3dbfdc7a0f7848a7c61ae68 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Wed, 19 Nov 2025 22:52:54 +0100
Subject: [PATCH 13/20] #31 More user friendly netcdf file API (9)

update API reference
---
 docs/pages/api-reference.md | 43 +++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index d950fdc..b98059f 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -816,7 +816,7 @@ Using `math.fmod` also does not seem to work correctly with floats.
 
 A thin wrapper over the netCDF4 library to make working with NetCDF files easier.
 
-Implements: `NetCDFFile`
+Implements: `NetCDFFile`, `remove_elements_from_netcdf_file`, `compress_netcdf_file`.
 
 This requires you to install this utils library with the optional `netcdf` dependencies:
 
@@ -840,7 +840,7 @@ class NetCDFFile()
 def __init__(filepath: str,
              parallel: bool = False,
              diskless: bool = True,
-             mode: Literal["w", "a", "r"] = "w") -> None
+             mode: Literal["w", "a", "r"] = "r") -> None
 ```
 
 A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier.
@@ -922,14 +922,14 @@ Import a variable from another NetCDF file.
 ##### `add_attribute`
 
 ```python
-def add_attribute(key: str, value: str) -> None
+def add_attribute(key: str, value: str, allow_overwrite: bool = False) -> None
 ```
 
 Add a global attribute to the NetCDF file.
 
 **Raises**:
 
-- `ValueError` - If the attribute already exists.
+- `ValueError` - If the attribute already exists and `allow_overwrite` is False.
 - `RuntimeError` - If the NetCDF file is not opened in write mode.
 
 
@@ -951,6 +951,41 @@ def __getitem__(key: str) -> "nc.Variable[Any]"
 Get a variable from the NetCDF file.
 
 
+##### `remove_elements_from_netcdf_file`
+
+```python
+def remove_elements_from_netcdf_file(source_filepath: str,
+                                     destination_filepath: str,
+                                     variables_to_remove: list[str] = [],
+                                     dimensions_to_remove: list[str] = [],
+                                     attributes_to_remove: list[str] = [],
+                                     compression_level: int = 2) -> None
+```
+
+Create a new NetCDF file by copying an existing one, but removing specified variables, dimensions, and attributes. This is useful because NetCDF4 does not support removing elements from an existing file.
+
+**Raises**:
+
+- `FileNotFoundError` - If the source file does not exist.
+- `FileExistsError` - If the destination file already exists.
+
+
+##### `compress_netcdf_file`
+
+```python
+def compress_netcdf_file(source_filepath: str,
+                         destination_filepath: str,
+                         compression_level: int = 2) -> None
+```
+
+Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed.
+
+**Raises**:
+
+- `FileNotFoundError` - If the source file does not exist.
+- `FileExistsError` - If the destination file already exists.
+
+
 ## `tum_esm_utils.opus`
 
 Functions for interacting with OPUS files.

From bb5058529ae705baee270e073f8dae5f67dedf8e Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:18:05 +0100
Subject: [PATCH 14/20] Add function `fill_df_time_gaps_with_nans` (1)

Implement
---
 tum_esm_utils/dataframes.py | 44 +++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 tum_esm_utils/dataframes.py

diff --git a/tum_esm_utils/dataframes.py b/tum_esm_utils/dataframes.py
new file mode 100644
index 0000000..6e60504
--- /dev/null
+++ b/tum_esm_utils/dataframes.py
@@ -0,0 +1,44 @@
+"""Dataframe-related utility functions.
+
+Implements: `fill_df_time_gaps_with_nans`
+
+This requires you to install this utils library with the optional `polars` dependency:
+
+```bash
+pip install "tum_esm_utils[polars]"
+# or
+pdm add "tum_esm_utils[polars]"
+```
+"""
+
+import datetime
+import polars as pl
+
+
+def fill_df_time_gaps_with_nans(
+    df: pl.DataFrame,
+    time_col: str,
+    max_gap_seconds: int,
+) -> pl.DataFrame:
+    """Fill time gaps in a dataframe with NaN rows. This is very useful for plotting dataframes where time gaps should be visible.
+
+    Args:
+        df: The input dataframe.
+        time_col: The name of the time column.
+        max_gap_seconds: The maximum gap in seconds to fill with NaN rows."""
+
+    assert max_gap_seconds > 1, "max_gap_seconds must be greater than 1"
+    gaps_in_df = df.select(
+        time_col,
+        pl.col(time_col).diff().dt.total_seconds().alias(f"{time_col}_diff_seconds"),
+    ).filter(
+        pl.col(f"{time_col}_diff_seconds").gt(max_gap_seconds),
+    )["utc"] - datetime.timedelta(seconds=1)
+    gap_df = pl.DataFrame(
+        {
+            time_col: gaps_in_df,
+            **{c: [None] * len(gaps_in_df) for c in df.columns if c != time_col},
+        }
+    )
+    df_with_gaps = pl.concat([df, gap_df], how="vertical").sort(time_col)
+    return df_with_gaps

From db7c0a8049bc90b42da46da8deeafbe7b4305dbc Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:18:18 +0100
Subject: [PATCH 15/20] Add function `fill_df_time_gaps_with_nans` (2)

Update API reference
---
 docs/pages/api-reference.md | 31 +++++++++++++++++++++++++++++++
 docs/scripts/sync-docs.py   |  1 +
 2 files changed, 32 insertions(+)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index b98059f..e96c22a 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -255,6 +255,37 @@ def load_ggg2020_vmr(filepath: str) -> pl.DataFrame
 Load the Atmospheric profile from a GGG2020 vmr file.
 
 
+## `tum_esm_utils.dataframes`
+
+Dataframe-related utility functions.
+
+Implements: `fill_df_time_gaps_with_nans`
+
+This requires you to install this utils library with the optional `polars` dependency:
+
+```bash
+pip install "tum_esm_utils[polars]"
+## `or`
+pdm add "tum_esm_utils[polars]"
+```
+
+
+##### `fill_df_time_gaps_with_nans`
+
+```python
+def fill_df_time_gaps_with_nans(df: pl.DataFrame, time_col: str,
+                                max_gap_seconds: int) -> pl.DataFrame
+```
+
+Fill time gaps in a dataframe with NaN rows. This is very useful for plotting dataframes where time gaps should be visible.
+
+**Arguments**:
+
+- `df` - The input dataframe.
+- `time_col` - The name of the time column.
+- `max_gap_seconds` - The maximum gap in seconds to fill with NaN rows.
+
+
 ## `tum_esm_utils.datastructures`
 
 Datastructures not in the standard library.
diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py
index 2cd5fda..696f7bd 100644
--- a/docs/scripts/sync-docs.py
+++ b/docs/scripts/sync-docs.py
@@ -25,6 +25,7 @@
     "column.astronomy",
     "column.averaging_kernel",
     "column.ncep_profiles",
+    "dataframes",
     "datastructures",
     "decorators",
     "em27",

From f1eecddc59a7b3c0f70b82618af84767a94cbc95 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:20:36 +0100
Subject: [PATCH 16/20] Add function `fill_df_time_gaps_with_nans` (3)

Test
---
 tests/test_dataframes.py  | 28 ++++++++++++++++++++++++++++
 tum_esm_utils/plotting.py | 10 +++++-----
 2 files changed, 33 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_dataframes.py

diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py
new file mode 100644
index 0000000..b4c386a
--- /dev/null
+++ b/tests/test_dataframes.py
@@ -0,0 +1,28 @@
+import datetime
+import pytest
+import polars as pl
+import tum_esm_utils.dataframes
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_fill_df_time_gaps_with_nans() -> None:
+    # Create a dataframe with a time gap
+    df = pl.DataFrame(
+        {
+            "utc": [
+                datetime.datetime(2024, 1, 1, 0, 0, 0),
+                datetime.datetime(2024, 1, 1, 0, 0, 10),  # gap > 5 seconds
+                datetime.datetime(2024, 1, 1, 0, 0, 11),
+            ],
+            "value": [1, 2, 3],
+        }
+    )
+    result = tum_esm_utils.dataframes.fill_df_time_gaps_with_nans(
+        df, time_col="utc", max_gap_seconds=5
+    )
+    # There should be a NaN row inserted after the first row
+    assert result.shape[0] == 4
+    # The inserted row should have None in 'value'
+    inserted_row = result.filter(pl.col("utc").eq(datetime.datetime(2024, 1, 1, 0, 0, 9)))
+    assert inserted_row["value"][0] is None
diff --git a/tum_esm_utils/plotting.py b/tum_esm_utils/plotting.py
index 9761437..b1ecf17 100644
--- a/tum_esm_utils/plotting.py
+++ b/tum_esm_utils/plotting.py
@@ -62,7 +62,7 @@ def create_figure(
     suptitle_y: float = 0.97,
     padding: float = 2,
     dpi: int = 250,
-) -> Generator[plt.Figure, None, None]: # pyright: ignore[reportPrivateImportUsage]
+) -> Generator[plt.Figure, None, None]:  # pyright: ignore[reportPrivateImportUsage]
     """Create a figure for plotting.
 
     Usage:
@@ -93,13 +93,13 @@ def create_figure(
 
 
 def add_subplot(
-    fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage]
+    fig: plt.Figure,  # pyright: ignore[reportPrivateImportUsage]
     position: tuple[int, int, int] | matplotlib.gridspec.SubplotSpec,
     title: Optional[str] = None,
     xlabel: Optional[str] = None,
     ylabel: Optional[str] = None,
     **kwargs: dict[str, Any],
-) -> plt.Axes: # pyright: ignore[reportPrivateImportUsage]
+) -> plt.Axes:  # pyright: ignore[reportPrivateImportUsage]
     """Add a subplot to a figure.
 
     Use a gridspec for more control:
@@ -125,7 +125,7 @@ def add_subplot(
     Raises:
         ValueError: If the index of the subplot is invalid."""
 
-    axis: plt.Axes # pyright: ignore[reportPrivateImportUsage]
+    axis: plt.Axes  # pyright: ignore[reportPrivateImportUsage]
     if isinstance(position, matplotlib.gridspec.SubplotSpec):
         axis = fig.add_subplot(position, **kwargs)
     else:
@@ -147,7 +147,7 @@ def add_subplot(
 
 
 def add_colorpatch_legend(
-    fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage]
+    fig: plt.Figure,  # pyright: ignore[reportPrivateImportUsage]
     handles: list[
         tuple[
             str,

From 8984ef52ab380f108c8d6b4d8dd765769e4bfeda Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:32:23 +0100
Subject: [PATCH 17/20] Add functions to convert julian day numbers to
 datetimes and back (1)

Implement
---
 docs/pages/api-reference.md | 44 ++++++++++++++++++++++++++++++
 tum_esm_utils/timing.py     | 53 +++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index e96c22a..6bfde4c 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -2352,6 +2352,50 @@ with timed_section("my_section"):
 ```
 
 
+##### `datetime_to_julian_day_number`
+
+```python
+def datetime_to_julian_day_number(dt: datetime.datetime) -> float
+```
+
+Convert a datetime to a Julian Day Number (JDN).
+
+The Julian Day Number is the continuous count of days since the beginning
+of the Julian Period on January 1, 4713 BC. This function was validated against
+https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+
+**Arguments**:
+
+- `dt` - The datetime to convert.
+  
+
+**Returns**:
+
+  The Julian Day Number as a float.
+
+
+##### `julian_day_number_to_datetime`
+
+```python
+def julian_day_number_to_datetime(jdn: float) -> datetime.datetime
+```
+
+Convert a Julian Day Number (JDN) to a datetime.
+
+The Julian Day Number is the continuous count of days since the beginning
+of the Julian Period on January 1, 4713 BC. This function was validated against
+https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+
+**Arguments**:
+
+- `jdn` - The Julian Day Number to convert.
+  
+
+**Returns**:
+
+  The corresponding datetime.
+
+
 ## `tum_esm_utils.validators`
 
 Implements validator utils for use with pydantic models.
diff --git a/tum_esm_utils/timing.py b/tum_esm_utils/timing.py
index 321b541..2339d1b 100644
--- a/tum_esm_utils/timing.py
+++ b/tum_esm_utils/timing.py
@@ -362,3 +362,56 @@ def timed_section(label: str) -> Generator[None, None, None]:
     yield
     end = time.time()
     print(f"{label}: {end - start:6.3f}s")
+
+
+def datetime_to_julian_day_number(dt: datetime.datetime) -> float:
+    """Convert a datetime to a Julian Day Number (JDN).
+
+    The Julian Day Number is the continuous count of days since the beginning
+    of the Julian Period on January 1, 4713 BC. This function was validated against
+    https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+
+    Args:
+        dt: The datetime to convert.
+
+    Returns:
+        The Julian Day Number as a float.
+    """
+
+    JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0)  # JDN 2451545.0
+    JDN_BASE_NUM = 2451545.0
+
+    delta = dt - JDN_BASE_DT
+
+    return JDN_BASE_NUM + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0
+
+
+def julian_day_number_to_datetime(jdn: float) -> datetime.datetime:
+    """Convert a Julian Day Number (JDN) to a datetime.
+
+    The Julian Day Number is the continuous count of days since the beginning
+    of the Julian Period on January 1, 4713 BC. This function was validated against
+    https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+
+    Args:
+        jdn: The Julian Day Number to convert.
+
+    Returns:
+        The corresponding datetime.
+    """
+
+    JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0)  # JDN 2451545.0
+    JDN_BASE_NUM = 2451545.0
+
+    delta_days = jdn - JDN_BASE_NUM
+    delta_whole_days = int(delta_days)
+    delta_fractional_day = delta_days - delta_whole_days
+
+    delta_seconds = int(delta_fractional_day * 86400)
+    delta_microseconds = int((delta_fractional_day * 86400 - delta_seconds) * 1_000_000)
+
+    return JDN_BASE_DT + datetime.timedelta(
+        days=delta_whole_days,
+        seconds=delta_seconds,
+        microseconds=delta_microseconds,
+    )

From edffc49c1d88d314702c09a8f298ec6d15073244 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:32:33 +0100
Subject: [PATCH 18/20] Add functions to convert julian day numbers to
 datetimes and back (2)

Test
---
 tests/test_timing.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_timing.py b/tests/test_timing.py
index 81c5a45..388ddee 100644
--- a/tests/test_timing.py
+++ b/tests/test_timing.py
@@ -265,3 +265,21 @@ def test_date_span_intersection() -> None:
         assert tum_esm_utils.timing.date_span_intersection(dt_span_2, dt_span_1) == expected, (
             f"Test case {i}b failed"
         )
+
+
+@pytest.mark.order(3)
+@pytest.mark.quick
+def test_datetime_to_julian_day_number() -> None:
+    test_cases = [
+        (datetime.datetime(2000, 1, 1, 12, 0, 0), 2451545.0),
+        (datetime.datetime(1995, 7, 23, 14, 36), 2449922.1083333),
+        (datetime.datetime(1976, 3, 3, 1, 36), 2442840.5666667),
+        (datetime.datetime(1999, 12, 3, 23, 36), 2451516.4833333),
+        (datetime.datetime(2024, 6, 3, 18, 1), 2460465.2506944),
+    ]
+    for dt, jdn in test_cases:
+        calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt)
+        assert abs(calculated_jdn - jdn) < 1e-6, f"Failed for {dt}"
+
+        calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn)
+        assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {jdn}"

From f4e053cf2b41cd2dda986dd40b93991e52be8f44 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:45:14 +0100
Subject: [PATCH 19/20] Add functions to convert julian day numbers to
 datetimes and back (3)

Also support MJD and MJD2K
---
 docs/pages/api-reference.md | 29 +++++++++++------
 tum_esm_utils/timing.py     | 62 ++++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md
index 6bfde4c..d6d4ced 100644
--- a/docs/pages/api-reference.md
+++ b/docs/pages/api-reference.md
@@ -2355,18 +2355,22 @@ with timed_section("my_section"):
 ##### `datetime_to_julian_day_number`
 
 ```python
-def datetime_to_julian_day_number(dt: datetime.datetime) -> float
+def datetime_to_julian_day_number(
+        dt: datetime.datetime, variant: Literal["JDN", "MJD",
+                                                "MJD2K"]) -> float
 ```
 
-Convert a datetime to a Julian Day Number (JDN).
+Convert a datetime to a Julian Day Number (JDN) or MJD/MJD2K.
 
-The Julian Day Number is the continuous count of days since the beginning
-of the Julian Period on January 1, 4713 BC. This function was validated against
-https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+The Julian Day Number (JDN) is the continuous count of days since the beginning
+of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts
+counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting
+from January 1, 2000 at 00:00:00 UTC.
 
 **Arguments**:
 
 - `dt` - The datetime to convert.
+- `variant` - The variant of the Julian Day Number ("JDN", "MJD", "MJD2K").
   
 
 **Returns**:
@@ -2377,18 +2381,25 @@ https://ssd.jpl.nasa.gov/tools/jdc/#/cd
 ##### `julian_day_number_to_datetime`
 
 ```python
-def julian_day_number_to_datetime(jdn: float) -> datetime.datetime
+def julian_day_number_to_datetime(
+        jdn: float, variant: Literal["JDN", "MJD",
+                                     "MJD2K"]) -> datetime.datetime
 ```
 
-Convert a Julian Day Number (JDN) to a datetime.
+Convert a Julian Day Number (JDN) or MJD/MJD2K to a datetime.
+
+The Julian Day Number (JDN) is the continuous count of days since the beginning
+of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts
+counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting
+from January 1, 2000 at 00:00:00 UTC.
 
-The Julian Day Number is the continuous count of days since the beginning
-of the Julian Period on January 1, 4713 BC. This function was validated against
+This function was validated against
 https://ssd.jpl.nasa.gov/tools/jdc/#/cd
 
 **Arguments**:
 
 - `jdn` - The Julian Day Number to convert.
+- `variant` - The variant of the Julian Day Number ("JDN", "MJD", "MJD2K").
   
 
 **Returns**:
diff --git a/tum_esm_utils/timing.py b/tum_esm_utils/timing.py
index 2339d1b..3f439ca 100644
--- a/tum_esm_utils/timing.py
+++ b/tum_esm_utils/timing.py
@@ -4,7 +4,7 @@
 `clear_alarm`, `wait_for_condition`, `ExponentialBackoff`"""
 
 import os
-from typing import Any, Callable, Generator, Optional
+from typing import Any, Callable, Generator, Literal, Optional
 import contextlib
 import datetime
 import re
@@ -364,53 +364,79 @@ def timed_section(label: str) -> Generator[None, None, None]:
     print(f"{label}: {end - start:6.3f}s")
 
 
-def datetime_to_julian_day_number(dt: datetime.datetime) -> float:
-    """Convert a datetime to a Julian Day Number (JDN).
+_JDN_BASE_DTS = {
+    "JDN": datetime.datetime(2000, 1, 1, 12, 0, 0),
+    "MJD": datetime.datetime(1858, 11, 17, 0, 0, 0),
+    "MJD2K": datetime.datetime(2000, 1, 1, 0, 0, 0),
+}
+_JDN_BASE_NUMS = {
+    "JDN": 2451545.0,
+    "MJD": 0.0,
+    "MJD2K": 0.0,
+}
 
-    The Julian Day Number is the continuous count of days since the beginning
-    of the Julian Period on January 1, 4713 BC. This function was validated against
-    https://ssd.jpl.nasa.gov/tools/jdc/#/cd
+
+def datetime_to_julian_day_number(
+    dt: datetime.datetime,
+    variant: Literal["JDN", "MJD", "MJD2K"],
+) -> float:
+    """Convert a datetime to a Julian Day Number (JDN) or MJD/MJD2K.
+
+    The Julian Day Number (JDN) is the continuous count of days since the beginning
+    of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts
+    counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting
+    from January 1, 2000 at 00:00:00 UTC.
 
     Args:
         dt: The datetime to convert.
+        variant: The variant of the Julian Day Number ("JDN", "MJD", "MJD2K").
 
     Returns:
         The Julian Day Number as a float.
     """
 
-    JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0)  # JDN 2451545.0
-    JDN_BASE_NUM = 2451545.0
+    assert variant in _JDN_BASE_DTS, f"Invalid variant: {variant}"
+    base_dt = _JDN_BASE_DTS[variant]
+    base_num = _JDN_BASE_NUMS[variant]
 
-    delta = dt - JDN_BASE_DT
+    delta = dt - base_dt
+    return base_num + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0
 
-    return JDN_BASE_NUM + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0
 
+def julian_day_number_to_datetime(
+    jdn: float,
+    variant: Literal["JDN", "MJD", "MJD2K"],
+) -> datetime.datetime:
+    """Convert a Julian Day Number (JDN) or MJD/MJD2K to a datetime.
 
-def julian_day_number_to_datetime(jdn: float) -> datetime.datetime:
-    """Convert a Julian Day Number (JDN) to a datetime.
+    The Julian Day Number (JDN) is the continuous count of days since the beginning
+    of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts
+    counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting
+    from January 1, 2000 at 00:00:00 UTC.
 
-    The Julian Day Number is the continuous count of days since the beginning
-    of the Julian Period on January 1, 4713 BC. This function was validated against
+    This function was validated against
     https://ssd.jpl.nasa.gov/tools/jdc/#/cd
 
     Args:
         jdn: The Julian Day Number to convert.
+        variant: The variant of the Julian Day Number ("JDN", "MJD", "MJD2K").
 
     Returns:
         The corresponding datetime.
     """
 
-    JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0)  # JDN 2451545.0
-    JDN_BASE_NUM = 2451545.0
+    assert variant in _JDN_BASE_DTS, f"Invalid variant: {variant}"
+    base_dt = _JDN_BASE_DTS[variant]
+    base_num = _JDN_BASE_NUMS[variant]
 
-    delta_days = jdn - JDN_BASE_NUM
+    delta_days = jdn - base_num
     delta_whole_days = int(delta_days)
     delta_fractional_day = delta_days - delta_whole_days
 
     delta_seconds = int(delta_fractional_day * 86400)
     delta_microseconds = int((delta_fractional_day * 86400 - delta_seconds) * 1_000_000)
 
-    return JDN_BASE_DT + datetime.timedelta(
+    return base_dt + datetime.timedelta(
         days=delta_whole_days,
         seconds=delta_seconds,
         microseconds=delta_microseconds,

From 8f74d97d6ad421b1f473cd5f407fca5e75307fd2 Mon Sep 17 00:00:00 2001
From: Moritz Makowski <moritz@dostuffthatmatters.dev>
Date: Thu, 20 Nov 2025 00:45:22 +0100
Subject: [PATCH 20/20] Add functions to convert julian day numbers to
 datetimes and back (4)

Also test MJD and MJD2K
---
 tests/test_timing.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/tests/test_timing.py b/tests/test_timing.py
index 388ddee..9a28d62 100644
--- a/tests/test_timing.py
+++ b/tests/test_timing.py
@@ -278,8 +278,22 @@ def test_datetime_to_julian_day_number() -> None:
         (datetime.datetime(2024, 6, 3, 18, 1), 2460465.2506944),
     ]
     for dt, jdn in test_cases:
-        calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt)
+        # JDN
+        calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="JDN")
+        calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn, variant="JDN")
         assert abs(calculated_jdn - jdn) < 1e-6, f"Failed for {dt}"
-
-        calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn)
         assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {jdn}"
+
+        # MJD
+        mjd = jdn - 2400000.5
+        calculated_mjd = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="MJD")
+        calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(mjd, variant="MJD")
+        assert abs(calculated_mjd - mjd) < 1e-6, f"Failed for {dt}"
+        assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {mjd}"
+
+        # MJD2K
+        mjd2k = jdn - 2451544.5
+        calculated_mjd2k = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="MJD2K")
+        calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(mjd2k, variant="MJD2K")
+        assert abs(calculated_mjd2k - mjd2k) < 1e-6, f"Failed for {dt}"
+        assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {mjd2k}"