From 8fac667551a73dbd0b47d93188f856f051423fa8 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:20:38 +0100 Subject: [PATCH 01/20] #33 Rebinning function for 1d and 2d tensors (1) Implement --- tum_esm_utils/rebinning.py | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tum_esm_utils/rebinning.py diff --git a/tum_esm_utils/rebinning.py b/tum_esm_utils/rebinning.py new file mode 100644 index 0000000..532af48 --- /dev/null +++ b/tum_esm_utils/rebinning.py @@ -0,0 +1,59 @@ +"""Functions to rebin binned data poins + +Implements: `rebin_1d`, `rebin_2d`. + +This requires you to install this utils library with the optional `modeling` dependency: + +```bash +pip install "tum_esm_utils[modeling]" +# or +pdm add "tum_esm_utils[modeling]" +```""" + +from typing import Any +import numpy as np + + +def _rebin_first_dimension( + arr: np.ndarray[Any, Any], + new_bin_count: int, +) -> np.ndarray[Any, Any]: + """Internal rebinning function.""" + + old_bin_count = len(arr) + new_bins = np.zeros(shape=(new_bin_count, *arr.shape[1:]), dtype=np.float64) + scale = old_bin_count / new_bin_count + for i in range(new_bin_count): + start = i * scale + end = (i + 1) * scale + left = int(np.floor(start)) + right = int(np.floor(end)) + + if left == right: + new_bins[i] += arr[left] * (end - start) + else: + new_bins[i] += arr[left] * (left + 1 - start) + for j in range(left + 1, right): + new_bins[i] += arr[j] + if right < old_bin_count: + new_bins[i] += arr[right] * (end - right) + return new_bins + + +def rebin_1d( + arr: np.ndarray[Any, Any], + new_bin_count: int, +) -> np.ndarray[Any, Any]: + """Rebins a 1D array to a new number of bins.""" + if len(arr.shape) != 1: + raise ValueError("Input array must be 1D.") + return _rebin_first_dimension(arr, new_bin_count) + + +def rebin_2d( + arr: np.ndarray[Any, Any], + new_x_bins: int, + new_y_bins: int, +) -> np.ndarray[Any, Any]: + """Rebins a 2D array to new number of bins in x and y dimensions.""" + return _rebin_first_dimension(_rebin_first_dimension(arr.T, new_x_bins).T, new_y_bins) From d7b7a6aa2578bab50057bddca4e7efa2cfa87ff8 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:20:58 +0100 Subject: [PATCH 02/20] #33 Rebinning function for 1d and 2d tensors (2) Test --- tests/test_rebinning.py | 115 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/test_rebinning.py diff --git a/tests/test_rebinning.py b/tests/test_rebinning.py new file mode 100644 index 0000000..0c97ee2 --- /dev/null +++ b/tests/test_rebinning.py @@ -0,0 +1,115 @@ +import time +import numpy as np +import pytest +import tum_esm_utils.rebinning + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_rebin_1d() -> None: + a = np.array([1, 2, 3, 4]) + + b1 = tum_esm_utils.rebinning.rebin_1d(a, 1) + b1_expected = np.array([10]) + assert np.allclose(b1, b1_expected), f"Expected {b1_expected}, got {b1}" + assert np.isclose(np.sum(a), np.sum(b1)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b1)}" + + b2 = tum_esm_utils.rebinning.rebin_1d(a, 2) + b2_expected = np.array([3, 7]) + assert np.allclose(b2, b2_expected), f"Expected {b2_expected}, got {b2}" + assert np.isclose(np.sum(a), np.sum(b2)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b2)}" + + b3 = tum_esm_utils.rebinning.rebin_1d(a, 3) + b3_expected = np.array([1.6666, 3.3333, 5]) + assert np.allclose(b3, b3_expected, atol=0.001), f"Expected {b3_expected}, got {b3}" + assert np.isclose(np.sum(a), np.sum(b3)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b3)}" + + b4 = tum_esm_utils.rebinning.rebin_1d(a, 4) + b4_expected = np.array([1, 2, 3, 4]) + assert np.allclose(b4, b4_expected), f"Expected {b4_expected}, got {b4}" + assert np.isclose(np.sum(a), np.sum(b4)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b4)}" + + b5 = tum_esm_utils.rebinning.rebin_1d(a, 5) + b5_expected = np.array([0.8, 1.4, 2, 2.6, 3.2]) + assert np.allclose(b5, b5_expected), f"Expected {b5_expected}, got {b5}" + assert np.isclose(np.sum(a), np.sum(b5)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b5)}" + + b6 = tum_esm_utils.rebinning.rebin_1d(a, 6) + b6_expected = np.array([0.6666, 1, 1.3333, 2, 2.3333, 2.6666]) + assert np.allclose(b6, b6_expected, atol=0.001), f"Expected {b6_expected}, got {b6}" + assert np.isclose(np.sum(a), np.sum(b6)), f"Sum mismatch: {np.sum(a)} vs {np.sum(b6)}" + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_rebin_2d() -> None: + a = np.array( + [ + [1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16], + [17, 18, 19, 20], + [21, 22, 23, 24], + ] + ) + + b23 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=2, new_y_bins=3) + b23_expected = np.array( + [ + [1 + 2 + 5 + 6, 3 + 4 + 7 + 8], + [9 + 10 + 13 + 14, 11 + 12 + 15 + 16], + [17 + 18 + 21 + 22, 19 + 20 + 23 + 24], + ] + ) + assert np.allclose(b23, b23_expected), f"Expected {b23_expected}, got {b23}" + + b13 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=3) + b13_expected = np.array( + [ + [1 + 2 + 5 + 6 + 3 + 4 + 7 + 8], + [9 + 10 + 13 + 14 + 11 + 12 + 15 + 16], + [17 + 18 + 21 + 22 + 19 + 20 + 23 + 24], + ] + ) + assert np.allclose(b13, b13_expected), f"Expected {b13_expected}, got {b13}" + + b22 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=2, new_y_bins=2) + b22_expected = np.array( + [ + [1 + 2 + 5 + 6 + 9 + 10, 3 + 4 + 7 + 8 + 11 + 12], + [13 + 14 + 17 + 18 + 21 + 22, 15 + 16 + 19 + 20 + 23 + 24], + ] + ) + assert np.allclose(b22, b22_expected), f"Expected {b22_expected}, got {b22}" + + b12 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=2) + b12_expected = np.array( + [ + [1 + 2 + 5 + 6 + 9 + 10 + 3 + 4 + 7 + 8 + 11 + 12], + [13 + 14 + 17 + 18 + 21 + 22 + 15 + 16 + 19 + 20 + 23 + 24], + ] + ) + assert np.allclose(b12, b12_expected), f"Expected {b12_expected}, got {b12}" + + b11 = tum_esm_utils.rebinning.rebin_2d(a, new_x_bins=1, new_y_bins=1) + b11_expected = np.array([[np.sum(a)]]) + assert np.allclose(b11, b11_expected), f"Expected {b11_expected}, got {b11}" + + # test rebinning performance + + large_a = np.random.rand(1000, 1000) + t1 = time.time() + tum_esm_utils.rebinning.rebin_2d(large_a, new_x_bins=100, new_y_bins=100) + t2 = time.time() + print(f"Rebinning 1000x1000 to 100x100 took {t2 - t1:.4f} seconds.") + # Rebinning 1000x1000 to 100x100 took 0.0032 seconds (on an M4 Pro Chip) + + large_a = np.random.rand(100, 100) + t1 = time.time() + tum_esm_utils.rebinning.rebin_2d(large_a, new_x_bins=1000, new_y_bins=1000) + t2 = time.time() + print(f"Rebinning 100x100 to 1000x1000 took {t2 - t1:.4f} seconds.") + # Rebinning 100x100 to 1000x1000 took 0.0062 seconds (on an M4 Pro Chip) + + # assert False comment out to see the performance prints From 1dd68fac7c204351618312e6389f62f1c732affa Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:21:10 +0100 Subject: [PATCH 03/20] #33 Rebinning function for 1d and 2d tensors (3) Add numpy to modeling dependencies --- pdm.lock | 10 +++++----- pyproject.toml | 9 +++++++-- tum_esm_utils/__init__.py | 7 +++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pdm.lock b/pdm.lock index 5ff6766..7acf716 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "dev", "em27", "modeling", "opus", "plotting", "polars"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:d2b3bc2a264b28c65499ecec63701101208fdd567951e29956d0fb2f00cb072b" +content_hash = "sha256:1363e6ec1e3d3f69b1c8d96c44e2fd1697192c0680c7b2f3bb8f76be72d3b510" [[metadata.targets]] requires_python = "~=3.10" @@ -1325,8 +1325,8 @@ files = [ [[package]] name = "requests" -version = "2.32.4" -requires_python = ">=3.8" +version = "2.32.5" +requires_python = ">=3.9" summary = "Python HTTP for Humans." groups = ["default", "dev"] dependencies = [ @@ -1336,8 +1336,8 @@ dependencies = [ "urllib3<3,>=1.21.1", ] files = [ - {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, - {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, + {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, + {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 24e312a..c127f41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ authors = [ ] dependencies = [ "filelock>=3.18.0", - "requests>=2.32.3", + "requests>=2.32.5", "psutil>=7.1.2", "pydantic>=2.11.3", "pytz>=2025.2", @@ -68,7 +68,12 @@ all = [ "scipy>=1.15.2", "skyfield>=1.53", ] -modeling = ["polars>=1.16.0", "scipy>=1.15.2", "skyfield>=1.53"] +modeling = [ + "polars>=1.16.0", + "scipy>=1.15.2", + "skyfield>=1.53", + "numpy>=2.2.6", +] [build-system] requires = ["pdm-backend"] diff --git a/tum_esm_utils/__init__.py b/tum_esm_utils/__init__.py index a623692..5714a07 100644 --- a/tum_esm_utils/__init__.py +++ b/tum_esm_utils/__init__.py @@ -27,6 +27,7 @@ files as files, mathematics as mathematics, processes as processes, + rebinning as rebinning, shell as shell, sqlitelock as sqlitelock, system as system, @@ -61,3 +62,9 @@ from . import column as column except ImportError: pass + + # requires extra "modeling" + try: + from . import rebinning as rebinning + except ImportError: + pass From 145e9a7854c76093a5265d848536a296ce46d181 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:22:12 +0100 Subject: [PATCH 04/20] #33 Rebinning function for 1d and 2d tensors (4) Update API reference --- docs/pages/api-reference.md | 35 +++++++++++++++++++++++++++++++++++ docs/scripts/sync-docs.py | 1 + 2 files changed, 36 insertions(+) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index e3a9144..e13ddbc 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -1376,6 +1376,41 @@ terminated forcefully after the given timeout (in seconds). The list of terminated PIDs. +## `tum_esm_utils.rebinning` + +Functions to rebin binned data poins + +Implements: `rebin_1d`, `rebin_2d`. + +This requires you to install this utils library with the optional `modeling` dependency: + +```bash +pip install "tum_esm_utils[modeling]" +## `or` +pdm add "tum_esm_utils[modeling]" +``` + + +##### `rebin_1d` + +```python +def rebin_1d(arr: np.ndarray[Any, Any], + new_bin_count: int) -> np.ndarray[Any, Any] +``` + +Rebins a 1D array to a new number of bins. + + +##### `rebin_2d` + +```python +def rebin_2d(arr: np.ndarray[Any, Any], new_x_bins: int, + new_y_bins: int) -> np.ndarray[Any, Any] +``` + +Rebins a 2D array to new number of bins in x and y dimensions. + + ## `tum_esm_utils.shell` Implements custom logging functionality, because the diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py index f108afd..c749d27 100644 --- a/docs/scripts/sync-docs.py +++ b/docs/scripts/sync-docs.py @@ -35,6 +35,7 @@ "opus.http_interface", "plotting", "processes", + "rebinning", "shell", "sqlitelock", "system", From a60f6b4246da46c48fe23636d5dddc6b8c019bbc Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:50:32 +0100 Subject: [PATCH 05/20] #31 More user friendly netcdf file API (1) Implement --- tum_esm_utils/netcdf.py | 183 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 tum_esm_utils/netcdf.py diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py new file mode 100644 index 0000000..61ec73a --- /dev/null +++ b/tum_esm_utils/netcdf.py @@ -0,0 +1,183 @@ +"""A thin wrapper over the netCDF4 library to make working with NetCDF files easier. + +Implements: `NetCDFFile` + +This requires you to install this utils library with the optional `netcdf` dependencies: + +```bash +pip install "tum_esm_utils[netcdf]" +# or +pdm add "tum_esm_utils[netcdf]" +```""" + +from typing import Any, Literal, Optional +import os +import netCDF4 as nc + + +class NetCDFFile: + def __init__( + self, + filepath: str, + parallel: bool = False, + diskless: bool = True, + mode: Literal["w", "a"] = "w", + ) -> None: + """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier. + + If writing to a new file, it will first write to the filepath+ ".tmp" and rename it to the final + filepath when closing the file. This ensures that the final filepath will only exist if the file + was written completely. In append mode, the filepath is not changes.""" + + self.tmp_filepath = filepath[:-3] + ".tmp.nc" + self.filepath = filepath + self.mode = mode + + if mode == "w" and os.path.isfile(self.tmp_filepath): + os.remove(self.tmp_filepath) + + self.ds = nc.Dataset( + self.tmp_filepath if mode == "w" else self.filepath, + mode=mode, + format="NETCDF4", + parallel=parallel, + diskless=diskless, + persist=True, + ) + self.dimensions: dict[str, nc.Dimension] = {} + self.variables: dict[str, nc.Variable[Any]] = {} + self.attributes: dict[str, str] = {} + + if mode == "a": + for dim_name, dim in self.ds.dimensions.items(): + self.dimensions[dim_name] = dim + for var_name, var in self.ds.variables.items(): + self.variables[var_name] = var + for attr_name in self.ds.ncattrs(): + self.attributes[attr_name] = self.ds.getncattr(attr_name) + + def create_dimension(self, name: str, size: int) -> None: + """Create a new dimension in the NetCDF file. + + Raises: + ValueError: If the dimension already exists.""" + + if name in self.dimensions: + raise ValueError(f"Dimension {name} already exists in the NetCDF file") + + d = self.ds.createDimension(name, size) + assert isinstance(d, nc.Dimension) + self.dimensions[name] = d + + def create_variable( + self, + name: str, + dimensions: tuple[nc.Dimension | str, ...], + units: str, + long_name: Optional[str] = None, + description: Optional[str] = None, + fill_value: Optional[float | int] = None, + chunk_dimensions: list[str] = [], + datatype: Literal["f4", "f8", "i4", "i8"] = "f4", + ) -> None: + """Create a new variable in the NetCDF file. + + Raises: + ValueError: If the variable already exists or if a dimension is not found.""" + if name in self.variables: + raise ValueError(f"Variable {name} already exists in the NetCDF file") + + object_dimensions: list[nc.Dimension] = [] + for dimension in dimensions: + if isinstance(dimension, str): + if dimension not in self.dimensions: + raise ValueError(f"Dimension {dimension} not found in the NetCDF file") + object_dimensions.append(self.dimensions[dimension]) + else: + if dimension.name not in self.dimensions: + raise ValueError(f"Dimension {dimension.name} not found in the NetCDF file") + object_dimensions.append(dimension) + + chunk_sizes = [dimension.size for dimension in object_dimensions] + for i, dimension in enumerate(object_dimensions): + if dimension.name in chunk_dimensions: + chunk_sizes[i] = 1 + + var: Any = self.ds.createVariable( # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + name, + datatype=datatype, + dimensions=object_dimensions, + zlib=(len(dimensions) > 1) or (name != object_dimensions[0].name), + complevel=2, + fill_value=fill_value, + chunksizes=chunk_sizes if len(chunk_dimensions) > 0 else None, + ) + var.units = units + if long_name is not None: + var.long_name = long_name + if description is not None: + var.description = description + self.variables[name] = var + + def import_dimension( + self, + dimension: nc.Dimension, + new_name: Optional[str] = None, + ) -> None: + """Import a dimension from another NetCDF file. + + Raises: + ValueError: If the dimension already exists.""" + + if dimension.name in self.dimensions: + raise ValueError(f"Dimension {dimension.name} already exists in the NetCDF file") + self.create_dimension(dimension.name if new_name is None else new_name, dimension.size) + + def import_variable( + self, + variable: nc.Variable, # type: ignore + new_name: Optional[str] = None, + ) -> None: + """Import a variable from another NetCDF file. + + Raises: + ValueError: If the variable already exists.""" + + if variable.name in self.variables: + raise ValueError(f"Variable {variable.name} already exists in the NetCDF file") + name = variable.name if new_name is None else new_name + self.create_variable( + name=name, + dimensions=variable.dimensions, + units=str(variable.units), + long_name=variable.long_name if hasattr(variable, "long_name") else None, # pyright: ignore[reportUnknownArgumentType] + description=variable.description if hasattr(variable, "description") else None, # pyright: ignore[reportUnknownArgumentType] + fill_value=float(variable.get_fill_value()), + ) + self.variables[name][:] = variable[:] + + def add_attribute(self, key: str, value: str) -> None: + """Add a global attribute to the NetCDF file. + + Raises: + ValueError: If the attribute already exists.""" + + if key in self.attributes: + raise ValueError(f"Attribute {key} already exists in the NetCDF file") + self.attributes[key] = value + self.ds.setncattr(key, value) + + def close(self) -> None: + """Close the NetCDF file, possibly renaming the temporary file to the final filepath.""" + + self.ds.close() + if self.mode == "w": + if os.path.isfile(self.filepath): + os.remove(self.filepath) + os.rename(self.tmp_filepath, self.filepath) + + del self + + def __getitem__(self, key: str) -> nc.Variable: # type: ignore + """Get a variable from the NetCDF file.""" + return self.variables[key] From fe93b65aaf8545e86e66f6d8ed5e7e5817e78969 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 00:50:46 +0100 Subject: [PATCH 06/20] #31 More user friendly netcdf file API (2) Add dependencies --- pdm.lock | 76 ++++++++++++++++++++++++++++++++++++--- pyproject.toml | 9 ++--- tum_esm_utils/__init__.py | 6 ++++ 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/pdm.lock b/pdm.lock index 7acf716..c14ac03 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "all", "dev", "em27", "modeling", "opus", "plotting", "polars"] +groups = ["default", "all", "dev", "em27", "modeling", "netcdf", "opus", "plotting", "polars"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:1363e6ec1e3d3f69b1c8d96c44e2fd1697192c0680c7b2f3bb8f76be72d3b510" +content_hash = "sha256:175bedc668865a812f60a5bb238b5ad17a7e13438f1023d331ccc9c7f5169628" [[metadata.targets]] requires_python = "~=3.10" @@ -65,12 +65,55 @@ name = "certifi" version = "2025.6.15" requires_python = ">=3.7" summary = "Python package for providing Mozilla's CA Bundle." -groups = ["default", "all", "dev", "modeling"] +groups = ["default", "all", "dev", "modeling", "netcdf"] files = [ {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"}, {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"}, ] +[[package]] +name = "cftime" +version = "1.6.5" +requires_python = ">=3.10" +summary = "Time-handling functionality from netcdf4-python" +groups = ["all", "netcdf"] +dependencies = [ + "numpy>=1.21.2", +] +files = [ + {file = "cftime-1.6.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8ad81e8cb0eb873b33c3d1e22c6168163fdc64daa8f7aeb4da8092f272575f4d"}, + {file = "cftime-1.6.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12d95c6af852114a13301c5a61e41afdbd1542e72939c1083796f8418b9b8b0e"}, + {file = "cftime-1.6.5-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2659b7df700e27d9e3671f686ce474dfb5fc274966961edf996acc148dfa094a"}, + {file = "cftime-1.6.5-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:94cebdfcda6a985b8e69aed22d00d6b8aa1f421495adbdcff1d59b3e896d81e2"}, + {file = "cftime-1.6.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:179681b023349a2fe277ceccc89d4fc52c0dd105cb59b7187b5bc5d442875133"}, + {file = "cftime-1.6.5-cp310-cp310-win_amd64.whl", hash = "sha256:d8b9fdecb466879cfe8ca4472b229b6f8d0bb65e4ffd44266ae17484bac2cf38"}, + {file = "cftime-1.6.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:474e728f5a387299418f8d7cb9c52248dcd5d977b2a01de7ec06bba572e26b02"}, + {file = "cftime-1.6.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ab9e80d4de815cac2e2d88a2335231254980e545d0196eb34ee8f7ed612645f1"}, + {file = "cftime-1.6.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ad24a563784e4795cb3d04bd985895b5db49ace2cbb71fcf1321fd80141f9a52"}, + {file = "cftime-1.6.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a3cda6fd12c7fb25eff40a6a857a2bf4d03e8cc71f80485d8ddc65ccbd80f16a"}, + {file = "cftime-1.6.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:28cda78d685397ba23d06273b9c916c3938d8d9e6872a537e76b8408a321369b"}, + {file = "cftime-1.6.5-cp311-cp311-win_amd64.whl", hash = "sha256:93ead088e3a216bdeb9368733a0ef89a7451dfc1d2de310c1c0366a56ad60dc8"}, + {file = "cftime-1.6.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:eef25caed5ebd003a38719bd3ff8847cd52ef2ea56c3ebdb2c9345ba131fc7c5"}, + {file = "cftime-1.6.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c87d2f3b949e45463e559233c69e6a9cf691b2b378c1f7556166adfabbd1c6b0"}, + {file = "cftime-1.6.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:82cb413973cc51b55642b3a1ca5b28db5b93a294edbef7dc049c074b478b4647"}, + {file = "cftime-1.6.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85ba8e7356d239cfe56ef7707ac30feaf67964642ac760a82e507ee3c5db4ac4"}, + {file = "cftime-1.6.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:456039af7907a3146689bb80bfd8edabd074c7f3b4eca61f91b9c2670addd7ad"}, + {file = "cftime-1.6.5-cp312-cp312-win_amd64.whl", hash = "sha256:da84534c43699960dc980a9a765c33433c5de1a719a4916748c2d0e97a071e44"}, + {file = "cftime-1.6.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4aba66fd6497711a47c656f3a732c2d1755ad15f80e323c44a8716ebde39ddd5"}, + {file = "cftime-1.6.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89e7cba699242366e67d6fb5aee579440e791063f92a93853610c91647167c0d"}, + {file = "cftime-1.6.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2f1eb43d7a7b919ec99aee709fb62ef87ef1cf0679829ef93d37cc1c725781e9"}, + {file = "cftime-1.6.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e02a1d80ffc33fe469c7db68aa24c4a87f01da0c0c621373e5edadc92964900b"}, + {file = "cftime-1.6.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18ab754805233cdd889614b2b3b86a642f6d51a57a1ec327c48053f3414f87d8"}, + {file = "cftime-1.6.5-cp313-cp313-win_amd64.whl", hash = "sha256:6c27add8f907f4a4cd400e89438f2ea33e2eb5072541a157a4d013b7dbe93f9c"}, + {file = "cftime-1.6.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c69ce3bdae6a322cbb44e9ebc20770d47748002fb9d68846a1e934f1bd5daf0b"}, + {file = "cftime-1.6.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e62e9f2943e014c5ef583245bf2e878398af131c97e64f8cd47c1d7baef5c4e2"}, + {file = "cftime-1.6.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7da5fdaa4360d8cb89b71b8ded9314f2246aa34581e8105c94ad58d6102d9e4f"}, + {file = "cftime-1.6.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bff865b4ea4304f2744a1ad2b8149b8328b321dd7a2b9746ef926d229bd7cd49"}, + {file = "cftime-1.6.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e552c5d1c8a58f25af7521e49237db7ca52ed2953e974fe9f7c4491e95fdd36c"}, + {file = "cftime-1.6.5-cp314-cp314-win_amd64.whl", hash = "sha256:e645b095dc50a38ac454b7e7f0742f639e7d7f6b108ad329358544a6ff8c9ba2"}, + {file = "cftime-1.6.5.tar.gz", hash = "sha256:8225fed6b9b43fb87683ebab52130450fc1730011150d3092096a90e54d1e81e"}, +] + [[package]] name = "charset-normalizer" version = "3.4.2" @@ -737,6 +780,31 @@ files = [ {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] +[[package]] +name = "netcdf4" +version = "1.7.3" +requires_python = ">=3.10" +summary = "Provides an object-oriented python interface to the netCDF version 4 library" +groups = ["all", "netcdf"] +dependencies = [ + "certifi", + "cftime", + "numpy", +] +files = [ + {file = "netcdf4-1.7.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:db761afd3a6b9482df018c4783e0bdf99141a41db1f14c68c89986effb182d57"}, + {file = "netcdf4-1.7.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ad4c2d9b469248d83cbacb70ad9e7d3a6c0ba27febe839c90192147199745ba4"}, + {file = "netcdf4-1.7.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6986d039717582071e55ae9c6fbebfe4e5bbbc3af122fc3db0c0c09c4d8955e"}, + {file = "netcdf4-1.7.3-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:348e79b4f26f2e403fe3c54364e9297e4ef326c7ee12f9be01c037db853d26c0"}, + {file = "netcdf4-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:6ab71f5d70e55e8584d168d5158efdb2fd8d350a033d0c27d942c3d399587f54"}, + {file = "netcdf4-1.7.3-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:801c222d8ad35fd7dc7e9aa7ea6373d184bcb3b8ee6b794c5fbecaa5155b1792"}, + {file = "netcdf4-1.7.3-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:83dbfd6f10a0ec785d5296016bd821bbe9f0df780be72fc00a1f0d179d9c5f0f"}, + {file = "netcdf4-1.7.3-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:949e086d4d2612b49e5b95f60119d216c9ceb7b17bc771e9e0fa0e9b9c0a2f9f"}, + {file = "netcdf4-1.7.3-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c764ba6f6a1421cab5496097e8a1c4d2e36be2a04880dfd288bb61b348c217e"}, + {file = "netcdf4-1.7.3-cp311-abi3-win_amd64.whl", hash = "sha256:1b6c646fa179fb1e5e8d6e8231bc78cc0311eceaa1241256b5a853f1d04055b9"}, + {file = "netcdf4-1.7.3.tar.gz", hash = "sha256:83f122fc3415e92b1d4904fd6a0898468b5404c09432c34beb6b16c533884673"}, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -793,7 +861,7 @@ name = "numpy" version = "2.2.6" requires_python = ">=3.10" summary = "Fundamental package for array computing in Python" -groups = ["all", "modeling", "opus", "plotting"] +groups = ["all", "modeling", "netcdf", "opus", "plotting"] files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, diff --git a/pyproject.toml b/pyproject.toml index c127f41..89c7d2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,8 @@ plotting = ["matplotlib>=3.10.7", "numpy>=2.2.5"] em27 = ["polars>=1.16.0"] polars = ["polars>=1.16.0"] # alias for em27 (will be removed with 3.0) opus = ["numpy>=2.2.5", "tenacity>=9.1.2"] +modeling = ["polars>=1.16.0", "scipy>=1.15.2", "skyfield>=1.53", "numpy>=2.2.6"] +netcdf = ["netcdf4>=1.7.3"] all = [ "matplotlib>=3.10.1", "numpy>=2.2.5", @@ -67,12 +69,7 @@ all = [ "tenacity>=9.1.2", "scipy>=1.15.2", "skyfield>=1.53", -] -modeling = [ - "polars>=1.16.0", - "scipy>=1.15.2", - "skyfield>=1.53", - "numpy>=2.2.6", + "netcdf4>=1.7.3", ] [build-system] diff --git a/tum_esm_utils/__init__.py b/tum_esm_utils/__init__.py index 5714a07..4177ce4 100644 --- a/tum_esm_utils/__init__.py +++ b/tum_esm_utils/__init__.py @@ -68,3 +68,9 @@ from . import rebinning as rebinning except ImportError: pass + + # requires extra "netcdf" + try: + from . import netcdf as netcdf + except ImportError: + pass From 4cb95bfcac758ec3365fe41b8c6b7c4df479dbdf Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 01:03:02 +0100 Subject: [PATCH 07/20] #31 More user friendly netcdf file API (3) refine new API --- tests/test_static_types.py | 5 +++- tum_esm_utils/netcdf.py | 52 +++++++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/tests/test_static_types.py b/tests/test_static_types.py index ecd4d9d..5e27721 100644 --- a/tests/test_static_types.py +++ b/tests/test_static_types.py @@ -13,6 +13,7 @@ def _rmdir(path: str) -> None: @pytest.mark.order(1) +@pytest.mark.quick def test_with_mypy() -> None: _rmdir(".mypy_cache/3.*/tum_esm_utils") _rmdir(".mypy_cache/3.*/tests") @@ -21,6 +22,8 @@ def test_with_mypy() -> None: print(f"Checking {path} ...") assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m mypy {path}") == 0 + @pytest.mark.order(1) +@pytest.mark.quick def test_with_pyright() -> None: - assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m pyright") == 0 \ No newline at end of file + assert os.system(f"cd {PROJECT_DIR} && {sys.executable} -m pyright") == 0 diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py index 61ec73a..2a94c1d 100644 --- a/tum_esm_utils/netcdf.py +++ b/tum_esm_utils/netcdf.py @@ -21,7 +21,7 @@ def __init__( filepath: str, parallel: bool = False, diskless: bool = True, - mode: Literal["w", "a"] = "w", + mode: Literal["w", "a", "r"] = "w", ) -> None: """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier. @@ -29,6 +29,7 @@ def __init__( filepath when closing the file. This ensures that the final filepath will only exist if the file was written completely. In append mode, the filepath is not changes.""" + assert filepath.endswith(".nc"), "Only the .nc file extension is supported" self.tmp_filepath = filepath[:-3] + ".tmp.nc" self.filepath = filepath self.mode = mode @@ -48,7 +49,7 @@ def __init__( self.variables: dict[str, nc.Variable[Any]] = {} self.attributes: dict[str, str] = {} - if mode == "a": + if mode in ["a", "r"]: for dim_name, dim in self.ds.dimensions.items(): self.dimensions[dim_name] = dim for var_name, var in self.ds.variables.items(): @@ -60,7 +61,11 @@ def create_dimension(self, name: str, size: int) -> None: """Create a new dimension in the NetCDF file. Raises: - ValueError: If the dimension already exists.""" + ValueError: If the dimension already exists + RuntimeError: If the NetCDF file is not opened in write mode.""" + + if self.mode == "r": + raise RuntimeError("Cannot create dimension in read-only mode") if name in self.dimensions: raise ValueError(f"Dimension {name} already exists in the NetCDF file") @@ -79,11 +84,18 @@ def create_variable( fill_value: Optional[float | int] = None, chunk_dimensions: list[str] = [], datatype: Literal["f4", "f8", "i4", "i8"] = "f4", + zlib: bool = True, + compression_level: int = 2, ) -> None: """Create a new variable in the NetCDF file. Raises: - ValueError: If the variable already exists or if a dimension is not found.""" + ValueError: If the variable already exists or if a dimension is not found. + RuntimeError: If the NetCDF file is not opened in write mode.""" + + if self.mode == "r": + raise RuntimeError("Cannot create dimension in read-only mode") + if name in self.variables: raise ValueError(f"Variable {name} already exists in the NetCDF file") @@ -103,12 +115,12 @@ def create_variable( if dimension.name in chunk_dimensions: chunk_sizes[i] = 1 - var: Any = self.ds.createVariable( # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + var: Any = self.ds.createVariable( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType] name, datatype=datatype, dimensions=object_dimensions, - zlib=(len(dimensions) > 1) or (name != object_dimensions[0].name), - complevel=2, + zlib=zlib and ((len(dimensions) > 1) or (name != object_dimensions[0].name)), + complevel=compression_level, # type: ignore fill_value=fill_value, chunksizes=chunk_sizes if len(chunk_dimensions) > 0 else None, ) @@ -127,7 +139,11 @@ def import_dimension( """Import a dimension from another NetCDF file. Raises: - ValueError: If the dimension already exists.""" + ValueError: If the dimension already exists. + RuntimeError: If the NetCDF file is not opened in write mode.""" + + if self.mode == "r": + raise RuntimeError("Cannot import dimension in read-only mode") if dimension.name in self.dimensions: raise ValueError(f"Dimension {dimension.name} already exists in the NetCDF file") @@ -135,13 +151,19 @@ def import_dimension( def import_variable( self, - variable: nc.Variable, # type: ignore + variable: "nc.Variable[Any]", new_name: Optional[str] = None, + zlib: bool = True, + compression_level: int = 2, ) -> None: """Import a variable from another NetCDF file. Raises: - ValueError: If the variable already exists.""" + ValueError: If the variable already exists. + RuntimeError: If the NetCDF file is not opened in write mode.""" + + if self.mode == "r": + raise RuntimeError("Cannot import variable in read-only mode") if variable.name in self.variables: raise ValueError(f"Variable {variable.name} already exists in the NetCDF file") @@ -153,6 +175,8 @@ def import_variable( long_name=variable.long_name if hasattr(variable, "long_name") else None, # pyright: ignore[reportUnknownArgumentType] description=variable.description if hasattr(variable, "description") else None, # pyright: ignore[reportUnknownArgumentType] fill_value=float(variable.get_fill_value()), + zlib=zlib, + compression_level=compression_level, ) self.variables[name][:] = variable[:] @@ -160,7 +184,11 @@ def add_attribute(self, key: str, value: str) -> None: """Add a global attribute to the NetCDF file. Raises: - ValueError: If the attribute already exists.""" + ValueError: If the attribute already exists. + RuntimeError: If the NetCDF file is not opened in write mode.""" + + if self.mode == "r": + raise RuntimeError("Cannot add attribute in read-only mode") if key in self.attributes: raise ValueError(f"Attribute {key} already exists in the NetCDF file") @@ -178,6 +206,6 @@ def close(self) -> None: del self - def __getitem__(self, key: str) -> nc.Variable: # type: ignore + def __getitem__(self, key: str) -> "nc.Variable[Any]": """Get a variable from the NetCDF file.""" return self.variables[key] From 38ea88da154590ba818087fed5612c2215a7efdc Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 01:05:48 +0100 Subject: [PATCH 08/20] #31 More user friendly netcdf file API (4) test new API --- tests/test_netcdf.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/test_netcdf.py diff --git a/tests/test_netcdf.py b/tests/test_netcdf.py new file mode 100644 index 0000000..8aee2ee --- /dev/null +++ b/tests/test_netcdf.py @@ -0,0 +1,88 @@ +import pytest +import tempfile +import os +import numpy as np +import netCDF4 as nc +import tum_esm_utils.files +from tum_esm_utils.netcdf import NetCDFFile + +PROJECT_DIR = tum_esm_utils.files.get_parent_dir_path(__file__, current_depth=2) + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_netcdffile_create_and_read() -> None: + with tempfile.TemporaryDirectory() as tmpdirname: + # Create a NetCDF file in a temporary directory + filepath = os.path.join(tmpdirname, "test.nc") + ncfile = NetCDFFile(str(filepath), mode="w") + + assert not os.path.exists(filepath) + assert os.path.exists(filepath[:-3] + ".tmp.nc") + + # Create dimensions + ncfile.create_dimension("time", 10) + ncfile.create_dimension("lat", 5) + ncfile.create_dimension("lon", 5) + + # Create variable + ncfile.create_variable( + name="temperature", + dimensions=("time", "lat", "lon"), + units="K", + long_name="Air temperature", + description="Synthetic temperature data", + fill_value=-9999.0, + datatype="f4", + ) + + # Write data + data = np.random.rand(10, 5, 5).astype(np.float32) + ncfile.variables["temperature"][:] = data + + # Add attribute + ncfile.add_attribute("title", "Test NetCDF File") + + # Close file + ncfile.close() + + # Reopen and check contents + ncfile2 = nc.Dataset(str(filepath), mode="a") + assert "temperature" in ncfile2.variables + assert ncfile2.variables["temperature"].shape == (10, 5, 5) + np.testing.assert_array_almost_equal(ncfile2.variables["temperature"][:], data) + assert ncfile2.getncattr("title") == "Test NetCDF File" + ncfile2.close() + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_netcdffile_import_dimension_and_variable() -> None: + with tempfile.TemporaryDirectory() as tmpdirname: + # Create source NetCDF file + src_filepath = os.path.join(tmpdirname, "src.nc") + src_nc = NetCDFFile(str(src_filepath), mode="w") + src_nc.create_dimension("x", 4) + src_nc.create_variable( + name="foo", dimensions=("x",), units="1", fill_value=0, datatype="i4" + ) + src_nc.variables["foo"][:] = np.arange(4) + src_nc.close() + + # Open source and target files + src_nc = NetCDFFile(str(src_filepath), mode="r") + tgt_filepath = os.path.join(tmpdirname, "tgt.nc") + tgt_nc = NetCDFFile(str(tgt_filepath), mode="w") + + # Import dimension and variable + tgt_nc.import_dimension(src_nc.dimensions["x"]) + tgt_nc.import_variable(src_nc.variables["foo"]) + tgt_nc.close() + src_nc.close() + + # Check imported data + tgt_nc = NetCDFFile(str(tgt_filepath), mode="r") + assert "x" in tgt_nc.dimensions + assert "foo" in tgt_nc.variables + np.testing.assert_array_equal(tgt_nc.variables["foo"][:], np.arange(4)) + tgt_nc.close() From 00d7d386957fa547cc25205e98c6e6e300494198 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 01:06:29 +0100 Subject: [PATCH 09/20] #31 More user friendly netcdf file API (5) add to API reference --- docs/pages/api-reference.md | 139 ++++++++++++++++++++++++++++++++++++ docs/scripts/sync-docs.py | 1 + 2 files changed, 140 insertions(+) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index e13ddbc..d950fdc 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -812,6 +812,145 @@ can lead to floating point errors, i.e. `1 % 0.1 == 0.09999999999999998`. Using `math.fmod` also does not seem to work correctly with floats. +## `tum_esm_utils.netcdf` + +A thin wrapper over the netCDF4 library to make working with NetCDF files easier. + +Implements: `NetCDFFile` + +This requires you to install this utils library with the optional `netcdf` dependencies: + +```bash +pip install "tum_esm_utils[netcdf]" +## `or` +pdm add "tum_esm_utils[netcdf]" +``` + + +### `NetCDFFile` Objects + +```python +class NetCDFFile() +``` + + +##### `__init__` + +```python +def __init__(filepath: str, + parallel: bool = False, + diskless: bool = True, + mode: Literal["w", "a", "r"] = "w") -> None +``` + +A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier. + +If writing to a new file, it will first write to the filepath+ ".tmp" and rename it to the final +filepath when closing the file. This ensures that the final filepath will only exist if the file +was written completely. In append mode, the filepath is not changes. + + +##### `create_dimension` + +```python +def create_dimension(name: str, size: int) -> None +``` + +Create a new dimension in the NetCDF file. + +**Raises**: + +- `ValueError` - If the dimension already exists +- `RuntimeError` - If the NetCDF file is not opened in write mode. + + +##### `create_variable` + +```python +def create_variable(name: str, + dimensions: tuple[nc.Dimension | str, ...], + units: str, + long_name: Optional[str] = None, + description: Optional[str] = None, + fill_value: Optional[float | int] = None, + chunk_dimensions: list[str] = [], + datatype: Literal["f4", "f8", "i4", "i8"] = "f4", + zlib: bool = True, + compression_level: int = 2) -> None +``` + +Create a new variable in the NetCDF file. + +**Raises**: + +- `ValueError` - If the variable already exists or if a dimension is not found. +- `RuntimeError` - If the NetCDF file is not opened in write mode. + + +##### `import_dimension` + +```python +def import_dimension(dimension: nc.Dimension, + new_name: Optional[str] = None) -> None +``` + +Import a dimension from another NetCDF file. + +**Raises**: + +- `ValueError` - If the dimension already exists. +- `RuntimeError` - If the NetCDF file is not opened in write mode. + + +##### `import_variable` + +```python +def import_variable(variable: "nc.Variable[Any]", + new_name: Optional[str] = None, + zlib: bool = True, + compression_level: int = 2) -> None +``` + +Import a variable from another NetCDF file. + +**Raises**: + +- `ValueError` - If the variable already exists. +- `RuntimeError` - If the NetCDF file is not opened in write mode. + + +##### `add_attribute` + +```python +def add_attribute(key: str, value: str) -> None +``` + +Add a global attribute to the NetCDF file. + +**Raises**: + +- `ValueError` - If the attribute already exists. +- `RuntimeError` - If the NetCDF file is not opened in write mode. + + +##### `close` + +```python +def close() -> None +``` + +Close the NetCDF file, possibly renaming the temporary file to the final filepath. + + +##### `__getitem__` + +```python +def __getitem__(key: str) -> "nc.Variable[Any]" +``` + +Get a variable from the NetCDF file. + + ## `tum_esm_utils.opus` Functions for interacting with OPUS files. diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py index c749d27..2cd5fda 100644 --- a/docs/scripts/sync-docs.py +++ b/docs/scripts/sync-docs.py @@ -30,6 +30,7 @@ "em27", "files", "mathematics", + "netcdf", "opus", "opus.file_interface", "opus.http_interface", From cb63b61aef1b4e2dd7e5e07c1a41f478ae8030de Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 20:39:48 +0100 Subject: [PATCH 10/20] #31 More user friendly netcdf file API (6) refine interface --- tum_esm_utils/netcdf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py index 2a94c1d..b45d346 100644 --- a/tum_esm_utils/netcdf.py +++ b/tum_esm_utils/netcdf.py @@ -21,7 +21,7 @@ def __init__( filepath: str, parallel: bool = False, diskless: bool = True, - mode: Literal["w", "a", "r"] = "w", + mode: Literal["w", "a", "r"] = "r", ) -> None: """A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier. @@ -180,17 +180,17 @@ def import_variable( ) self.variables[name][:] = variable[:] - def add_attribute(self, key: str, value: str) -> None: + def add_attribute(self, key: str, value: str, allow_overwrite: bool = False) -> None: """Add a global attribute to the NetCDF file. Raises: - ValueError: If the attribute already exists. + ValueError: If the attribute already exists and `allow_overwrite` is False. RuntimeError: If the NetCDF file is not opened in write mode.""" if self.mode == "r": raise RuntimeError("Cannot add attribute in read-only mode") - if key in self.attributes: + if (not allow_overwrite) and (key in self.attributes): raise ValueError(f"Attribute {key} already exists in the NetCDF file") self.attributes[key] = value self.ds.setncattr(key, value) From f512653f66d310b8683543ba7cc1b51d04d01e96 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 20:40:47 +0100 Subject: [PATCH 11/20] #31 More user friendly netcdf file API (7) add function to compress an existing nc file --- tum_esm_utils/netcdf.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py index b45d346..779a860 100644 --- a/tum_esm_utils/netcdf.py +++ b/tum_esm_utils/netcdf.py @@ -1,6 +1,6 @@ """A thin wrapper over the netCDF4 library to make working with NetCDF files easier. -Implements: `NetCDFFile` +Implements: `NetCDFFile`, `compress_netcdf_file`. This requires you to install this utils library with the optional `netcdf` dependencies: @@ -209,3 +209,40 @@ def close(self) -> None: def __getitem__(self, key: str) -> "nc.Variable[Any]": """Get a variable from the NetCDF file.""" return self.variables[key] + + +def compress_netcdf_file( + source_filepath: str, + destination_filepath: str, + compression_level: int = 2, +) -> None: + """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed. + + Raises: + FileNotFoundError: If the source file does not exist. + FileExistsError: If the destination file already exists. + """ + + if not os.path.isfile(source_filepath): + raise FileNotFoundError(f"Source file {source_filepath} does not exist.") + if os.path.isfile(destination_filepath): + raise FileExistsError(f"Destination file {destination_filepath} already exists.") + + src_nc = NetCDFFile(source_filepath, mode="r") + dest_nc = NetCDFFile(destination_filepath, mode="w") + + # Copy dimensions + for dim in src_nc.dimensions.values(): + dest_nc.import_dimension(dim) + + # Copy variables + for var in src_nc.variables.values(): + dest_nc.import_variable(var, compression_level=compression_level) + + # Copy attributes + for attr_name, attr_value in src_nc.attributes.items(): + dest_nc.add_attribute(attr_name, attr_value) + + src_nc.close() + dest_nc.close() + From 7213d2bd81afd2e39ed76dffe02b5ca9feaf7df5 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 22:51:53 +0100 Subject: [PATCH 12/20] #31 More user friendly netcdf file API (8) add function to remove items from an nc file --- tum_esm_utils/netcdf.py | 49 ++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/tum_esm_utils/netcdf.py b/tum_esm_utils/netcdf.py index 779a860..e557d5f 100644 --- a/tum_esm_utils/netcdf.py +++ b/tum_esm_utils/netcdf.py @@ -1,6 +1,6 @@ """A thin wrapper over the netCDF4 library to make working with NetCDF files easier. -Implements: `NetCDFFile`, `compress_netcdf_file`. +Implements: `NetCDFFile`, `remove_elements_from_netcdf_file`, `compress_netcdf_file`. This requires you to install this utils library with the optional `netcdf` dependencies: @@ -211,12 +211,15 @@ def __getitem__(self, key: str) -> "nc.Variable[Any]": return self.variables[key] -def compress_netcdf_file( +def remove_elements_from_netcdf_file( source_filepath: str, destination_filepath: str, + variables_to_remove: list[str] = [], + dimensions_to_remove: list[str] = [], + attributes_to_remove: list[str] = [], compression_level: int = 2, ) -> None: - """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed. + """Create a new NetCDF file by copying an existing one, but removing specified variables, dimensions, and attributes. This is useful because NetCDF4 does not support removing elements from an existing file. Raises: FileNotFoundError: If the source file does not exist. @@ -231,18 +234,48 @@ def compress_netcdf_file( src_nc = NetCDFFile(source_filepath, mode="r") dest_nc = NetCDFFile(destination_filepath, mode="w") + # check that no variable depends on a dimension to be removed + vars = [v for v in src_nc.variables.values() if v.name not in variables_to_remove] + for var in vars: + for dim_name in var.dimensions: + if dim_name in dimensions_to_remove: + raise ValueError( + f"Cannot remove dimension {dim_name} because it is used by variable {var.name}." + ) + # Copy dimensions - for dim in src_nc.dimensions.values(): - dest_nc.import_dimension(dim) + for dim_name, dim in src_nc.dimensions.items(): + if dim_name not in dimensions_to_remove: + dest_nc.import_dimension(dim) # Copy variables - for var in src_nc.variables.values(): - dest_nc.import_variable(var, compression_level=compression_level) + for var_name, var in src_nc.variables.items(): + if var_name not in variables_to_remove: + dest_nc.import_variable(var, compression_level=compression_level) # Copy attributes for attr_name, attr_value in src_nc.attributes.items(): - dest_nc.add_attribute(attr_name, attr_value) + if attr_name not in attributes_to_remove: + dest_nc.add_attribute(attr_name, attr_value) src_nc.close() dest_nc.close() + +def compress_netcdf_file( + source_filepath: str, + destination_filepath: str, + compression_level: int = 2, +) -> None: + """Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed. + + Raises: + FileNotFoundError: If the source file does not exist. + FileExistsError: If the destination file already exists. + """ + + remove_elements_from_netcdf_file( + source_filepath, + destination_filepath, + compression_level=compression_level, + ) From 21f0c5b4641643f1c3dbfdc7a0f7848a7c61ae68 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Wed, 19 Nov 2025 22:52:54 +0100 Subject: [PATCH 13/20] #31 More user friendly netcdf file API (9) update API reference --- docs/pages/api-reference.md | 43 +++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index d950fdc..b98059f 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -816,7 +816,7 @@ Using `math.fmod` also does not seem to work correctly with floats. A thin wrapper over the netCDF4 library to make working with NetCDF files easier. -Implements: `NetCDFFile` +Implements: `NetCDFFile`, `remove_elements_from_netcdf_file`, `compress_netcdf_file`. This requires you to install this utils library with the optional `netcdf` dependencies: @@ -840,7 +840,7 @@ class NetCDFFile() def __init__(filepath: str, parallel: bool = False, diskless: bool = True, - mode: Literal["w", "a", "r"] = "w") -> None + mode: Literal["w", "a", "r"] = "r") -> None ``` A simple wrapper around netCDF4.Dataset to make the interaction with NetCDF files easier. @@ -922,14 +922,14 @@ Import a variable from another NetCDF file. ##### `add_attribute` ```python -def add_attribute(key: str, value: str) -> None +def add_attribute(key: str, value: str, allow_overwrite: bool = False) -> None ``` Add a global attribute to the NetCDF file. **Raises**: -- `ValueError` - If the attribute already exists. +- `ValueError` - If the attribute already exists and `allow_overwrite` is False. - `RuntimeError` - If the NetCDF file is not opened in write mode. @@ -951,6 +951,41 @@ def __getitem__(key: str) -> "nc.Variable[Any]" Get a variable from the NetCDF file. +##### `remove_elements_from_netcdf_file` + +```python +def remove_elements_from_netcdf_file(source_filepath: str, + destination_filepath: str, + variables_to_remove: list[str] = [], + dimensions_to_remove: list[str] = [], + attributes_to_remove: list[str] = [], + compression_level: int = 2) -> None +``` + +Create a new NetCDF file by copying an existing one, but removing specified variables, dimensions, and attributes. This is useful because NetCDF4 does not support removing elements from an existing file. + +**Raises**: + +- `FileNotFoundError` - If the source file does not exist. +- `FileExistsError` - If the destination file already exists. + + +##### `compress_netcdf_file` + +```python +def compress_netcdf_file(source_filepath: str, + destination_filepath: str, + compression_level: int = 2) -> None +``` + +Compress an existing NetCDF file by creating a new one with the specified compression level. This is useful because some NetCDF4 files given to you might not be (very well) compressed. + +**Raises**: + +- `FileNotFoundError` - If the source file does not exist. +- `FileExistsError` - If the destination file already exists. + + ## `tum_esm_utils.opus` Functions for interacting with OPUS files. From bb5058529ae705baee270e073f8dae5f67dedf8e Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:18:05 +0100 Subject: [PATCH 14/20] Add function `fill_df_time_gaps_with_nans` (1) Implement --- tum_esm_utils/dataframes.py | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tum_esm_utils/dataframes.py diff --git a/tum_esm_utils/dataframes.py b/tum_esm_utils/dataframes.py new file mode 100644 index 0000000..6e60504 --- /dev/null +++ b/tum_esm_utils/dataframes.py @@ -0,0 +1,44 @@ +"""Dataframe-related utility functions. + +Implements: `fill_df_time_gaps_with_nans` + +This requires you to install this utils library with the optional `polars` dependency: + +```bash +pip install "tum_esm_utils[polars]" +# or +pdm add "tum_esm_utils[polars]" +``` +""" + +import datetime +import polars as pl + + +def fill_df_time_gaps_with_nans( + df: pl.DataFrame, + time_col: str, + max_gap_seconds: int, +) -> pl.DataFrame: + """Fill time gaps in a dataframe with NaN rows. This is very useful for plotting dataframes where time gaps should be visible. + + Args: + df: The input dataframe. + time_col: The name of the time column. + max_gap_seconds: The maximum gap in seconds to fill with NaN rows.""" + + assert max_gap_seconds > 1, "max_gap_seconds must be greater than 1" + gaps_in_df = df.select( + time_col, + pl.col(time_col).diff().dt.total_seconds().alias(f"{time_col}_diff_seconds"), + ).filter( + pl.col(f"{time_col}_diff_seconds").gt(max_gap_seconds), + )["utc"] - datetime.timedelta(seconds=1) + gap_df = pl.DataFrame( + { + time_col: gaps_in_df, + **{c: [None] * len(gaps_in_df) for c in df.columns if c != time_col}, + } + ) + df_with_gaps = pl.concat([df, gap_df], how="vertical").sort(time_col) + return df_with_gaps From db7c0a8049bc90b42da46da8deeafbe7b4305dbc Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:18:18 +0100 Subject: [PATCH 15/20] Add function `fill_df_time_gaps_with_nans` (2) Update API reference --- docs/pages/api-reference.md | 31 +++++++++++++++++++++++++++++++ docs/scripts/sync-docs.py | 1 + 2 files changed, 32 insertions(+) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index b98059f..e96c22a 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -255,6 +255,37 @@ def load_ggg2020_vmr(filepath: str) -> pl.DataFrame Load the Atmospheric profile from a GGG2020 vmr file. +## `tum_esm_utils.dataframes` + +Dataframe-related utility functions. + +Implements: `fill_df_time_gaps_with_nans` + +This requires you to install this utils library with the optional `polars` dependency: + +```bash +pip install "tum_esm_utils[polars]" +## `or` +pdm add "tum_esm_utils[polars]" +``` + + +##### `fill_df_time_gaps_with_nans` + +```python +def fill_df_time_gaps_with_nans(df: pl.DataFrame, time_col: str, + max_gap_seconds: int) -> pl.DataFrame +``` + +Fill time gaps in a dataframe with NaN rows. This is very useful for plotting dataframes where time gaps should be visible. + +**Arguments**: + +- `df` - The input dataframe. +- `time_col` - The name of the time column. +- `max_gap_seconds` - The maximum gap in seconds to fill with NaN rows. + + ## `tum_esm_utils.datastructures` Datastructures not in the standard library. diff --git a/docs/scripts/sync-docs.py b/docs/scripts/sync-docs.py index 2cd5fda..696f7bd 100644 --- a/docs/scripts/sync-docs.py +++ b/docs/scripts/sync-docs.py @@ -25,6 +25,7 @@ "column.astronomy", "column.averaging_kernel", "column.ncep_profiles", + "dataframes", "datastructures", "decorators", "em27", From f1eecddc59a7b3c0f70b82618af84767a94cbc95 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:20:36 +0100 Subject: [PATCH 16/20] Add function `fill_df_time_gaps_with_nans` (3) Test --- tests/test_dataframes.py | 28 ++++++++++++++++++++++++++++ tum_esm_utils/plotting.py | 10 +++++----- 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 tests/test_dataframes.py diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py new file mode 100644 index 0000000..b4c386a --- /dev/null +++ b/tests/test_dataframes.py @@ -0,0 +1,28 @@ +import datetime +import pytest +import polars as pl +import tum_esm_utils.dataframes + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_fill_df_time_gaps_with_nans() -> None: + # Create a dataframe with a time gap + df = pl.DataFrame( + { + "utc": [ + datetime.datetime(2024, 1, 1, 0, 0, 0), + datetime.datetime(2024, 1, 1, 0, 0, 10), # gap > 5 seconds + datetime.datetime(2024, 1, 1, 0, 0, 11), + ], + "value": [1, 2, 3], + } + ) + result = tum_esm_utils.dataframes.fill_df_time_gaps_with_nans( + df, time_col="utc", max_gap_seconds=5 + ) + # There should be a NaN row inserted after the first row + assert result.shape[0] == 4 + # The inserted row should have None in 'value' + inserted_row = result.filter(pl.col("utc").eq(datetime.datetime(2024, 1, 1, 0, 0, 9))) + assert inserted_row["value"][0] is None diff --git a/tum_esm_utils/plotting.py b/tum_esm_utils/plotting.py index 9761437..b1ecf17 100644 --- a/tum_esm_utils/plotting.py +++ b/tum_esm_utils/plotting.py @@ -62,7 +62,7 @@ def create_figure( suptitle_y: float = 0.97, padding: float = 2, dpi: int = 250, -) -> Generator[plt.Figure, None, None]: # pyright: ignore[reportPrivateImportUsage] +) -> Generator[plt.Figure, None, None]: # pyright: ignore[reportPrivateImportUsage] """Create a figure for plotting. Usage: @@ -93,13 +93,13 @@ def create_figure( def add_subplot( - fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage] + fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage] position: tuple[int, int, int] | matplotlib.gridspec.SubplotSpec, title: Optional[str] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, **kwargs: dict[str, Any], -) -> plt.Axes: # pyright: ignore[reportPrivateImportUsage] +) -> plt.Axes: # pyright: ignore[reportPrivateImportUsage] """Add a subplot to a figure. Use a gridspec for more control: @@ -125,7 +125,7 @@ def add_subplot( Raises: ValueError: If the index of the subplot is invalid.""" - axis: plt.Axes # pyright: ignore[reportPrivateImportUsage] + axis: plt.Axes # pyright: ignore[reportPrivateImportUsage] if isinstance(position, matplotlib.gridspec.SubplotSpec): axis = fig.add_subplot(position, **kwargs) else: @@ -147,7 +147,7 @@ def add_subplot( def add_colorpatch_legend( - fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage] + fig: plt.Figure, # pyright: ignore[reportPrivateImportUsage] handles: list[ tuple[ str, From 8984ef52ab380f108c8d6b4d8dd765769e4bfeda Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:32:23 +0100 Subject: [PATCH 17/20] Add functions to convert julian day numbers to datetimes and back (1) Implement --- docs/pages/api-reference.md | 44 ++++++++++++++++++++++++++++++ tum_esm_utils/timing.py | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index e96c22a..6bfde4c 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -2352,6 +2352,50 @@ with timed_section("my_section"): ``` +##### `datetime_to_julian_day_number` + +```python +def datetime_to_julian_day_number(dt: datetime.datetime) -> float +``` + +Convert a datetime to a Julian Day Number (JDN). + +The Julian Day Number is the continuous count of days since the beginning +of the Julian Period on January 1, 4713 BC. This function was validated against +https://ssd.jpl.nasa.gov/tools/jdc/#/cd + +**Arguments**: + +- `dt` - The datetime to convert. + + +**Returns**: + + The Julian Day Number as a float. + + +##### `julian_day_number_to_datetime` + +```python +def julian_day_number_to_datetime(jdn: float) -> datetime.datetime +``` + +Convert a Julian Day Number (JDN) to a datetime. + +The Julian Day Number is the continuous count of days since the beginning +of the Julian Period on January 1, 4713 BC. This function was validated against +https://ssd.jpl.nasa.gov/tools/jdc/#/cd + +**Arguments**: + +- `jdn` - The Julian Day Number to convert. + + +**Returns**: + + The corresponding datetime. + + ## `tum_esm_utils.validators` Implements validator utils for use with pydantic models. diff --git a/tum_esm_utils/timing.py b/tum_esm_utils/timing.py index 321b541..2339d1b 100644 --- a/tum_esm_utils/timing.py +++ b/tum_esm_utils/timing.py @@ -362,3 +362,56 @@ def timed_section(label: str) -> Generator[None, None, None]: yield end = time.time() print(f"{label}: {end - start:6.3f}s") + + +def datetime_to_julian_day_number(dt: datetime.datetime) -> float: + """Convert a datetime to a Julian Day Number (JDN). + + The Julian Day Number is the continuous count of days since the beginning + of the Julian Period on January 1, 4713 BC. This function was validated against + https://ssd.jpl.nasa.gov/tools/jdc/#/cd + + Args: + dt: The datetime to convert. + + Returns: + The Julian Day Number as a float. + """ + + JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0) # JDN 2451545.0 + JDN_BASE_NUM = 2451545.0 + + delta = dt - JDN_BASE_DT + + return JDN_BASE_NUM + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0 + + +def julian_day_number_to_datetime(jdn: float) -> datetime.datetime: + """Convert a Julian Day Number (JDN) to a datetime. + + The Julian Day Number is the continuous count of days since the beginning + of the Julian Period on January 1, 4713 BC. This function was validated against + https://ssd.jpl.nasa.gov/tools/jdc/#/cd + + Args: + jdn: The Julian Day Number to convert. + + Returns: + The corresponding datetime. + """ + + JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0) # JDN 2451545.0 + JDN_BASE_NUM = 2451545.0 + + delta_days = jdn - JDN_BASE_NUM + delta_whole_days = int(delta_days) + delta_fractional_day = delta_days - delta_whole_days + + delta_seconds = int(delta_fractional_day * 86400) + delta_microseconds = int((delta_fractional_day * 86400 - delta_seconds) * 1_000_000) + + return JDN_BASE_DT + datetime.timedelta( + days=delta_whole_days, + seconds=delta_seconds, + microseconds=delta_microseconds, + ) From edffc49c1d88d314702c09a8f298ec6d15073244 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:32:33 +0100 Subject: [PATCH 18/20] Add functions to convert julian day numbers to datetimes and back (2) Test --- tests/test_timing.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_timing.py b/tests/test_timing.py index 81c5a45..388ddee 100644 --- a/tests/test_timing.py +++ b/tests/test_timing.py @@ -265,3 +265,21 @@ def test_date_span_intersection() -> None: assert tum_esm_utils.timing.date_span_intersection(dt_span_2, dt_span_1) == expected, ( f"Test case {i}b failed" ) + + +@pytest.mark.order(3) +@pytest.mark.quick +def test_datetime_to_julian_day_number() -> None: + test_cases = [ + (datetime.datetime(2000, 1, 1, 12, 0, 0), 2451545.0), + (datetime.datetime(1995, 7, 23, 14, 36), 2449922.1083333), + (datetime.datetime(1976, 3, 3, 1, 36), 2442840.5666667), + (datetime.datetime(1999, 12, 3, 23, 36), 2451516.4833333), + (datetime.datetime(2024, 6, 3, 18, 1), 2460465.2506944), + ] + for dt, jdn in test_cases: + calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt) + assert abs(calculated_jdn - jdn) < 1e-6, f"Failed for {dt}" + + calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn) + assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {jdn}" From f4e053cf2b41cd2dda986dd40b93991e52be8f44 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:45:14 +0100 Subject: [PATCH 19/20] Add functions to convert julian day numbers to datetimes and back (3) Also support MJD and MJD2K --- docs/pages/api-reference.md | 29 +++++++++++------ tum_esm_utils/timing.py | 62 ++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/docs/pages/api-reference.md b/docs/pages/api-reference.md index 6bfde4c..d6d4ced 100644 --- a/docs/pages/api-reference.md +++ b/docs/pages/api-reference.md @@ -2355,18 +2355,22 @@ with timed_section("my_section"): ##### `datetime_to_julian_day_number` ```python -def datetime_to_julian_day_number(dt: datetime.datetime) -> float +def datetime_to_julian_day_number( + dt: datetime.datetime, variant: Literal["JDN", "MJD", + "MJD2K"]) -> float ``` -Convert a datetime to a Julian Day Number (JDN). +Convert a datetime to a Julian Day Number (JDN) or MJD/MJD2K. -The Julian Day Number is the continuous count of days since the beginning -of the Julian Period on January 1, 4713 BC. This function was validated against -https://ssd.jpl.nasa.gov/tools/jdc/#/cd +The Julian Day Number (JDN) is the continuous count of days since the beginning +of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts +counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting +from January 1, 2000 at 00:00:00 UTC. **Arguments**: - `dt` - The datetime to convert. +- `variant` - The variant of the Julian Day Number ("JDN", "MJD", "MJD2K"). **Returns**: @@ -2377,18 +2381,25 @@ https://ssd.jpl.nasa.gov/tools/jdc/#/cd ##### `julian_day_number_to_datetime` ```python -def julian_day_number_to_datetime(jdn: float) -> datetime.datetime +def julian_day_number_to_datetime( + jdn: float, variant: Literal["JDN", "MJD", + "MJD2K"]) -> datetime.datetime ``` -Convert a Julian Day Number (JDN) to a datetime. +Convert a Julian Day Number (JDN) or MJD/MJD2K to a datetime. + +The Julian Day Number (JDN) is the continuous count of days since the beginning +of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts +counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting +from January 1, 2000 at 00:00:00 UTC. -The Julian Day Number is the continuous count of days since the beginning -of the Julian Period on January 1, 4713 BC. This function was validated against +This function was validated against https://ssd.jpl.nasa.gov/tools/jdc/#/cd **Arguments**: - `jdn` - The Julian Day Number to convert. +- `variant` - The variant of the Julian Day Number ("JDN", "MJD", "MJD2K"). **Returns**: diff --git a/tum_esm_utils/timing.py b/tum_esm_utils/timing.py index 2339d1b..3f439ca 100644 --- a/tum_esm_utils/timing.py +++ b/tum_esm_utils/timing.py @@ -4,7 +4,7 @@ `clear_alarm`, `wait_for_condition`, `ExponentialBackoff`""" import os -from typing import Any, Callable, Generator, Optional +from typing import Any, Callable, Generator, Literal, Optional import contextlib import datetime import re @@ -364,53 +364,79 @@ def timed_section(label: str) -> Generator[None, None, None]: print(f"{label}: {end - start:6.3f}s") -def datetime_to_julian_day_number(dt: datetime.datetime) -> float: - """Convert a datetime to a Julian Day Number (JDN). +_JDN_BASE_DTS = { + "JDN": datetime.datetime(2000, 1, 1, 12, 0, 0), + "MJD": datetime.datetime(1858, 11, 17, 0, 0, 0), + "MJD2K": datetime.datetime(2000, 1, 1, 0, 0, 0), +} +_JDN_BASE_NUMS = { + "JDN": 2451545.0, + "MJD": 0.0, + "MJD2K": 0.0, +} - The Julian Day Number is the continuous count of days since the beginning - of the Julian Period on January 1, 4713 BC. This function was validated against - https://ssd.jpl.nasa.gov/tools/jdc/#/cd + +def datetime_to_julian_day_number( + dt: datetime.datetime, + variant: Literal["JDN", "MJD", "MJD2K"], +) -> float: + """Convert a datetime to a Julian Day Number (JDN) or MJD/MJD2K. + + The Julian Day Number (JDN) is the continuous count of days since the beginning + of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts + counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting + from January 1, 2000 at 00:00:00 UTC. Args: dt: The datetime to convert. + variant: The variant of the Julian Day Number ("JDN", "MJD", "MJD2K"). Returns: The Julian Day Number as a float. """ - JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0) # JDN 2451545.0 - JDN_BASE_NUM = 2451545.0 + assert variant in _JDN_BASE_DTS, f"Invalid variant: {variant}" + base_dt = _JDN_BASE_DTS[variant] + base_num = _JDN_BASE_NUMS[variant] - delta = dt - JDN_BASE_DT + delta = dt - base_dt + return base_num + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0 - return JDN_BASE_NUM + delta.days + (delta.seconds + delta.microseconds / 1_000_000) / 86400.0 +def julian_day_number_to_datetime( + jdn: float, + variant: Literal["JDN", "MJD", "MJD2K"], +) -> datetime.datetime: + """Convert a Julian Day Number (JDN) or MJD/MJD2K to a datetime. -def julian_day_number_to_datetime(jdn: float) -> datetime.datetime: - """Convert a Julian Day Number (JDN) to a datetime. + The Julian Day Number (JDN) is the continuous count of days since the beginning + of the Julian Period on January 1, 4713 BC. THe modified variant MJD starts + counting from November 17, 1858 at 00:00:00 UTC, and MJD2K starts counting + from January 1, 2000 at 00:00:00 UTC. - The Julian Day Number is the continuous count of days since the beginning - of the Julian Period on January 1, 4713 BC. This function was validated against + This function was validated against https://ssd.jpl.nasa.gov/tools/jdc/#/cd Args: jdn: The Julian Day Number to convert. + variant: The variant of the Julian Day Number ("JDN", "MJD", "MJD2K"). Returns: The corresponding datetime. """ - JDN_BASE_DT = datetime.datetime(2000, 1, 1, 12, 0, 0) # JDN 2451545.0 - JDN_BASE_NUM = 2451545.0 + assert variant in _JDN_BASE_DTS, f"Invalid variant: {variant}" + base_dt = _JDN_BASE_DTS[variant] + base_num = _JDN_BASE_NUMS[variant] - delta_days = jdn - JDN_BASE_NUM + delta_days = jdn - base_num delta_whole_days = int(delta_days) delta_fractional_day = delta_days - delta_whole_days delta_seconds = int(delta_fractional_day * 86400) delta_microseconds = int((delta_fractional_day * 86400 - delta_seconds) * 1_000_000) - return JDN_BASE_DT + datetime.timedelta( + return base_dt + datetime.timedelta( days=delta_whole_days, seconds=delta_seconds, microseconds=delta_microseconds, From 8f74d97d6ad421b1f473cd5f407fca5e75307fd2 Mon Sep 17 00:00:00 2001 From: Moritz Makowski Date: Thu, 20 Nov 2025 00:45:22 +0100 Subject: [PATCH 20/20] Add functions to convert julian day numbers to datetimes and back (4) Also test MJD and MJD2K --- tests/test_timing.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/test_timing.py b/tests/test_timing.py index 388ddee..9a28d62 100644 --- a/tests/test_timing.py +++ b/tests/test_timing.py @@ -278,8 +278,22 @@ def test_datetime_to_julian_day_number() -> None: (datetime.datetime(2024, 6, 3, 18, 1), 2460465.2506944), ] for dt, jdn in test_cases: - calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt) + # JDN + calculated_jdn = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="JDN") + calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn, variant="JDN") assert abs(calculated_jdn - jdn) < 1e-6, f"Failed for {dt}" - - calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(jdn) assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {jdn}" + + # MJD + mjd = jdn - 2400000.5 + calculated_mjd = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="MJD") + calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(mjd, variant="MJD") + assert abs(calculated_mjd - mjd) < 1e-6, f"Failed for {dt}" + assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {mjd}" + + # MJD2K + mjd2k = jdn - 2451544.5 + calculated_mjd2k = tum_esm_utils.timing.datetime_to_julian_day_number(dt, variant="MJD2K") + calculated_dt = tum_esm_utils.timing.julian_day_number_to_datetime(mjd2k, variant="MJD2K") + assert abs(calculated_mjd2k - mjd2k) < 1e-6, f"Failed for {dt}" + assert abs((calculated_dt - dt).total_seconds()) < 1, f"Failed for {mjd2k}"