diff --git a/Changelog.rst b/Changelog.rst index 3ab3931c6..73bf1fb76 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,3 +1,13 @@ +Version NEXTVERSION +------------------- + +**2025-12-??** + +* Reduce the time taken to import `cfdm` + (https://github.com/NCAS-CMS/cfdm/issues/361) + +---- + Version 1.12.3.1 ---------------- diff --git a/cfdm/__init__.py b/cfdm/__init__.py index 54e43089e..31373db6f 100644 --- a/cfdm/__init__.py +++ b/cfdm/__init__.py @@ -40,174 +40,12 @@ import logging import sys -from packaging.version import Version - from . import core __date__ = core.__date__ __cf_version__ = core.__cf_version__ __version__ = core.__version__ -_requires = core._requires + ( - "cftime", - "netCDF4", - "dask", - "scipy", - "h5netcdf", - "zarr", - "s3fs", - "uritools", - "cfunits", -) - -_error0 = f"cfdm requires the modules {', '.join(_requires)}. " - -# Check the version of cftime -try: - import cftime -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "1.6.4" - if Version(cftime.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad cftime version: cfdm requires cftime>={_minimum_vn}. " - f"Got {cftime.__version__} at {cftime.__file__}" - ) - -# Check the version of netCDF4 -try: - import netCDF4 -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "1.7.2" - if Version(netCDF4.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad netCDF4 version: cfdm requires netCDF4>={_minimum_vn}. " - f"Got {netCDF4.__version__} at {netCDF4.__file__}" - ) - -# Check the version of h5netcdf -try: - import h5netcdf -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "1.3.0" - if Version(h5netcdf.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad h5netcdf version: cfdm requires h5netcdf>={_minimum_vn}. " - f"Got {h5netcdf.__version__} at {h5netcdf.__file__}" - ) - -# Check the version of h5py -try: - import h5py -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "3.12.0" - if Version(h5py.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad h5py version: cfdm requires h5py>={_minimum_vn}. " - f"Got {h5py.__version__} at {h5py.__file__}" - ) - -# Check the version of zarr -try: - import zarr -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "3.0.8" - if Version(zarr.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad zarr version: cfdm requires zarr>={_minimum_vn}. " - f"Got {zarr.__version__} at {zarr.__file__}" - ) - -# Check the version of s3fs -try: - import s3fs -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "2024.6.0" - if Version(s3fs.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad s3fs version: cfdm requires s3fs>={_minimum_vn}. " - f"Got {s3fs.__version__} at {s3fs.__file__}" - ) - -# Check the version of scipy -try: - import scipy -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "1.10.0" - if Version(scipy.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad scipy version: cfdm requires scipy>={_minimum_vn}. " - f"Got {scipy.__version__} at {scipy.__file__}" - ) - -# Check the version of dask -try: - import dask -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "2025.5.1" - if Version(dask.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad dask version: cfdm requires dask>={_minimum_vn}. " - f"Got {dask.__version__} at {dask.__file__}" - ) - -# Check the version of distributed -try: - import distributed -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "2025.5.1" - if Version(distributed.__version__) < Version(_minimum_vn): - raise ValueError( - "Bad distributed version: cfdm requires " - f"distributed>={_minimum_vn}. " - f"Got {distributed.__version__} at {distributed.__file__}" - ) - -# Check the version of uritools -try: - import uritools -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "4.0.3" - if Version(uritools.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad uritools version: cfdm requires uritools>={_minimum_vn}. " - f"Got {uritools.__version__} at {uritools.__file__}" - ) - -# Check the version of cfunits -try: - import cfunits -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "3.3.7" - if Version(cfunits.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad cfunits version: cfdm requires cfunits>={_minimum_vn}. " - f"Got {cfunits.__version__} at {cfunits.__file__}" - ) - -del _minimum_vn - from .constants import masked # Internal ones passed on so they can be used in cf-python (see diff --git a/cfdm/constants.py b/cfdm/constants.py index 511a9f5aa..1f359483d 100644 --- a/cfdm/constants.py +++ b/cfdm/constants.py @@ -1,42 +1,6 @@ -import logging -import sys from enum import Enum import numpy as np -from dask import config -from dask.utils import parse_bytes - -_CHUNKSIZE = "128 MiB" -config.set({"array.chunk-size": _CHUNKSIZE}) -"""A dictionary of useful constants. - -Whilst the dictionary may be modified directly, it is safer to -retrieve and set the values with the dedicated get-and-set functions. - -:Keys: - - ATOL: `float` - The value of absolute tolerance for testing numerically tolerant - equality. - - RTOL: `float` - The value of relative tolerance for testing numerically tolerant - equality. - - LOG_LEVEL: `str` - The minimal level of seriousness for which log messages are - shown. See `cfdm.log_level`. - - CHUNKSIZE: `int` - The Dask chunk size (in bytes). See `cfdm.chunksize`. - -""" -CONSTANTS = { - "ATOL": sys.float_info.epsilon, - "RTOL": sys.float_info.epsilon, - "LOG_LEVEL": logging.getLevelName(logging.getLogger().level), - "CHUNKSIZE": parse_bytes(_CHUNKSIZE), -} # -------------------------------------------------------------------- diff --git a/cfdm/constructs.py b/cfdm/constructs.py index ddd272fc2..c8fa90d14 100644 --- a/cfdm/constructs.py +++ b/cfdm/constructs.py @@ -2,14 +2,15 @@ from itertools import zip_longest from re import Pattern -from . import core, mixin +from .core import Constructs as core_Constructs from .core.functions import deepcopy from .decorators import _manage_log_level_via_verbosity +from .mixin import Container logger = logging.getLogger(__name__) -class Constructs(mixin.Container, core.Constructs): +class Constructs(Container, core_Constructs): """A container for metadata constructs. The container has similarities to a `dict` in that it presents the diff --git a/cfdm/core/__init__.py b/cfdm/core/__init__.py index 6b19f7037..7c8f87333 100644 --- a/cfdm/core/__init__.py +++ b/cfdm/core/__init__.py @@ -15,52 +15,12 @@ __cf_version__ = "1.12" __version__ = "1.12.3.1" -from platform import python_version - -_requires = ("numpy", "packaging") -_error0 = f"cfdm.core requires the modules {', '.join(_requires)}. " - -# Check the version of packaging -try: - import packaging - from packaging.version import Version -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "20.0" - if Version(packaging.__version__) < Version(_minimum_vn): - raise RuntimeError( - f"Bad packaging version: cf requires packaging>={_minimum_vn}. " - f"Got {packaging.__version__} at {packaging.__file__}" - ) - -# Check the version of python -_minimum_vn = "3.10.0" -if Version(python_version()) < Version(_minimum_vn): - raise ValueError( - f"Bad python version: cfdm.core requires python>={_minimum_vn}. " - f"Got {python_version()}" - ) - -# Check the version of numpy -try: - import numpy as np -except ImportError as error1: - raise ImportError(_error0 + str(error1)) -else: - _minimum_vn = "2.0.0" - if Version(np.__version__) < Version(_minimum_vn): - raise ValueError( - f"Bad numpy version: cfdm.core requires numpy>={_minimum_vn}. " - f"Got {np.__version__} at {np.__file__}" - ) - -del _minimum_vn +# Count the number of docstrings (first element), and the number which +# have docstring substitutions applied to them (second element). +_docstring_substitutions = [0, 0] from .constructs import Constructs - from .functions import CF, environment - from .data import Data, Array, NumpyArray from .bounds import Bounds diff --git a/cfdm/core/cellconnectivity.py b/cfdm/core/cellconnectivity.py index c9d9a2760..fcdb3ffd8 100644 --- a/cfdm/core/cellconnectivity.py +++ b/cfdm/core/cellconnectivity.py @@ -105,7 +105,7 @@ def construct_type(self): def del_connectivity(self, default=ValueError()): """Remove the connectivity. - {{{cell connectivity type}} + {{cell connectivity type}} .. versionadded:: (cfdm) 1.11.0.0 @@ -149,7 +149,7 @@ def del_connectivity(self, default=ValueError()): def has_connectivity(self): """Whether the connectivity type has been set. - {{{cell connectivity type}} + {{cell connectivity type}} .. versionadded:: (cfdm) 1.11.0.0 @@ -233,7 +233,7 @@ def get_connectivity(self, default=ValueError()): def set_connectivity(self, connectivity): """Set the connectivity type. - {{{cell connectivity type}} + {{cell connectivity type}} .. versionadded:: (cfdm) 1.11.0.0 diff --git a/cfdm/core/data/abstract/array.py b/cfdm/core/data/abstract/array.py index 314068c9c..48975f025 100644 --- a/cfdm/core/data/abstract/array.py +++ b/cfdm/core/data/abstract/array.py @@ -1,8 +1,8 @@ from functools import reduce from operator import mul -from ...abstract import Container -from ...utils import cached_property +from cfdm.core.abstract import Container +from cfdm.core.utils import cached_property class Array(Container): diff --git a/cfdm/core/data/data.py b/cfdm/core/data/data.py index 8983bbb5f..27fff1afe 100644 --- a/cfdm/core/data/data.py +++ b/cfdm/core/data/data.py @@ -1,6 +1,7 @@ import numpy as np -from .. import abstract +from cfdm.core import abstract + from .abstract import Array from .numpyarray import NumpyArray diff --git a/cfdm/core/docstring/docstring.py b/cfdm/core/docstring/docstring.py index 5865c8520..8bdcf3576 100644 --- a/cfdm/core/docstring/docstring.py +++ b/cfdm/core/docstring/docstring.py @@ -9,13 +9,7 @@ Replacement text may not contain other non-special substitutions. -Keys must be a `str` or `re.Pattern` object: - -* If a key is a `str` then the corresponding value must be a string. - -* If a key is a `re.Pattern` object then the corresponding value must - be a string or a callable, as accepted by the `re.Pattern.sub` - method. +A key and its corresponding value must both be `str`. .. versionaddedd:: (cfdm) 1.8.7.0 @@ -95,7 +89,10 @@ "{{init data: data_like, optional}}": """data: data_like, optional Set the data. - {{data_like}} + A data_like object is any object that can be converted + to a `Data` object, i.e. `numpy` array_like objects, + `Data` objects, and {{package}} instances that contain + `Data` objects. The data also may be set after initialisation with the `set_data` method.""", diff --git a/cfdm/core/functions.py b/cfdm/core/functions.py index e3e1423b8..3ebf22e40 100644 --- a/cfdm/core/functions.py +++ b/cfdm/core/functions.py @@ -1,9 +1,4 @@ -import os -import platform -import sys -from pickle import dumps, loads - -from . import __cf_version__, __file__, __version__ +from . import __cf_version__ def environment(display=True, paths=True): @@ -46,9 +41,15 @@ def environment(display=True, paths=True): cfdm.core: 1.12.2.0 """ + import os + import platform + import sys + import numpy as np import packaging + from . import __file__, __version__ + dependency_version_paths_mapping = { "Platform": (platform.platform(), ""), "Python": (platform.python_version(), sys.executable), @@ -104,4 +105,6 @@ def deepcopy(x): b) be "not slower, sometimes much faster" than `copy.deepcopy`. """ + from pickle import dumps, loads + return loads(dumps(x)) diff --git a/cfdm/core/meta/docstringrewrite.py b/cfdm/core/meta/docstringrewrite.py index 3bd78cf39..3a1c7b12e 100644 --- a/cfdm/core/meta/docstringrewrite.py +++ b/cfdm/core/meta/docstringrewrite.py @@ -1,8 +1,12 @@ import inspect +from re import compile -from ..functions import CF +# Count the number of docstrings (first element of +# '_docstring_substitutions'), and the number which have docstring +# substitutions applied to them (second element). +from .. import _docstring_substitutions -_VN = CF() +base = compile("{{.*?}}") class DocstringRewriteMeta(type): @@ -28,7 +32,6 @@ class DocstringRewriteMeta(type): # Based on # http://www.jesshamrick.com/2013/04/17/rewriting-python-docstrings-with-a-metaclass/ - def __new__(cls, class_name, parents, attrs): """Combines docstring substitutions across the inheritance tree. @@ -62,14 +65,6 @@ def __new__(cls, class_name, parents, attrs): if class_docstring_rewrite is not None: docstring_rewrite.update(class_docstring_rewrite(None)) - special = DocstringRewriteMeta._docstring_special_substitutions() - for key in special: - if key in docstring_rewrite: - raise ValueError( - f"Can't use {key!r} as a user-defined " - "docstring substitution." - ) - # ------------------------------------------------------------ # Find the package depth # ------------------------------------------------------------ @@ -380,9 +375,6 @@ def __new__(cls, class_name, parents, attrs): # ------------------------------------------------------------ return super().__new__(cls, class_name, parents, attrs) - # ---------------------------------------------------------------- - # Private methods - # ---------------------------------------------------------------- @classmethod def _docstring_special_substitutions(cls): """Return the special docstring substitutions. @@ -412,7 +404,7 @@ def _docstring_special_substitutions(cls): The special docstring substitution identifiers. """ - return ("{{class}}", "{{class_lower}}", "{{package}}", "{{VN}}") + return ("{{class}}", "{{class_lower}}", "{{package}}") @staticmethod def _docstring_substitutions(cls): @@ -434,13 +426,7 @@ def _docstring_substitutions(cls): then the latter will *not* be replaced. This restriction is to prevent the possibility of infinite recursion. - A key must be either a `str` or a `re.Pattern` object. - - If a key is a `str` then the corresponding value must be a string. - - If a key is a `re.Pattern` object then the corresponding value - must be a string or a callable, as accepted by the - `re.Pattern.sub` method. + A key and its corresponding value must both be `str`. .. versionadded:: (cfdm) 1.8.7.0 @@ -594,61 +580,93 @@ def _docstring_update( config, class_docstring=None, ): - """Performs docstring substitutions on a method at import time. + """Perform docstring substitutions. + + Docstring substitutions are applied to a class or method at + import time. .. versionadded:: (cfdm) 1.8.7.0 :Parameters: package_name: `str` + The name of the package containing the class or + method. class_name: `str` + The name of the class. - f: class method + class_name_lower: `str` + The lower case name of the class. - method_name: `str` + f: class method or `None` + The method, or `None` if a class docstring is being + updated. + + method_name: `str` or `None` + The method name, or `None` if a class docstring is + being updated. config: `dict` + A dictionary containing the general docstring + substitutions. + + class_docstring, `str` or `None` + If docstring of a class, or `None` if a method + docstring is being updated. + + :Returns: + + `str` or `None` + The updated docstring, or `None` if there is no + docstring. """ + _docstring_substitutions[0] += 1 + if class_docstring is not None: doc = class_docstring else: doc = f.__doc__ - if doc is None or "{{" not in doc: - return doc - # ------------------------------------------------------------ - # Do general substitutions first - # ------------------------------------------------------------ - for key, value in config.items(): - # Substitute the key for the value - try: - # Compiled regular expression substitution - doc = key.sub(value, doc) - except AttributeError: - # String substitution - doc = doc.replace(key, value) + if doc is None: + return - # ------------------------------------------------------------ - # Now do special substitutions - # ------------------------------------------------------------ - # Insert the name of the package - doc = doc.replace("{{package}}", package_name) + substitutions = base.findall(doc) + if substitutions: + _docstring_substitutions[1] += 1 - # Insert the name of the class containing this method - doc = doc.replace("{{class}}", class_name) + # Remove duplicates + substitutions = set(substitutions) - # Insert the lower case name of the class containing this method - doc = doc.replace("{{class_lower}}", class_name_lower) + # Special substitutions + if "{{package}}" in substitutions: + # Insert the name of the package + doc = doc.replace("{{package}}", package_name) - # Insert the CF version - doc = doc.replace("{{VN}}", _VN) + if "{{class}}" in substitutions: + # Insert the name of the class + doc = doc.replace("{{class}}", class_name) + + if "{{class_lower}}" in substitutions: + # Insert the lower case name of the class + doc = doc.replace("{{class_lower}}", class_name_lower) + + # General substitutions + for key in substitutions: + value = config.get(key) + if value is None: + continue + + # Do special substitutions on the value + value = value.replace("{{package}}", package_name) + value = value.replace("{{class}}", class_name) + value = value.replace("{{class_lower}}", class_name_lower) + + doc = doc.replace(key, value) - # ---------------------------------------------------------------- - # Set the rewritten docstring on the method - # ---------------------------------------------------------------- - if class_docstring is None: - f.__doc__ = doc + if class_docstring is None: + # Set the rewritten docstring on the method + f.__doc__ = doc return doc diff --git a/cfdm/data/__init__.py b/cfdm/data/__init__.py index 3914c1d19..496075ddf 100644 --- a/cfdm/data/__init__.py +++ b/cfdm/data/__init__.py @@ -1,4 +1,7 @@ -from .abstract import Array, CompressedArray, MeshArray, RaggedArray +from .abstract import Array +from .abstract import CompressedArray +from .abstract import MeshArray +from .abstract import RaggedArray from .subarray import ( BiLinearSubarray, @@ -13,6 +16,7 @@ RaggedSubarray, ) + from .subarray.abstract import MeshSubarray, Subarray, SubsampledSubarray from .aggregatedarray import AggregatedArray diff --git a/cfdm/data/abstract/array.py b/cfdm/data/abstract/array.py index 8f5fa8d4b..711a45186 100644 --- a/cfdm/data/abstract/array.py +++ b/cfdm/data/abstract/array.py @@ -1,5 +1,6 @@ -from ... import core -from ...mixin import Container +from cfdm import core +from cfdm.mixin import Container + from .. import mixin diff --git a/cfdm/data/abstract/filearray.py b/cfdm/data/abstract/filearray.py index b0cd1a781..99fec389b 100644 --- a/cfdm/data/abstract/filearray.py +++ b/cfdm/data/abstract/filearray.py @@ -1,12 +1,9 @@ from copy import deepcopy from os import sep from os.path import join -from urllib.parse import urlparse -from s3fs import S3FileSystem -from uritools import isuri, urisplit +from cfdm.functions import abspath, dirname -from ...functions import abspath, dirname from . import Array @@ -419,7 +416,10 @@ def get_storage_options( and "endpoint_url" not in client_kwargs ): if parsed_filename is None: + from urllib.parse import urlparse + if filename is None: + try: filename = self.get_filename(normalise=False) except AttributeError: @@ -461,6 +461,8 @@ def open(self, func, *args, **kwargs): the data within the file. """ + from urllib.parse import urlparse + filename = self.get_filename(normalise=True) url = urlparse(filename) if url.scheme == "file": @@ -468,6 +470,8 @@ def open(self, func, *args, **kwargs): filename = abspath(filename, uri=False) elif url.scheme == "s3": # Create an openable S3 file object + from s3fs import S3FileSystem + storage_options = self.get_storage_options( create_endpoint_url=True, parsed_filename=url ) @@ -532,6 +536,8 @@ def replace_directory(self, old=None, new=None, normalise=False): filename = a.get_filename(normalise=normalise) if old or new: if normalise: + from uritools import isuri, urisplit + if not old: raise ValueError( "When 'normalise' is True and 'new' is a non-empty " diff --git a/cfdm/data/aggregatedarray.py b/cfdm/data/aggregatedarray.py index 77f75ea2c..0afbf070f 100644 --- a/cfdm/data/aggregatedarray.py +++ b/cfdm/data/aggregatedarray.py @@ -2,7 +2,6 @@ from itertools import accumulate, product import numpy as np -from uritools import isuri, uricompose from ..functions import dirname from . import abstract @@ -704,6 +703,7 @@ def to_dask_array(self, chunks="auto"): import dask.array as da from dask.array.core import getter from dask.base import tokenize + from uritools import isuri, uricompose name = (f"{self.__class__.__name__}-{tokenize(self)}",) diff --git a/cfdm/data/creation.py b/cfdm/data/creation.py index 976874f5a..6fb9ca2e9 100644 --- a/cfdm/data/creation.py +++ b/cfdm/data/creation.py @@ -1,8 +1,6 @@ """Functions used during the creation of `Data` objects.""" -import dask.array as da import numpy as np -from dask.base import is_dask_collection def to_dask(array, chunks, **from_array_options): @@ -51,6 +49,8 @@ def to_dask(array, chunks, **from_array_options): dask.array """ + from dask.base import is_dask_collection + if is_dask_collection(array): return array @@ -65,6 +65,8 @@ def to_dask(array, chunks, **from_array_options): except TypeError: return array.to_dask_array() + import dask.array as da + if type(array).__module__.split(".")[0] == "xarray": data = getattr(array, "data", None) if data is not None: diff --git a/cfdm/data/data.py b/cfdm/data/data.py index 8a42fa8f5..23480aa72 100644 --- a/cfdm/data/data.py +++ b/cfdm/data/data.py @@ -6,12 +6,7 @@ from numbers import Integral from os.path import commonprefix -import dask.array as da import numpy as np -from dask.base import collections_to_expr, is_dask_collection, tokenize -from dask.optimization import cull -from netCDF4 import default_fillvals -from scipy.sparse import issparse from .. import core from ..constants import masked @@ -428,6 +423,8 @@ def __init__( return # Is the array a sparse array? + from scipy.sparse import issparse + sparse_array = issparse(array) # Is the array data in memory? @@ -470,6 +467,8 @@ def __init__( pass # Deterministic name + from dask.base import is_dask_collection + self._custom["has_deterministic_name"] = not is_dask_collection(array) if self._is_abstract_Array_subclass(array): @@ -711,6 +710,8 @@ def __getitem__(self, indices): # can't do a normal dask subspace # Subspace axes which have list/1-d array indices + import dask.array as da + for axis in axes_with_list_indices: dx = da.take(dx, indices[axis], axis=axis) @@ -2067,6 +2068,8 @@ def _modify_dask_graph( updated = True if updated: + import dask.array as da + # The Dask graph was modified, so recast the dictionary # representation as a Dask array. dx = self.to_dask_array( @@ -2569,6 +2572,8 @@ def _update_deterministic(self, other): "has_deterministic_name", False ) except AttributeError: + from dask.base import is_dask_collection + custom["has_deterministic_name"] = not is_dask_collection( other ) @@ -2618,6 +2623,8 @@ def array(self): 2000-12-01 00:00:00 """ + from scipy.sparse import issparse + a = self.compute().copy() if issparse(a): a = a.toarray() @@ -3034,6 +3041,8 @@ def mask(self): (12, 73, 96) """ + import dask.array as da + mask_data_obj = self.copy(array=False) dx = self.to_dask_array( @@ -3278,6 +3287,8 @@ def sparse_array(self): True """ + from scipy.sparse import issparse + array = self.compute() if issparse(array): return array.copy() @@ -3374,6 +3385,8 @@ def all(self, axis=None, keepdims=True, split_every=None): False """ + import dask.array as da + d = self.copy(array=False) dx = self.to_dask_array( _force_mask_hardness=False, _force_to_memory=True @@ -3439,6 +3452,8 @@ def any(self, axis=None, keepdims=True, split_every=None): False """ + import dask.array as da + d = self.copy(array=False) dx = self.to_dask_array( _force_mask_hardness=False, _force_to_memory=True @@ -3648,6 +3663,8 @@ def apply_masking( mask |= dx > valid_max if mask is not None: + import dask.array as da + dx = da.ma.masked_where(mask, dx) CFA = self._CFA else: @@ -3835,6 +3852,8 @@ def compressed(self, inplace=False): [9] """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) dx = d.to_dask_array(_force_mask_hardness=True, _force_to_memory=True) @@ -4027,6 +4046,8 @@ def concatenate( if n_data == 1: return data0 + import dask.array as da + conformed_data = [data0] for data1 in data[1:]: # Turn any scalar array into a 1-d array @@ -4307,6 +4328,9 @@ def cull_graph(self, inplace=True): 0): } """ + import dask.array as da + from dask.optimization import cull + d = _inplace_enabled_define_and_cleanup(self) dx = d.to_dask_array( @@ -4464,6 +4488,8 @@ def empty( [ False True] #uninitialised """ + import dask.array as da + dx = da.empty(shape, dtype=dtype, chunks=chunks) return cls(dx, units=units, calendar=calendar) @@ -4546,6 +4572,8 @@ def equals( ) # pragma: no cover return False + import dask.array as da + self_dx = self.to_dask_array(_force_mask_hardness=False) other_dx = other.to_dask_array(_force_mask_hardness=False) @@ -4743,6 +4771,8 @@ def filled(self, fill_value=None, inplace=False): if fill_value is None: fill_value = d.get_fill_value(None) if fill_value is None: # still... + from netCDF4 import default_fillvals + fill_value = default_fillvals.get(d.dtype.str[1:]) if fill_value is None and d.dtype.kind in ("SU"): fill_value = default_fillvals.get("S1", None) @@ -5010,6 +5040,8 @@ def full( [False False] """ + import dask.array as da + if dtype is None: # Need to explicitly set the default because dtype is not # a named keyword of da.full @@ -5298,6 +5330,8 @@ def get_deterministic_name(self): True """ + from dask.base import tokenize + if not self.has_deterministic_name(): raise ValueError() @@ -5887,6 +5921,8 @@ def masked_values(self, value, rtol=None, atol=None, inplace=False): [1.0 -- 2.0 -- 3.0] """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) if rtol is None: @@ -5944,6 +5980,8 @@ def masked_where(self, condition, inplace=False): [1 -- 3 -- 5] """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) array = cfdm_where(d.array, condition, masked, None, d.hardmask) @@ -6010,6 +6048,8 @@ def max( <{{repr}}Data(1, 1): [[11]] K> """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) d = collapse( da.max, @@ -6066,6 +6106,8 @@ def min(self, axes=None, squeeze=False, split_every=None, inplace=False): <{{repr}}Data(1, 1): [[0]] K> """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) d = collapse( da.min, @@ -6118,6 +6160,8 @@ def ones(cls, shape, dtype=None, units=None, calendar=None, chunks="auto"): [ True True] """ + import dask.array as da + dx = da.ones(shape, dtype=dtype, chunks=chunks) return cls(dx, units=units, calendar=calendar) @@ -6173,6 +6217,8 @@ def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): [3 4 5 -- --]] """ + import dask.array as da + if not 0 <= axis < self.ndim: raise ValueError( f"'axis' must be a valid dimension position. Got {axis}" @@ -6445,6 +6491,8 @@ def replace_filenames(self, filenames): `None` """ + import dask.array as da + filenames = np.ma.filled(filenames, "") if self.numblocks != filenames.shape: raise ValueError( @@ -6879,6 +6927,8 @@ def sum(self, axes=None, squeeze=False, split_every=None, inplace=False): <{{repr}}Data(1, 1): [[97.0]] K> """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) d = collapse( da.sum, @@ -7025,6 +7075,7 @@ def todict( # change in the future, in which case this method could break # and need refactoring (e.g. # https://github.com/dask/dask/pull/11736#discussion_r1954752842). + from dask.base import collections_to_expr if optimize_graph is not None: _DEPRECATION_ERROR_KWARGS( @@ -7124,6 +7175,8 @@ def transpose(self, axes=None, inplace=False): (19, 73, 96) """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) ndim = d.ndim @@ -7235,6 +7288,8 @@ def unique(self, inplace=False): [1 2 3 --] """ + import dask.array as da + d = _inplace_enabled_define_and_cleanup(self) original_shape = self.shape @@ -7301,6 +7356,8 @@ def zeros( [False False] """ + import dask.array as da + dx = da.zeros(shape, dtype=dtype, chunks=chunks) return cls(dx, units=units, calendar=calendar) diff --git a/cfdm/data/fragment/fragmentfilearray.py b/cfdm/data/fragment/fragmentfilearray.py index f8bc37cac..0e4e9c07a 100644 --- a/cfdm/data/fragment/fragmentfilearray.py +++ b/cfdm/data/fragment/fragmentfilearray.py @@ -1,8 +1,7 @@ from os.path import join -from uritools import urisplit +from cfdm.functions import abspath -from ...functions import abspath from ..abstract import FileArray from ..mixin import IndexMixin from .mixin import FragmentArrayMixin @@ -225,6 +224,8 @@ def get_filename(self, normalise=False, default=AttributeError()): ) if normalise: + from uritools import urisplit + uri = urisplit(filename) # Convert the file name to an absolute URI diff --git a/cfdm/data/fragment/mixin/fragmentarraymixin.py b/cfdm/data/fragment/mixin/fragmentarraymixin.py index 9130fc5ab..d18cdc60d 100644 --- a/cfdm/data/fragment/mixin/fragmentarraymixin.py +++ b/cfdm/data/fragment/mixin/fragmentarraymixin.py @@ -2,7 +2,8 @@ import numpy as np -from ....units import Units +from cfdm.units import Units + from ...netcdfindexer import netcdf_indexer diff --git a/cfdm/data/fullarray.py b/cfdm/data/fullarray.py index bc1b34d5f..35da1bfef 100644 --- a/cfdm/data/fullarray.py +++ b/cfdm/data/fullarray.py @@ -1,6 +1,7 @@ import numpy as np -from ..functions import indices_shape, parse_indices +from cfdm.functions import indices_shape, parse_indices + from .abstract import Array from .mixin import IndexMixin from .mixin.arraymixin import array_implements diff --git a/cfdm/data/h5netcdfarray.py b/cfdm/data/h5netcdfarray.py index 30a7367b8..7e1b45de7 100644 --- a/cfdm/data/h5netcdfarray.py +++ b/cfdm/data/h5netcdfarray.py @@ -1,7 +1,5 @@ import logging -import h5netcdf - from . import abstract from .locks import netcdf_lock from .mixin import IndexMixin @@ -211,6 +209,6 @@ def open(self, **kwargs): within the file. """ - return super().open( - h5netcdf.File, mode="r", decode_vlen_strings=True, **kwargs - ) + from h5netcdf import File + + return super().open(File, mode="r", decode_vlen_strings=True, **kwargs) diff --git a/cfdm/data/locks.py b/cfdm/data/locks.py index f2309c14d..d159e4a65 100644 --- a/cfdm/data/locks.py +++ b/cfdm/data/locks.py @@ -1,4 +1,3 @@ -from dask.utils import SerializableLock +from threading import Lock -# Global lock for netCDFfile access -netcdf_lock = SerializableLock() +netcdf_lock = Lock() diff --git a/cfdm/data/mixin/arraymixin.py b/cfdm/data/mixin/arraymixin.py index 60615f9f0..b8eefb7e9 100644 --- a/cfdm/data/mixin/arraymixin.py +++ b/cfdm/data/mixin/arraymixin.py @@ -1,7 +1,8 @@ from copy import deepcopy import numpy as np -from cfunits import Units + +from cfdm.units import Units class ArrayMixin: diff --git a/cfdm/data/mixin/compressedarraymixin.py b/cfdm/data/mixin/compressedarraymixin.py index c61a1304a..5b97fe515 100644 --- a/cfdm/data/mixin/compressedarraymixin.py +++ b/cfdm/data/mixin/compressedarraymixin.py @@ -1,6 +1,3 @@ -import dask.array as da - - class CompressedArrayMixin: """Mixin class for compressed arrays. @@ -45,6 +42,8 @@ def _lock_file_read(self, array): except AttributeError: pass else: + import dask.array as da + array = da.from_array(array, chunks=chunks, lock=True) return array @@ -74,10 +73,12 @@ def to_dask_array(self, chunks="auto"): """ from functools import partial + import dask.array as da from dask import config - from dask.array.core import getter from dask.base import tokenize + getter = da.core.getter + from ..utils import normalize_chunks name = (f"{self.__class__.__name__}-{tokenize(self)}",) diff --git a/cfdm/data/mixin/filearraymixin.py b/cfdm/data/mixin/filearraymixin.py index 123a80c70..c62cca613 100644 --- a/cfdm/data/mixin/filearraymixin.py +++ b/cfdm/data/mixin/filearraymixin.py @@ -1,9 +1,6 @@ from copy import deepcopy -from urllib.parse import urlparse -from s3fs import S3FileSystem - -from ...functions import abspath +from cfdm.functions import abspath class DeprecationError(Exception): @@ -284,6 +281,8 @@ def get_storage_options( and "endpoint_url" not in client_kwargs ): if parsed_filename is None: + from urllib.parse import urlparse + if filename is None: try: filename = self.get_filename() @@ -328,6 +327,8 @@ def open(self, func, *args, **kwargs): """ # Loop round the files, returning as soon as we find one that # works. + from urllib.parse import urlparse + filenames = self.get_filenames() for filename, address in zip(filenames, self.get_addresses()): url = urlparse(filename) @@ -335,6 +336,8 @@ def open(self, func, *args, **kwargs): # Convert a file URI into an absolute local path filename = abspath(filename, uri=False) elif url.scheme == "s3": + from s3fs import S3FileSystem + # Create an openable S3 file object storage_options = self.get_storage_options( create_endpoint_url=True, parsed_filename=url diff --git a/cfdm/data/mixin/indexmixin.py b/cfdm/data/mixin/indexmixin.py index 739fe9068..c2cdfcb0f 100644 --- a/cfdm/data/mixin/indexmixin.py +++ b/cfdm/data/mixin/indexmixin.py @@ -1,10 +1,8 @@ from numbers import Integral import numpy as np -from dask.array.slicing import normalize_index -from dask.base import is_dask_collection -from ...functions import indices_shape, parse_indices +from cfdm.functions import indices_shape, parse_indices class IndexMixin: @@ -103,6 +101,8 @@ def __getitem__(self, index): The subspaced data. """ + from dask.base import is_dask_collection + shape0 = self.shape index0 = self.index(conform=False) reference_shape = list(self.reference_shape) @@ -375,6 +375,8 @@ def index(self, conform=True): # 2) Converting, where possible, sequences of integers to # slices. This helps when the parent class can't cope with # indices that are sequences of integers. + from dask.array.slicing import normalize_index + ind = list(ind) for n, (i, size) in enumerate(zip(ind[:], self.original_shape)): if isinstance(i, slice): diff --git a/cfdm/data/netcdf4array.py b/cfdm/data/netcdf4array.py index 57a796c4d..5f515da71 100644 --- a/cfdm/data/netcdf4array.py +++ b/cfdm/data/netcdf4array.py @@ -1,5 +1,3 @@ -import netCDF4 - from . import abstract from .locks import netcdf_lock from .mixin import IndexMixin @@ -221,4 +219,6 @@ def open(self): address of the data within the file. """ + import netCDF4 + return super().open(netCDF4.Dataset, mode="r") diff --git a/cfdm/data/netcdfindexer.py b/cfdm/data/netcdfindexer.py index dd873a374..8a2d4cd0a 100644 --- a/cfdm/data/netcdfindexer.py +++ b/cfdm/data/netcdfindexer.py @@ -3,20 +3,20 @@ Portions of this code were adapted from the `netCDF4` Python library, which carries the following MIT License: -Copyright 2008 Jeffrey Whitaker + Copyright 2008 Jeffrey Whitaker -https://opensource.org/license/mit + https://opensource.org/license/mit -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. """ @@ -25,9 +25,6 @@ from numbers import Integral import numpy as np -from dask.array.slicing import normalize_index -from netCDF4 import chartostring, default_fillvals -from netCDF4.utils import _safecast logger = logging.getLogger(__name__) @@ -275,6 +272,8 @@ def __getitem__(self, index): elif data.dtype.kind in "OSU": kind = data.dtype.kind if kind == "S": + from netCDF4 import chartostring + data = chartostring(data) # Assume that object arrays are arrays of strings @@ -365,6 +364,8 @@ def _check_safecast(self, attr, dtype, attributes): except ValueError: safe = False else: + from netCDF4.utils import _safecast + safe = _safecast(att, atta) if not safe: @@ -392,6 +393,8 @@ def _default_FillValue(self, dtype): The default ``_FillValue``. """ + from netCDF4 import default_fillvals + if dtype.kind in "OS": return default_fillvals["S1"] @@ -426,6 +429,8 @@ def _index(self, index, data=None): if index is Ellipsis: return data[...] + from dask.array.slicing import normalize_index + index = normalize_index(index, data.shape) # Find the positions of any list/1-d array indices (which by diff --git a/cfdm/data/subarray/abstract/subsampledsubarray.py b/cfdm/data/subarray/abstract/subsampledsubarray.py index 4ff535035..d4d879793 100644 --- a/cfdm/data/subarray/abstract/subsampledsubarray.py +++ b/cfdm/data/subarray/abstract/subsampledsubarray.py @@ -1,6 +1,7 @@ import numpy as np -from ....core.utils import cached_property +from cfdm.core.utils import cached_property + from .subarray import Subarray diff --git a/cfdm/data/subarray/cellconnectivitysubarray.py b/cfdm/data/subarray/cellconnectivitysubarray.py index 4fde29184..8dc83b3ff 100644 --- a/cfdm/data/subarray/cellconnectivitysubarray.py +++ b/cfdm/data/subarray/cellconnectivitysubarray.py @@ -1,6 +1,5 @@ import numpy as np -from ...functions import integer_dtype from .abstract import MeshSubarray @@ -30,6 +29,8 @@ def __getitem__(self, indices): .. versionadded:: (cfdm) 1.11.0.0 """ + from cfdm.functions import integer_dtype + start_index = self.start_index shape = self.shape start = 0 diff --git a/cfdm/data/subarray/mixin/pointtopology.py b/cfdm/data/subarray/mixin/pointtopology.py index 9d83ac6cc..4567b3ecc 100644 --- a/cfdm/data/subarray/mixin/pointtopology.py +++ b/cfdm/data/subarray/mixin/pointtopology.py @@ -1,7 +1,5 @@ import numpy as np -from ....functions import integer_dtype - class PointTopology: """Mixin class for point topology array compressed by UGRID. @@ -24,6 +22,8 @@ def __getitem__(self, indices): from scipy.sparse import csr_array + from cfdm.functions import integer_dtype + start_index = self.start_index node_connectivity = self._select_data(check_mask=False) diff --git a/cfdm/data/subsampledarray.py b/cfdm/data/subsampledarray.py index f1b0fff9e..97c720516 100644 --- a/cfdm/data/subsampledarray.py +++ b/cfdm/data/subsampledarray.py @@ -4,7 +4,8 @@ import numpy as np -from ..core.utils import cached_property +from cfdm.core.utils import cached_property + from .abstract import CompressedArray from .mixin import CompressedArrayMixin from .netcdfindexer import netcdf_indexer diff --git a/cfdm/data/utils.py b/cfdm/data/utils.py index fc0185da3..f77ba7dfb 100644 --- a/cfdm/data/utils.py +++ b/cfdm/data/utils.py @@ -3,12 +3,9 @@ from functools import lru_cache, partial from itertools import product -import cftime -import dask.array as da import numpy as np -from dask.core import flatten -from ..units import Units +from cfdm.units import Units _default_calendar = "standard" @@ -54,6 +51,8 @@ def allclose(x, y, masked_equal=True, rtol=None, atol=None): *atol* tolerance. """ + import dask.array as da + if rtol is None or atol is None: raise ValueError( "Must provide numeric values for the rtol and atol keywords" @@ -64,6 +63,8 @@ def allclose(x, y, masked_equal=True, rtol=None, atol=None): # Dask's internal algorithms require these to be set as parameters. def allclose(a_blocks, b_blocks, rtol=rtol, atol=atol): """Run `ma.allclose` across multiple blocks over two arrays.""" + from dask.core import flatten + result = True # Handle scalars, including 0-d arrays, for which a_blocks and # b_blocks will have the corresponding type and hence not be iterable. @@ -336,6 +337,8 @@ def convert_to_reftime(a, units=None, first_value=None): if first_value is not None: x = first_value else: + import cftime + x = cftime.DatetimeGregorian(1970, 1, 1) x_since = "days since " + "-".join(map(str, (x.year, x.month, x.day))) @@ -453,6 +456,8 @@ def first_non_missing_value(a, cached=None, method="index"): return if method == "mask": + import dask.array as da + mask = da.ma.getmaskarray(a) if not a.ndim: # Scalar data @@ -789,6 +794,8 @@ def dt2rt(array, units_out): [-- 685.5] """ + import cftime + isscalar = not np.ndim(array) array = cftime.date2num( @@ -842,6 +849,8 @@ def rt2dt(array, units_in): # mask return np.ma.masked_all((), dtype=object) + import cftime + units = units_in.units calendar = getattr(units_in, "calendar", "standard") @@ -886,6 +895,8 @@ def st2datetime(date_string, calendar=None): `cftime.datetime` """ + import cftime + if date_string.count("-") != 2: raise ValueError( "Input date-time string must contain at least a year, a month " diff --git a/cfdm/data/zarrarray.py b/cfdm/data/zarrarray.py index 194228903..cfa1ccf5c 100644 --- a/cfdm/data/zarrarray.py +++ b/cfdm/data/zarrarray.py @@ -1,6 +1,5 @@ from . import abstract from .mixin import IndexMixin -from .netcdfindexer import netcdf_indexer class ZarrArray(IndexMixin, abstract.FileArray): @@ -27,6 +26,8 @@ def _get_array(self, index=None): The subspace. """ + from .netcdfindexer import netcdf_indexer + if index is None: index = self.index() diff --git a/cfdm/docstring/docstring.py b/cfdm/docstring/docstring.py index 0ddcd32e3..8892b8ffe 100644 --- a/cfdm/docstring/docstring.py +++ b/cfdm/docstring/docstring.py @@ -9,13 +9,7 @@ Replacement text may not contain other non-special substitutions. -Keys must be a `str` or `re.Pattern` object: - -* If a key is a `str` then the corresponding value must be a string. - -* If a key is a `re.Pattern` object then the corresponding value must - be a string or a callable, as accepted by the `re.Pattern.sub` - method. +A key and its corresponding value must both be `str`. .. versionaddedd:: (cfdm) 1.8.7.0 @@ -974,7 +968,7 @@ parts are not set. Can't be used with the *update* parameter.""", # update - "{{update: (sequence of) `str`, optional}": """update: (sequence of) `str`, optional + "{{update: (sequence of) `str`, optional}}": """update: (sequence of) `str`, optional Add these original file names to those already stored. The original file names of any constituent parts are not updated. Can't be used with the *define* diff --git a/cfdm/functions.py b/cfdm/functions.py index 799cb7b93..4c515f987 100644 --- a/cfdm/functions.py +++ b/cfdm/functions.py @@ -1,5 +1,6 @@ import logging import os +import sys from copy import deepcopy from functools import total_ordering from math import isnan @@ -10,13 +11,9 @@ from os.path import join import numpy as np -from dask import config as _config -from dask.base import is_dask_collection -from dask.utils import parse_bytes -from uritools import uricompose, urisplit from . import __cf_version__, __file__, __version__, core -from .constants import CONSTANTS, ValidLogLevels +from .constants import ValidLogLevels from .core import DocstringRewriteMeta from .core.docstring import ( _docstring_substitution_definitions as _core_docstring_substitution_definitions, @@ -222,13 +219,6 @@ def _configuration(_Configuration, **kwargs): values are specified. """ - old = {name.lower(): val for name, val in CONSTANTS.items()} - - # Filter out 'None' kwargs from configuration() defaults. Note that this - # does not filter out '0' or 'True' values, which is important as the user - # might be trying to set those, as opposed to None emerging as default. - kwargs = {name: val for name, val in kwargs.items() if val is not None} - # Note values are the functions not the keyword arguments of same name: reset_mapping = { "new_atol": atol, @@ -237,6 +227,17 @@ def _configuration(_Configuration, **kwargs): "new_chunksize": chunksize, } + # Make sure that the constants dictionary is fully populated + for func in reset_mapping.values(): + func() + + old = ConstantAccess.constants(copy=True) + + # Filter out 'None' kwargs from configuration() defaults. Note that this + # does not filter out '0' or 'True' values, which is important as the user + # might be trying to set those, as opposed to None emerging as default. + kwargs = {name: val for name, val in kwargs.items() if val is not None} + old_values = {} try: @@ -582,6 +583,8 @@ def abspath(path, uri=None): ValueError: Can't set uri=False for path='http:///file.nc' """ + from uritools import uricompose, urisplit + u = urisplit(path) scheme = u.scheme path = u.path @@ -728,6 +731,8 @@ def dirname(path, normalise=False, uri=None, isdir=False, sep=False): '/data' """ + from uritools import uricompose, urisplit + u = urisplit(path) scheme = u.scheme path = u.path @@ -1465,44 +1470,52 @@ def copy(self): class ConstantAccess(metaclass=DocstringRewriteMeta): - '''Base class to act as a function accessing package-wide constants. + """Base class to act as a function accessing package-wide constants. Subclasses must implement or inherit a method called `_parse` as - follows: + follows:: def _parse(cls, arg): - """Parse a new constant value. + '''Parse a new constant value. - :Parameter: + :Parameter: - cls: - This class. + cls: + This class. - arg: - The given new constant value. + arg: + The given new constant value. - :Returns: + :Returns: - A version of the new constant value suitable for - insertion into the `CONSTANTS` dictionary. + A version of the new constant value suitable for + insertion into the `_constants` dictionary. - """ + ''' - ''' + """ - # Define the dictionary that stores the constant values - _CONSTANTS = CONSTANTS + # Define the dictionary that stores all constant values. + # + # Sublasses must re-define this as an empty dictionary (unless + # it's OK for the child to modify the parent's dictionary). + _constants = {} - # Define the `Constant` object that contains a constant value + # Define the `Constant` class that contains a constant value _Constant = Constant - # Define the key of the _CONSTANTS dictionary that contains the + # Define the key of the `_constants` dictionary that contains the # constant value _name = None + # Define the default value of the constant + _default = None + def __new__(cls, *arg): """Return a `Constant` instance during class creation.""" - old = cls._CONSTANTS[cls._name] + name = cls._name + constants = cls.constants(copy=False) + old = constants.setdefault(name, cls._default) if arg: arg = arg[0] try: @@ -1511,7 +1524,7 @@ def __new__(cls, *arg): except AttributeError: pass - cls._CONSTANTS[cls._name] = cls._parse(cls, arg) + constants[name] = cls._parse(cls, arg) return cls._Constant(old, _func=cls) @@ -1546,6 +1559,15 @@ def __docstring_package_depth__(self): """ return 0 + @classmethod + def constants(cls, copy=True): + """See docstring to `ConstantAccess`.""" + out = cls._constants + if copy: + out = out.copy() + + return out + class atol(ConstantAccess): """The numerical equality tolerance on absolute differences. @@ -1608,7 +1630,8 @@ class atol(ConstantAccess): """ - _name = "ATOL" + _name = "atol" + _default = sys.float_info.epsilon def _parse(cls, arg): """Parse a new constant value. @@ -1626,7 +1649,7 @@ def _parse(cls, arg): :Returns: A version of the new constant value suitable for - insertion into the `CONSTANTS` dictionary. + insertion into the `_constants` dictionary. """ return float(arg) @@ -1693,7 +1716,8 @@ class rtol(ConstantAccess): """ - _name = "RTOL" + _name = "rtol" + _default = sys.float_info.epsilon def _parse(cls, arg): """Parse a new constant value. @@ -1710,8 +1734,8 @@ def _parse(cls, arg): :Returns: - A version of the new constant value suitable for insertion - into the `CONSTANTS` dictionary. + A version of the new constant value suitable for + insertion into the `_constants` dictionary. """ return float(arg) @@ -1778,7 +1802,8 @@ class chunksize(ConstantAccess): """ - _name = "CHUNKSIZE" + _name = "chunksize" + _default = 134217728 # 134217728 = 128 MiB def _parse(cls, arg): """Parse a new constant value. @@ -1795,12 +1820,16 @@ def _parse(cls, arg): :Returns: - A version of the new constant value suitable for insertion - into the `CONSTANTS` dictionary. + A version of the new constant value suitable for + insertion into the `_constants` dictionary. """ - _config.set({"array.chunk-size": arg}) - return parse_bytes(arg) + from dask import config + from dask.utils import parse_bytes + + arg = parse_bytes(arg) + config.set({"array.chunk-size": arg}) + return arg class log_level(ConstantAccess): @@ -1875,7 +1904,8 @@ class log_level(ConstantAccess): """ - _name = "LOG_LEVEL" + _name = "log_level" + _default = logging.getLevelName(logging.getLogger().level) # Define the valid log levels _ValidLogLevels = ValidLogLevels @@ -1906,8 +1936,8 @@ def _parse(cls, arg): :Returns: - A version of the new constant value suitable for insertion - into the `CONSTANTS` dictionary. + A version of the new constant value suitable for + insertion into the `_constants` dictionary. """ # Ensuring it is a valid level specifier to set & use, either @@ -2230,6 +2260,8 @@ def indices_shape(indices, full_shape, keepdims=True): [] """ + from dask.base import is_dask_collection + shape = [] # i = 0 for index, full_size in zip(indices, full_shape): diff --git a/cfdm/mixin/fielddomain.py b/cfdm/mixin/fielddomain.py index 06b84bd5d..ed5363c57 100644 --- a/cfdm/mixin/fielddomain.py +++ b/cfdm/mixin/fielddomain.py @@ -1,5 +1,4 @@ import logging -import re from ..decorators import _manage_log_level_via_verbosity @@ -480,6 +479,8 @@ def _unique_construct_names(self): 'domainaxis2': 'key%domainaxis2'} """ + import re + key_to_name = {} ignore = self.constructs._ignore @@ -517,6 +518,8 @@ def _unique_domain_axis_identities(self): 'domainaxis2': 'time(1)'} """ + import re + key_to_name = {} name_to_keys = {} diff --git a/cfdm/mixin/netcdf.py b/cfdm/mixin/netcdf.py index 426c05754..eb02eaed5 100644 --- a/cfdm/mixin/netcdf.py +++ b/cfdm/mixin/netcdf.py @@ -1,8 +1,6 @@ from numbers import Integral from re import split -from dask.utils import parse_bytes - from ..core.functions import deepcopy from ..functions import _DEPRECATION_ERROR_METHOD @@ -2729,6 +2727,8 @@ def nc_set_dataset_chunksizes(self, chunksizes): ] if chunksizes != "contiguous": + from dask.utils import parse_bytes + try: chunksizes = parse_bytes(chunksizes) except ValueError: diff --git a/cfdm/read_write/abstract/readwrite.py b/cfdm/read_write/abstract/readwrite.py index d9558583d..afc61557e 100644 --- a/cfdm/read_write/abstract/readwrite.py +++ b/cfdm/read_write/abstract/readwrite.py @@ -1,8 +1,8 @@ from collections.abc import Iterable -from ...cfdmimplementation import implementation -from ...core import DocstringRewriteMeta -from ...docstring import _docstring_substitution_definitions +from cfdm.cfdmimplementation import implementation +from cfdm.core import DocstringRewriteMeta +from cfdm.docstring import _docstring_substitution_definitions class ReadWrite(metaclass=DocstringRewriteMeta): diff --git a/cfdm/read_write/netcdf/constants.py b/cfdm/read_write/netcdf/constants.py index e29b04dce..a0e1c972e 100644 --- a/cfdm/read_write/netcdf/constants.py +++ b/cfdm/read_write/netcdf/constants.py @@ -1,4 +1,4 @@ -from ...quantization import Quantization +from cfdm.quantization import Quantization CODE0 = { # Physically meaningful and corresponding to constructs diff --git a/cfdm/read_write/netcdf/flatten/flatten.py b/cfdm/read_write/netcdf/flatten/flatten.py index 1ca120f25..2a4c16c5e 100644 --- a/cfdm/read_write/netcdf/flatten/flatten.py +++ b/cfdm/read_write/netcdf/flatten/flatten.py @@ -13,9 +13,7 @@ """ -import hashlib import logging -import re import warnings from .config import ( @@ -147,6 +145,7 @@ def parse_attribute(name, attribute): The parsed string. """ + import re def subst(s): """Substitute tokens for WORD and SEP.""" @@ -1616,6 +1615,8 @@ def generate_flattened_name(self, input_group, orig_name): The new valid name of the dimension or variable. """ + import hashlib + # If element is at root: no change if self.parent(input_group) is None: new_name = orig_name diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 40bb277ad..6942847f2 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1,6 +1,5 @@ import logging import operator -import re import struct import subprocess import tempfile @@ -14,17 +13,12 @@ from typing import Any from uuid import uuid4 -import netCDF4 import numpy as np -from dask.array.core import normalize_chunks -from dask.base import tokenize -from packaging.version import Version -from s3fs import S3FileSystem -from uritools import urisplit - -from ...data.netcdfindexer import netcdf_indexer -from ...decorators import _manage_log_level_via_verbosity -from ...functions import abspath, is_log_level_debug, is_log_level_detail + +from cfdm.data.netcdfindexer import netcdf_indexer +from cfdm.decorators import _manage_log_level_via_verbosity +from cfdm.functions import abspath, is_log_level_debug, is_log_level_detail + from .. import IORead from ..exceptions import DatasetTypeError, ReadError from .constants import ( @@ -516,6 +510,8 @@ def file_open(self, dataset, flatten=True, verbose=None): >>> r.file_open('file.nc') """ + from uritools import urisplit + g = self.read_vars netcdf_backend = g["netcdf_backend"] @@ -539,6 +535,7 @@ def file_open(self, dataset, flatten=True, verbose=None): # -------------------------------------------------------- # A file in an S3 object store # -------------------------------------------------------- + from dask.base import tokenize # Create an openable S3 file object fs_key = tokenize(("s3", storage_options)) @@ -547,6 +544,8 @@ def file_open(self, dataset, flatten=True, verbose=None): if file_system is None: # An S3 file system with these options does not exist, # so create one. + from s3fs import S3FileSystem + file_system = S3FileSystem(**storage_options) file_systems[fs_key] = file_system @@ -598,6 +597,8 @@ def file_open(self, dataset, flatten=True, verbose=None): # If the file has a group structure then flatten it (CF>=1.8) # ------------------------------------------------------------ if flatten and self._dataset_has_groups(nc): + import netCDF4 + # Create a diskless, non-persistent container for the # flattened file flat_file = tempfile.NamedTemporaryFile( @@ -648,6 +649,8 @@ def _open_netCDF4(self, filename): `netCDF4.Dataset` """ + import netCDF4 + nc = netCDF4.Dataset(filename, "r") self.read_vars["file_opened_with"] = "netCDF4" return nc @@ -825,6 +828,10 @@ def dataset_type(cls, dataset, allowed_dataset_types): * `None` for anything else. """ + import re + + from uritools import urisplit + # Assume that non-local URIs are netCDF or zarr u = urisplit(dataset) if u.scheme not in (None, "file"): @@ -909,6 +916,8 @@ def default_netCDF_fill_value(self, ncvar): 9.969209968386869e+36 """ + import netCDF4 + data_type = self.read_vars["variables"][ncvar].dtype.str[-2:] return netCDF4.default_fillvals[data_type] @@ -1074,6 +1083,10 @@ def read( The field or domain constructs in the file. """ + import re + + from packaging.version import Version + debug = is_log_level_debug(logger) # ------------------------------------------------------------ @@ -7231,6 +7244,8 @@ def _parse_cell_methods(self, cell_methods_string, field_ncvar=None): ... 't: mean over ENSO years)') """ + import re + if field_ncvar: attribute = {field_ncvar + ":cell_methods": cell_methods_string} @@ -9475,6 +9490,7 @@ def _parse_x( # ============================================================ # Thanks to Alan Iwi for creating these regular expressions # ============================================================ + import re def subst(s): """Substitutes WORD and SEP tokens for regular expressions. @@ -11363,6 +11379,8 @@ def _dask_chunks(self, array, ncvar, compressed, construct_type=None): # storage-aligned: (50, 100, 150, 20, 5) 75000000 # -------------------------------------------------------- # 1) Initialise the Dask chunk shape + from dask.array.core import normalize_chunks + dask_chunks = normalize_chunks( "auto", shape=array.shape, dtype=array.dtype ) @@ -11732,6 +11750,8 @@ def _cache_data_elements(self, data, ncvar): # collapse (by concatenation) the outermost (fastest # varying) dimension. E.g. [['a','b','c']] becomes # ['abc'] + import netCDF4 + if dtype.kind == "U": value = value.astype("S") @@ -11932,7 +11952,7 @@ def _set_quantization(self, parent, ncvar): :Returns: - `None`ppp + `None` """ g = self.read_vars diff --git a/cfdm/read_write/netcdf/netcdfwrite.py b/cfdm/read_write/netcdf/netcdfwrite.py index a8f946546..7ffee840e 100644 --- a/cfdm/read_write/netcdf/netcdfwrite.py +++ b/cfdm/read_write/netcdf/netcdfwrite.py @@ -1,20 +1,13 @@ import copy import logging import os -import re -import dask.array as da -import netCDF4 import numpy as np -from dask import config as dask_config -from dask.array.core import normalize_chunks -from dask.utils import parse_bytes -from packaging.version import Version -from uritools import uricompose, urisplit - -from ...data.dask_utils import cfdm_to_memory -from ...decorators import _manage_log_level_via_verbosity -from ...functions import abspath, dirname, integer_dtype + +from cfdm.data.dask_utils import cfdm_to_memory +from cfdm.decorators import _manage_log_level_via_verbosity +from cfdm.functions import abspath, dirname, integer_dtype + from .. import IOWrite from .constants import ( CF_QUANTIZATION_PARAMETER_LIMITS, @@ -2824,6 +2817,8 @@ def _write_netcdf_variable( if quantize_on_write: # Set "implemention" to this version of the netCDF-C # library + import netCDF4 + self.implementation.set_parameter( q, "implementation", @@ -3190,6 +3185,8 @@ def _write_data( # ------------------------------------------------------------ # Still here? The write a normal (non-aggregation) variable # ------------------------------------------------------------ + import dask.array as da + if compressed: # Write data in its compressed form data = data.source().source() @@ -3368,6 +3365,8 @@ def _write_field_or_domain( `None` """ + import re + g = self.write_vars ncdim_size_to_spanning_constructs = [] seen = g["seen"] @@ -4469,6 +4468,8 @@ def _write_global_attributes(self, fields): `None` """ + import re + g = self.write_vars # ------------------------------------------------------------ @@ -4663,6 +4664,8 @@ def file_open(self, filename, mode, fmt, fields): A `netCDF4.Dataset` object for the file. """ + import netCDF4 + if fields and mode == "w": filename = os.path.abspath(filename) for f in fields: @@ -4969,6 +4972,8 @@ def write( See `cfdm.write` for examples. """ + from packaging.version import Version + logger.info(f"Writing to {fmt}") # pragma: no cover # Expand file name @@ -5109,6 +5114,8 @@ def write( # Parse the 'dataset_chunks' parameter if dataset_chunks != "contiguous": + from dask.utils import parse_bytes + try: self.write_vars["dataset_chunks"] = parse_bytes(dataset_chunks) except (ValueError, AttributeError): @@ -5313,6 +5320,8 @@ def _file_io_iteration( group, ): """Perform a file-writing iteration with the given settings.""" + from packaging.version import Version + # ------------------------------------------------------------ # Initiate file IO with given write variables # ------------------------------------------------------------ @@ -5678,6 +5687,9 @@ def _chunking_parameters(self, data, ncdimensions): d_dtype = d.dtype dtype = g["datatype"].get(d_dtype, d_dtype) + from dask import config as dask_config + from dask.array.core import normalize_chunks + with dask_config.set({"array.chunk-size": dataset_chunks}): chunksizes = normalize_chunks("auto", shape=d.shape, dtype=dtype) @@ -6196,6 +6208,8 @@ def _cfa_fragment_array_variables(self, data, cfvar): out = {"map": type(data)(aggregation_shape)} if data.nc_get_aggregation_fragment_type() == "uri": + from uritools import uricompose, urisplit + # -------------------------------------------------------- # Create 'uris' and 'idenftifiers' arrays # -------------------------------------------------------- @@ -6340,6 +6354,8 @@ def _cfa_fragment_array_variables(self, data, cfvar): # dimensions with one value per fragment. If a chunk has # more than one unique value then the fragment's value is # missing data. + import dask.array as da + dx = data.to_dask_array( _force_mask_hardness=False, _force_to_memory=False ) diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index 350a46e99..6899ada3e 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -4,10 +4,9 @@ from os import walk from os.path import expanduser, expandvars, isdir, join -from uritools import urisplit +from cfdm.decorators import _manage_log_level_via_verbosity +from cfdm.functions import abspath, is_log_level_info -from ..decorators import _manage_log_level_via_verbosity -from ..functions import abspath, is_log_level_info from .abstract import ReadWrite from .exceptions import DatasetTypeError from .netcdf import NetCDFRead @@ -342,6 +341,8 @@ def _datasets(self): return + from uritools import urisplit + if followlinks and not recursive: raise ValueError( f"Can only set followlinks={followlinks!r} when " diff --git a/cfdm/test/test_docstring.py b/cfdm/test/test_docstring.py index 10b022d4e..bf7e91fca 100644 --- a/cfdm/test/test_docstring.py +++ b/cfdm/test/test_docstring.py @@ -279,6 +279,11 @@ def test_docstring_docstring_substitutions(self): self.assertIsInstance(d, dict) self.assertIn("{{repr}}", d) + # Check that the special substitutions have not been + # overwritten + for key in x._docstring_special_substitutions(): + self.assertNotIn(key, d) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cfdm/units.py b/cfdm/units.py index fbcbf565b..c5d598d2b 100644 --- a/cfdm/units.py +++ b/cfdm/units.py @@ -1,15 +1,7 @@ -from ctypes.util import find_library - from cfunits import Units as cfUnits from .core.meta import DocstringRewriteMeta -_libpath = find_library("udunits2") -if _libpath is None: - raise FileNotFoundError( - "cfdm UNIDATA UDUNITS-2. Can't find the 'udunits2' library." - ) - class Units(metaclass=DocstringRewriteMeta): """Store, combine, compare, and convert physical units.