diff --git a/argopy/extensions/canyon_b.py b/argopy/extensions/canyon_b.py index 056a3cbde..d75fc51ad 100644 --- a/argopy/extensions/canyon_b.py +++ b/argopy/extensions/canyon_b.py @@ -38,7 +38,8 @@ def decorator(func): delayed = None from ..errors import InvalidDatasetStructure, DataNotFound -from ..utils import path2assets, to_list, point_in_polygon +from ..utils import to_list, point_in_polygon +from argopy.utils.assets import Asset from . import register_argo_accessor, ArgoAccessorExtension @@ -170,9 +171,6 @@ def __init__(self, *args, **kwargs): if self._argo.N_POINTS == 0: raise DataNotFound("Empty dataset, no data to transform !") - self.path2coef = Path(path2assets).joinpath( - "canyon-b" - ) # Path to CANYON-B assets def get_param_attrs(self, param: str) -> dict: """ @@ -447,22 +445,16 @@ def load_weights(self, param: str) -> pd.DataFrame: Returns ------- - pd.DataFrame + :class:`pd.DataFrame` DataFrame containing the neural network weights for the specified parameter. """ if param in ["AT", "pCO2", "NO3", "PO4", "SiOH4"]: - weights = pd.read_csv( - self.path2coef.joinpath(f"wgts_{param}.txt"), header=None, sep="\t" - ) + weights = Asset.load(f"wgts_{param}.txt", header=None, sep="\t") elif param == "DIC": - weights = pd.read_csv( - self.path2coef.joinpath("wgts_CT.txt"), header=None, sep="\t" - ) + weights = Asset.load("wgts_CT.txt", header=None, sep="\t") else: - weights = pd.read_csv( - self.path2coef.joinpath("wgts_pH.txt"), header=None, sep="\t" - ) + weights = Asset.load("wgts_pH.txt", header=None, sep="\t") return weights diff --git a/argopy/extensions/canyon_med.py b/argopy/extensions/canyon_med.py index 72f78084d..dc5443695 100644 --- a/argopy/extensions/canyon_med.py +++ b/argopy/extensions/canyon_med.py @@ -5,7 +5,8 @@ from typing import Union, List from ..errors import InvalidDatasetStructure, DataNotFound -from ..utils import path2assets, to_list +from ..utils import to_list +from argopy.utils.assets import Asset from . import register_argo_accessor, ArgoAccessorExtension @@ -78,7 +79,6 @@ def __init__(self, *args, **kwargs): raise DataNotFound("Empty dataset, no data to transform !") self.n_list = 5 - self.path2coef = Path(path2assets).joinpath("canyon-med") self._input = None # Private CANYON-MED input dataframe @property @@ -186,50 +186,21 @@ def isin_medsea(row): def load_normalisation_factors(self, param, subset="F"): suff = self.param2suff(param) - moy_sub = pd.read_table( - self.path2coef.joinpath("moy_%s_%s.txt" % (suff, subset)), - sep=" {3}", - header=None, - engine="python", - ).values - std_sub = pd.read_table( - self.path2coef.joinpath("std_%s_%s.txt" % (suff, subset)), - sep=" {3}", - header=None, - engine="python", - ).values + moy_sub = Asset.load(f"canyon-med:moy_{suff}_{subset}.txt", sep=" {3}", header=None, engine="python").values + std_sub = Asset.load(f"canyon-med:std_{suff}_{subset}.txt", sep=" {3}", header=None, engine="python").values + return moy_sub, std_sub def load_weights(self, param, subset, i): suff = self.param2suff(param) - b1 = pd.read_csv( - self.path2coef.joinpath("poids_%s_b1_%s_%i.txt" % (suff, subset, i)), - header=None, - ) - b2 = pd.read_csv( - self.path2coef.joinpath("poids_%s_b2_%s_%i.txt" % (suff, subset, i)), - header=None, - ) - b3 = pd.read_csv( - self.path2coef.joinpath("poids_%s_b3_%s_%i.txt" % (suff, subset, i)), - header=None, - ) - IW = pd.read_csv( - self.path2coef.joinpath("poids_%s_IW_%s_%i.txt" % (suff, subset, i)), - sep=r"\s+", - header=None, - ) - LW1 = pd.read_csv( - self.path2coef.joinpath("poids_%s_LW1_%s_%i.txt" % (suff, subset, i)), - sep=r"\s+", - header=None, - ) - LW2 = pd.read_csv( - self.path2coef.joinpath("poids_%s_LW2_%s_%i.txt" % (suff, subset, i)), - sep=r"\s+", - header=None, - ) + b1 = Asset.load(f"canyon-med:poids_{suff}_b1_{subset}_{i}.txt", header=None) + b2 = Asset.load(f"canyon-med:poids_{suff}_b2_{subset}_{i}.txt", header=None) + b3 = Asset.load(f"canyon-med:poids_{suff}_b3_{subset}_{i}.txt", header=None) + + IW = Asset.load(f"canyon-med:poids_{suff}_IW_{subset}_{i}.txt", header=None, sep=r"\s+") + LW1 = Asset.load(f"canyon-med:poids_{suff}_LW1_{subset}_{i}.txt", header=None, sep=r"\s+") + LW2 = Asset.load(f"canyon-med:poids_{suff}_LW2_{subset}_{i}.txt", header=None, sep=r"\s+") # Using float128 arrays avoid the error or warning "overflow encountered in exp" raised by the # activation function diff --git a/argopy/plot/plot.py b/argopy/plot/plot.py old mode 100644 new mode 100755 index e0a6257fd..816d2ac57 --- a/argopy/plot/plot.py +++ b/argopy/plot/plot.py @@ -8,15 +8,12 @@ # import warnings import logging -import os -import json from copy import copy import xarray as xr import pandas as pd import numpy as np from typing import Union -import importlib from ..options import OPTIONS from ..utils.loggers import warnUnless @@ -25,6 +22,7 @@ from ..utils.lists import subsample_list from ..utils.casting import to_list from ..errors import InvalidDatasetStructure +from argopy.utils.assets import Asset from .utils import STYLE, has_seaborn, has_mpl, has_cartopy, has_ipython, has_ipywidgets from .utils import axes_style, latlongrid, land_feature @@ -50,13 +48,6 @@ log = logging.getLogger("argopy.plot.plot") -path2assets = importlib.util.find_spec( - "argopy.static.assets" -).submodule_search_locations[0] - -with open(os.path.join(path2assets, "data_types.json"), "r") as f: - DATA_TYPES = json.load(f) - def guess_cmap(hue: str) -> str | None: """Try to guess the ArgoColors colormap name to use as a function of the variable to plot @@ -756,6 +747,8 @@ def scatter_plot( """ warnUnless(has_mpl, "requires matplotlib installed") + + #deprecation if 'this_param' in kwargs: warnings.warn( @@ -779,8 +772,8 @@ def scatter_plot( ) y = kwargs['this_y'] # Safe fallback on new argument - if param in DATA_TYPES["data"]["str"]: - raise ValueError("scatter_plot does not support parameter of string type (yet !)") + if param in Asset.load('data_types')["data"]["str"]: + raise ValueError("scatter_plot does not support string data type (yet !)") # Transform the 'cmap' argument into a mpl.colors.Colormap instance a_color = None diff --git a/argopy/related/argo_documentation.py b/argopy/related/argo_documentation.py index e209d13ee..3cb557a9d 100644 --- a/argopy/related/argo_documentation.py +++ b/argopy/related/argo_documentation.py @@ -1,17 +1,14 @@ -import os -import json import pandas as pd from functools import lru_cache import requests from ..stores import httpstore, memorystore from ..options import OPTIONS -from .utils import path2assets +from argopy.utils.assets import Asset # Load the ADMT documentation catalogue: -with open(os.path.join(path2assets, "admt_documentation_catalogue.json"), "rb") as f: - ADMT_CATALOGUE = json.load(f)['data']['catalogue'] +ADMT_CATALOGUE = Asset.load('admt_documentation_catalogue')['data']['catalogue'] class ArgoDocs: diff --git a/argopy/related/reference_tables.py b/argopy/related/reference_tables.py index 892329a59..2a4770b89 100644 --- a/argopy/related/reference_tables.py +++ b/argopy/related/reference_tables.py @@ -1,14 +1,13 @@ import pandas as pd from functools import lru_cache import collections -from pathlib import Path -from ..stores import httpstore, filestore +from ..stores import httpstore from ..options import OPTIONS -from ..utils import path2assets +from argopy.utils.assets import Asset -VALID_REF = filestore(cache=True).open_json(Path(path2assets).joinpath("nvs_reference_tables.json"))['data']['valid_ref'] +VALID_REF = Asset.load('nvs_reference_tables')['data']['valid_ref'] class ArgoNVSReferenceTables: diff --git a/argopy/related/utils.py b/argopy/related/utils.py index ad75d4f81..52ec93519 100644 --- a/argopy/related/utils.py +++ b/argopy/related/utils.py @@ -1,14 +1,10 @@ -import importlib -import os -import json import logging + +from argopy.utils.assets import Asset from . import ArgoNVSReferenceTables log = logging.getLogger("argopy.related.utils") -path2assets = importlib.util.find_spec( - "argopy.static.assets" -).submodule_search_locations[0] def load_dict(ptype): @@ -21,8 +17,7 @@ def load_dict(ptype): profilers = dict(sorted(profilers.items())) return profilers except Exception: - with open(os.path.join(path2assets, "profilers.json"), "rb") as f: - jsdata = json.load(f) + jsdata = Asset.load('profilers') log.debug( "Failed to load the ArgoNVSReferenceTables R08 for profiler types, fall back on static assets last updated on %s" % jsdata["last_update"] @@ -37,8 +32,7 @@ def load_dict(ptype): institutions = dict(sorted(institutions.items())) return institutions except Exception: - with open(os.path.join(path2assets, "institutions.json"), "rb") as f: - jsdata = json.load(f) + jsdata = Asset.load('institutions') log.debug( "Failed to load the ArgoNVSReferenceTables R04 for institutions name, fall back on static assets last updated on %s" % jsdata["last_update"] diff --git a/argopy/tests/test_utils_assets.py b/argopy/tests/test_utils_assets.py new file mode 100644 index 000000000..8f0be72a0 --- /dev/null +++ b/argopy/tests/test_utils_assets.py @@ -0,0 +1,22 @@ +import pandas as pd +import pytest +from argopy.utils.assets import Asset + + +class Test_Asset(): + assets = ['gdac_servers.json', 'data_types', 'schema:argo.sensor.schema.json', 'schema:argo.float.schema'] + assets_id = [f"{a}" for a in assets] + @pytest.mark.parametrize("asset", assets, indirect=False, ids=assets_id) + def test_load_json(self, asset): + data = Asset.load(asset) + assert isinstance(data, dict) + + assets = ['canyon-b:wgts_AT.txt'] + assets_id = [f"{a}" for a in assets] + @pytest.mark.parametrize("asset", assets, indirect=False, ids=assets_id) + def test_load_csv(self, asset): + data = Asset.load(asset) + assert isinstance(data, pd.DataFrame) + + data = Asset.load(asset, header=None, sep="\t") + assert isinstance(data, pd.DataFrame) diff --git a/argopy/tests/test_utils_locals.py b/argopy/tests/test_utils_locals.py index 9c34a1d38..4ad32bfd1 100644 --- a/argopy/tests/test_utils_locals.py +++ b/argopy/tests/test_utils_locals.py @@ -1,10 +1,8 @@ import os - -import pandas as pd import pytest import io import argopy -from argopy.utils.locals import modified_environ, Asset +from argopy.utils.locals import modified_environ @pytest.mark.parametrize("conda", [False, True], @@ -22,22 +20,3 @@ def test_modified_environ(): assert os.environ['DUMMY_ENV_ARGOPY'] == 'toto' assert os.environ['DUMMY_ENV_ARGOPY'] == 'initial' os.environ.pop('DUMMY_ENV_ARGOPY') - - -class Test_Asset(): - assets = ['gdac_servers.json', 'data_types', 'schema:argo.sensor.schema.json', 'schema:argo.float.schema'] - assets_id = [f"{a}" for a in assets] - @pytest.mark.parametrize("asset", assets, indirect=False, ids=assets_id) - def test_load_json(self, asset): - data = Asset.load(asset) - assert isinstance(data, dict) - - assets = ['canyon-b:wgts_AT.txt'] - assets_id = [f"{a}" for a in assets] - @pytest.mark.parametrize("asset", assets, indirect=False, ids=assets_id) - def test_load_csv(self, asset): - data = Asset.load(asset) - assert isinstance(data, pd.DataFrame) - - data = Asset.load(asset, header=None, sep="\t") - assert isinstance(data, pd.DataFrame) diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index 59e999960..956ebe0b0 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -43,7 +43,6 @@ modified_environ, get_sys_info, # noqa: F401 netcdf_and_hdf5_versions, # noqa: F401 - Asset, ) from .monitors import monitor_status, badge, fetch_status # noqa: F401 from .geo import ( @@ -69,9 +68,6 @@ from . import optical_modeling from .carbonate import calculate_uncertainties, error_propagation -import importlib -path2assets = importlib.util.find_spec('argopy.static.assets').submodule_search_locations[0] - __all__ = ( # Checkers: @@ -121,12 +117,10 @@ # Accessories classes (specific objects): "Registry", "float_wmo", - # Locals (environments, versions, systems): - "path2assets", + # Locals (environments, versions, systems, assets): "show_versions", "show_options", "modified_environ", - "Asset", # Monitors "monitor_status", # Geo (space/time data utilities) diff --git a/argopy/utils/assets.py b/argopy/utils/assets.py new file mode 100644 index 000000000..2adacf8de --- /dev/null +++ b/argopy/utils/assets.py @@ -0,0 +1,145 @@ +from typing import Any, Literal +import pandas as pd +from pathlib import Path +import importlib +import json +from functools import lru_cache + +from argopy.errors import DataNotFound + + +class Asset: + """Internal asset loader + + Assets are loaded using an instance of :class:`argopy.stores.filestore`. + + Notes + ----- + This is **single-instance** class, whereby a single instance will be created during a session, whatever the number of calls is made. This avoids to create too many, and unnecessary, instances of file stores. + + Examples + -------- + .. code-block:: python + :caption: Examples of asset files loading + + Asset.load('data_types') + Asset.load('data_types.json') + Asset.load('schema:argo.float.schema') + Asset.load('canyon-b:wgts_AT.txt', header=None, sep="\t") + """ + + _instance: "Asset | None" = None + _initialized: bool = False + + def __new__(cls, *args: Any, **kwargs: Any) -> "Asset": + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, *args, **kwargs) -> None: + if not self._initialized: + path2assets = importlib.util.find_spec( + "argopy.static.assets" + ).submodule_search_locations[0] + self._path = Path(path2assets) + self._initialized = True + + @lru_cache + def _read_csv(self, path, **kwargs): + """Return a pandas.dataframe from a path that is a csv resource + + Parameters + ---------- + Path: str + Path to csv resources passed to :func:`pandas.read_csv` + + Returns + ------- + :class:`pandas.DataFrame` + """ + with open(path, 'r') as of: + df = pd.read_csv(of, **kwargs) + return df + + @lru_cache + def _open_json(self, url, errors: Literal['raise', 'silent', 'ignore'] = 'raise', **kwargs) -> Any: + """Open and process a json document from a path + + Steps performed: + + 1. Path is open from ``url`` with :class:`filestore.open` and then + 2. Create a JSON with :func:`json.loads`. + + Each steps can be passed specifics arguments (see Parameters below). + + Parameters + ---------- + path: str + Path to resources passed to :func:`json.loads` + errors: str, default: ``raise`` + Define how to handle errors: + - ``raise`` (default): Raise any error encountered + - ``ignore``: Do not stop processing, simply issue a debug message in logging console and return None + - ``silent``: Do not stop processing and do not issue log message, return None + + kwargs: dict + + - ``open_opts`` key dictionary is passed to :class:`open` + - ``js_opts`` key dictionary is passed to :func:`json.loads` + + Returns + ------- + Any + + See Also + -------- + :class:`filestore.open_mfjson` + """ + js_opts = {} + if "js_opts" in kwargs: + js_opts.update(kwargs["js_opts"]) + + with open(url, 'r') as of: + js = json.load(of, **js_opts) + + if len(js) == 0: + if errors == "raise": + raise DataNotFound("No data return by %s" % url) + else: + return None + + return js + + def _load(self, name: str, **kwargs) -> dict | pd.DataFrame: + suffix = Path(name).suffix + if suffix in [".csv", ".txt"]: + load = self._read_csv + else: + load = self._open_json + if suffix != ".json": # eg: '.schema' + name = f"{name}.json" + + name = name.strip() + name = name.split(":") + return load(self._path.joinpath(*name), **kwargs) + + @classmethod + def load(cls, name: str = None, **kwargs) -> Any: + """Load an asset file + + Parameters + ---------- + name: str + The *name* of the asset file to load. + If no suffix is indicated, it is assumed to be a JSON file with a `.json` extension. + If the asset is in sub-folders, use semicolons ':' as separator (eg: 'schema:argo.float.schema') + **kwargs: + All other arguments are passed down to the loading method. + + Notes + ----- + If the asset `name` has a `.txt` or `.csv` suffix, the :meth:`argopy.stores.filestore.read_csv` is used. + + For all other asset `name`, the :meth:`argopy.stores.filestore.load_json` is used by default. + """ + return cls()._load(name=name, **kwargs) diff --git a/argopy/utils/casting.py b/argopy/utils/casting.py index 08087ca8b..71ee5f9c2 100644 --- a/argopy/utils/casting.py +++ b/argopy/utils/casting.py @@ -1,22 +1,15 @@ import sys -import os import numpy as np import pandas as pd import xarray as xr -import importlib -import json import logging from copy import deepcopy +from argopy.utils.assets import Asset -log = logging.getLogger("argopy.utils.casting") - -path2assets = importlib.util.find_spec( - "argopy.static.assets" -).submodule_search_locations[0] -with open(os.path.join(path2assets, "data_types.json"), "r") as f: - DATA_TYPES = json.load(f) +log = logging.getLogger("argopy.utils.casting") +DATA_TYPES = Asset.load('data_types')['data'] def cast_Argo_variable_type(ds: xr.Dataset, overwrite=True) -> xr.Dataset: @@ -61,14 +54,14 @@ def cast_this_da(da, v): # print("Casting %s ..." % da.name) da.attrs["casted"] = 0 - if v in DATA_TYPES["data"]["str"] and da.dtype == "O": # Object + if v in DATA_TYPES["str"] and da.dtype == "O": # Object try: da = cast_this(da, str, exception_to_raise=UnicodeDecodeError) except UnicodeDecodeError: da = da.str.decode(encoding="unicode_escape") da = cast_this(da, str) - if v in DATA_TYPES["data"]["int"]: # and da.dtype == 'O': # Object + if v in DATA_TYPES["int"]: # and da.dtype == 'O': # Object if "conventions" in da.attrs: convname = "conventions" elif "convention" in da.attrs: @@ -90,7 +83,7 @@ def cast_this_da(da, v): da = cast_this(da, float) da = cast_this(da, int) - if v in DATA_TYPES["data"]["datetime"] and da.dtype == "O": # Object + if v in DATA_TYPES["datetime"] and da.dtype == "O": # Object if ( "conventions" in da.attrs and da.attrs["conventions"] == "YYYYMMDDHHMISS" diff --git a/argopy/utils/lists.py b/argopy/utils/lists.py index 08174969d..54bd70acb 100644 --- a/argopy/utils/lists.py +++ b/argopy/utils/lists.py @@ -1,14 +1,9 @@ import sys import warnings -import importlib -import os -import json -from ..options import OPTIONS from typing import List, Union -path2assets = importlib.util.find_spec( - "argopy.static.assets" -).submodule_search_locations[0] +from argopy.options import OPTIONS +from argopy.utils.assets import Asset def subsample_list(original_list, N): @@ -284,9 +279,7 @@ def list_bgc_s_variables() -> List[str]: :meth:`argopy.utils.list_radiometry_variables` :meth:`argopy.utils.list_radiometry_parameters`, """ - with open(os.path.join(path2assets, "variables_bgc_synthetic.json"), "r") as f: - vlist = json.load(f) - return vlist["data"]["variables"] + return Asset.load('variables_bgc_synthetic')["data"]["variables"] def list_bgc_s_parameters() -> List[str]: @@ -414,9 +407,7 @@ def list_gdac_servers() -> List[str]: :class:`argopy.gdacfs`, :meth:`argopy.utils.check_gdac_path`, :meth:`argopy.utils.shortcut2gdac` """ - with open(os.path.join(path2assets, "gdac_servers.json"), "r") as f: - vlist = json.load(f) - return vlist["data"]["paths"] + return Asset.load('gdac_servers')["data"]["paths"] def shortcut2gdac(short: str = None) -> Union[str, dict]: @@ -437,9 +428,7 @@ def shortcut2gdac(short: str = None) -> Union[str, dict]: :func:`argopy.utils.list_gdac_servers`, :class:`argopy.gdacfs`, :meth:`argopy.utils.check_gdac_path` """ - with open(os.path.join(path2assets, "gdac_servers.json"), "r") as f: - vlist = json.load(f) - shortcuts = vlist["data"]["shortcuts"] + shortcuts = Asset.load('gdac_servers')["data"]["shortcuts"] if short is not None: if short.lower().strip() in shortcuts.keys(): diff --git a/argopy/utils/locals.py b/argopy/utils/locals.py index e7aed0295..ec21a1e8d 100644 --- a/argopy/utils/locals.py +++ b/argopy/utils/locals.py @@ -9,10 +9,7 @@ import copy import shutil import json -from typing import Any import importlib -from pathlib import Path -import pandas as pd from argopy.options import OPTIONS @@ -319,78 +316,3 @@ def show_options(file=sys.stdout): # noqa: C901 opts = dict(sorted(opts.items())) for k, v in opts.items(): print(f"{k}: {v}", file=file) - - -class Asset: - """Internal asset loader - - Assets are loaded using an instance of :class:`argopy.stores.filestore`. - - Notes - ----- - This is **single-instance** class, whereby a single instance will be created during a session, whatever the number of calls is made. This avoids to create too many, and unnecessary, instances of file stores. - - Examples - -------- - .. code-block:: python - :caption: Examples of asset files loading - - Asset.load('data_types') - Asset.load('data_types.json') - Asset.load('schema:argo.float.schema') - Asset.load('canyon-b:wgts_AT.txt', header=None, sep="\t") - """ - - _fs: Any = None - _instance: "Asset | None" = None - _initialized: bool = False - - def __new__(cls, *args: Any, **kwargs: Any) -> "Asset": - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self, *args, **kwargs) -> None: - if not self._initialized: - from argopy.stores import filestore - - self._fs = filestore(cache=True, cachedir=OPTIONS["cachedir"]) - path2assets = importlib.util.find_spec( - "argopy.static.assets" - ).submodule_search_locations[0] - self._path = Path(path2assets) - self._initialized = True - - def _load(self, name: str, **kwargs) -> dict | pd.DataFrame: - suffix = Path(name).suffix - if suffix in [".csv", ".txt"]: - load = self._fs.read_csv - else: - load = self._fs.open_json - if suffix != ".json": # eg: '.schema' - name = f"{name}.json" - - name = name.strip() - name = name.split(":") - return load(self._path.joinpath(*name), **kwargs) - - @classmethod - def load(cls, name: str = None, **kwargs) -> Any: - """Load an asset file - - Parameters - ---------- - name: str - The *name* of the asset file to load. - If no suffix is indicated, it is assumed to be a JSON file with a `.json` extension. - If the asset is in sub-folders, use semicolons ':' as separator (eg: 'schema:argo.float.schema') - **kwargs: - All other arguments are passed down to the loading method. - - Notes - ----- - If the asset `name` has a `.txt` or `.csv` suffix, the :meth:`argopy.stores.filestore.read_csv` is used. - - For all other asset `name`, the :meth:`argopy.stores.filestore.load_json` is used by default. - """ - return cls()._load(name=name, **kwargs)