diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1cc74ac..90446c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ --- repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -12,57 +12,54 @@ repos: - id: check-ast - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.2 + rev: v0.14.9 hooks: - id: ruff args: [--fix] - repo: https://github.com/psf/black - rev: 24.8.0 + rev: 25.12.0 hooks: - id: black language_version: python3 + additional_dependencies: ["black[jupyter]"] - repo: https://github.com/keewis/blackdoc - rev: v0.3.9 + rev: v0.4.6 hooks: - id: blackdoc additional_dependencies: ["black[jupyter]"] - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v4.0.0-alpha.8" - hooks: - - id: prettier - types_or: [yaml, html, css, scss, javascript, json] # markdown to avoid conflicts with mdformat + files: \.(rst)$ - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell types_or: [python, markdown, rst] additional_dependencies: [tomli] - - repo: https://github.com/asottile/pyupgrade - rev: v3.17.0 - hooks: - - id: pyupgrade + # - repo: https://github.com/asottile/pyupgrade + # rev: v3.20.0 + # hooks: + # - id: pyupgrade - repo: https://github.com/MarcoGorelli/madforhooks rev: 0.4.1 hooks: # - id: conda-env-sorter # conflicts with prettier - id: check-execution-order - repo: https://github.com/executablebooks/mdformat - rev: 0.7.17 + rev: 1.0.0 hooks: - id: mdformat additional_dependencies: [mdformat-gfm, mdformat-black] exclude: ^.github/ # this avoid the hook to wrongly reformats md issue template files - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout args: [--keep-output] - repo: https://github.com/nbQA-dev/nbQA - rev: 1.8.7 + rev: 1.9.1 hooks: - id: nbqa-black - - id: nbqa-ruff - args: [--fix, "--ignore=E402,B018"] + # - id: nbqa-ruff + # args: [--fix, "--ignore=E402,B018"] - repo: https://github.com/pre-commit/pygrep-hooks rev: "v1.10.0" hooks: diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 573a804..6983dd1 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -58,7 +58,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -\[INSERT CONTACT METHOD\]. +[INSERT CONTACT METHOD]. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/README.md b/README.md index ffff247..fc5878e 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ | Code Quality | [![Codefactor](https://www.codefactor.io/repository/github/ghiggi/radar_api/badge?style=flat)](https://www.codefactor.io/repository/github/ghiggi/radar_api) [![Codebeat](https://codebeat.co/badges/57498d71-f042-473f-bb8e-9b45e50572d8?style=flat)](https://codebeat.co/projects/github-com-ghiggi-radar_api-main) [![Codacy](https://app.codacy.com/project/badge/Grade/bee842cb10004ad8bb9288256f2fc8af?style=flat)](https://app.codacy.com/gh/ghiggi/radar_api/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Codescene](https://codescene.io/projects/63299/status-badges/average-code-health?style=flat)](https://codescene.io/projects/63299) | | License | [![License](https://img.shields.io/github/license/ghiggi/radar_api?style=flat)](https://github.com/ghiggi/radar_api/blob/main/LICENSE) | | Community | [![Discourse](https://img.shields.io/badge/Slack-radar_api-green.svg?logo=slack&style=flat)](https://openradar.discourse.group/) [![GitHub Discussions](https://img.shields.io/badge/GitHub-Discussions-green?logo=github&style=flat)](https://github.com/ghiggi/radar_api/discussions) | -| Citation | [![DOI](https://zenodo.org/badge/922589509.svg?style=flat)](https://doi.org/10.5281/zenodo.14743651) | +| Citation | [![DOI](https://zenodo.org/badge/922589509.svg?style=flat)](https://doi.org/10.5281/zenodo.14743651) | [**Documentation: https://radar-api.readthedocs.io**](https://radar-api.readthedocs.io/) diff --git a/pyproject.toml b/pyproject.toml index b0feb92..253f7f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -46,7 +45,7 @@ dependencies = [ "pandas", "tqdm", ] -requires-python = ">=3.10" +requires-python = ">=3.11" dynamic = ["version"] [tool.setuptools] @@ -60,6 +59,7 @@ write_to = "radar_api/_version.py" [project.optional-dependencies] dev = ["pre-commit", "loghub", + "xarray", "xradar", "black[jupyter]", "blackdoc", "codespell", "ruff", "pytest", "pytest-cov", "pytest-mock", "pytest-check", "pytest-sugar", "pytest-watcher", "deepdiff", @@ -186,9 +186,8 @@ ignore = [ "B006", # {} defaults in function arguments "PT011", # pytest raised error must be checked if match the expected error msg "PERF203", - "UP038", + "PLC0415", # import not at the top-level file # "PD011", # suggest values --> to_numpy - "PD901", "PD013", # suggest melt instead of stack "PLW2901", "PLW0603", diff --git a/radar_api/__init__.py b/radar_api/__init__.py index ef51240..5693de6 100644 --- a/radar_api/__init__.py +++ b/radar_api/__init__.py @@ -39,6 +39,7 @@ from radar_api.info import group_filepaths from radar_api.io import ( available_networks, + available_products, available_radars, ) from radar_api.readers import ( @@ -52,17 +53,18 @@ __all__ = [ - "available_radars", "available_networks", + "available_products", + "available_radars", "config", "define_configs", - "read_configs", + "download_files", "find_files", "group_filepaths", - "open_datatree", "open_dataset", + "open_datatree", "open_pyart", - "download_files", + "read_configs", ] # Get version diff --git a/radar_api/checks.py b/radar_api/checks.py index 5029559..9a96b7f 100644 --- a/radar_api/checks.py +++ b/radar_api/checks.py @@ -27,7 +27,6 @@ import datetime import os import pathlib -import sys import numpy as np @@ -37,9 +36,7 @@ def get_current_utc_time(): """Return current UTC time.""" - if sys.version_info >= (3, 11): - return datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - return datetime.datetime.utcnow() + return datetime.datetime.now(datetime.UTC).replace(tzinfo=None) def check_protocol(protocol): @@ -85,7 +82,7 @@ def check_radar(radar, network): if not isinstance(radar, str): raise TypeError("Specify 'radar' as a string.") check_network(network) - valid_radars = available_radars() + valid_radars = available_radars(only_online=False) if radar not in valid_radars: raise ValueError(f"Invalid {network} radar {radar}. Available radars: {valid_radars}") return radar @@ -98,12 +95,29 @@ def check_network(network): if not isinstance(network, str): raise TypeError("Specify 'network' as a string.") - valid_networks = available_networks() + valid_networks = available_networks(only_online=False) if network not in valid_networks: raise ValueError(f"Invalid network {network}. Available networks: {valid_networks}") return network +def check_product(network, product=None): + """Check product validity. + + If only one product available for that network, return that. + """ + from radar_api.io import available_products + + valid_products = available_products(network, only_online=False) + if product is None: + if len(valid_products) == 1: + return valid_products[0] + raise ValueError(f"{network} has {valid_products} products available. 'product' must be specified.") + if product not in valid_products: + raise ValueError(f"Invalid product {product} for {network}. Available products: {valid_products}") + return product + + def check_time(time): """Check time validity. diff --git a/radar_api/configs.py b/radar_api/configs.py index 14c5031..8582911 100644 --- a/radar_api/configs.py +++ b/radar_api/configs.py @@ -27,6 +27,7 @@ """RADAR-API configurations settings.""" import os +import radar_api from radar_api.utils.yaml import read_yaml, write_yaml @@ -75,6 +76,11 @@ def define_configs( print(f"The RADAR-API config file has been {action_msg} successfully!") + # Now read the config file and set it as the active configuration + # - This avoid the need to restart a python session to take effect ! + config_dict = read_configs() + radar_api.config.update(config_dict) + def read_configs() -> dict[str, str]: """Reads the RADAR-API configuration file and returns a dictionary with the configuration settings. @@ -110,8 +116,6 @@ def read_configs() -> dict[str, str]: ####--------------------------------------------------------------------------. def _get_config_key(key): """Return the config key.""" - import radar_api - value = radar_api.config.get(key, None) if value is None: raise ValueError(f"The '{key}' is not specified in the RADAR-API configuration file.") @@ -120,8 +124,6 @@ def _get_config_key(key): def get_base_dir(base_dir=None): """Return the RADAR-API base directory.""" - import radar_api - if base_dir is None: base_dir = radar_api.config.get("base_dir") if base_dir is None: diff --git a/radar_api/download.py b/radar_api/download.py index 2df9825..c5b30db 100644 --- a/radar_api/download.py +++ b/radar_api/download.py @@ -41,6 +41,7 @@ check_base_dir, check_download_protocol, check_network, + check_product, check_radar, check_start_end_time, ) @@ -115,13 +116,13 @@ def _select_missing_fpaths(local_fpaths, bucket_fpaths): return local_fpaths, bucket_fpaths -def define_local_filepath(filename, network, radar, base_dir=None): +def define_local_filepath(filename, network, product, radar, base_dir=None): """Define filepath where to save file locally on disk.""" base_dir = get_base_dir(base_dir) base_dir = check_base_dir(base_dir) # Get directory pattern - directory_pattern = get_directory_pattern(protocol="local", network=network) - info_dict = get_info_from_filepath(filename, network=network) + directory_pattern = get_directory_pattern(protocol="local", network=network, product=product) + info_dict = get_info_from_filepath(filename, network=network, product=product) time = info_dict["start_time"] # Define local directory path parser = Parser(directory_pattern) @@ -133,10 +134,16 @@ def define_local_filepath(filename, network, radar, base_dir=None): return filepath -def _get_local_from_bucket_fpaths(base_dir, network, radar, bucket_fpaths): +def _get_local_from_bucket_fpaths(base_dir, network, product, radar, bucket_fpaths): """Convert cloud bucket filepaths to local storage filepaths.""" fpaths = [ - define_local_filepath(filename=os.path.basename(fpath), network=network, radar=radar, base_dir=base_dir) + define_local_filepath( + filename=os.path.basename(fpath), + network=network, + product=product, + radar=radar, + base_dir=base_dir, + ) for fpath in bucket_fpaths ] return fpaths @@ -236,6 +243,7 @@ def download_files( radar, start_time, end_time, + product=None, n_threads=20, force_download=False, check_data_integrity=True, @@ -256,6 +264,11 @@ def download_files( network : str The name of the radar network. See `radar_api.available_network()` for available radar networks. + product: str + The product acronym. The default is None. + It must be specified if for a given network, multiple products are available + through radar_api. + See `radar_api.available_products(network)` for available products. start_time : datetime.datetime The start (inclusive) time of the interval period for retrieving the filepaths. end_time : datetime.datetime @@ -294,6 +307,7 @@ def download_files( base_dir = check_base_dir(base_dir) network = check_network(network) radar = check_radar(radar=radar, network=network) + product = check_product(network=network, product=product) start_time, end_time = check_start_end_time(start_time, end_time) # Initialize timing @@ -323,6 +337,7 @@ def download_files( fs_args=fs_args, radar=radar, network=network, + product=product, start_time=start_time, end_time=end_time, base_dir=None, @@ -339,6 +354,7 @@ def download_files( base_dir=base_dir, network=network, radar=radar, + product=product, bucket_fpaths=bucket_fpaths, ) diff --git a/radar_api/etc/network/FMI.yaml b/radar_api/etc/network/FMI/PVOL.yaml similarity index 96% rename from radar_api/etc/network/FMI.yaml rename to radar_api/etc/network/FMI/PVOL.yaml index 8bc0c82..b9f48c2 100644 --- a/radar_api/etc/network/FMI.yaml +++ b/radar_api/etc/network/FMI/PVOL.yaml @@ -1,5 +1,6 @@ network: "FMI" description: "Finnish Meteorological Institute radars" +public_data: True cloud_directory_pattern: "s3://fmi-opendata-radar-volume-hdf5/{time:%Y}/{time:%m}/{time:%d}/{radar:s}" local_directory_pattern: "{base_dir}/FMI/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/{radar:s}" filename_patterns: diff --git a/radar_api/etc/network/IDEAM.yaml b/radar_api/etc/network/IDEAM/IDEAM_L2.yaml similarity index 96% rename from radar_api/etc/network/IDEAM.yaml rename to radar_api/etc/network/IDEAM/IDEAM_L2.yaml index 8946f16..1b5414c 100644 --- a/radar_api/etc/network/IDEAM.yaml +++ b/radar_api/etc/network/IDEAM/IDEAM_L2.yaml @@ -1,5 +1,6 @@ network: "IDEAM" description: "Colombian weather radar network" +public_data: True cloud_directory_pattern: "s3://s3-radaresideam/l2_data/{time:%Y}/{time:%m}/{time:%d}/{radar:s}" local_directory_pattern: "{base_dir}/IDEAM/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/{radar:s}" filename_patterns: diff --git a/radar_api/etc/network/MCH_CSCS/POL.yaml b/radar_api/etc/network/MCH_CSCS/POL.yaml new file mode 100644 index 0000000..e4e5dca --- /dev/null +++ b/radar_api/etc/network/MCH_CSCS/POL.yaml @@ -0,0 +1,10 @@ +network: MCH_CSCS +description: MeteoSwiss Rad4Alp network +public_data: False +cloud_directory_pattern: null +local_directory_pattern: "/store_new/mch/msrad/radar/swiss/data/{time:%Y}/{time:%y}{time:%j}/ML{radar:1s}{time:%y}{time:%j}.zip" +filename_patterns: + - "{radar_acronym:3s}{start_time:%y%j%H%M}0U.{volume_identifier:3s}" +pyart_reader: read_metranet +xradar_reader: null +xradar_engine: null diff --git a/radar_api/etc/network/MCH_LTE/HYM.yaml b/radar_api/etc/network/MCH_LTE/HYM.yaml new file mode 100644 index 0000000..b273ba8 --- /dev/null +++ b/radar_api/etc/network/MCH_LTE/HYM.yaml @@ -0,0 +1,10 @@ +network: MCH_LTE +description: MeteoSwiss Rad4Alp network +public_data: False +cloud_directory_pattern: null +local_directory_pattern: "{base_dir}/MCH/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/YM{radar:1s}" +filename_patterns: + - "YM{radar_acronym:1s}{start_time:%y%j%H%M}0L.8{sweep_identifier:2s}" +pyart_reader: read_metranet +xradar_reader: null +xradar_engine: null diff --git a/radar_api/etc/network/MCH_LTE/HZT.yaml b/radar_api/etc/network/MCH_LTE/HZT.yaml new file mode 100644 index 0000000..584dff3 --- /dev/null +++ b/radar_api/etc/network/MCH_LTE/HZT.yaml @@ -0,0 +1,10 @@ +network: MCH_LTE +description: MeteoSwiss Rad4Alp network +public_data: False +cloud_directory_pattern: null +local_directory_pattern: "{base_dir}/MCH/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/HZT" +filename_patterns: + - "HZT{start_time:%y%j%H%M}0L.{volume_identifier:3s}" # volume_identifier=800 +pyart_reader: read_metranet +xradar_reader: null +xradar_engine: null diff --git a/radar_api/etc/network/MCH_LTE/POL.yaml b/radar_api/etc/network/MCH_LTE/POL.yaml new file mode 100644 index 0000000..b2af30e --- /dev/null +++ b/radar_api/etc/network/MCH_LTE/POL.yaml @@ -0,0 +1,10 @@ +network: MCH_LTE +description: MeteoSwiss Rad4Alp network +public_data: False +cloud_directory_pattern: null +local_directory_pattern: "{base_dir}/MCH/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/ML{radar:1s}" +filename_patterns: + - "ML{radar_acronym:1s}{start_time:%y%j%H%M}0U.0{sweep_identifier:2s}" +pyart_reader: read_metranet +xradar_reader: null +xradar_engine: null diff --git a/radar_api/etc/network/NEXRAD.yaml b/radar_api/etc/network/NEXRAD/NEXRAD_L2.yaml similarity index 85% rename from radar_api/etc/network/NEXRAD.yaml rename to radar_api/etc/network/NEXRAD/NEXRAD_L2.yaml index d38c1d3..591b393 100644 --- a/radar_api/etc/network/NEXRAD.yaml +++ b/radar_api/etc/network/NEXRAD/NEXRAD_L2.yaml @@ -1,6 +1,7 @@ network: "NEXRAD" description: "NOAA NEXRAD radar network" -cloud_directory_pattern: "s3://noaa-nexrad-level2/{time:%Y}/{time:%m}/{time:%d}/{radar:s}" +public_data: True +cloud_directory_pattern: "s3://unidata-nexrad-level2/{time:%Y}/{time:%m}/{time:%d}/{radar:s}" local_directory_pattern: "{base_dir}/NEXRAD/{time:%Y}/{time:%m}/{time:%d}/{time:%H}/{radar:s}" filename_patterns: - "{radar_acronym:4s}{start_time:%Y%m%d_%H%M%S}_V0{version:1s}" diff --git a/radar_api/etc/radar/MCH_CSCS/A.yaml b/radar_api/etc/radar/MCH_CSCS/A.yaml new file mode 100644 index 0000000..d5fc7e8 --- /dev/null +++ b/radar_api/etc/radar/MCH_CSCS/A.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Albis" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 47.284332 +longitude: 8.512 +altitude: 938 +radar_band: C diff --git a/radar_api/etc/radar/MCH_CSCS/D.yaml b/radar_api/etc/radar/MCH_CSCS/D.yaml new file mode 100644 index 0000000..a668817 --- /dev/null +++ b/radar_api/etc/radar/MCH_CSCS/D.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Dole" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 46.425114 +longitude: 6.099415 +altitude: 1682 +radar_band: C diff --git a/radar_api/etc/radar/MCH_CSCS/L.yaml b/radar_api/etc/radar/MCH_CSCS/L.yaml new file mode 100644 index 0000000..4f26f42 --- /dev/null +++ b/radar_api/etc/radar/MCH_CSCS/L.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Lema" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 46.04076 +longitude: 8.833217 +altitude: 1626 +radar_band: C diff --git a/radar_api/etc/radar/MCH_CSCS/P.yaml b/radar_api/etc/radar/MCH_CSCS/P.yaml new file mode 100644 index 0000000..4034f1b --- /dev/null +++ b/radar_api/etc/radar/MCH_CSCS/P.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Plaine Morte" +start_time: "2018-01-01 00:00:00" +end_time: "" +latitude: 46.370647 +longitude: 7.486552 +altitude: 2937 +radar_band: C diff --git a/radar_api/etc/radar/MCH_CSCS/W.yaml b/radar_api/etc/radar/MCH_CSCS/W.yaml new file mode 100644 index 0000000..b7443dd --- /dev/null +++ b/radar_api/etc/radar/MCH_CSCS/W.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Weissfluhgipfel" +start_time: "2019-01-01 00:00:00" +end_time: "" +latitude: 46.834972 +longitude: 9.794458 +altitude: 2850 +radar_band: C diff --git a/radar_api/etc/radar/MCH_LTE/A.yaml b/radar_api/etc/radar/MCH_LTE/A.yaml new file mode 100644 index 0000000..d5fc7e8 --- /dev/null +++ b/radar_api/etc/radar/MCH_LTE/A.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Albis" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 47.284332 +longitude: 8.512 +altitude: 938 +radar_band: C diff --git a/radar_api/etc/radar/MCH_LTE/D.yaml b/radar_api/etc/radar/MCH_LTE/D.yaml new file mode 100644 index 0000000..a668817 --- /dev/null +++ b/radar_api/etc/radar/MCH_LTE/D.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Dole" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 46.425114 +longitude: 6.099415 +altitude: 1682 +radar_band: C diff --git a/radar_api/etc/radar/MCH_LTE/L.yaml b/radar_api/etc/radar/MCH_LTE/L.yaml new file mode 100644 index 0000000..4f26f42 --- /dev/null +++ b/radar_api/etc/radar/MCH_LTE/L.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Lema" +start_time: "2017-01-01 00:00:00" +end_time: "" +latitude: 46.04076 +longitude: 8.833217 +altitude: 1626 +radar_band: C diff --git a/radar_api/etc/radar/MCH_LTE/P.yaml b/radar_api/etc/radar/MCH_LTE/P.yaml new file mode 100644 index 0000000..4034f1b --- /dev/null +++ b/radar_api/etc/radar/MCH_LTE/P.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Plaine Morte" +start_time: "2018-01-01 00:00:00" +end_time: "" +latitude: 46.370647 +longitude: 7.486552 +altitude: 2937 +radar_band: C diff --git a/radar_api/etc/radar/MCH_LTE/W.yaml b/radar_api/etc/radar/MCH_LTE/W.yaml new file mode 100644 index 0000000..b7443dd --- /dev/null +++ b/radar_api/etc/radar/MCH_LTE/W.yaml @@ -0,0 +1,8 @@ +icao: "" +radar_name: "Weissfluhgipfel" +start_time: "2019-01-01 00:00:00" +end_time: "" +latitude: 46.834972 +longitude: 9.794458 +altitude: 2850 +radar_band: C diff --git a/radar_api/filter.py b/radar_api/filter.py index fdc9409..32cede6 100644 --- a/radar_api/filter.py +++ b/radar_api/filter.py @@ -27,6 +27,7 @@ """This module provides files filtering functions.""" import datetime +from radar_api.checks import check_product, check_start_end_time from radar_api.info import get_info_from_filepath @@ -40,8 +41,8 @@ def is_file_within_time(start_time, end_time, file_start_time, file_end_time): # - Case 2 # s e # | | - # -------- - is_case2 = file_start_time >= start_time and file_end_time < end_time + # (--)---------(--) + is_case2 = file_start_time >= start_time and file_end_time <= end_time # - Case 3 # s e # | | @@ -51,12 +52,12 @@ def is_file_within_time(start_time, end_time, file_start_time, file_end_time): return is_case1 or is_case2 or is_case3 -def filter_file(fpath, network, start_time, end_time): +def filter_file(fpath, network, product, start_time, end_time): """Utility function to select a file is matching the specified time periods.""" # Filter by start_time if start_time is not None and end_time is not None: # Retrieve info - info_dict = get_info_from_filepath(fpath, network=network, ignore_errors=True) + info_dict = get_info_from_filepath(fpath, network=network, product=product, ignore_errors=True) # If no start_time info, return None --> filtered out if "start_time" not in info_dict: return None @@ -75,16 +76,23 @@ def filter_file(fpath, network, start_time, end_time): def filter_files( fpaths, network, + product=None, start_time=None, end_time=None, ): """Utility function to select filepaths between time periods.""" + product = check_product(network, product=product) + + if start_time is not None and end_time is not None: + start_time, end_time = check_start_end_time(start_time, end_time) + if isinstance(fpaths, str): fpaths = [fpaths] fpaths = [ filter_file( fpath, network=network, + product=product, start_time=start_time, end_time=end_time, ) diff --git a/radar_api/info.py b/radar_api/info.py index 9119ca2..13ebd4a 100644 --- a/radar_api/info.py +++ b/radar_api/info.py @@ -33,7 +33,8 @@ import numpy as np from trollsift import Parser -from radar_api.io import get_network_filename_patterns +from radar_api.checks import check_product +from radar_api.io import get_product_filename_patterns # TODO: Create a class all such methods that depend on the filename_patterns and network @@ -41,6 +42,7 @@ FILE_KEYS = [ "radar_acronym", "volume_identifier", + "sweep_identifier", "start_time", "end_time", "version", @@ -64,6 +66,7 @@ DEFAULT_FILE_KEY = { "radar_acronym": "", "volume_identifier": "", + "sweep_identifier": "", "start_time": None, "end_time": None, "version": "", @@ -77,9 +80,9 @@ ########################## -def parse_filename(filename, network): +def parse_filename(filename, network, product): """Try to parse the filename based on the radar network.""" - filename_patterns = get_network_filename_patterns(network) + filename_patterns = get_product_filename_patterns(network, product) pattern_identified = False for pattern in filename_patterns: try: @@ -95,10 +98,10 @@ def parse_filename(filename, network): return info_dict -def get_info_from_filename(filename, network, ignore_errors=False): +def get_info_from_filename(filename, network, product, ignore_errors=False): """Retrieve file information dictionary from filename.""" # Try to parse the filename - info_dict = parse_filename(filename, network=network) + info_dict = parse_filename(filename, network=network, product=product) # Raise error if the filename can't be parsed if len(info_dict) == 0 and not ignore_errors: @@ -115,25 +118,27 @@ def get_info_from_filename(filename, network, ignore_errors=False): return info_dict -def get_info_from_filepath(filepath, network, ignore_errors=False): +def get_info_from_filepath(filepath, network, product, ignore_errors=False): """Retrieve file information dictionary from filepath.""" if not isinstance(filepath, str): raise TypeError("'filepath' must be a string.") filename = os.path.basename(filepath) - return get_info_from_filename(filename, network=network, ignore_errors=ignore_errors) + return get_info_from_filename(filename, network=network, product=product, ignore_errors=ignore_errors) -def get_key_from_filepath(filepath, key, network, ignore_errors=False): +def get_key_from_filepath(filepath, key, network, product, ignore_errors=False): """Extract specific key information from a list of filepaths.""" - return get_info_from_filepath(filepath, network=network, ignore_errors=ignore_errors)[key] + return get_info_from_filepath(filepath, network=network, product=product, ignore_errors=ignore_errors)[key] -def get_key_from_filepaths(filepaths, key, network, ignore_errors=False): +def get_key_from_filepaths(filepaths, key, network, product=None, ignore_errors=False): """Extract specific key information from a list of filepaths.""" + product = check_product(network, product=product) if isinstance(filepaths, str): filepaths = [filepaths] return [ - get_key_from_filepath(filepath, key=key, network=network, ignore_errors=ignore_errors) for filepath in filepaths + get_key_from_filepath(filepath, key=key, network=network, product=product, ignore_errors=ignore_errors) + for filepath in filepaths ] @@ -143,26 +148,49 @@ def get_key_from_filepaths(filepaths, key, network, ignore_errors=False): ######################################### -def get_start_time_from_filepaths(filepaths, network, ignore_errors=False): +def get_start_time_from_filepaths(filepaths, network, product, ignore_errors=False): """Infer files ``start_time`` from filenames.""" - return get_key_from_filepaths(filepaths, key="start_time", network=network, ignore_errors=ignore_errors) + return get_key_from_filepaths( + filepaths, + key="start_time", + network=network, + product=product, + ignore_errors=ignore_errors, + ) -def get_end_time_from_filepaths(filepaths, network, ignore_errors=False): +def get_end_time_from_filepaths(filepaths, network, product, ignore_errors=False): """Infer files ``end_time`` from filenames.""" - return get_key_from_filepaths(filepaths, key="end_time", network=network, ignore_errors=ignore_errors) + return get_key_from_filepaths( + filepaths, + key="end_time", + network=network, + product=product, + ignore_errors=ignore_errors, + ) -def get_start_end_time_from_filepaths(filepaths, network, ignore_errors=False): +def get_start_end_time_from_filepaths(filepaths, network, product=None, ignore_errors=False): """Infer files ``start_time`` and ``end_time`` from filenames.""" - list_start_time = get_start_time_from_filepaths(filepaths, network=network, ignore_errors=ignore_errors) - list_end_time = get_end_time_from_filepaths(filepaths, network=network, ignore_errors=ignore_errors) + product = check_product(network, product=product) + list_start_time = get_start_time_from_filepaths( + filepaths, + network=network, + product=product, + ignore_errors=ignore_errors, + ) + list_end_time = get_end_time_from_filepaths( + filepaths, + network=network, + product=product, + ignore_errors=ignore_errors, + ) return np.array(list_start_time), np.array(list_end_time) -def get_version_from_filepath(filepath, network, integer=True): +def get_version_from_filepath(filepath, network, product, integer=True): """Infer file ``version`` from filenames.""" - version = get_key_from_filepath(filepath, key="version", network=network) + version = get_key_from_filepath(filepath, key="version", network=network, product=product) if version == "": return None if integer: @@ -170,11 +198,14 @@ def get_version_from_filepath(filepath, network, integer=True): return version -def get_version_from_filepaths(filepaths, network, integer=True): +def get_version_from_filepaths(filepaths, network, product=None, integer=True): """Infer files ``version`` from filenames.""" + product = check_product(network, product=product) if isinstance(filepaths, str): filepaths = [filepaths] - return [get_version_from_filepath(filepath, integer=integer, network=network) for filepath in filepaths] + return [ + get_version_from_filepath(filepath, integer=integer, network=network, product=product) for filepath in filepaths + ] ####--------------------------------------------------------------------------. @@ -228,7 +259,7 @@ def get_time_component(time, component): return str(func_dict[component](time)) -def _get_groups_value(groups, filepath, network): +def _get_groups_value(groups, filepath, network, product): """Return the value associated to the groups keys. If multiple keys are specified, the value returned is a string of format: ``//...`` @@ -237,7 +268,7 @@ def _get_groups_value(groups, filepath, network): returns a :py:class:`datetime.datetime` object. """ single_key = len(groups) == 1 - info_dict = get_info_from_filepath(filepath, network=network) + info_dict = get_info_from_filepath(filepath, network=network, product=product) start_time = info_dict["start_time"] list_key_values = [] for key in groups: @@ -251,7 +282,7 @@ def _get_groups_value(groups, filepath, network): return "/".join(list_key_values) -def group_filepaths(filepaths, network, groups=None): +def group_filepaths(filepaths, network, product=None, groups=None): """ Group filepaths in a dictionary if groups are specified. @@ -262,7 +293,9 @@ def group_filepaths(filepaths, network, groups=None): groups: list or str The group keys by which to group the filepaths. Valid group keys are - ``start_time``, ``end_time``, ``version``, ``volume_identifier``, ``radar_acronym``, ``extension``, + ``start_time``, ``end_time``, ``version``, + ``volume_identifier``, ``sweep_identifier``, + ``radar_acronym``, ``extension``, ``year``, ``month``, ``day``, ``doy``, ``dow``, ``hour``, ``minute``, ``second``, ``month_name``, ``quarter``, ``season``. The time components are extracted from ``start_time`` ! @@ -276,11 +309,14 @@ def group_filepaths(filepaths, network, groups=None): or the original input filepaths (if ``groups=None``) """ + product = check_product(network, product=product) + if groups is None: return filepaths groups = check_groups(groups) filepaths_dict = defaultdict(list) _ = [ - filepaths_dict[_get_groups_value(groups, filepath, network=network)].append(filepath) for filepath in filepaths + filepaths_dict[_get_groups_value(groups, filepath, network=network, product=product)].append(filepath) + for filepath in filepaths ] return dict(filepaths_dict) diff --git a/radar_api/io.py b/radar_api/io.py index 976979a..029ba95 100644 --- a/radar_api/io.py +++ b/radar_api/io.py @@ -50,9 +50,17 @@ def get_network_radars_config_path(network): return path -def get_network_config_filepath(network): - """Get filepath of the network configuration file.""" - filepath = os.path.join(get_network_config_path(), f"{network}.yaml") +def get_products_config_path(network): + """Get directory path with the network configuration files.""" + from radar_api import _root_path + + path = os.path.join(_root_path, "radar_api", "etc", "network", network) + return path + + +def get_product_config_filepath(network, product): + """Get filepath of the network product configuration file.""" + filepath = os.path.join(get_network_config_path(), network, f"{product}.yaml") return filepath @@ -62,14 +70,41 @@ def get_radar_config_filepath(network, radar): return filepath -def available_networks(): +def available_networks(only_online=False): """Get list of available networks.""" network_config_path = get_network_config_path() - networks_config_filenames = os.listdir(network_config_path) - networks = [fname.split(".")[0] for fname in networks_config_filenames] + # Select only directory and not hiddend directories + networks = [ + entry.name for entry in os.scandir(network_config_path) if entry.is_dir() and not entry.name.startswith(".") + ] + # If only_online=True, check if there are available_products online + if only_online: + networks = [network for network in networks if len(available_products(network, only_online=only_online)) > 0] return sorted(networks) +def available_products(network, only_online=False): + """Get list of available products for a given network.""" + network = check_network(network) + products_config_path = get_products_config_path(network) + + # Select only yaml files and remove hidden files + products = [ + os.path.splitext(entry.name)[0] + for entry in os.scandir(products_config_path) + if entry.is_file() and not entry.name.startswith(".") and entry.name.endswith(".yaml") + ] + + # If only_online=True, return products where cloud_directory_pattern is specified + if only_online: + products = [ + product + for product in products + if get_product_info(network, product=product).get("cloud_directory_pattern", None) is not None + ] + return sorted(products) + + def _get_network_radars(network, start_time=None, end_time=None): radars_config_path = get_network_radars_config_path(network) radars_config_filenames = os.listdir(radars_config_path) @@ -82,10 +117,10 @@ def _get_network_radars(network, start_time=None, end_time=None): return radars -def available_radars(network=None, start_time=None, end_time=None): +def available_radars(network=None, start_time=None, end_time=None, only_online=False): """Get list of available radars.""" if network is None: - networks = available_networks() + networks = available_networks(only_online=only_online) list_radars = [ _get_network_radars(network=network, start_time=start_time, end_time=end_time) for network in networks ] @@ -97,10 +132,10 @@ def available_radars(network=None, start_time=None, end_time=None): return sorted(radars) -def get_network_info(network): +def get_product_info(network, product): """Get network information.""" - network_config_path = get_network_config_filepath(network) - info_dict = read_yaml(network_config_path) + product_config_path = get_product_config_filepath(network, product) + info_dict = read_yaml(product_config_path) return info_dict @@ -194,10 +229,10 @@ def is_radar_available(network, radar, start_time=None, end_time=None): ) -def get_network_database(network): +def get_network_database(network, only_online=False): """Retrieve the radar network database.""" list_info = [] - for radar in available_radars(network=network): + for radar in available_radars(network=network, only_online=only_online): try: radar_info_path = get_radar_config_filepath(network=network, radar=radar) radar_info = read_yaml(radar_info_path) @@ -211,23 +246,25 @@ def get_network_database(network): return pd.DataFrame(list_info) -def get_database(): +def get_database(only_online=False): """Retrieve the RADAR-API database.""" - list_df = [get_network_database(network) for network in available_networks()] + list_df = [get_network_database(network) for network in available_networks(only_online=only_online)] return pd.concat(list_df) -def get_network_filename_patterns(network): +def get_product_filename_patterns(network, product): """Get radar filenames patterns.""" - return get_network_info(network)["filename_patterns"] + return get_product_info(network, product)["filename_patterns"] -def get_directory_pattern(protocol, network): +def get_directory_pattern(protocol, network, product): """Get directory pattern.""" if protocol in ["s3", "gcs"]: - directory_pattern = get_network_info(network)["cloud_directory_pattern"] + directory_pattern = get_product_info(network, product)["cloud_directory_pattern"] else: - directory_pattern = get_network_info(network)["local_directory_pattern"] + directory_pattern = get_product_info(network, product)["local_directory_pattern"] + if directory_pattern is None: + raise NotImplementedError(f"protocol {protocol} is not implemented for {network}.") return directory_pattern diff --git a/radar_api/readers.py b/radar_api/readers.py index 5b499fc..e0584bd 100644 --- a/radar_api/readers.py +++ b/radar_api/readers.py @@ -28,7 +28,8 @@ import fsspec -from radar_api.io import get_network_info +from radar_api.checks import check_product +from radar_api.io import get_product_info def get_simplecache_file(filepath): @@ -68,28 +69,42 @@ def wrapper(*args, **kwargs): return decorator -def get_xradar_datatree_reader(network): +def get_xradar_datatree_reader(network, product=None): """Return the xradar datatree reader.""" import xradar.io - func = getattr(xradar.io, get_network_info(network)["xradar_reader"]) + product = check_product(network, product=product) + xradar_reader_name = get_product_info(network, product)["xradar_reader"] + if xradar_reader_name is None: + raise NotImplementedError(f"No xradar reader is yet available for {product} product of network {network}.") + func = getattr(xradar.io, xradar_reader_name) return func -def get_pyart_reader(network): +def get_pyart_reader(network, product=None): """Return the pyart reader.""" + import pyart.aux_io import pyart.io + product = check_product(network, product=product) + pyart_reader_name = get_product_info(network, product)["pyart_reader"] + if pyart_reader_name is None: + raise NotImplementedError(f"No pyart reader is yet available for {product} product of network {network}.") + try: - func = getattr(pyart.io, get_network_info(network)["pyart_reader"]) + func = getattr(pyart.io, pyart_reader_name) except AttributeError: - func = getattr(pyart.aux_io, get_network_info(network)["pyart_reader"]) + try: + func = getattr(pyart.aux_io, pyart_reader_name) + except AttributeError: + raise NotImplementedError(f"The pyart reader {pyart_reader_name} is not available in your pyart library.") return func -def get_xradar_engine(network): +def get_xradar_engine(network, product=None): """Return the xradar engine.""" - return get_network_info(network)["xradar_engine"] + product = check_product(network, product=product) + return get_product_info(network, product)["xradar_engine"] def _prepare_file(filepath): @@ -99,29 +114,29 @@ def _prepare_file(filepath): @check_software_availability(software="xradar", conda_package="xradar") -def open_datatree(filepath, network, **kwargs): +def open_datatree(filepath, network, product=None, **kwargs): """Open a file into an xarray DataTree object using xradar.""" filepath = _prepare_file(filepath) - open_datatree = get_xradar_datatree_reader(network) + open_datatree = get_xradar_datatree_reader(network, product) dt = open_datatree(filepath, **kwargs) return dt @check_software_availability(software="xradar", conda_package="xradar") -def open_dataset(filepath, network, sweep, **kwargs): +def open_dataset(filepath, network, sweep, product=None, **kwargs): """Open a file into an xarray Dataset object using xradar.""" import xarray as xr filepath = _prepare_file(filepath) - engine = get_xradar_engine(network) + engine = get_xradar_engine(network, product) ds = xr.open_dataset(filepath, group=sweep, engine=engine, **kwargs) return ds @check_software_availability(software="pyart", conda_package="arm_pyart") -def open_pyart(filepath, network, **kwargs): +def open_pyart(filepath, network, product=None, **kwargs): """Open a file into a pyart object.""" filepath = _prepare_file(filepath) - pyart_reader = get_pyart_reader(network) + pyart_reader = get_pyart_reader(network, product) pyart_obj = pyart_reader(filepath, **kwargs) return pyart_obj diff --git a/radar_api/search.py b/radar_api/search.py index 93d3903..6007028 100644 --- a/radar_api/search.py +++ b/radar_api/search.py @@ -24,6 +24,8 @@ # SOFTWARE. """This module provides functions for searching files on local disk and cloud buckets.""" import datetime +import os +import zipfile import pandas as pd from trollsift import Parser @@ -31,6 +33,7 @@ from radar_api.checks import ( check_base_dir, check_network, + check_product, check_protocol, check_radar, check_start_end_time, @@ -54,7 +57,7 @@ def get_pattern_shortest_time_component(directory_pattern): if any(s in directory_pattern for s in ["{time:%m}", "{time:%b}", "{time:%B}"]): return "MS" if any(s in directory_pattern for s in ["{time:%Y}", "{time:%y}"]): - return "Y" # Y-DEC + return "YE" # Y-DEC raise NotImplementedError @@ -65,7 +68,7 @@ def get_list_timesteps(start_time, end_time, freq): end = pd.to_datetime(end_time) # Round start_time and end_time to the frequency resolution - # "Y" --> set month and days to 01 and zero out hour, minute, second + # "YE" --> set month and days to 01 and zero out hour, minute, second # "MS" --> set day to 01 and zero out hour, minute, second # "D" -> zero out hour, minute, second # "h" -> zero out minute, second @@ -85,7 +88,7 @@ def get_list_timesteps(start_time, end_time, freq): new_start_year = start.year start = pd.to_datetime(datetime.datetime(new_start_year, new_start_month, 1)) end = pd.to_datetime(datetime.datetime(end.year, end.month, 1)) - elif freq == "Y": # Y-DEC + elif freq == "YE": # Y-DEC start = pd.to_datetime(datetime.datetime(start.year - 1, 1, 1)) end = pd.to_datetime(datetime.datetime(end.year, 12, 31)) else: @@ -97,10 +100,10 @@ def get_list_timesteps(start_time, end_time, freq): return timesteps -def get_directories_paths(start_time, end_time, network, radar, protocol, base_dir): +def get_directories_paths(start_time, end_time, network, product, radar, protocol, base_dir): """Returns a list of the directory paths to scan.""" # Get directory pattern - directory_pattern = get_directory_pattern(protocol, network) + directory_pattern = get_directory_pattern(protocol, network, product) # Identify frequency freq = get_pattern_shortest_time_component(directory_pattern) # Create list of time directories @@ -111,12 +114,22 @@ def get_directories_paths(start_time, end_time, network, radar, protocol, base_d return paths +def _list_files_within_zip(zip_filepath): + """Return the paths of files within a zip file.""" + with zipfile.ZipFile(zip_filepath, "r") as zf: + filenames = zf.namelist() + filepaths = [os.path.join(zip_filepath, fname) for fname in filenames] + return filepaths + + def _try_list_files(fs, dir_path): + """Return filepaths within a given directory (or zip file).""" try: - fpaths = fs.ls(dir_path) + if not dir_path.endswith(".zip"): + return fs.ls(dir_path) + return _list_files_within_zip(dir_path) except Exception: - fpaths = [] - return fpaths + return [] def find_files( @@ -126,6 +139,7 @@ def find_files( end_time, base_dir=None, protocol="s3", + product=None, fs_args={}, verbose=False, ): @@ -153,6 +167,10 @@ def find_files( radar : str The name of the radar. Use `radar_api.available_radars()` to retrieve the available satellites. + product: str + The product acronym. The default is None. + It must be specified if for a given network, multiple products are available + through radar_api. network : str The name of the radar network. See `radar_api.available_network()` for available radar networks. @@ -182,6 +200,7 @@ def find_files( base_dir = check_base_dir(base_dir) network = check_network(network) radar = check_radar(radar=radar, network=network) + product = check_product(network=network, product=product) start_time, end_time = check_start_end_time(start_time, end_time) # Get filesystem @@ -194,6 +213,7 @@ def find_files( end_time=end_time, network=network, radar=radar, + product=product, protocol=protocol, base_dir=base_dir, ) @@ -219,8 +239,10 @@ def find_files( fpaths = [fpath for fpath in fpaths if not fpath.endswith("_MDM")] # Add bucket prefix fpaths = [bucket_prefix + fpath for fpath in fpaths] - # Filter files if necessary - fpaths = filter_files(fpaths, network=network, start_time=start_time, end_time=end_time) + # Filter files + # - Keep only files with expected filename structure + # - Subset by time + fpaths = filter_files(fpaths, network=network, product=product, start_time=start_time, end_time=end_time) list_fpaths += fpaths # Flat the list of filepaths and return it diff --git a/radar_api/tests/test_checks.py b/radar_api/tests/test_checks.py index ba113a9..04faf27 100644 --- a/radar_api/tests/test_checks.py +++ b/radar_api/tests/test_checks.py @@ -39,6 +39,7 @@ check_date, check_download_protocol, check_network, + check_product, check_protocol, check_radar, check_start_end_time, @@ -154,6 +155,23 @@ def test_check_radar() -> None: check_radar("DUMMY", network="NEXRAD") +def test_check_product() -> None: + """Test check_product().""" + # Check if for a network only one product available, return that + assert check_product(network="NEXRAD", product=None) == "NEXRAD_L2" + + # Check raise error for network with more than one product available, when product=None + with pytest.raises(ValueError): + check_product(network="MCH_LTE", product=None) + + # Check valid product + assert check_product(network="NEXRAD", product="NEXRAD_L2") == "NEXRAD_L2" + + # Check invalid product + with pytest.raises(ValueError): + check_product(network="MCH_LTE", product="INVALID") + + def test_check_time() -> None: """Test that time is returned a `datetime.datetime` object from varying inputs.""" # Test a string diff --git a/radar_api/tests/test_download.py b/radar_api/tests/test_download.py index 7459487..d445dd6 100644 --- a/radar_api/tests/test_download.py +++ b/radar_api/tests/test_download.py @@ -127,22 +127,25 @@ def test_define_local_filepath(tmp_path): """Test the define_local_filepath function.""" filename = "KTLX19910605_162126.gz" network = "NEXRAD" + product = "NEXRAD_L2" radar = "KTLX" base_dir = os.path.join(tmp_path, "RADAR") os.makedirs(base_dir, exist_ok=True) - res = define_local_filepath(filename=filename, network=network, radar=radar, base_dir=base_dir) + res = define_local_filepath(filename=filename, network=network, product=product, radar=radar, base_dir=base_dir) assert res == os.path.join(base_dir, network, "1991", "06", "05", "16", radar, filename) def test_find_files_on_cloud_bucket(tmp_path): """Test the find_files function on the s3 cloud bucket.""" base_dir = tmp_path - radar = "KTLX" network = "NEXRAD" + product = "NEXRAD_L2" + radar = "KTLX" start_time = "1991-06-05T16:20:00" end_time = "1991-06-05T16:22:00" # download only the first file available ... filepaths = download_files( network=network, + product=product, radar=radar, start_time=start_time, end_time=end_time, diff --git a/radar_api/tests/test_info.py b/radar_api/tests/test_info.py index 725d584..dba41c2 100644 --- a/radar_api/tests/test_info.py +++ b/radar_api/tests/test_info.py @@ -47,10 +47,13 @@ ) SAMPLE_FILES = { - # : [] - "FMI": ["202101010100_fiika_PVOL.h5"], - "NEXRAD": ["KFSX19960701_044028.gz", "KABR20100101_000618_V03", "KLIX20211220_160243_V06"], - "IDEAM": ["9100SAN-20240202-105624-PPIVol-0d1c.nc", "BAR240201135316.RAWMUAK"], + # : {"product": , "files": []} + "FMI": {"product": "PVOL", "files": ["202101010100_fiika_PVOL.h5"]}, + "NEXRAD": { + "product": "NEXRAD_L2", + "files": ["KFSX19960701_044028.gz", "KABR20100101_000618_V03", "KLIX20211220_160243_V06"], + }, + "IDEAM": {"product": "IDEAM_L2", "files": ["9100SAN-20240202-105624-PPIVol-0d1c.nc", "BAR240201135316.RAWMUAK"]}, } @@ -68,6 +71,7 @@ "end_time": None, "radar_acronym": "fiika", "volume_identifier": "PVOL", + "sweep_identifier": "", "extension": "h5", "version": "", }, @@ -83,6 +87,7 @@ "end_time": None, "radar_acronym": "KFSX", "volume_identifier": "", + "sweep_identifier": "", "extension": "gz", "version": "", }, @@ -96,6 +101,7 @@ "end_time": None, "radar_acronym": "KABR", "volume_identifier": "", + "sweep_identifier": "", "extension": "gz", "version": "3", }, @@ -108,6 +114,7 @@ "end_time": None, "radar_acronym": "KABR", "volume_identifier": "", + "sweep_identifier": "", "extension": "", "version": "6", }, @@ -123,6 +130,7 @@ "end_time": None, "radar_acronym": "9100SAN", "volume_identifier": "0d1c", + "sweep_identifier": "", "extension": "nc", "version": "", }, @@ -136,6 +144,7 @@ "end_time": None, "radar_acronym": "BAR", "volume_identifier": "MUAK", + "sweep_identifier": "", "extension": "", "version": "", }, @@ -153,20 +162,24 @@ def _generate_test_params(sample_dict): - """Generate (network, filename, expected_info) for all samples in SAMPLE_FILES_INFO_DICT.""" + """Generate (network, product, filename, expected_info) for all samples in SAMPLE_FILES_INFO_DICT.""" for network, file_info_list in sample_dict.items(): + product = SAMPLE_FILES[network]["product"] for filename, expected_info in file_info_list: test_id = f"{network}-{filename}" - yield pytest.param(network, filename, expected_info, id=test_id) + yield pytest.param(network, product, filename, expected_info, id=test_id) ####------------------------------------------------------------------------ -@pytest.mark.parametrize(("network", "filename", "expected_info"), _generate_test_params(SAMPLE_FILES_INFO_DICT)) -def test_get_info_from_filename(network, filename, expected_info): +@pytest.mark.parametrize( + ("network", "product", "filename", "expected_info"), + _generate_test_params(SAMPLE_FILES_INFO_DICT), +) +def test_get_info_from_filename(network, product, filename, expected_info): """Test get_info_from_filename returns the correct parsed info for known filenames.""" - parsed_info = get_info_from_filename(filename, network) + parsed_info = get_info_from_filename(filename, network, product) # Check each key in expected_info for key, expected_val in expected_info.items(): assert parsed_info.get(key) == expected_val, ( @@ -179,56 +192,64 @@ def test_get_info_from_filename(network, filename, expected_info): def test_get_info_from_invalid_filename(): """Test get_info_from_filename raise error or return empty dictionary for unknown filenames.""" + product = SAMPLE_FILES["NEXRAD"]["product"] with pytest.raises(ValueError): - get_info_from_filename("invalid_filename", network="NEXRAD") + get_info_from_filename("invalid_filename", network="NEXRAD", product=product) # Assert that if ignore_errors = True, return empty dictionary - assert get_info_from_filename("invalid_filename", network="NEXRAD", ignore_errors=True) == {} + assert get_info_from_filename("invalid_filename", network="NEXRAD", product=product, ignore_errors=True) == {} def test_get_info_from_invalid_filepath(): """Test get_info_from_filepath raise error with invalid filepaths.""" + product = SAMPLE_FILES["NEXRAD"]["product"] # Invalid filename with pytest.raises(ValueError): - get_info_from_filepath("invalid_filename", network="NEXRAD") + get_info_from_filepath("invalid_filename", network="NEXRAD", product=product) # Filepath not a string with pytest.raises(TypeError): - get_info_from_filepath(123, network="NEXRAD") + get_info_from_filepath(123, network="NEXRAD", product=product) # Assert that if ignore_errors = True, return empty dictionary - assert get_info_from_filepath("invalid_filename", network="NEXRAD", ignore_errors=True) == {} + assert get_info_from_filepath("invalid_filename", network="NEXRAD", product=product, ignore_errors=True) == {} -@pytest.mark.parametrize(("network", "filename", "expected_info"), _generate_test_params(SAMPLE_FILES_INFO_DICT)) -def test_get_key_from_filepath_valid(network, filename, expected_info): +@pytest.mark.parametrize( + ("network", "product", "filename", "expected_info"), + _generate_test_params(SAMPLE_FILES_INFO_DICT), +) +def test_get_key_from_filepath_valid(network, product, filename, expected_info): """Test get_key_from_filepath returns the requested key value.""" for key, expected_val in expected_info.items(): # Some keys might be None in expected_info, skip them or test them if relevant - returned_val = get_key_from_filepath(filename, key=key, network=network) + returned_val = get_key_from_filepath(filename, key=key, network=network, product=product) assert returned_val == expected_val, f"For key '{key}', expected {expected_val} but got {returned_val}" def test_get_key_from_filepath_missing_key(): """Test get_key_from_filepath raises KeyError if requested key is not present.""" + product = SAMPLE_FILES["NEXRAD"]["product"] with pytest.raises(KeyError): - get_key_from_filepath("KFSX19960701_044028.gz", key="non_existent", network="NEXRAD") + get_key_from_filepath("KFSX19960701_044028.gz", key="non_existent", network="NEXRAD", product=product) def test_get_key_from_filepaths(): """Test get_key_from_filepaths returns a list of the requested key value.""" filepaths = ["202101010100_fiika_PVOL.h5"] + product = SAMPLE_FILES["FMI"]["product"] # Test input a list return a list - assert isinstance(get_key_from_filepaths(filepaths, network="FMI", key="start_time"), list) + assert isinstance(get_key_from_filepaths(filepaths, network="FMI", product=product, key="start_time"), list) # Test input a string return still a list - assert isinstance(get_key_from_filepaths(filepaths[0], network="FMI", key="start_time"), list) + assert isinstance(get_key_from_filepaths(filepaths[0], network="FMI", product=product, key="start_time"), list) def test_get_start_time_from_filepaths() -> None: """Test that the start time is correctly extracted from filepaths.""" # We'll use the FMI sample as an example filenames = ["202101010100_fiika_PVOL.h5"] - times = get_start_time_from_filepaths(filenames, network="FMI") + product = SAMPLE_FILES["FMI"]["product"] + times = get_start_time_from_filepaths(filenames, network="FMI", product=product) # The function returns a list, so check the first item assert len(times) == 1 assert times[0] == datetime.datetime(2021, 1, 1, 1, 0) @@ -238,7 +259,8 @@ def test_get_end_time_from_filepaths() -> None: """Test that the end time is correctly extracted from filepaths.""" # We'll use the FMI sample as an example (without end_time in filename) filenames = ["202101010100_fiika_PVOL.h5"] - times = get_end_time_from_filepaths(filenames, network="FMI") + product = SAMPLE_FILES["FMI"]["product"] + times = get_end_time_from_filepaths(filenames, network="FMI", product=product) assert len(times) == 1 assert times[0] is None, f"Expected None end_time for {filenames[0]}" @@ -246,7 +268,8 @@ def test_get_end_time_from_filepaths() -> None: def test_get_start_end_time_from_filepaths() -> None: """Test get_start_end_time_from_filepaths returns numpy arrays.""" filenames = ["202101010100_fiika_PVOL.h5"] - start_arr, end_arr = get_start_end_time_from_filepaths(filenames, network="FMI") + product = SAMPLE_FILES["FMI"]["product"] + start_arr, end_arr = get_start_end_time_from_filepaths(filenames, network="FMI", product=product) assert len(start_arr) == 1 assert len(end_arr) == 1 assert start_arr[0] == datetime.datetime(2021, 1, 1, 1, 0) @@ -258,10 +281,13 @@ def test_get_start_end_time_from_filepaths() -> None: def test_get_versions_from_filepaths() -> None: """Test that the version is correctly extracted from filepaths.""" filenames = ["KFSX19960701_044028.gz", "KABR20100101_000618_V03.gz", "KABR20100101_000618_V06"] - output_version = get_version_from_filepaths(filenames, network="NEXRAD") + product = SAMPLE_FILES["NEXRAD"]["product"] + output_version = get_version_from_filepaths(filenames, network="NEXRAD", product=product) assert output_version == [None, 3, 6] - assert get_version_from_filepaths(filenames[0], network="NEXRAD") == [None] # input str output list + assert get_version_from_filepaths(filenames[0], network="NEXRAD", product=product) == [ + None, + ] # input str output list def test_check_groups(): @@ -304,32 +330,44 @@ def test_get_season(): @pytest.mark.parametrize("network", NETWORKS) def test_group_filepaths(network): """Test group_filepaths function.""" - filepaths = SAMPLE_FILES[network] + filepaths = SAMPLE_FILES[network]["files"] + product = SAMPLE_FILES[network]["product"] # Test groups = None - assert group_filepaths(filepaths, None) == filepaths + assert group_filepaths(filepaths, network=network, product=product, groups=None) == filepaths # Test all time keys pass for key in TIME_KEYS: - assert isinstance(group_filepaths(filepaths, network=network, groups=key), dict) + assert isinstance(group_filepaths(filepaths, network=network, product=product, groups=key), dict) # Test multiple groups - assert isinstance(group_filepaths([filepaths[0]], network=network, groups=["radar_acronym", "year", "month"]), dict) + assert isinstance( + group_filepaths([filepaths[0]], network=network, product=product, groups=["radar_acronym", "year", "month"]), + dict, + ) # Test all file keys pass for key in FILE_KEYS: - assert isinstance(group_filepaths(filepaths, network=network, groups=key), dict) + assert isinstance(group_filepaths(filepaths, network=network, product=product, groups=key), dict) def test_group_filepaths_by_time(): """Test group_filepaths by time.""" network = "NEXRAD" + product = SAMPLE_FILES[network]["product"] dummy_filepath = "KABR20100101_000618_V03" # Test single group - assert group_filepaths([dummy_filepath], network=network, groups="year") == {"2010": [dummy_filepath]} + assert group_filepaths([dummy_filepath], network=network, product=product, groups="year") == { + "2010": [dummy_filepath], + } # Test multiple groups - assert group_filepaths([dummy_filepath], network=network, groups=["radar_acronym", "year", "month"]) == { + assert group_filepaths( + [dummy_filepath], + network=network, + product=product, + groups=["radar_acronym", "year", "month"], + ) == { "KABR/2010/1": [dummy_filepath], } diff --git a/radar_api/tests/test_io.py b/radar_api/tests/test_io.py index efdc61e..efec162 100644 --- a/radar_api/tests/test_io.py +++ b/radar_api/tests/test_io.py @@ -34,15 +34,16 @@ from radar_api.io import ( available_networks, + available_products, available_radars, get_bucket_prefix, get_directory_pattern, get_filesystem, - get_network_config_filepath, get_network_config_path, - get_network_filename_patterns, - get_network_info, get_network_radars_config_path, + get_product_config_filepath, + get_product_filename_patterns, + get_product_info, get_radar_config_filepath, get_radar_end_time, get_radar_info, @@ -69,11 +70,13 @@ def test_get_network_radars_config_path(): @pytest.mark.parametrize("network", NETWORKS) -def test_get_network_config_filepath(network): - """Test get_network_config_filepath returns the correct .yaml file path.""" - filepath = get_network_config_filepath(network) - assert os.path.isfile(filepath) - assert filepath.endswith(f"{network}.yaml") +def test_get_product_config_filepath(network): + """Test get_product_config_filepath returns the correct .yaml file path.""" + products = available_products(network=network) + for product in products: + filepath = get_product_config_filepath(network, product) + assert os.path.isfile(filepath) + assert filepath.endswith(f"{product}.yaml") @pytest.mark.parametrize("network", NETWORKS) @@ -87,25 +90,29 @@ def test_get_radar_config_filepath(network): def test_available_networks(): """Test available_networks.""" - nets = available_networks() - assert "NEXRAD" in nets + networks = available_networks() + assert isinstance(networks, list) + assert len(networks) > 0 + assert "NEXRAD" in networks def test_available_radars_all_networks(): """Test available_radars().""" radars = available_radars() assert isinstance(radars, list) + assert len(radars) > 0 def test_available_radars_single_network(): """Test available_radars(network).""" radars = available_radars("NEXRAD") assert isinstance(radars, list) + assert len(radars) > 0 -def test_get_network_info(): - """Test get_network_info returns the correct dict from NEXRAD.yaml.""" - info = get_network_info("NEXRAD") +def test_get_product_info(): + """Test get_product_info returns the correct dict from NEXRAD.yaml.""" + info = get_product_info("NEXRAD", product="NEXRAD_L2") assert info["xradar_reader"] == "open_nexradlevel2_datatree" assert info["pyart_reader"] == "read_nexrad_archive" @@ -162,24 +169,33 @@ def test_is_radar_available(start_time, end_time, expected): @pytest.mark.parametrize("network", NETWORKS) -def test_get_network_filename_patterns(network): - """Test get_network_filename_patterns returns the test pattern.""" - patterns = get_network_filename_patterns(network) - assert isinstance(patterns, list) +def test_get_product_filename_patterns(network): + """Test get_product_filename_patterns returns the test pattern.""" + products = available_products(network=network) + for product in products: + patterns = get_product_filename_patterns(network, product=product) + assert isinstance(patterns, list) @pytest.mark.parametrize("network", NETWORKS) def test_get_directory_pattern_cloud(network): """Test get_directory_pattern for a cloud protocol (e.g. s3).""" - pattern = get_directory_pattern(protocol="s3", network=network) - assert isinstance(pattern, str) + products = available_products(network=network) + for product in products: + try: + pattern = get_directory_pattern(protocol="s3", network=network, product=product) + except NotImplementedError: + pytest.skip(f"protocol s3 is not implemented for product {product} of {network} network") + assert isinstance(pattern, str) @pytest.mark.parametrize("network", NETWORKS) def test_get_directory_pattern_local(network): """Test get_directory_pattern for local protocol.""" - pattern = get_directory_pattern(protocol="file", network=network) - assert isinstance(pattern, str) + products = available_products(network=network) + for product in products: + pattern = get_directory_pattern(protocol="file", network=network, product=product) + assert isinstance(pattern, str) def test_get_filesystem_s3(): diff --git a/radar_api/tests/test_readers.py b/radar_api/tests/test_readers.py index a82bd2c..ddb18f5 100644 --- a/radar_api/tests/test_readers.py +++ b/radar_api/tests/test_readers.py @@ -42,7 +42,7 @@ def test_get_simplecache_file(): """Test file simple caching with fsspec.""" - filepath = "s3://noaa-nexrad-level2/2023/01/01/KABR/KABR20230101_000142_V06" + filepath = "s3://unidata-nexrad-level2/2023/01/01/KABR/KABR20230101_000142_V06" file = get_simplecache_file(filepath) assert isinstance(file, str) diff --git a/radar_api/tests/test_search.py b/radar_api/tests/test_search.py index b3994da..cd2e91b 100644 --- a/radar_api/tests/test_search.py +++ b/radar_api/tests/test_search.py @@ -69,7 +69,7 @@ def test_year_component(self): """Test get_pattern_shortest_time_component returns 'Y' for pattern with {time:%Y}.""" pattern = "some_path/{time:%Y}" freq = get_pattern_shortest_time_component(pattern) - assert freq == "Y" + assert freq == "YE" def test_not_implemented(self): """Test get_pattern_shortest_time_component raises NotImplementedError if unknown format.""" @@ -145,7 +145,7 @@ def test_freq_y(self): """Test get_list_timesteps with freq='Y' (yearly).""" start_time = "2024-06-10" end_time = "2026-02-01" - times = get_list_timesteps(start_time, end_time, freq="Y") + times = get_list_timesteps(start_time, end_time, freq="YE") # freq='Y' (YE-DEC) means shift start year back by 1 => 2023, assert list(times) == [ pd.Timestamp("2023-12-31"), @@ -167,10 +167,11 @@ def test_directories_s3_day(self): end_time = "2023-07-02T01:00:00" network = "NEXRAD" radar = "KFSD" + product = "NEXRAD_L2" protocol = "s3" base_dir = None - # directory_pattern for protocol="s3" is 's3://noaa-nexrad-level2/{time:%Y}/{time:%m}/{time:%d}/{radar:s}' + # directory_pattern for protocol="s3" is 's3://unidata-nexrad-level2/{time:%Y}/{time:%m}/{time:%d}/{radar:s}' # => The last time component is {time:%d} => freq='D' # => We'll generate times for 2023-06-30, 2023-07-01, 2023-07-02 @@ -178,15 +179,16 @@ def test_directories_s3_day(self): start_time=start_time, end_time=end_time, network=network, + product=product, radar=radar, protocol=protocol, base_dir=base_dir, ) expected = [ # Because the function does start_time - 1 day, then floors to day ... - "s3://noaa-nexrad-level2/2023/06/30/KFSD", - "s3://noaa-nexrad-level2/2023/07/01/KFSD", - "s3://noaa-nexrad-level2/2023/07/02/KFSD", + "s3://unidata-nexrad-level2/2023/06/30/KFSD", + "s3://unidata-nexrad-level2/2023/07/01/KFSD", + "s3://unidata-nexrad-level2/2023/07/02/KFSD", ] assert paths == expected diff --git a/radar_api/utils/xradar.py b/radar_api/utils/xradar.py index d209d20..06c3163 100644 --- a/radar_api/utils/xradar.py +++ b/radar_api/utils/xradar.py @@ -48,8 +48,9 @@ def _get_sweep_dataset(radar_obj, sweep): fields = list(radar_obj.fields) for field_name in fields: arr = _get_field_array(radar_obj, sweep, field_name) - dims = radar_obj.fields[field_name]["coordinates"].split(" ")[1:] - dict_da[field_name] = xr.DataArray(arr, dims=dims) + if "coordinates" in radar_obj.fields[field_name]: + dims = radar_obj.fields[field_name]["coordinates"].split(" ")[1:] + dict_da[field_name] = xr.DataArray(arr, dims=dims) ds = xr.Dataset(dict_da) # Add coords coords_dict = { @@ -98,3 +99,64 @@ def get_nexrad_datatree_from_pyart(radar_obj): dt[coord] = value return dt + + +def _get_sweep_dataset_mch(radar_obj, sweep): + dict_da = {} + fields = list(radar_obj.fields) + for field_name in fields: + arr = _get_field_array(radar_obj, sweep=0, field_name=field_name) + if "coordinates" in radar_obj.fields[field_name]: + dims = radar_obj.fields[field_name]["coordinates"].split(" ")[1:] + dict_da[field_name] = xr.DataArray(arr, dims=dims) + ds = xr.Dataset(dict_da) + # Add coords + coords_dict = { + "azimuth": ("azimuth", radar_obj.get_azimuth(0)), + "elevation": ("azimuth", radar_obj.get_elevation(0)), + "range": ("range", radar_obj.range["data"][: ds.sizes["range"]]), + "time": ("azimuth", radar_obj.time["data"][radar_obj.get_slice(0)]), + } + # Add other coordinates + coords_dict.update(_get_radar_location(radar_obj)) + coords_dict["sweep_number"] = sweep + coords_dict["sweep_mode"] = radar_obj.sweep_mode["data"][0] + coords_dict["sweep_fixed_angle"] = radar_obj.fixed_angle["data"][0] + + ds = ds.assign_coords(coords_dict) + ds["time"].attrs["units"] = radar_obj.time["units"] + + # Decode time + ds = xr.decode_cf(ds, decode_times=True) + return ds + + +def get_mch_datatree_from_pyart(radar_obj): + """Convert a pyart object to xradar datatree.""" + # Define renaming dictionary to CF-Radials2 + # --> https://github.com/openradar/xradar/blob/830d86b1c6290f1dce0e73c60a1d3b819735f906/xradar/model.py#L385 + # --> Currently set same range for all sweeps ! + # --> Currently do not copy metadata and variable attributes ! + dict_var_naming = { + "reflectivity": "DBZH", + "differential_reflectivity": "ZDR", + "uncorrected_cross_correlation_ratio": "RHOHV", + "uncorrected_differential_phase": "PHIDP", + "spectrum_width": "WRADH", + "velocity": "VRADH", + # reflectivity_hh_clut + # reflectivity_vv + # signal_to_noise_ratio + } + dict_ds = {} + for sweep in radar_obj.sweep_number["data"]: + sweep_name = f"sweep_{sweep}" + ds = _get_sweep_dataset_mch(radar_obj, sweep=sweep) + rename_dict = {k: v for k, v in dict_var_naming.items() if k in ds} + dict_ds[sweep_name] = ds.rename(rename_dict) + dt = xr.DataTree.from_dict(dict_ds) + # Add geolocation + for coord, value in _get_radar_location(radar_obj).items(): + dt[coord] = value + + return dt