diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a32e2b4 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda", + "python-envs.pythonProjects": [] +} diff --git a/README.md b/README.md index a31a1ec..eba4580 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ Host will be `http://localhost:8000`. 2. **Create Conda virtual environment:** - ```bash - $ conda env create -f environment.yml - ``` + ```bash + $ conda env create -f environment.yml + ``` ### Dependency management @@ -47,7 +47,9 @@ aodn_cloud_optimised = { git = "https://github.com/aodn/aodn_cloud_optimised.git ``` ### Error in dependencies + You may try to clean the cache by using the following command + ```commandline poetry cache clear --all PyPI poetry env remove --all @@ -59,16 +61,18 @@ poetry install 1. **Activate Conda virtual environment:** - ```bash - $ conda activate data-access-service - ``` + ```bash + $ conda activate data-access-service + ``` 2. **Install dependencies using Poetry:** - ```bash - # after cloning the repo with git clone command - $ cd data-access-service - $ poetry install - ``` + + ```bash + # after cloning the repo with git clone command + $ cd data-access-service + $ poetry install + ``` + ```bash # You should not need to install lib locally, if your python version is correct. # https://arrow.apache.org/install/ @@ -91,19 +95,20 @@ poetry install sudo apt install -y -V libparquet-dev # For Apache Parquet C++ sudo apt install -y -V libparquet-glib-dev # For Apache Parquet GLib (C) sudo apt install -y ninja-build - ``` + ``` 3. **Run the app:** - In project root folder, create a '.env' file, which contains your API key, e.g.: - ``` - API_KEY="your_actual_api_key_here" - ``` + In project root folder, create a '.env' file, which contains your API key, e.g.: + + ``` + API_KEY="your_actual_api_key_here" + ``` Host will be `http://localhost:5000` and default profile is DEV - ```bash - $ python -m data_access_service.server - ``` + ```bash + $ python -m data_access_service.server + ``` ### Code formatting @@ -149,33 +154,41 @@ PROFILE=edge | Get values for indexing | /api/v1/das/data/{uuid}/{key}/indexing_values | key=radar_CoffsHarbour_wind_delayed_qc.zarr, start_date=2023-12-25T14:30:00, end_date=2024-02-25T14:30:00 | | | Get buoys and its locations that contains data in a time window | /api/v1/das/data/feature-collection/wave-buoy | start_date=2023-12-25T14:30:00, end_date=2024-02-25T14:30:00 | ALL | | Gets SSWMD,WPFM,WSSH from a buoy in a time window | /api/v1/das/data/feature-collection/wave-buoy/{buoy_name} | start_date=2023-12-25T14:30:00, end_date=2024-02-25T14:30:00 | ALL | +| Gets latest available wave buoy date | /api/v1/das/data/feature-collection/wave-buoy/latest | | ALL | ### Running Tests To run the tests for the project: + ```shell poetry run python -m unittest discover ``` + This will discover and run all the test cases in your project. If you have "ModuleNotFoundError" or similar issues, you may need to install dependencies before running the tests: + ```shell poetry install ``` # Batch jobs + Another part of this project is to run batch jobs for dataset subsetting. ### Local running testing + - If you want to test the batch job codes locally, (running in your local machine) -Please export aws environment variables first (or use profile etc..) (for example, if use edge, please go to aws access portal, and pick AODN-Edge -> AodnAdminAccess) -and also export `AWS_BATCH_JOB_ID` (please go to batch console to copy an existing job id). -After several exporting, make sure your terminal is at the root folder of this project. Then please run: + Please export aws environment variables first (or use profile etc..) (for example, if use edge, please go to aws access portal, and pick AODN-Edge -> AodnAdminAccess) + and also export `AWS_BATCH_JOB_ID` (please go to batch console to copy an existing job id). + After several exporting, make sure your terminal is at the root folder of this project. Then please run: + ```shell ./entry_point.py ``` ### aws running testing + - If you want to test the batch job codes in AWS, (running in AWS Batch), please : 1. Build the docker image and push it to ECR (havier-test-ecr). Please do it by following the instructions in the havier-test-ecr repo by clicking button "View push commands" at the top right corner. 2. Open the ogc-api project locally diff --git a/data_access_service/core/api.py b/data_access_service/core/api.py index 33d0357..60e0479 100644 --- a/data_access_service/core/api.py +++ b/data_access_service/core/api.py @@ -476,6 +476,15 @@ def fetch_wave_buoy_data(self, buoy_name: str, start_date: str, end_date: str): return feature + def fetch_wave_buoy_latest_date(self): + result = self.memconn.execute( + f"""SELECT + MAX(TIME) AS TIME + FROM wave_buoy_realtime_nonqc""" + ).df() + DATE_FORMAT = "%Y-%m-%d" + return result["TIME"].item().strftime(DATE_FORMAT) + def fetch_wave_buoy_sites(self, start_date: str, end_date: str): result = self.memconn.execute( f"""SELECT diff --git a/data_access_service/core/routes.py b/data_access_service/core/routes.py index 0a64380..05fd107 100644 --- a/data_access_service/core/routes.py +++ b/data_access_service/core/routes.py @@ -294,6 +294,17 @@ async def get_feature_collection_of_items_with_data_between_dates( ) +@router.get( + "/data/feature-collection/wave-buoy/latest", dependencies=[Depends(api_key_auth)] +) +async def get_feature_collection_of_items_latest_dates(request: Request): + api_instance = get_api_instance(request) + return Response( + content=json.dumps(api_instance.fetch_wave_buoy_latest_date()), + media_type="application/json", + ) + + @router.get( "/data/feature-collection/wave-buoy/{buoy_name}", dependencies=[Depends(api_key_auth)], diff --git a/tests/core/test_api.py b/tests/core/test_api.py index f76d28c..3c17ade 100644 --- a/tests/core/test_api.py +++ b/tests/core/test_api.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np from pathlib import Path -from unittest.mock import patch +from unittest.mock import patch, MagicMock from aodn_cloud_optimised import DataQuery @@ -214,3 +214,88 @@ def test_normalize_to_0_360_if_needed(self, get_metadata): with self.assertRaises(TypeError): api.normalize_to_0_360_if_needed(uuid, key, 370) api.normalize_to_0_360_if_needed(uuid, key, -370.0) + + def test_fetch_wave_buoy_latest_date(self): + api = API() + mock_df = pd.DataFrame({"TIME": [pd.Timestamp("2025-01-15 12:30:00")]}) + api.memconn = MagicMock() + api.memconn.execute.return_value.df.return_value = mock_df + + result = api.fetch_wave_buoy_latest_date() + + self.assertEqual(result, "2025-01-15") + api.memconn.execute.assert_called_once() + + def test_fetch_wave_buoy_sites(self): + api = API() + mock_df = pd.DataFrame( + { + "site_name": ["Brisbane", "Sydney"], + "TIME": [ + pd.Timestamp("2025-01-10 08:00:00"), + pd.Timestamp("2025-01-11 09:00:00"), + ], + "LATITUDE": [-27.47, -33.87], + "LONGITUDE": [153.03, 151.21], + } + ) + api.memconn = MagicMock() + api.memconn.execute.return_value.df.return_value = mock_df + + result = api.fetch_wave_buoy_sites("2025-01-10", "2025-01-12") + + self.assertEqual(result["type"], "FeatureCollection") + self.assertEqual(len(result["features"]), 2) + + feature0 = result["features"][0] + self.assertEqual(feature0["type"], "Feature") + self.assertEqual(feature0["properties"]["buoy"], "Brisbane") + self.assertEqual(feature0["properties"]["date"], "2025-01-10") + self.assertEqual(feature0["geometry"]["type"], "Point") + self.assertEqual(feature0["geometry"]["coordinates"], [153.03, -27.47]) + + feature1 = result["features"][1] + self.assertEqual(feature1["properties"]["buoy"], "Sydney") + self.assertEqual(feature1["properties"]["date"], "2025-01-11") + self.assertEqual(feature1["geometry"]["coordinates"], [151.21, -33.87]) + + def test_fetch_wave_buoy_data(self): + api = API() + position_df = pd.DataFrame({"LATITUDE": [-27.47], "LONGITUDE": [153.03]}) + data_df = pd.DataFrame( + { + "TIME": [ + pd.Timestamp("2025-01-10 08:00:00"), + pd.Timestamp("2025-01-10 09:00:00"), + ], + "SSWMD": [180.0, np.nan], + "WPFM": [0.08, 0.09], + "WPMH": [np.nan, 5.0], + "WHTH": [1.2, 1.3], + "WSSH": [np.nan, np.nan], + } + ) + api.memconn = MagicMock() + api.memconn.execute.return_value.df.side_effect = [position_df, data_df] + + result = api.fetch_wave_buoy_data("Brisbane", "2025-01-10", "2025-01-11") + + self.assertEqual(result["type"], "Feature") + self.assertEqual(result["geometry"]["coordinates"], [153.03, -27.47]) + + # SSWMD: first row has value, second is NaN + self.assertEqual(len(result["properties"]["SSWMD"]), 1) + self.assertAlmostEqual(result["properties"]["SSWMD"][0][1], 180.0) + + # WPFM: both rows have values + self.assertEqual(len(result["properties"]["WPFM"]), 2) + + # WPMH: only second row has value + self.assertEqual(len(result["properties"]["WPMH"]), 1) + self.assertAlmostEqual(result["properties"]["WPMH"][0][1], 5.0) + + # WHTH: both rows have values + self.assertEqual(len(result["properties"]["WHTH"]), 2) + + # WSSH: both NaN, so empty + self.assertEqual(len(result["properties"]["WSSH"]), 0)