diff --git a/data_access_service/tasks/subset_zarr.py b/data_access_service/tasks/subset_zarr.py index 6be8dba4..bb598990 100644 --- a/data_access_service/tasks/subset_zarr.py +++ b/data_access_service/tasks/subset_zarr.py @@ -160,7 +160,14 @@ def __get_zarr_dataset_for_(self, key: str) -> xarray.Dataset | None: for k, val_range in conditions.items(): print("forming condition for key", k, "with range", val_range) if is_dim(key=k, dataset=dataset): - dim_conditions[k] = slice(val_range[0], val_range[1]) + # dim_conditions[k] = slice(val_range[0], val_range[1]) + form_dim_conditions( + existing_conditions=dim_conditions, + key=k, + min_value=val_range[0], + max_value=val_range[1], + dataset=dataset, + ) elif is_var(key=k, dataset=dataset): mask = form_mask( existing_mask=mask, @@ -525,3 +532,27 @@ def form_mask( return var_mask else: return existing_mask & var_mask + + +def form_dim_conditions( + existing_conditions: dict[str, slice] | None, + key: str, + min_value: any, + max_value: any, + dataset: xarray.Dataset, +) -> dict[str, slice]: + # try to know the dim is ascending or descending + slice_from = min_value + slice_to = max_value + + # if descending, swap + if dataset[key][0] > dataset[key][-1]: + slice_from = max_value + slice_to = min_value + + dim_condition = {key: slice(slice_from, slice_to)} + if existing_conditions is None: + return dim_condition + else: + existing_conditions.update(dim_condition) + return existing_conditions diff --git a/tests/tasks/test_subset_zarr.py b/tests/tasks/test_subset_zarr.py index 1c7ef0bd..a651ff58 100644 --- a/tests/tasks/test_subset_zarr.py +++ b/tests/tasks/test_subset_zarr.py @@ -94,6 +94,74 @@ def test_zarr_processor( # Delete temp output folder as the name always same for testing shutil.rmtree(config.get_temp_folder("888"), ignore_errors=True) + @patch("aodn_cloud_optimised.lib.DataQuery.REGION", REGION) + def test_zarr_descending_dims( + self, + aws_clients, + upload_test_case_to_s3, + mock_get_fs_token_paths, + ): + s3_client, _, _ = aws_clients + config = Config.get_config() + helper = AWSHelper() + + api = API() + api.initialize_metadata() + + with patch("fsspec.core.get_fs_token_paths", mock_get_fs_token_paths): + # Patch fsspec to fix an issue were we cannot pass the storage_options correctly + with patch.object(AWSHelper, "send_email") as mock_send_email: + + key = "radar_CoffsHarbour_wind_delayed_qc.zarr" + no_ext_key = key.replace(".zarr", "") + try: + zarr_processor = ZarrProcessor( + api, + uuid="ffe8f19c-de4a-4362-89be-7605b2dd6b8c", + job_id="job_id_888", + keys=[key], + start_date_str="03-2012", + end_date_str="04-2012", + multi_polygon='{"type":"MultiPolygon","coordinates":[[[[-180,90],[-180,-90],[180,-90],[180,90],[-180,90]]]]}', + recipient="example@@test.com", + collection_title="Test Ocean Data Collection", + full_metadata_link="https://metadata.imas.utas.edu.au/.../test-uuid-123", + suggested_citation="Cite data as: Mazor, T., Watermeyer, K., Hobley, T., Grinter, V., Holden, R., MacDonald, K. and Ferns, L. (2023). Statewide Marine Habitat Map.", + ) + + zarr_processor.process() + + # This is a zarr file, we should be able to read the result from S3, and have part-1, part2 and part-3 + files = helper.list_all_s3_objects( + config.get_csv_bucket_name(), + "", + ) + + assert ( + "job_id_888/radar_CoffsHarbour_wind_delayed_qc.nc" in files + ), "didn't find expected output file" + + # use tempfile to download an object from s3 + with tempfile.TemporaryDirectory() as tmpdirname: + temp_file_path = Path(tmpdirname) / f"{no_ext_key}.nc" + helper.download_file_from_s3( + config.get_csv_bucket_name(), + f"job_id_888/{no_ext_key}.nc", + str(temp_file_path), + ) + + netcdf_xarray = xarray.open_dataset(temp_file_path) + assert ( + netcdf_xarray.sizes["LATITUDE"] == 167 + ), f"LATITUDE dimension size expected to be 167, but got {netcdf_xarray.dims['LATITUDE']}" + + except Exception as ex: + # Should not land here + assert False, f"{ex}" + finally: + # Delete temp output folder as the name always same for testing + shutil.rmtree(config.get_temp_folder("888"), ignore_errors=True) + def test_zarr_multi_bboxes( self, aws_clients,