Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion data_access_service/tasks/subset_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,14 @@ def __get_zarr_dataset_for_(self, key: str) -> xarray.Dataset | None:
for k, val_range in conditions.items():
print("forming condition for key", k, "with range", val_range)
if is_dim(key=k, dataset=dataset):
dim_conditions[k] = slice(val_range[0], val_range[1])
# dim_conditions[k] = slice(val_range[0], val_range[1])
form_dim_conditions(
existing_conditions=dim_conditions,
key=k,
min_value=val_range[0],
max_value=val_range[1],
dataset=dataset,
)
elif is_var(key=k, dataset=dataset):
mask = form_mask(
existing_mask=mask,
Expand Down Expand Up @@ -525,3 +532,27 @@ def form_mask(
return var_mask
else:
return existing_mask & var_mask


def form_dim_conditions(
existing_conditions: dict[str, slice] | None,
key: str,
min_value: any,
max_value: any,
dataset: xarray.Dataset,
) -> dict[str, slice]:
# try to know the dim is ascending or descending
slice_from = min_value
slice_to = max_value

# if descending, swap
if dataset[key][0] > dataset[key][-1]:
slice_from = max_value
slice_to = min_value

dim_condition = {key: slice(slice_from, slice_to)}
if existing_conditions is None:
return dim_condition
else:
existing_conditions.update(dim_condition)
return existing_conditions
68 changes: 68 additions & 0 deletions tests/tasks/test_subset_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,74 @@ def test_zarr_processor(
# Delete temp output folder as the name always same for testing
shutil.rmtree(config.get_temp_folder("888"), ignore_errors=True)

@patch("aodn_cloud_optimised.lib.DataQuery.REGION", REGION)
def test_zarr_descending_dims(
self,
aws_clients,
upload_test_case_to_s3,
mock_get_fs_token_paths,
):
s3_client, _, _ = aws_clients
config = Config.get_config()
helper = AWSHelper()

api = API()
api.initialize_metadata()

with patch("fsspec.core.get_fs_token_paths", mock_get_fs_token_paths):
# Patch fsspec to fix an issue were we cannot pass the storage_options correctly
with patch.object(AWSHelper, "send_email") as mock_send_email:

key = "radar_CoffsHarbour_wind_delayed_qc.zarr"
no_ext_key = key.replace(".zarr", "")
try:
zarr_processor = ZarrProcessor(
api,
uuid="ffe8f19c-de4a-4362-89be-7605b2dd6b8c",
job_id="job_id_888",
keys=[key],
start_date_str="03-2012",
end_date_str="04-2012",
multi_polygon='{"type":"MultiPolygon","coordinates":[[[[-180,90],[-180,-90],[180,-90],[180,90],[-180,90]]]]}',
recipient="example@@test.com",
collection_title="Test Ocean Data Collection",
full_metadata_link="https://metadata.imas.utas.edu.au/.../test-uuid-123",
suggested_citation="Cite data as: Mazor, T., Watermeyer, K., Hobley, T., Grinter, V., Holden, R., MacDonald, K. and Ferns, L. (2023). Statewide Marine Habitat Map.",
)

zarr_processor.process()

# This is a zarr file, we should be able to read the result from S3, and have part-1, part2 and part-3
files = helper.list_all_s3_objects(
config.get_csv_bucket_name(),
"",
)

assert (
"job_id_888/radar_CoffsHarbour_wind_delayed_qc.nc" in files
), "didn't find expected output file"

# use tempfile to download an object from s3
with tempfile.TemporaryDirectory() as tmpdirname:
temp_file_path = Path(tmpdirname) / f"{no_ext_key}.nc"
helper.download_file_from_s3(
config.get_csv_bucket_name(),
f"job_id_888/{no_ext_key}.nc",
str(temp_file_path),
)

netcdf_xarray = xarray.open_dataset(temp_file_path)
assert (
netcdf_xarray.sizes["LATITUDE"] == 167
), f"LATITUDE dimension size expected to be 167, but got {netcdf_xarray.dims['LATITUDE']}"

except Exception as ex:
# Should not land here
assert False, f"{ex}"
finally:
# Delete temp output folder as the name always same for testing
shutil.rmtree(config.get_temp_folder("888"), ignore_errors=True)

def test_zarr_multi_bboxes(
self,
aws_clients,
Expand Down