diff --git a/tests/io/test_asn.py b/tests/io/test_asn.py index 8369083a..dc7c6a20 100644 --- a/tests/io/test_asn.py +++ b/tests/io/test_asn.py @@ -2,6 +2,7 @@ import threading import time +import h5py import numpy as np import zmq @@ -162,6 +163,30 @@ def get_socket(self, address): return socket +def test_read_handles_exclusive_roi_end(tmp_path): + path = tmp_path / "sample_asn.hdf5" + with h5py.File(path, "w") as file: + header = file.create_group("header") + header["time"] = 0.0 + header["dt"] = 0.1 + header["dx"] = 10.0 + + file.create_dataset("data", data=np.zeros((4, 4), dtype=np.float32)) + + cable_spec = file.create_group("cableSpec") + cable_spec["sensorDistances"] = np.array([0.0, 10.0, 20.0, 30.0]) + + demod_spec = file.create_group("demodSpec") + demod_spec["roiStart"] = np.array([0]) + demod_spec["roiEnd"] = np.array([4]) + + da = xd.open_dataarray(path, engine="asn") + + assert da.shape == (4, 4) + assert da["distance"][0].values == 0.0 + assert da["distance"][-1].values == 30.0 + + class TestZMQSubscriber: def test_one_chunk(self): address = get_free_local_address() diff --git a/xdas/core/routines.py b/xdas/core/routines.py index 587c955e..aea4f60b 100644 --- a/xdas/core/routines.py +++ b/xdas/core/routines.py @@ -342,14 +342,23 @@ def open_mfdataarray( else: iterator = as_completed(futures_to_paths) objs = [] + failures = [] for future in iterator: try: obj = future.result() except Exception as e: path = futures_to_paths[future] + failures.append((path, e)) warnings.warn(f"could not open {path}: {e}", RuntimeWarning) else: objs.append(obj) + if len(objs) == 0: + if failures: + path, error = failures[0] + raise RuntimeError( + f"could not open any file with engine={engine!r}; first failure was {path}: {error}" + ) from error + raise FileNotFoundError("no file to open") return combine_by_coords(objs, dim, tolerance, squeeze, None, verbose) diff --git a/xdas/io/asn.py b/xdas/io/asn.py index e1a32530..b4e008f9 100644 --- a/xdas/io/asn.py +++ b/xdas/io/asn.py @@ -1,5 +1,5 @@ import json -from bisect import bisect_left +from bisect import bisect_left, bisect_right import h5py import numpy as np @@ -11,6 +11,18 @@ from .core import parse_ctype +def _get_roi_bound_indices(all_dists, n_start, n_end, dx): + start_index = bisect_left(all_dists, n_start * dx) + if start_index >= len(all_dists): + raise IndexError("ROI start lies beyond available sensor distances") + + end_index = bisect_right(all_dists, n_end * dx) - 1 + if end_index < 0: + raise IndexError("ROI end lies before available sensor distances") + + return start_index, end_index + + def read(fname, ctype=None): ctype = parse_ctype(ctype) with h5py.File(fname, "r") as file: @@ -33,17 +45,19 @@ def read(fname, ctype=None): # Loop over ROIs, get the start/stop index before downsampling for n_start, n_end in zip(demod["roiStart"], demod["roiEnd"]): + # ASN stores ROI end as an upper boundary. Use the last sampled distance + # that does not exceed that boundary instead of indexing the insertion point. + i_start, i_end = _get_roi_bound_indices(all_dists, n_start, n_end, dx) + # Get the index where the ROI starts based on the position in the # distance vector. This solves the issue of rounding during decimation - i = bisect_left(all_dists, n_start * dx) # Append the data index and optical distance to the buffers - dist_tie_inds.append(i) - dist_tie_vals.append(float(all_dists[i])) + dist_tie_inds.append(i_start) + dist_tie_vals.append(float(all_dists[i_start])) # Repeat the procedure for the index/distance at which the ROI ends. - i = bisect_left(all_dists, n_end * dx) - dist_tie_inds.append(i) - dist_tie_vals.append(float(all_dists[i])) + dist_tie_inds.append(i_end) + dist_tie_vals.append(float(all_dists[i_end])) nt = data.shape[0] time = Coordinate[ctype["time"]].from_block(t0, nt, dt, dim="time")