diff --git a/dascore/config.py b/dascore/config.py index 662dc924..c92f17ef 100644 --- a/dascore/config.py +++ b/dascore/config.py @@ -99,6 +99,10 @@ class DascoreConfig(BaseModel): default=1_048_576, description="Block size in bytes for general remote file downloads.", ) + remote_download_timeout: float = Field( + default=60.0, + description="Timeout in seconds for blocking remote file downloads.", + ) remote_hdf5_block_size: int = Field( default=5_242_880, description="Block size in bytes for remote HDF5 access on tuned protocols.", diff --git a/dascore/core/coords.py b/dascore/core/coords.py index 34cbf3ec..74fe97c7 100644 --- a/dascore/core/coords.py +++ b/dascore/core/coords.py @@ -38,7 +38,11 @@ get_quantity_str, percent, ) -from dascore.utils.array import _coerce_text_array, _is_text_coercible_array +from dascore.utils.array import ( + _coerce_text_array, + _is_text_coercible_array, + hash_array, +) from dascore.utils.display import get_nice_text from dascore.utils.docs import compose_docstring from dascore.utils.misc import ( @@ -450,6 +454,57 @@ def __array__(self, dtype=None, copy=False): """Numpy method for getting array data with `np.array(coord)`.""" return self.data + def _get_hashable_coord(self) -> Self: + """Return a coordinate normalized for stable hashing.""" + if self.units is None or dtype_time_like(self.dtype): + return self + return self.simplify_units() + + @staticmethod + def _hash_scalar(value) -> tuple[str, str | None]: + """Return a dtype-aware scalar hash token.""" + if value is None: + return ("none", None) + return ("scalar", hash_array(np.asarray([value]))) + + @staticmethod + def _hash_array_token(values: np.ndarray) -> tuple[str, str | tuple[int, ...]]: + """Return a hash token consistent with coord array equality.""" + dtype = np.dtype(values.dtype) + if np.issubdtype(dtype, np.inexact): + # Coord equality for inexact arrays uses np.isclose semantics, so + # hashing raw bytes would violate the hash contract. Fall back to + # metadata-only hashing for these dtypes. + return ("approx-array", str(dtype), values.shape) + return ("array", hash_array(values)) + + def __hash__(self): + coord = self._get_hashable_coord() + unit_str = coord.unit_str + if isinstance(coord, CoordRange): + payload = ( + coord.__class__, + unit_str, + coord.shape, + self._hash_scalar(coord.start), + self._hash_scalar(coord.stop), + self._hash_scalar(coord.step), + ) + return hash(payload) + if isinstance(coord, CoordPartial): + payload = ( + coord.__class__, + unit_str, + coord.shape, + str(np.dtype(coord.dtype)), + self._hash_scalar(coord.start), + self._hash_scalar(coord.stop), + self._hash_scalar(coord.step), + ) + return hash(payload) + payload = (coord.__class__, unit_str, self._hash_array_token(coord.values)) + return hash(payload) + @cached_method def min(self): """Return min value.""" @@ -1016,7 +1071,20 @@ def update(self, **kwargs): # Other operations that normally modify data do not in this case. update_limits = update set_units = update - convert_units = update + + def convert_units(self, units) -> Self: + """Convert scalar metadata units, or set units if none exist.""" + if self.units is None or dtype_time_like(self.dtype): + return self.set_units(units=units) + out = {"units": units} + for name in ("start", "stop", "step"): + value = getattr(self, name) + out[name] = ( + value + if pd.isnull(value) + else convert_units(value, to_units=units, from_units=self.units) + ) + return self.new(**out) def sort(self, reverse=False): """Sort dummy array. Does nothing.""" diff --git a/dascore/utils/array.py b/dascore/utils/array.py index 8c23f067..9caffc24 100644 --- a/dascore/utils/array.py +++ b/dascore/utils/array.py @@ -622,7 +622,7 @@ def patch_array_function(self, func, types, args, kwargs): return apply_array_func(func, *args, **kwargs) -def hash_numpy_array(arr: np.ndarray) -> str: +def hash_array(arr: np.ndarray) -> str: """ Return a stable hash for a NumPy array. @@ -644,18 +644,18 @@ def hash_numpy_array(arr: np.ndarray) -> str: Examples -------- >>> import numpy as np - >>> from dascore.utils.array import hash_numpy_array + >>> from dascore.utils.array import hash_array >>> a = np.array([1.0, 2.0, 3.0]) - >>> h = hash_numpy_array(a) + >>> h = hash_array(a) >>> assert isinstance(h, str) and len(h) == 32 >>> # Same data always produces the same hash - >>> assert hash_numpy_array(a) == hash_numpy_array(a.copy()) + >>> assert hash_array(a) == hash_array(a.copy()) >>> # Different dtype produces a different hash - >>> assert hash_numpy_array(a) != hash_numpy_array(a.astype(np.float32)) + >>> assert hash_array(a) != hash_array(a.astype(np.float32)) """ arr = np.asarray(arr) if arr.dtype == object: - msg = "hash_numpy_array does not support object arrays." + msg = "hash_array does not support object arrays." raise ParameterError(msg) h = hashlib.blake2b(digest_size=16) @@ -665,11 +665,17 @@ def hash_numpy_array(arr: np.ndarray) -> str: h.update(arr.dtype.str.encode("ascii")) h.update(np.asarray(arr.shape, dtype=np.int64).tobytes()) - if arr.flags.c_contiguous: + if arr.flags.c_contiguous and arr.dtype.kind not in {"M", "m"}: # Zero-copy fast path h.update(memoryview(arr).cast("B")) else: - # Canonicalize layout; this copies once + # Canonicalize layout; this also handles datetime/timedelta dtypes, + # which do not expose a Python buffer directly. h.update(np.ascontiguousarray(arr).view(np.uint8)) return h.hexdigest() + + +def hash_numpy_array(arr: np.ndarray) -> str: + """Backward-compatible alias for `hash_array`.""" + return hash_array(arr) diff --git a/dascore/utils/hdf5.py b/dascore/utils/hdf5.py index 62e8c121..88c281b6 100644 --- a/dascore/utils/hdf5.py +++ b/dascore/utils/hdf5.py @@ -50,6 +50,7 @@ from dascore.utils.remote_io import ( FallbackFileObj, ensure_local_file, + get_cached_local_file, get_local_handle, is_no_range_http_error, ) @@ -63,6 +64,50 @@ ns_to_timedelta = partial(pd.to_timedelta, unit="ns") +class _ManagedH5pyFile: + """Proxy an h5py file while owning the underlying file object lifecycle.""" + + def __init__(self, handle: H5pyFile, owned_fileobj): + self._handle = handle + self._owned_fileobj = owned_fileobj + self._closed = False + + def close(self): + """Close both the h5py file and the owned file object.""" + if self._closed: + return + try: + self._handle.close() + finally: + with suppress(Exception): + self._owned_fileobj.close() + self._closed = True + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self.close() + return False + + def __getitem__(self, item): + return self._handle[item] + + def __contains__(self, item): + return item in self._handle + + def __iter__(self): + return iter(self._handle) + + @property + def closed(self): + """Return True when close has been called on the proxy.""" + return self._closed + + def __getattr__(self, item): + return getattr(self._handle, item) + + class _HDF5Store(pd.HDFStore): """ A work-around for pandas HDF5 store not accepting @@ -473,6 +518,12 @@ class H5Reader(PyTablesReader): mode = "r" constructor = H5pyFile + @classmethod + def _open_fileobj_handle(cls, fileobj): + """Open an h5py file and retain ownership of the wrapped file object.""" + handle = cls.constructor(fileobj, mode=cls.mode, driver="fileobj") + return _ManagedH5pyFile(handle, fileobj) + @staticmethod def _get_open_kwargs(resource: UPath) -> dict[str, object]: """Return backend-specific kwargs for remote HDF5 file objects.""" @@ -495,11 +546,13 @@ def get_handle(cls, resource): Unlike PyTablesReader, h5py can consume a binary file object via the ``fileobj`` driver, so remote UPath inputs stay streaming-based here. """ - if isinstance(resource, cls | H5pyFile): + if isinstance(resource, cls | H5pyFile | _ManagedH5pyFile): return resource if isinstance(resource, io.IOBase): - return cls.constructor(resource, mode=cls.mode, driver="fileobj") + return cls._open_fileobj_handle(resource) if isinstance(resource, UPath): + if cached_path := get_cached_local_file(resource): + return super().get_handle(cached_path) mode = "rb" if cls.mode == "r" else "r+b" open_kwargs = cls._get_open_kwargs(resource) handle = FallbackFileObj( @@ -508,7 +561,7 @@ def get_handle(cls, resource): error_predicate=is_no_range_http_error, ) try: - return cls.constructor(handle, mode=cls.mode, driver="fileobj") + return cls._open_fileobj_handle(handle) except Exception: handle.close() raise diff --git a/dascore/utils/remote_io.py b/dascore/utils/remote_io.py index 7951c374..016b122a 100644 --- a/dascore/utils/remote_io.py +++ b/dascore/utils/remote_io.py @@ -12,6 +12,7 @@ from functools import lru_cache from hashlib import sha256 from pathlib import Path +from urllib.request import Request, urlopen from dascore.compat import UPath from dascore.config import get_config @@ -125,7 +126,6 @@ def _download_remote_file(path, local_path: Path): """Download a remote path into its cache location.""" resource = coerce_to_upath(path) protocol = getattr(resource, "protocol", None) - open_kwargs = {"block_size": 0} if protocol in _HTTP_PROTOCOLS else {} local_path.parent.mkdir(parents=True, exist_ok=True) fd, temp_name = tempfile.mkstemp( dir=local_path.parent, @@ -135,12 +135,20 @@ def _download_remote_file(path, local_path: Path): os.close(fd) tmp_path = Path(temp_name) try: - with ( - resource.open("rb", **open_kwargs) as remote_fi, - tmp_path.open("wb") as local_fi, - ): - while chunk := remote_fi.read(get_config().remote_download_block_size): - local_fi.write(chunk) + if protocol in _HTTP_PROTOCOLS: + headers = dict(getattr(resource, "storage_options", {}) or {}) + request = Request(str(resource), headers=headers) + timeout = get_config().remote_download_timeout + with ( + urlopen(request, timeout=timeout) as remote_fi, + tmp_path.open("wb") as local_fi, + ): + while chunk := remote_fi.read(get_config().remote_download_block_size): + local_fi.write(chunk) + else: + with resource.open("rb") as remote_fi, tmp_path.open("wb") as local_fi: + while chunk := remote_fi.read(get_config().remote_download_block_size): + local_fi.write(chunk) tmp_path.replace(local_path) finally: tmp_path.unlink(missing_ok=True) @@ -201,6 +209,19 @@ def ensure_local_file(resource) -> Path: raise TypeError(msg) +def get_cached_local_file(resource) -> Path | None: + """Return the cached local path for one remote resource if it exists.""" + if not is_pathlike(resource) or is_local_path(resource): + return None + remote = coerce_to_upath(resource) + cache_root = _normalize_cache_root(get_remote_cache_path()) + remote_id = normalize_remote_id(remote) + local_path = ( + cache_root / sha256(remote_id.encode()).hexdigest() / _safe_remote_name(remote) + ) + return local_path if local_path.exists() else None + + def get_local_handle(resource, opener): """Materialize a resource locally, then pass it to an opener.""" return opener(ensure_local_file(resource)) diff --git a/tests/test_core/test_coords.py b/tests/test_core/test_coords.py index 72f2074b..c69ccc80 100644 --- a/tests/test_core/test_coords.py +++ b/tests/test_core/test_coords.py @@ -496,6 +496,106 @@ def test_slice_works_as_tuple(self, evenly_sampled_coord): assert out_sli == (None, None) or out_sli == (0, len(coord)) +class TestCoordHash: + """Tests for coordinate hashing.""" + + def test_hash_scalar_none(self): + """The helper should preserve an explicit None sentinel.""" + assert BaseCoord._hash_scalar(None) == ("none", None) + + def test_range_equivalent_units_same_hash(self): + """Equivalent range coords should hash the same after SI normalization.""" + coord_1 = get_coord(start=0, stop=10, step=1, units="m") + coord_2 = get_coord(start=0, stop=1000, step=100, units="cm") + assert hash(coord_1) == hash(coord_2) + + def test_array_equivalent_units_same_hash(self): + """Equivalent array coords should hash the same after SI normalization.""" + coord_1 = get_coord(data=np.arange(5.0), units="m") + coord_2 = get_coord(data=np.arange(5.0) * 100, units="cm") + assert hash(coord_1) == hash(coord_2) + + def test_approx_equal_float_array_coords_share_hash(self): + """Float array coords equal under all_close should hash the same.""" + coord_1 = get_coord(data=np.array([1.0, 2.0, 4.0])) + coord_2 = get_coord(data=np.array([1.0, 2.0 + 1e-10, 4.0])) + assert coord_1 == coord_2 + assert hash(coord_1) == hash(coord_2) + + def test_string_coord_hash_equal(self, string_coord): + """Equal string coords should share a hash.""" + other = get_coord(data=string_coord.values.copy()) + assert other == string_coord + assert hash(other) == hash(string_coord) + + def test_partial_coord_hash_respects_metadata(self): + """Partial coord hashes must include scalar metadata, not just values.""" + coord_1 = CoordPartial( + shape=(3,), start=1, stop=4, step=1, dtype=np.dtype("int64") + ) + coord_2 = CoordPartial( + shape=(3,), start=2, stop=5, step=1, dtype=np.dtype("int64") + ) + assert coord_1 != coord_2 + assert hash(coord_1) != hash(coord_2) + + def test_partial_equivalent_units_same_hash(self): + """Equivalent partial coords should hash the same after SI normalization.""" + coord_1 = CoordPartial( + shape=(3,), start=1.0, stop=4.0, step=1.0, units="m", dtype="float64" + ) + coord_2 = CoordPartial( + shape=(3,), start=100.0, stop=400.0, step=100.0, units="cm", dtype="float64" + ) + assert hash(coord_1) == hash(coord_2) + + def test_partial_convert_units_preserves_null_scalars(self): + """Null partial metadata should not be passed through conversion.""" + coord = CoordPartial( + shape=(3,), + start=np.nan, + stop=400.0, + step=100.0, + units="cm", + dtype="float64", + ) + seen = [] + + def _fake_convert(value, to_units=None, from_units=None): + seen.append((value, to_units, from_units)) + return value + + original = BaseCoord.convert_units.__globals__["convert_units"] + BaseCoord.convert_units.__globals__["convert_units"] = _fake_convert + try: + coord.convert_units("m") + finally: + BaseCoord.convert_units.__globals__["convert_units"] = original + + assert seen == [(400.0, "m", coord.units), (100.0, "m", coord.units)] + + def test_partial_convert_units_without_existing_units_sets_units_only(self): + """Unitless partial coords should take units without scalar conversion.""" + coord = CoordPartial( + shape=(3,), start=1.0, stop=4.0, step=1.0, units=None, dtype="float64" + ) + out = coord.convert_units("m") + assert out.units == get_quantity("m") + assert out.start == coord.start + assert out.stop == coord.stop + assert out.step == coord.step + + def test_equal_coords_share_hash(self, coord): + """Hash must be consistent with equality.""" + payload = coord.model_dump() + if "values" in payload: + payload["values"] = coord.values.copy() + other = get_coord(**payload) + assert other is not coord + assert other == coord + assert hash(other) == hash(coord) + + class TestSelect: """Generic tests for selecting values from coords.""" diff --git a/tests/test_io/test_remote_http.py b/tests/test_io/test_remote_http.py index 634bc6f6..2a7374fa 100644 --- a/tests/test_io/test_remote_http.py +++ b/tests/test_io/test_remote_http.py @@ -159,9 +159,12 @@ def test_http_range_hdf5_read_succeeds( """Range-capable HTTP servers should support DASCore HDF5 reads.""" ensure_http_fetch_file("prodml_2.1.h5") path = http_range_das_path / "prodml_2.1.h5" - assert dc.get_format(path) == ("PRODML", "2.1") - assert dc.read(path) - assert not list(get_remote_cache_path().rglob("prodml_2.1.h5")) + fmt = dc.get_format(path) + assert fmt == ("PRODML", "2.1") + spool = dc.read(path) + assert spool + cached = list(get_remote_cache_path().rglob("prodml_2.1.h5")) + assert not cached def test_spool_file_path(self, http_regression_das_path): """A remote HTTP file should still produce a file-backed spool.""" diff --git a/tests/test_utils/test_array_utils.py b/tests/test_utils/test_array_utils.py index 8d47ed5d..cfe6c4d5 100644 --- a/tests/test_utils/test_array_utils.py +++ b/tests/test_utils/test_array_utils.py @@ -20,6 +20,7 @@ apply_ufunc, convert_bytes_to_strings, convert_strings_to_bytes, + hash_array, hash_numpy_array, is_string_byte_serializable_array, ) @@ -661,42 +662,42 @@ def element_wise_func(data): assert np.allclose(result.data, np.abs(random_patch.data) + 1) -class TestHashNumpyArray: - """Tests for hash_numpy_array.""" +class TestHashArray: + """Tests for hash_array.""" def test_returns_hex_string_of_length_32(self): """Output is a 32-character hex string (16-byte digest).""" - result = hash_numpy_array(np.array([1, 2, 3])) + result = hash_array(np.array([1, 2, 3])) assert isinstance(result, str) assert len(result) == 32 def test_copy_same_hash(self): """A copy of an array produces the same hash.""" a = np.array([1.0, 2.0, 3.0]) - assert hash_numpy_array(a) == hash_numpy_array(a.copy()) + assert hash_array(a) == hash_array(a.copy()) def test_different_values_different_hash(self): """Different data produces a different hash.""" a = np.array([1, 2, 3]) b = np.array([1, 2, 4]) - assert hash_numpy_array(a) != hash_numpy_array(b) + assert hash_array(a) != hash_array(b) def test_different_dtype_different_hash(self): """Same raw shape but different dtype produces a different hash.""" a = np.array([1, 2, 3], dtype=np.int32) b = np.array([1, 2, 3], dtype=np.int64) - assert hash_numpy_array(a) != hash_numpy_array(b) + assert hash_array(a) != hash_array(b) def test_different_shape_different_hash(self): """Same values but reshaped produce a different hash.""" a = np.arange(6).reshape(2, 3) b = np.arange(6).reshape(3, 2) - assert hash_numpy_array(a) != hash_numpy_array(b) + assert hash_array(a) != hash_array(b) def test_object_array_raises(self): """Object arrays are not supported.""" with pytest.raises(ParameterError): - hash_numpy_array(np.array([1, "a"], dtype=object)) + hash_array(np.array([1, "a"], dtype=object)) def test_non_contiguous_matches_contiguous(self): """A non-C-contiguous view hashes identically to its contiguous copy.""" @@ -704,4 +705,14 @@ def test_non_contiguous_matches_contiguous(self): # Fortran-order (non-C-contiguous) non_contig = np.asfortranarray(base) assert not non_contig.flags.c_contiguous - assert hash_numpy_array(base) == hash_numpy_array(non_contig) + assert hash_array(base) == hash_array(non_contig) + + def test_datetime_array_hashes(self): + """Datetime arrays should hash without special casing at call sites.""" + arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]") + assert hash_array(arr) == hash_array(arr.copy()) + + def test_hash_numpy_array_alias(self): + """The legacy helper name should remain a passthrough.""" + arr = np.array([1, 2, 3]) + assert hash_numpy_array(arr) == hash_array(arr) diff --git a/tests/test_utils/test_io_utils.py b/tests/test_utils/test_io_utils.py index ac3b4032..9c819a76 100644 --- a/tests/test_utils/test_io_utils.py +++ b/tests/test_utils/test_io_utils.py @@ -36,6 +36,7 @@ from dascore.utils.remote_io import ( FallbackFileObj, clear_remote_file_cache, + get_cached_local_file, get_remote_cache_path, get_remote_cache_scope, is_no_range_http_error, @@ -228,6 +229,54 @@ def test_h5_reader_from_open_file_handle(self, tmp_path): finally: handle.close() + def test_h5_reader_close_closes_owned_fileobj(self, tmp_path): + """Closing the reader should close the file object passed to h5py.""" + path = tmp_path / "owned_handle.h5" + with h5py.File(path, "w") as handle: + handle.create_dataset("data", data=[1, 2, 3]) + raw = open(path, "rb") + handle = H5Reader.get_handle(raw) + assert not raw.closed + handle.close() + assert raw.closed + + def test_h5_reader_managed_handle_context_manager_and_closed(self, tmp_path): + """Managed HDF5 handles should support context-manager helpers.""" + path = tmp_path / "managed_context.h5" + with h5py.File(path, "w") as handle: + handle.create_dataset("data", data=[1, 2, 3]) + raw = open(path, "rb") + with H5Reader.get_handle(raw) as handle: + assert "data" in handle + assert list(iter(handle)) == ["data"] + assert not handle.closed + assert handle.closed + assert raw.closed + + def test_h5_reader_prefers_existing_cached_local_file(self, monkeypatch, tmp_path): + """Cached remote HDF5 resources should reopen locally, not remotely.""" + local_path = tmp_path / "cached.h5" + with h5py.File(local_path, "w") as handle: + handle.create_dataset("data", data=[1, 2, 3]) + + path = UPath("http://example.com/cached.h5") + monkeypatch.setattr( + "dascore.utils.hdf5.get_cached_local_file", lambda _: local_path + ) + monkeypatch.setattr( + type(path), + "open", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + AssertionError("remote open should not be used") + ), + ) + + handle = H5Reader.get_handle(path) + try: + assert list(handle["data"][:]) == [1, 2, 3] + finally: + handle.close() + def test_h5_reader_passthrough_h5py_handle(self, tmp_path): """Ensure h5py-backed readers return open handles unchanged.""" path = tmp_path / "passthrough.h5" @@ -513,6 +562,13 @@ def test_ensure_local_file_reuses_cached_path(self): assert first.exists() assert first.read_text() == "hello" + def test_get_cached_local_file_returns_existing_cached_path(self): + """The cache helper should find already materialized remote resources.""" + path = UPath("memory://dascore/io_resource_test_cached_lookup.txt") + path.write_text("hello") + local_path = ensure_local_file(path) + assert get_cached_local_file(path) == local_path + def test_ensure_local_file_respects_cache_dir_changes(self, tmp_path): """Changing the configured cache dir should change future materialization.""" path = UPath("memory://dascore/io_resource_test_reconfigure.txt") @@ -580,6 +636,97 @@ def __exit__(self, *_args): assert local_path.read_bytes() == b"a" assert handle.read_sizes == [321, 321] + def test_http_remote_download_uses_urlopen_not_upath_open( + self, monkeypatch, tmp_path + ): + """HTTP cache downloads should bypass fsspec open re-entry.""" + + class _HTTPResource: + def __init__(self): + self.protocol = "http" + self.storage_options = {"User-Agent": "dascore-test"} + + def __str__(self): + return "http://example.com/data.bin" + + def open(self, *_args, **_kwargs): + raise AssertionError( + "HTTP fallback download should not call resource.open" + ) + + class _HTTPResponse: + def __init__(self): + self._chunks = [b"ab", b"c", b""] + self.read_sizes = [] + + def read(self, size=-1): + self.read_sizes.append(size) + return self._chunks.pop(0) + + def __enter__(self): + return self + + def __exit__(self, *_args): + return False + + seen = {} + response = _HTTPResponse() + + def _fake_urlopen(request, timeout=None): + seen["url"] = request.full_url + seen["headers"] = dict(request.header_items()) + seen["timeout"] = timeout + return response + + monkeypatch.setattr(remote_io, "coerce_to_upath", lambda resource: resource) + monkeypatch.setattr(remote_io, "urlopen", _fake_urlopen) + with set_config(remote_download_block_size=2): + local_path = tmp_path / "downloaded.bin" + remote_io._download_remote_file(_HTTPResource(), local_path) + + assert local_path.read_bytes() == b"abc" + assert seen["url"] == "http://example.com/data.bin" + assert seen["headers"] == {"User-agent": "dascore-test"} + assert seen["timeout"] == 60.0 + assert response.read_sizes == [2, 2, 2] + + def test_http_remote_download_uses_configured_timeout(self, monkeypatch, tmp_path): + """HTTP cache downloads should pass through the configured timeout.""" + + class _HTTPResource: + def __init__(self): + self.protocol = "http" + self.storage_options = {} + + def __str__(self): + return "http://example.com/data.bin" + + class _HTTPResponse: + def read(self, _size=-1): + return b"" + + def __enter__(self): + return self + + def __exit__(self, *_args): + return False + + seen = {} + + def _fake_urlopen(request, timeout=None): + seen["url"] = request.full_url + seen["timeout"] = timeout + return _HTTPResponse() + + monkeypatch.setattr(remote_io, "coerce_to_upath", lambda resource: resource) + monkeypatch.setattr(remote_io, "urlopen", _fake_urlopen) + with set_config(remote_download_timeout=12.5): + local_path = tmp_path / "downloaded.bin" + remote_io._download_remote_file(_HTTPResource(), local_path) + + assert seen == {"url": "http://example.com/data.bin", "timeout": 12.5} + assert local_path.read_bytes() == b"" + def test_ensure_local_file_can_unwrap_io_resource_manager(self): """ensure_local_file should accept IOResourceManager instances.""" path = UPath("memory://dascore/io_resource_test_manager.txt")