From 958bcd307d80bbd483051d04772a1977d2e88563 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 12:32:18 -0700 Subject: [PATCH 1/8] Fix stale locks (#56) and generalize upload exceptions (#12) Co-Authored-By: Claude Sonnet 4.6 --- filecache/__init__.py | 5 +- filecache/exceptions.py | 22 ++++ filecache/file_cache.py | 77 ++++++++++--- filecache/file_cache_source.py | 28 ++++- tests/test_file_cache.py | 193 ++++++++++++++++++++++++++++++--- 5 files changed, 290 insertions(+), 35 deletions(-) create mode 100644 filecache/exceptions.py diff --git a/filecache/__init__.py b/filecache/__init__.py index 5205899..428f5d1 100644 --- a/filecache/__init__.py +++ b/filecache/__init__.py @@ -174,6 +174,7 @@ FileCacheSourceGS, FileCacheSourceS3, FileCacheSourceFake) +from .exceptions import FileCacheError, UploadFailed # noqa: ignore E401 __all__ = ['get_global_logger', 'register_filecachesource', @@ -186,4 +187,6 @@ 'FileCacheSourceHTTP', 'FileCacheSourceGS', 'FileCacheSourceS3', - 'FileCacheSourceFake'] + 'FileCacheSourceFake', + 'FileCacheError', + 'UploadFailed'] diff --git a/filecache/exceptions.py b/filecache/exceptions.py new file mode 100644 index 0000000..5ebe4c0 --- /dev/null +++ b/filecache/exceptions.py @@ -0,0 +1,22 @@ +########################################################################################## +# filecache/exceptions.py +########################################################################################## + +from __future__ import annotations + + +class FileCacheError(Exception): + """Base class for all filecache-specific exceptions.""" + pass + + +class UploadFailed(FileCacheError): + """Raised when a file upload to a remote storage location fails. + + This exception wraps cloud-provider-specific exceptions (such as + ``boto3.exceptions.S3UploadFailedError`` or + ``google.api_core.exceptions.BadRequest``) so that callers do not need to + import or understand the underlying cloud SDK in order to handle upload + failures. + """ + pass diff --git a/filecache/file_cache.py b/filecache/file_cache.py index a98ebc9..f7f6b13 100644 --- a/filecache/file_cache.py +++ b/filecache/file_cache.py @@ -38,6 +38,7 @@ from .file_cache_types import (StrOrPathOrSeqType, UrlToPathFuncOrSeqType, UrlToUrlFuncOrSeqType) +from .exceptions import UploadFailed # Global cache of all instantiated FileCacheSource since they may involve opening @@ -1600,7 +1601,7 @@ def _retrieve_multi_locked(self, except filelock._error.Timeout: self._log_debug(f' Failed to lock: {sub_path}') wait_to_appear.append((idx, f'{source_pfx}{sub_path}', local_path, - lock_path)) + lock_path, source, sub_path)) continue lock_list.append((lock_path, lock)) source_idxes.append(idx) @@ -1644,12 +1645,29 @@ def _retrieve_multi_locked(self, # If wait_to_appear is not empty, then we failed to acquire at least one lock, # which means that another process was downloading the file. So now we just # sit here and wait for all of the missing files to magically show up, or for - # us to time out. If the lock file disappears but the destination file isn't - # present, that means the other process failed in its download. + # us to time out. + # + # In each iteration we also check for stale locks: a stale lock exists when the + # lock file is present but the process that created it has died (the OS released + # the advisory flock, but did not delete the file). We detect a stale lock by + # attempting a non-blocking acquire -- if it succeeds, no live process holds the + # lock, so we steal it and initiate the download ourselves. + # + # Race-condition guarantee: multiple waiting processes may all notice the same + # stale lock and all attempt `lock.acquire(timeout=0)` simultaneously. Because + # the underlying flock(2) call is atomic, exactly one process wins the race; the + # others receive a Timeout and remain in the wait list. The winner downloads the + # file atomically (write to temp path + rename), so the losers will find the + # completed file on their next poll iteration. + # + # If the lock file disappears without the destination file appearing, that means + # the other process failed (or cleaned up after itself on error). timed_out = False while wait_to_appear: new_wait_to_appear = [] - for idx, url, local_path, lock_path in wait_to_appear: + stale_lock_items = [] # Items whose locks we have stolen + + for idx, url, local_path, lock_path, source, sub_path in wait_to_appear: if local_path.is_file(): func_ret[idx] = local_path self._log_debug(f' Downloaded elsewhere: {url}') @@ -1659,7 +1677,36 @@ def _retrieve_multi_locked(self, f'Another process failed to download {url}') self._log_debug(f' Download elsewhere failed: {url}') continue - new_wait_to_appear.append((idx, url, local_path, lock_path)) + # Lock file exists and destination is absent. Check whether the lock is + # stale (flock released by a crashed process but file not cleaned up). + stale_lock = filelock.FileLock(lock_path, timeout=0) + try: + stale_lock.acquire() + # Acquired with timeout=0 → no live process holds this lock. + stale_lock_items.append( + (idx, url, local_path, lock_path, stale_lock, source, sub_path)) + self._log_debug(f' Stale lock detected for {url}, will re-download') + except filelock._error.Timeout: + # Lock is actively held -- another process is still downloading. + new_wait_to_appear.append( + (idx, url, local_path, lock_path, source, sub_path)) + + # Download all files whose stale locks we have just acquired. + for idx, url, local_path, lock_path, stale_lock, source, sub_path in ( + stale_lock_items): + try: + ret = source.retrieve(sub_path, local_path, preserve_mtime=False) + func_ret[idx] = ret + self._download_counter += 1 + self._log_debug(f' Re-downloaded after stale lock recovery: {url}') + except Exception as e: + func_ret[idx] = e + files_not_exist.append(url) + self._log_debug( + f' Download failed after stale lock recovery: {url}: {e!r}') + finally: + stale_lock.release() + lock_path.unlink(missing_ok=True) if not new_wait_to_appear: break @@ -1670,7 +1717,7 @@ def _retrieve_multi_locked(self, 'Timeout while waiting for another process to finish downloading') self._log_debug( ' Timeout while waiting for another process to finish downloading:') - for idx, url, local_path, lock_path in wait_to_appear: + for idx, url, local_path, lock_path, source, sub_path in wait_to_appear: func_ret[idx] = exc self._log_debug(f' {url}') if exception_on_fail: @@ -1923,15 +1970,15 @@ def _upload_multi(self, pass if exception_on_fail: - exc_str = '' - if files_not_exist: - exc_str += f"File(s) do not exist: {', '.join(files_not_exist)}" - if files_failed: - if exc_str: - exc_str += ' AND ' - exc_str += f"Failed to upload file(s): {', '.join(files_failed)}" - if exc_str: - raise FileNotFoundError(exc_str) + if files_not_exist and not files_failed: + raise FileNotFoundError( + f"File(s) do not exist: {', '.join(files_not_exist)}") + elif files_failed: + exc_str = f"Failed to upload file(s): {', '.join(files_failed)}" + if files_not_exist: + exc_str = (f"File(s) do not exist: {', '.join(files_not_exist)}" + f' AND {exc_str}') + raise UploadFailed(exc_str) return cast(list[Union[Path, Exception]], func_ret) diff --git a/filecache/file_cache_source.py b/filecache/file_cache_source.py index 6185e4d..f91a724 100644 --- a/filecache/file_cache_source.py +++ b/filecache/file_cache_source.py @@ -27,6 +27,8 @@ from google.cloud import storage as gs_storage # type: ignore import google.api_core.exceptions +from .exceptions import UploadFailed + class FileCacheSource(ABC): """Superclass for all remote file source classes. Do not use directly. @@ -1272,6 +1274,8 @@ def upload(self, Raises: FileNotFoundError: If the local file does not exist. + UploadFailed: If the upload to Google Storage fails (e.g. bad bucket name, + insufficient permissions, or network error). """ local_path = Path(local_path) @@ -1289,7 +1293,12 @@ def upload(self, # For some reason the Google API doesn't let you update this directly blob.metadata = blob_metadata - blob.upload_from_filename(str(local_path)) + try: + blob.upload_from_filename(str(local_path)) + except Exception as e: + raise UploadFailed( + f'Failed to upload file to Google Storage: ' + f'{self._src_prefix_}{sub_path}') from e return local_path @@ -1600,6 +1609,8 @@ def upload(self, Raises: FileNotFoundError: If the local file does not exist. + UploadFailed: If the upload to AWS S3 fails (e.g. bad bucket name, + insufficient permissions, or network error). """ local_path = Path(local_path) @@ -1613,11 +1624,16 @@ def upload(self, mtime_str = str(mtime_sec) extra_args['Metadata'] = {'mtime': mtime_str} - if extra_args: - self._client.upload_file(str(local_path), self._bucket_name, sub_path, - ExtraArgs=extra_args) - else: - self._client.upload_file(str(local_path), self._bucket_name, sub_path) + try: + if extra_args: + self._client.upload_file(str(local_path), self._bucket_name, sub_path, + ExtraArgs=extra_args) + else: + self._client.upload_file(str(local_path), self._bucket_name, sub_path) + except Exception as e: + raise UploadFailed( + f'Failed to upload file to AWS S3: ' + f'{self._src_prefix_}{sub_path}') from e return local_path diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index b1ea126..34e2c3a 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -10,6 +10,7 @@ from pathlib import Path import platform import tempfile +import threading import time import uuid @@ -1055,6 +1056,179 @@ def test_locking_multi_pfx_2(): assert not fc.cache_dir.exists() +@pytest.mark.skipif(platform.system() == 'Windows', + reason='Uses POSIX fcntl for stale-lock simulation') +def test_stale_lock_single(): + """A stale lock (file exists, no flock held) must not block a single-file retrieve. + + We simulate a stale lock by acquiring the flock with raw fcntl, then releasing + only the flock (leaving the lock file on disk) before the retrieve. On the + next call, filelock should acquire the lock immediately because no process + holds it. + """ + import fcntl + + with FileCache(delete_on_exit=True) as fc: + filename = EXPECTED_FILENAMES[0] + local_path = (fc.cache_dir / + (HTTP_TEST_ROOT.replace('https://', 'http_') + '/' + filename)) + lock_path = fc._lock_path(local_path) + lock_path.parent.mkdir(parents=True, exist_ok=True) + + # Create and hold the lock via raw fcntl so we can release the flock without + # deleting the file (simulating a process that crashed). + fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o644) + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + + # Release the flock but keep the file -- this is the "stale lock" state. + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + # The lock file exists, but nobody holds the flock. The retrieve must + # succeed rather than raising a TimeoutError. + path = fc.retrieve(f'{HTTP_TEST_ROOT}/{filename}', lock_timeout=0) + assert isinstance(path, Path) + assert path.is_file() + _compare_to_expected_path(path, filename) + assert fc.download_counter == 1 + assert fc.upload_counter == 0 + + +@pytest.mark.skipif(platform.system() == 'Windows', + reason='Uses POSIX fcntl for stale-lock simulation') +def test_stale_lock_multi(): + """A stale lock must not permanently block multi-file retrieval. + + We hold the flock for the first file from a background thread, trigger the + multi-file download (which puts file[0] into wait_to_appear), then simulate + a crash by releasing only the flock (but not deleting the lock file) while + the wait loop is running. The stale-lock detection code must steal the lock + and complete the download without timing out. + + Race-condition guarantee: the background thread uses a threading.Event to + synchronise so that the flock is released only after the main thread's + retrieve call has entered the wait_to_appear loop. Because only one process + can win the flock race, the main thread is guaranteed to be the sole + downloader of file[0] after stealing the stale lock. + """ + import fcntl + + with FileCache(delete_on_exit=True) as fc: + filename0 = EXPECTED_FILENAMES[0] + local_path0 = (fc.cache_dir / + (HTTP_TEST_ROOT.replace('https://', 'http_') + '/' + filename0)) + lock_path0 = fc._lock_path(local_path0) + lock_path0.parent.mkdir(parents=True, exist_ok=True) + + # Phase-control events + flock_held = threading.Event() # signalled once the raw flock is held + flock_release = threading.Event() # signalled when the "crash" should happen + + def holder_thread() -> None: + """Hold the flock until told to "crash", then release without cleanup.""" + fd = os.open(str(lock_path0), os.O_RDWR | os.O_CREAT, 0o644) + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock_held.set() + flock_release.wait() + # Simulate crash: release flock, leave file on disk. + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + t = threading.Thread(target=holder_thread, daemon=True) + t.start() + flock_held.wait() # make sure the flock is held before we start retrieve + + results: list[list[Path]] = [] + exc_holder: list[Exception] = [] + + def do_retrieve() -> None: + try: + filenames = [f'{HTTP_TEST_ROOT}/{f}' for f in EXPECTED_FILENAMES] + ret = fc.retrieve(filenames, lock_timeout=15) + results.append(ret) # type: ignore[arg-type] + except Exception as e: + exc_holder.append(e) + + retrieve_thread = threading.Thread(target=do_retrieve, daemon=True) + retrieve_thread.start() + + # Give the retrieve thread time to enter the wait_to_appear loop. + time.sleep(0.3) + + # Signal the holder to "crash" -- releases flock, keeps the lock file. + flock_release.set() + t.join(timeout=5) + assert not t.is_alive(), 'holder thread did not finish' + + retrieve_thread.join(timeout=20) + assert not retrieve_thread.is_alive(), 'retrieve thread timed out (stale lock not recovered)' + + assert not exc_holder, f'retrieve raised: {exc_holder[0]}' + assert results, 'retrieve returned no result' + for r in results[0]: + assert isinstance(r, Path), f'Expected Path, got {r!r}' + assert fc.download_counter == len(EXPECTED_FILENAMES) + assert fc.upload_counter == 0 + + +@pytest.mark.skipif(platform.system() == 'Windows', + reason='Uses POSIX fcntl for stale-lock simulation') +def test_stale_lock_multi_pfx(): + """Same as test_stale_lock_multi but exercised through the FCPath interface.""" + import fcntl + + with FileCache(delete_on_exit=True) as fc: + pfx = fc.new_path(HTTP_TEST_ROOT, lock_timeout=15) + filename0 = EXPECTED_FILENAMES[0] + local_path0 = (fc.cache_dir / + (HTTP_TEST_ROOT.replace('https://', 'http_') + '/' + filename0)) + lock_path0 = fc._lock_path(local_path0) + lock_path0.parent.mkdir(parents=True, exist_ok=True) + + flock_held = threading.Event() + flock_release = threading.Event() + + def holder_thread() -> None: + fd = os.open(str(lock_path0), os.O_RDWR | os.O_CREAT, 0o644) + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock_held.set() + flock_release.wait() + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + t = threading.Thread(target=holder_thread, daemon=True) + t.start() + flock_held.wait() + + results: list[list[Path]] = [] + exc_holder: list[Exception] = [] + + def do_retrieve() -> None: + try: + ret = pfx.retrieve(EXPECTED_FILENAMES) + results.append(ret) # type: ignore[arg-type] + except Exception as e: + exc_holder.append(e) + + retrieve_thread = threading.Thread(target=do_retrieve, daemon=True) + retrieve_thread.start() + + time.sleep(0.3) + flock_release.set() + t.join(timeout=5) + assert not t.is_alive() + + retrieve_thread.join(timeout=20) + assert not retrieve_thread.is_alive(), 'retrieve thread timed out (stale lock not recovered)' + + assert not exc_holder, f'retrieve raised: {exc_holder[0]}' + assert results + for r in results[0]: + assert isinstance(r, Path) + assert fc.download_counter == len(EXPECTED_FILENAMES) + assert fc.upload_counter == 0 + + def test_cleanup_locking_bad(): logger = MyLogger() with FileCache(cache_name=None, logger=logger) as fc: @@ -1330,15 +1504,13 @@ def test_cloud_upl_bad_2(prefix): filename = 'test_file.txt' local_path = fc.get_local_path(f'{new_path}/{filename}') _copy_file(EXPECTED_DIR / EXPECTED_FILENAMES[0], local_path) - with pytest.raises(Exception) as e: + with pytest.raises(filecache.UploadFailed): fc.upload(f'{new_path}/{filename}', anonymous=True) - assert not isinstance(e.type, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 ret = fc.upload(f'{new_path}/{filename}', anonymous=True, exception_on_fail=False) - assert isinstance(ret, Exception) - assert not isinstance(e, FileNotFoundError) + assert isinstance(ret, filecache.UploadFailed) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert not fc.cache_dir.exists() @@ -1352,17 +1524,14 @@ def test_cloud_upl_pfx_bad_2(prefix): filename = 'test_file.txt' local_path = pfx.get_local_path(filename) _copy_file(EXPECTED_DIR / EXPECTED_FILENAMES[0], local_path) - with pytest.raises(Exception) as e: + with pytest.raises(filecache.UploadFailed): pfx.upload(filename) - assert not isinstance(e.type, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert pfx.download_counter == 0 assert pfx.upload_counter == 0 ret = pfx.upload(filename, exception_on_fail=False) - assert not isinstance(e, FileNotFoundError) - assert isinstance(ret, Exception) - assert not isinstance(e, FileNotFoundError) + assert isinstance(ret, filecache.UploadFailed) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert pfx.download_counter == 0 @@ -1873,8 +2042,7 @@ def test_cloud_upl_multi_bad_3(prefix): ret = fc.upload(paths, anonymous=True, exception_on_fail=False) assert isinstance(ret[0], FileNotFoundError) for r, lp in zip(ret[1:], local_paths[1:]): - assert isinstance(r, Exception) - assert not isinstance(r, FileNotFoundError) + assert isinstance(r, filecache.UploadFailed) assert not fc.exists(paths[0], anonymous=True) for path in paths[1:]: assert fc.exists(path, anonymous=True) @@ -1895,8 +2063,7 @@ def test_cloud_upl_multi_pfx_bad_3(prefix): ret = pfx.upload(paths, exception_on_fail=False) assert isinstance(ret[0], FileNotFoundError) for r, lp in zip(ret[1:], local_paths[1:]): - assert isinstance(r, Exception) - assert not isinstance(r, FileNotFoundError) + assert isinstance(r, filecache.UploadFailed) assert not pfx.exists(paths[0]) for path in paths[1:]: assert pfx.exists(path) From e954784dd46a9fb9b39964df6ec0db8455be480f Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 13:05:27 -0700 Subject: [PATCH 2/8] Revert #12 changes; add __repr__/__str__ to all public classes (#21) Reverts the #12 upload-exception changes (removes UploadFailed / FileCacheError, restores native exceptions). Implements #21: adds __repr__ and __str__ to FileCache, enhances FCPath.__repr__ to include non-default options (filecache, anonymous, lock_timeout, nthreads), and adds __repr__/__str__ to FileCacheSource and all subclasses via the base class. Co-Authored-By: Claude Sonnet 4.6 --- filecache/__init__.py | 6 +-- filecache/exceptions.py | 22 -------- filecache/file_cache.py | 12 ++++- filecache/file_cache_path.py | 11 +++- filecache/file_cache_source.py | 34 +++++-------- tests/test_file_cache.py | 92 +++++++++++++++++++++++++++++++--- 6 files changed, 120 insertions(+), 57 deletions(-) delete mode 100644 filecache/exceptions.py diff --git a/filecache/__init__.py b/filecache/__init__.py index 428f5d1..041fd83 100644 --- a/filecache/__init__.py +++ b/filecache/__init__.py @@ -174,8 +174,6 @@ FileCacheSourceGS, FileCacheSourceS3, FileCacheSourceFake) -from .exceptions import FileCacheError, UploadFailed # noqa: ignore E401 - __all__ = ['get_global_logger', 'register_filecachesource', 'set_easy_logger', @@ -187,6 +185,4 @@ 'FileCacheSourceHTTP', 'FileCacheSourceGS', 'FileCacheSourceS3', - 'FileCacheSourceFake', - 'FileCacheError', - 'UploadFailed'] + 'FileCacheSourceFake'] diff --git a/filecache/exceptions.py b/filecache/exceptions.py deleted file mode 100644 index 5ebe4c0..0000000 --- a/filecache/exceptions.py +++ /dev/null @@ -1,22 +0,0 @@ -########################################################################################## -# filecache/exceptions.py -########################################################################################## - -from __future__ import annotations - - -class FileCacheError(Exception): - """Base class for all filecache-specific exceptions.""" - pass - - -class UploadFailed(FileCacheError): - """Raised when a file upload to a remote storage location fails. - - This exception wraps cloud-provider-specific exceptions (such as - ``boto3.exceptions.S3UploadFailedError`` or - ``google.api_core.exceptions.BadRequest``) so that callers do not need to - import or understand the underlying cloud SDK in order to handle upload - failures. - """ - pass diff --git a/filecache/file_cache.py b/filecache/file_cache.py index f7f6b13..dd89c5a 100644 --- a/filecache/file_cache.py +++ b/filecache/file_cache.py @@ -38,7 +38,6 @@ from .file_cache_types import (StrOrPathOrSeqType, UrlToPathFuncOrSeqType, UrlToUrlFuncOrSeqType) -from .exceptions import UploadFailed # Global cache of all instantiated FileCacheSource since they may involve opening @@ -419,6 +418,15 @@ def logger(self) -> Logger | None: return _GLOBAL_LOGGER return cast(Logger, self._logger) + def __repr__(self) -> str: + return (f'FileCache({self._cache_dir.name!r}, ' + f'anonymous={self._anonymous!r}, ' + f'lock_timeout={self._lock_timeout!r}, ' + f'nthreads={self._nthreads!r})') + + def __str__(self) -> str: + return str(self._cache_dir) + def _log_debug(self, msg: str) -> None: logger = self.logger if logger: @@ -1978,7 +1986,7 @@ def _upload_multi(self, if files_not_exist: exc_str = (f"File(s) do not exist: {', '.join(files_not_exist)}" f' AND {exc_str}') - raise UploadFailed(exc_str) + raise FileNotFoundError(exc_str) return cast(list[Union[Path, Exception]], func_ret) diff --git a/filecache/file_cache_path.py b/filecache/file_cache_path.py index 3ce7a1f..154d724 100644 --- a/filecache/file_cache_path.py +++ b/filecache/file_cache_path.py @@ -539,7 +539,16 @@ def splitpath(self, search_dir: str) -> tuple[FCPath, ...]: for i, j in zip(indices[:-1], indices[1:])) def __repr__(self) -> str: - return f'FCPath({self._path!r})' + parts = [repr(self._path)] + if self._filecache is not None: + parts.append(f'filecache={self._filecache!r}') + if self._anonymous is not None: + parts.append(f'anonymous={self._anonymous!r}') + if self._lock_timeout is not None: + parts.append(f'lock_timeout={self._lock_timeout!r}') + if self._nthreads is not None: + parts.append(f'nthreads={self._nthreads!r}') + return f'FCPath({", ".join(parts)})' def __eq__(self, other: object) -> bool: if not isinstance(other, FCPath): diff --git a/filecache/file_cache_source.py b/filecache/file_cache_source.py index f91a724..fbc083b 100644 --- a/filecache/file_cache_source.py +++ b/filecache/file_cache_source.py @@ -27,7 +27,6 @@ from google.cloud import storage as gs_storage # type: ignore import google.api_core.exceptions -from .exceptions import UploadFailed class FileCacheSource(ABC): @@ -74,6 +73,13 @@ def __init__(self, # The _cache_subdir attribute is only used by the FileCache class self._cache_subdir = '' + def __repr__(self) -> str: + return (f'{type(self).__name__}({self._src_prefix!r}, ' + f'anonymous={self._anonymous!r})') + + def __str__(self) -> str: + return self._src_prefix + @classmethod @abstractmethod def schemes(self) -> tuple[str, ...]: @@ -1274,8 +1280,6 @@ def upload(self, Raises: FileNotFoundError: If the local file does not exist. - UploadFailed: If the upload to Google Storage fails (e.g. bad bucket name, - insufficient permissions, or network error). """ local_path = Path(local_path) @@ -1293,12 +1297,7 @@ def upload(self, # For some reason the Google API doesn't let you update this directly blob.metadata = blob_metadata - try: - blob.upload_from_filename(str(local_path)) - except Exception as e: - raise UploadFailed( - f'Failed to upload file to Google Storage: ' - f'{self._src_prefix_}{sub_path}') from e + blob.upload_from_filename(str(local_path)) return local_path @@ -1609,8 +1608,6 @@ def upload(self, Raises: FileNotFoundError: If the local file does not exist. - UploadFailed: If the upload to AWS S3 fails (e.g. bad bucket name, - insufficient permissions, or network error). """ local_path = Path(local_path) @@ -1624,16 +1621,11 @@ def upload(self, mtime_str = str(mtime_sec) extra_args['Metadata'] = {'mtime': mtime_str} - try: - if extra_args: - self._client.upload_file(str(local_path), self._bucket_name, sub_path, - ExtraArgs=extra_args) - else: - self._client.upload_file(str(local_path), self._bucket_name, sub_path) - except Exception as e: - raise UploadFailed( - f'Failed to upload file to AWS S3: ' - f'{self._src_prefix_}{sub_path}') from e + if extra_args: + self._client.upload_file(str(local_path), self._bucket_name, sub_path, + ExtraArgs=extra_args) + else: + self._client.upload_file(str(local_path), self._bucket_name, sub_path) return local_path diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index 34e2c3a..2ead659 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -1504,13 +1504,15 @@ def test_cloud_upl_bad_2(prefix): filename = 'test_file.txt' local_path = fc.get_local_path(f'{new_path}/{filename}') _copy_file(EXPECTED_DIR / EXPECTED_FILENAMES[0], local_path) - with pytest.raises(filecache.UploadFailed): + with pytest.raises(Exception) as e: fc.upload(f'{new_path}/{filename}', anonymous=True) + assert not isinstance(e.type, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 ret = fc.upload(f'{new_path}/{filename}', anonymous=True, exception_on_fail=False) - assert isinstance(ret, filecache.UploadFailed) + assert isinstance(ret, Exception) + assert not isinstance(e, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert not fc.cache_dir.exists() @@ -1524,14 +1526,17 @@ def test_cloud_upl_pfx_bad_2(prefix): filename = 'test_file.txt' local_path = pfx.get_local_path(filename) _copy_file(EXPECTED_DIR / EXPECTED_FILENAMES[0], local_path) - with pytest.raises(filecache.UploadFailed): + with pytest.raises(Exception) as e: pfx.upload(filename) + assert not isinstance(e.type, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert pfx.download_counter == 0 assert pfx.upload_counter == 0 ret = pfx.upload(filename, exception_on_fail=False) - assert isinstance(ret, filecache.UploadFailed) + assert not isinstance(e, FileNotFoundError) + assert isinstance(ret, Exception) + assert not isinstance(e, FileNotFoundError) assert fc.download_counter == 0 assert fc.upload_counter == 0 assert pfx.download_counter == 0 @@ -2042,7 +2047,8 @@ def test_cloud_upl_multi_bad_3(prefix): ret = fc.upload(paths, anonymous=True, exception_on_fail=False) assert isinstance(ret[0], FileNotFoundError) for r, lp in zip(ret[1:], local_paths[1:]): - assert isinstance(r, filecache.UploadFailed) + assert isinstance(r, Exception) + assert not isinstance(r, FileNotFoundError) assert not fc.exists(paths[0], anonymous=True) for path in paths[1:]: assert fc.exists(path, anonymous=True) @@ -2063,7 +2069,8 @@ def test_cloud_upl_multi_pfx_bad_3(prefix): ret = pfx.upload(paths, exception_on_fail=False) assert isinstance(ret[0], FileNotFoundError) for r, lp in zip(ret[1:], local_paths[1:]): - assert isinstance(r, filecache.UploadFailed) + assert isinstance(r, Exception) + assert not isinstance(r, FileNotFoundError) assert not pfx.exists(paths[0]) for path in paths[1:]: assert pfx.exists(path) @@ -3115,3 +3122,76 @@ def test_modification_time_caching_multi(time_sensitive, cache_metadata, mp_safe assert lp[0].stat().st_mtime == mtime_lp_orig[0] assert lp[1].stat().st_mtime == mtime_lp_orig[1] assert lp[2].stat().st_mtime == mtime_lp_orig[2] + + +############################################################################## +# Tests for __repr__ and __str__ (issue #21) +############################################################################## + +def test_filecache_repr_str(): + with FileCache(cache_name=None) as fc: + r = repr(fc) + assert r.startswith('FileCache(') + assert 'anonymous=False' in r + assert 'lock_timeout=60' in r + assert 'nthreads=8' in r + assert str(fc) == str(fc.cache_dir) + + # Named cache with non-default options + with FileCache('repr-test', anonymous=True, lock_timeout=30, nthreads=4, + delete_on_exit=True) as fc: + r = repr(fc) + assert 'anonymous=True' in r + assert 'lock_timeout=30' in r + assert 'nthreads=4' in r + + +def test_fcpath_repr_str(): + # Default FCPath — only the path + p = FCPath('gs://bucket/path/to/file') + assert repr(p) == "FCPath('gs://bucket/path/to/file')" + assert str(p) == 'gs://bucket/path/to/file' + + # FCPath with extra options — they appear in repr + p2 = FCPath('s3://bucket/path', anonymous=True, lock_timeout=10, nthreads=2) + r = repr(p2) + assert r.startswith('FCPath(') + assert 'anonymous=True' in r + assert 'lock_timeout=10' in r + assert 'nthreads=2' in r + + # filecache kwarg appears in repr when set + with FileCache(cache_name=None, delete_on_exit=True) as fc: + p3 = FCPath('gs://bucket/x', filecache=fc) + r3 = repr(p3) + assert 'filecache=' in r3 + + +def test_filecachesource_repr_str(): + from filecache import FileCacheSourceGS, FileCacheSourceS3, FileCacheSourceFake + from filecache import FileCacheSourceFile, FileCacheSourceHTTP + + src_gs = FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True) + r = repr(src_gs) + assert r == "FileCacheSourceGS('gs://rms-filecache-tests', anonymous=True)" + assert str(src_gs) == 'gs://rms-filecache-tests' + + src_s3 = FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True) + r = repr(src_s3) + assert r == "FileCacheSourceS3('s3://rms-filecache-tests', anonymous=True)" + assert str(src_s3) == 's3://rms-filecache-tests' + + src_http = FileCacheSourceHTTP('https', 'storage.googleapis.com', anonymous=False) + r = repr(src_http) + assert r == "FileCacheSourceHTTP('https://storage.googleapis.com', anonymous=False)" + assert str(src_http) == 'https://storage.googleapis.com' + + src_file = FileCacheSourceFile('file', '', anonymous=False) + r = repr(src_file) + assert r == "FileCacheSourceFile('file://', anonymous=False)" + assert str(src_file) == 'file://' + + src_fake = FileCacheSourceFake('fake', 'fake-bucket', anonymous=False) + r = repr(src_fake) + assert r == "FileCacheSourceFake('fake://fake-bucket', anonymous=False)" + assert str(src_fake) == 'fake://fake-bucket' From fe5bf9d1b462921b6f4f053ea3b9736de9005580 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 13:11:23 -0700 Subject: [PATCH 3/8] Fix flake8 E303 (extra blank line after removing UploadFailed import) Co-Authored-By: Claude Sonnet 4.6 --- filecache/file_cache_source.py | 1 - 1 file changed, 1 deletion(-) diff --git a/filecache/file_cache_source.py b/filecache/file_cache_source.py index fbc083b..2c5c724 100644 --- a/filecache/file_cache_source.py +++ b/filecache/file_cache_source.py @@ -28,7 +28,6 @@ import google.api_core.exceptions - class FileCacheSource(ABC): """Superclass for all remote file source classes. Do not use directly. From 7d904b9920ad61ab22efa34af25eef0240788418 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 13:15:22 -0700 Subject: [PATCH 4/8] Address CodeRabbit review comments - FileCache.__repr__: store _cache_name and show it directly so repr(FileCache(None)) prints None and repr(FileCache('global')) prints 'global', not the internal _filecache_-prefixed dir name - FileCacheSource.__repr__: use (scheme, remote) to match the actual constructor signature instead of the derived src_prefix URI - FCPath.__repr__: include url_to_url and url_to_path when non-None so two behaviourally different instances don't print identically - test_stale_lock_multi/pfx: replace brittle time.sleep(0.3) with a state-based barrier that polls until download_counter reaches len(EXPECTED_FILENAMES)-1 and the retrieve thread is still alive, guaranteeing the thread is in wait_to_appear before the flock is released Co-Authored-By: Claude Sonnet 4.6 --- filecache/file_cache.py | 3 ++- filecache/file_cache_path.py | 4 ++++ filecache/file_cache_source.py | 2 +- tests/test_file_cache.py | 37 +++++++++++++++++++++++++--------- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/filecache/file_cache.py b/filecache/file_cache.py index dd89c5a..a16870d 100644 --- a/filecache/file_cache.py +++ b/filecache/file_cache.py @@ -264,6 +264,7 @@ def __init__(self, raise TypeError(f'cache_name argument {cache_name} is of improper type') is_shared = (cache_name is not None) + self._cache_name = cache_name self._delete_on_exit = (delete_on_exit if delete_on_exit is not None else not is_shared) @@ -419,7 +420,7 @@ def logger(self) -> Logger | None: return cast(Logger, self._logger) def __repr__(self) -> str: - return (f'FileCache({self._cache_dir.name!r}, ' + return (f'FileCache({self._cache_name!r}, ' f'anonymous={self._anonymous!r}, ' f'lock_timeout={self._lock_timeout!r}, ' f'nthreads={self._nthreads!r})') diff --git a/filecache/file_cache_path.py b/filecache/file_cache_path.py index 154d724..3c979c2 100644 --- a/filecache/file_cache_path.py +++ b/filecache/file_cache_path.py @@ -548,6 +548,10 @@ def __repr__(self) -> str: parts.append(f'lock_timeout={self._lock_timeout!r}') if self._nthreads is not None: parts.append(f'nthreads={self._nthreads!r}') + if self._url_to_url is not None: + parts.append(f'url_to_url={self._url_to_url!r}') + if self._url_to_path is not None: + parts.append(f'url_to_path={self._url_to_path!r}') return f'FCPath({", ".join(parts)})' def __eq__(self, other: object) -> bool: diff --git a/filecache/file_cache_source.py b/filecache/file_cache_source.py index 2c5c724..2ad85e3 100644 --- a/filecache/file_cache_source.py +++ b/filecache/file_cache_source.py @@ -73,7 +73,7 @@ def __init__(self, self._cache_subdir = '' def __repr__(self) -> str: - return (f'{type(self).__name__}({self._src_prefix!r}, ' + return (f'{type(self).__name__}({self._scheme!r}, {self._remote!r}, ' f'anonymous={self._anonymous!r})') def __str__(self) -> str: diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index 2ead659..38fc2d0 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -1152,8 +1152,16 @@ def do_retrieve() -> None: retrieve_thread = threading.Thread(target=do_retrieve, daemon=True) retrieve_thread.start() - # Give the retrieve thread time to enter the wait_to_appear loop. - time.sleep(0.3) + # Wait until all non-blocked files have downloaded and retrieve_thread is + # still alive — at that point it must be blocked in the wait_to_appear loop. + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + if (fc.download_counter == len(EXPECTED_FILENAMES) - 1 and + retrieve_thread.is_alive()): + break + time.sleep(0.01) + else: + pytest.fail('retrieve never reached the blocked/stale-lock state') # Signal the holder to "crash" -- releases flock, keeps the lock file. flock_release.set() @@ -1213,7 +1221,17 @@ def do_retrieve() -> None: retrieve_thread = threading.Thread(target=do_retrieve, daemon=True) retrieve_thread.start() - time.sleep(0.3) + # Wait until all non-blocked files have downloaded and retrieve_thread is + # still alive — at that point it must be blocked in the wait_to_appear loop. + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + if (fc.download_counter == len(EXPECTED_FILENAMES) - 1 and + retrieve_thread.is_alive()): + break + time.sleep(0.01) + else: + pytest.fail('retrieve never reached the blocked/stale-lock state') + flock_release.set() t.join(timeout=5) assert not t.is_alive() @@ -3131,7 +3149,7 @@ def test_modification_time_caching_multi(time_sensitive, cache_metadata, mp_safe def test_filecache_repr_str(): with FileCache(cache_name=None) as fc: r = repr(fc) - assert r.startswith('FileCache(') + assert r.startswith('FileCache(None,') assert 'anonymous=False' in r assert 'lock_timeout=60' in r assert 'nthreads=8' in r @@ -3141,6 +3159,7 @@ def test_filecache_repr_str(): with FileCache('repr-test', anonymous=True, lock_timeout=30, nthreads=4, delete_on_exit=True) as fc: r = repr(fc) + assert r.startswith("FileCache('repr-test',") assert 'anonymous=True' in r assert 'lock_timeout=30' in r assert 'nthreads=4' in r @@ -3173,25 +3192,25 @@ def test_filecachesource_repr_str(): src_gs = FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True) r = repr(src_gs) - assert r == "FileCacheSourceGS('gs://rms-filecache-tests', anonymous=True)" + assert r == "FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True)" assert str(src_gs) == 'gs://rms-filecache-tests' src_s3 = FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True) r = repr(src_s3) - assert r == "FileCacheSourceS3('s3://rms-filecache-tests', anonymous=True)" + assert r == "FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True)" assert str(src_s3) == 's3://rms-filecache-tests' src_http = FileCacheSourceHTTP('https', 'storage.googleapis.com', anonymous=False) r = repr(src_http) - assert r == "FileCacheSourceHTTP('https://storage.googleapis.com', anonymous=False)" + assert r == "FileCacheSourceHTTP('https', 'storage.googleapis.com', anonymous=False)" assert str(src_http) == 'https://storage.googleapis.com' src_file = FileCacheSourceFile('file', '', anonymous=False) r = repr(src_file) - assert r == "FileCacheSourceFile('file://', anonymous=False)" + assert r == "FileCacheSourceFile('file', '', anonymous=False)" assert str(src_file) == 'file://' src_fake = FileCacheSourceFake('fake', 'fake-bucket', anonymous=False) r = repr(src_fake) - assert r == "FileCacheSourceFake('fake://fake-bucket', anonymous=False)" + assert r == "FileCacheSourceFake('fake', 'fake-bucket', anonymous=False)" assert str(src_fake) == 'fake://fake-bucket' From c44d8d72173200323e454177af892716cb2c4f76 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 14:20:24 -0700 Subject: [PATCH 5/8] Parallelize stale-lock recovery downloads via retrieve_multi Stale-lock items are now grouped by source and downloaded in parallel using source.retrieve_multi(nthreads=nthreads) instead of sequentially calling source.retrieve() in a loop. Locks are released after the parallel batch completes, matching the pattern used by the initial multi-file download path. Co-Authored-By: Claude Sonnet 4.6 --- filecache/file_cache.py | 50 ++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/filecache/file_cache.py b/filecache/file_cache.py index a16870d..911db3e 100644 --- a/filecache/file_cache.py +++ b/filecache/file_cache.py @@ -1700,22 +1700,42 @@ def _retrieve_multi_locked(self, new_wait_to_appear.append( (idx, url, local_path, lock_path, source, sub_path)) - # Download all files whose stale locks we have just acquired. - for idx, url, local_path, lock_path, stale_lock, source, sub_path in ( - stale_lock_items): - try: - ret = source.retrieve(sub_path, local_path, preserve_mtime=False) + # Download all files whose stale locks we have just acquired, grouped by + # source so that retrieve_multi can fetch them in parallel. + stale_by_source: dict[str, list[tuple[ + int, str, Path, Path, filelock.FileLock, FileCacheSource, str]]] = {} + for item in stale_lock_items: + pfx = item[5]._src_prefix_ + if pfx not in stale_by_source: + stale_by_source[pfx] = [] + stale_by_source[pfx].append(item) + + for items in stale_by_source.values(): + s_idxes = [it[0] for it in items] + s_urls = [it[1] for it in items] + s_local_paths = [it[2] for it in items] + s_lock_paths = [it[3] for it in items] + s_locks = [it[4] for it in items] + s_source = items[0][5] + s_sub_paths = [it[6] for it in items] + + rets = s_source.retrieve_multi(s_sub_paths, s_local_paths, + preserve_mtime=False, nthreads=nthreads) + for idx, url, ret in zip(s_idxes, s_urls, rets): func_ret[idx] = ret - self._download_counter += 1 - self._log_debug(f' Re-downloaded after stale lock recovery: {url}') - except Exception as e: - func_ret[idx] = e - files_not_exist.append(url) - self._log_debug( - f' Download failed after stale lock recovery: {url}: {e!r}') - finally: - stale_lock.release() - lock_path.unlink(missing_ok=True) + if isinstance(ret, Exception): + files_not_exist.append(url) + self._log_debug( + f' Stale lock recovery download failed: ' + f'{url}: {ret!r}') + else: + self._download_counter += 1 + self._log_debug( + f' Re-downloaded after stale lock recovery: {url}') + + for s_lock, s_lock_path in zip(s_locks, s_lock_paths): + s_lock.release() + s_lock_path.unlink(missing_ok=True) if not new_wait_to_appear: break From 0698009a8d0b74af3943ae010258557a3c4905cb Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 15:08:09 -0700 Subject: [PATCH 6/8] Move repr/str tests to their corresponding test files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_fcpath_repr_str → tests/test_file_cache_path.py - test_filecachesource_repr_str → tests/test_file_cache_source.py - test_filecache_repr_str stays in tests/test_file_cache.py Co-Authored-By: Claude Sonnet 4.6 --- tests/test_file_cache.py | 51 --------------------------------- tests/test_file_cache_path.py | 21 ++++++++++++++ tests/test_file_cache_source.py | 29 +++++++++++++++++++ 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index 38fc2d0..dc02ae2 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -3163,54 +3163,3 @@ def test_filecache_repr_str(): assert 'anonymous=True' in r assert 'lock_timeout=30' in r assert 'nthreads=4' in r - - -def test_fcpath_repr_str(): - # Default FCPath — only the path - p = FCPath('gs://bucket/path/to/file') - assert repr(p) == "FCPath('gs://bucket/path/to/file')" - assert str(p) == 'gs://bucket/path/to/file' - - # FCPath with extra options — they appear in repr - p2 = FCPath('s3://bucket/path', anonymous=True, lock_timeout=10, nthreads=2) - r = repr(p2) - assert r.startswith('FCPath(') - assert 'anonymous=True' in r - assert 'lock_timeout=10' in r - assert 'nthreads=2' in r - - # filecache kwarg appears in repr when set - with FileCache(cache_name=None, delete_on_exit=True) as fc: - p3 = FCPath('gs://bucket/x', filecache=fc) - r3 = repr(p3) - assert 'filecache=' in r3 - - -def test_filecachesource_repr_str(): - from filecache import FileCacheSourceGS, FileCacheSourceS3, FileCacheSourceFake - from filecache import FileCacheSourceFile, FileCacheSourceHTTP - - src_gs = FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True) - r = repr(src_gs) - assert r == "FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True)" - assert str(src_gs) == 'gs://rms-filecache-tests' - - src_s3 = FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True) - r = repr(src_s3) - assert r == "FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True)" - assert str(src_s3) == 's3://rms-filecache-tests' - - src_http = FileCacheSourceHTTP('https', 'storage.googleapis.com', anonymous=False) - r = repr(src_http) - assert r == "FileCacheSourceHTTP('https', 'storage.googleapis.com', anonymous=False)" - assert str(src_http) == 'https://storage.googleapis.com' - - src_file = FileCacheSourceFile('file', '', anonymous=False) - r = repr(src_file) - assert r == "FileCacheSourceFile('file', '', anonymous=False)" - assert str(src_file) == 'file://' - - src_fake = FileCacheSourceFake('fake', 'fake-bucket', anonymous=False) - r = repr(src_fake) - assert r == "FileCacheSourceFake('fake', 'fake-bucket', anonymous=False)" - assert str(src_fake) == 'fake://fake-bucket' diff --git a/tests/test_file_cache_path.py b/tests/test_file_cache_path.py index 893e767..469217d 100644 --- a/tests/test_file_cache_path.py +++ b/tests/test_file_cache_path.py @@ -1308,3 +1308,24 @@ def test_splitpath(): FCPath('d')) assert FCPath('gs://bucket/a/c/b/b1/c/d/d1').splitpath('c') == \ (FCPath('gs://bucket/a'), FCPath('b/b1'), FCPath('d/d1')) + + +def test_fcpath_repr_str(): + # Default FCPath — only the path + p = FCPath('gs://bucket/path/to/file') + assert repr(p) == "FCPath('gs://bucket/path/to/file')" + assert str(p) == 'gs://bucket/path/to/file' + + # FCPath with extra options — they appear in repr + p2 = FCPath('s3://bucket/path', anonymous=True, lock_timeout=10, nthreads=2) + r = repr(p2) + assert r.startswith('FCPath(') + assert 'anonymous=True' in r + assert 'lock_timeout=10' in r + assert 'nthreads=2' in r + + # filecache kwarg appears in repr when set + with FileCache(cache_name=None, delete_on_exit=True) as fc: + p3 = FCPath('gs://bucket/x', filecache=fc) + r3 = repr(p3) + assert 'filecache=' in r3 diff --git a/tests/test_file_cache_source.py b/tests/test_file_cache_source.py index 767ddde..301be3e 100644 --- a/tests/test_file_cache_source.py +++ b/tests/test_file_cache_source.py @@ -404,3 +404,32 @@ def mock_rename2(src, dst): pathlib.Path.rename = original_rename finally: FileCacheSourceFake.delete_default_storage_dir() + + +def test_filecachesource_repr_str(): + src_gs = FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True) + r = repr(src_gs) + assert r == "FileCacheSourceGS('gs', 'rms-filecache-tests', anonymous=True)" + assert str(src_gs) == 'gs://rms-filecache-tests' + + src_s3 = FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True) + r = repr(src_s3) + assert r == "FileCacheSourceS3('s3', 'rms-filecache-tests', anonymous=True)" + assert str(src_s3) == 's3://rms-filecache-tests' + + src_http = FileCacheSourceHTTP('https', 'storage.googleapis.com', + anonymous=False) + r = repr(src_http) + assert r == ("FileCacheSourceHTTP('https', 'storage.googleapis.com', " + "anonymous=False)") + assert str(src_http) == 'https://storage.googleapis.com' + + src_file = FileCacheSourceFile('file', '', anonymous=False) + r = repr(src_file) + assert r == "FileCacheSourceFile('file', '', anonymous=False)" + assert str(src_file) == 'file://' + + src_fake = FileCacheSourceFake('fake', 'fake-bucket', anonymous=False) + r = repr(src_fake) + assert r == "FileCacheSourceFake('fake', 'fake-bucket', anonymous=False)" + assert str(src_fake) == 'fake://fake-bucket' From 8a4a8c2c69f86c11b15dd0b9abc66d01301f7196 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 15:10:30 -0700 Subject: [PATCH 7/8] Remove section comment from test_file_cache.py Co-Authored-By: Claude Sonnet 4.6 --- tests/test_file_cache.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index dc02ae2..7cff655 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -3142,10 +3142,6 @@ def test_modification_time_caching_multi(time_sensitive, cache_metadata, mp_safe assert lp[2].stat().st_mtime == mtime_lp_orig[2] -############################################################################## -# Tests for __repr__ and __str__ (issue #21) -############################################################################## - def test_filecache_repr_str(): with FileCache(cache_name=None) as fc: r = repr(fc) From 160f3605a533adcaeef7c4ba9051179b10fd0b10 Mon Sep 17 00:00:00 2001 From: Robert French Date: Fri, 10 Apr 2026 15:20:16 -0700 Subject: [PATCH 8/8] Address second round of CodeRabbit review comments - Fix pre-existing bug: negative lock_timeout (meaning "never time out") was immediately triggering TimeoutError because elapsed > negative is always true. Now guarded with lock_timeout >= 0. - Bound flock_held.wait() with timeout=5 in stale lock tests so they fail fast instead of hanging if the holder thread dies. - Add url_to_url/url_to_path coverage to test_fcpath_repr_str. - Skip base FileCacheSource repr test (it's an ABC, can't instantiate). Co-Authored-By: Claude Sonnet 4.6 --- filecache/file_cache.py | 2 +- tests/test_file_cache.py | 4 ++-- tests/test_file_cache_path.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/filecache/file_cache.py b/filecache/file_cache.py index 911db3e..b33f5f1 100644 --- a/filecache/file_cache.py +++ b/filecache/file_cache.py @@ -1741,7 +1741,7 @@ def _retrieve_multi_locked(self, break wait_to_appear = new_wait_to_appear - if time.time() - start_time > lock_timeout: + if lock_timeout >= 0 and time.time() - start_time > lock_timeout: exc = TimeoutError( 'Timeout while waiting for another process to finish downloading') self._log_debug( diff --git a/tests/test_file_cache.py b/tests/test_file_cache.py index 7cff655..9cbb73e 100644 --- a/tests/test_file_cache.py +++ b/tests/test_file_cache.py @@ -1136,7 +1136,7 @@ def holder_thread() -> None: t = threading.Thread(target=holder_thread, daemon=True) t.start() - flock_held.wait() # make sure the flock is held before we start retrieve + assert flock_held.wait(timeout=5), 'holder thread never acquired flock' results: list[list[Path]] = [] exc_holder: list[Exception] = [] @@ -1206,7 +1206,7 @@ def holder_thread() -> None: t = threading.Thread(target=holder_thread, daemon=True) t.start() - flock_held.wait() + assert flock_held.wait(timeout=5), 'holder thread never acquired flock' results: list[list[Path]] = [] exc_holder: list[Exception] = [] diff --git a/tests/test_file_cache_path.py b/tests/test_file_cache_path.py index 469217d..9018780 100644 --- a/tests/test_file_cache_path.py +++ b/tests/test_file_cache_path.py @@ -1329,3 +1329,16 @@ def test_fcpath_repr_str(): p3 = FCPath('gs://bucket/x', filecache=fc) r3 = repr(p3) assert 'filecache=' in r3 + + # url_to_url and url_to_path appear in repr when set + def _identity_url(scheme, remote, path): + return None + + def _identity_path(scheme, remote, path, cache_dir, cache_subdir): + return None + + p4 = FCPath('gs://bucket/y', + url_to_url=_identity_url, url_to_path=_identity_path) + r4 = repr(p4) + assert 'url_to_url=' in r4 + assert 'url_to_path=' in r4