From cbc721bfacd0ce396dba55235703525a8feaf0ac Mon Sep 17 00:00:00 2001 From: 2xB <31772910+2xB@users.noreply.github.com> Date: Sun, 8 Jun 2025 14:34:45 +0200 Subject: [PATCH 01/11] Fix errors with multiprocessing Before, one could get OSError 22 and BadZipFile errors due to re-used file pointers in forked subprocesses. Fixes #520 --- importlib_metadata/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 03031190..79f356a8 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -803,6 +803,7 @@ class FastPath: True """ + # The following cache is cleared at fork, see os.register_at_fork below @functools.lru_cache() # type: ignore[misc] def __new__(cls, root): return super().__new__(cls) @@ -843,6 +844,10 @@ def mtime(self): def lookup(self, mtime): return Lookup(self) +# Clear FastPath.__new__ cache when forked, avoids trying to re-useing open +# file pointers from zipp.Path/zipfile.Path objects in forked process +os.register_at_fork(after_in_child=FastPath.__new__.cache_clear) + class Lookup: """ From 339d7a57c7190d462c81ee12e60875c69d60f925 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 20 Dec 2025 12:47:03 -0500 Subject: [PATCH 02/11] Added decorator to encapsulate the fork multiprocessing workaround. --- importlib_metadata/__init__.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 79f356a8..a8bf1c93 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -787,6 +787,24 @@ def find_distributions(self, context=Context()) -> Iterable[Distribution]: """ +def _clear_lru_cache_after_fork(func): + """Wrap ``func`` with ``functools.lru_cache`` and clear it after ``fork``. + + ``FastPath`` caches zip-backed ``pathlib.Path`` objects that keep a + reference to the parent's open ``ZipFile`` handle. Re-using a cached + instance in a forked child can therefore resurrect invalid file pointers + and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520). + Registering ``cache_clear`` with ``os.register_at_fork`` ensures every + process gets a pristine cache and opens its own archive handles. + """ + + cached = functools.lru_cache()(func) + register = getattr(os, 'register_at_fork', None) + if register is not None: + register(after_in_child=cached.cache_clear) + return cached + + class FastPath: """ Micro-optimized class for searching a root for children. @@ -803,8 +821,7 @@ class FastPath: True """ - # The following cache is cleared at fork, see os.register_at_fork below - @functools.lru_cache() # type: ignore[misc] + @_clear_lru_cache_after_fork # type: ignore[misc] def __new__(cls, root): return super().__new__(cls) @@ -844,11 +861,6 @@ def mtime(self): def lookup(self, mtime): return Lookup(self) -# Clear FastPath.__new__ cache when forked, avoids trying to re-useing open -# file pointers from zipp.Path/zipfile.Path objects in forked process -os.register_at_fork(after_in_child=FastPath.__new__.cache_clear) - - class Lookup: """ A micro-optimized class for searching a (fast) path for metadata. From 104265b037f8994588992ebfbdd316cc78e3d457 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 20 Dec 2025 13:19:44 -0500 Subject: [PATCH 03/11] Add test capturing missed expectation. --- tests/test_zip.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/test_zip.py b/tests/test_zip.py index d4f8e2f0..aeb91e79 100644 --- a/tests/test_zip.py +++ b/tests/test_zip.py @@ -1,7 +1,10 @@ +import multiprocessing +import os import sys import unittest from importlib_metadata import ( + FastPath, PackageNotFoundError, distribution, distributions, @@ -47,6 +50,37 @@ def test_one_distribution(self): dists = list(distributions(path=sys.path[:1])) assert len(dists) == 1 + @unittest.skipUnless( + hasattr(os, 'register_at_fork') + and 'fork' in multiprocessing.get_all_start_methods(), + 'requires fork-based multiprocessing support', + ) + def test_fastpath_cache_cleared_in_forked_child(self): + zip_path = sys.path[0] + + FastPath(zip_path) + self.assertEqual(FastPath.__new__.cache_info().currsize, 1) + + ctx = multiprocessing.get_context('fork') + parent_conn, child_conn = ctx.Pipe() + + def child(conn, root): + try: + before = FastPath.__new__.cache_info().currsize + FastPath(root) + after = FastPath.__new__.cache_info().currsize + conn.send((before, after)) + finally: + conn.close() + + proc = ctx.Process(target=child, args=(child_conn, zip_path)) + proc.start() + child_conn.close() + cache_sizes = parent_conn.recv() + proc.join() + + self.assertEqual(cache_sizes, (0, 1)) + class TestEgg(TestZip): def setUp(self): From 6a30ab96290b18c0b9805268a201ca5011c1feae Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 03:27:23 -0500 Subject: [PATCH 04/11] Allow initial currsize to be greater than one (as happens when running the test suite). --- tests/test_zip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_zip.py b/tests/test_zip.py index aeb91e79..165aa6dd 100644 --- a/tests/test_zip.py +++ b/tests/test_zip.py @@ -59,7 +59,7 @@ def test_fastpath_cache_cleared_in_forked_child(self): zip_path = sys.path[0] FastPath(zip_path) - self.assertEqual(FastPath.__new__.cache_info().currsize, 1) + assert FastPath.__new__.cache_info().currsize >= 1 ctx = multiprocessing.get_context('fork') parent_conn, child_conn = ctx.Pipe() From 4e962a8498990ba82120e7a58ce71abedefa0003 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 03:27:37 -0500 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=91=B9=20Feed=20the=20hobgoblins=20?= =?UTF-8?q?(delint).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- importlib_metadata/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index a8bf1c93..3e436d24 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -861,6 +861,7 @@ def mtime(self): def lookup(self, mtime): return Lookup(self) + class Lookup: """ A micro-optimized class for searching a (fast) path for metadata. From a1c25d8f2dc50abec65e4cf6d733b15d73c2f3b1 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 03:29:36 -0500 Subject: [PATCH 06/11] =?UTF-8?q?=F0=9F=A7=8E=E2=80=8D=E2=99=80=EF=B8=8F?= =?UTF-8?q?=20Genuflect=20to=20the=20types.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- importlib_metadata/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 3e436d24..68f9b5f9 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -821,7 +821,7 @@ class FastPath: True """ - @_clear_lru_cache_after_fork # type: ignore[misc] + @_clear_lru_cache_after_fork def __new__(cls, root): return super().__new__(cls) From 1da3f456ab53832fd6e1236f2338388d9ea0b0c6 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 04:09:52 -0500 Subject: [PATCH 07/11] Add news fragment. --- newsfragments/520.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/520.bugfix.rst diff --git a/newsfragments/520.bugfix.rst b/newsfragments/520.bugfix.rst new file mode 100644 index 00000000..1fbe7cec --- /dev/null +++ b/newsfragments/520.bugfix.rst @@ -0,0 +1 @@ +Fixed errors in FastPath under fork-multiprocessing. From 8dd2937cf852eb0d9ad96d4e45ed3470e80c1463 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 04:17:14 -0500 Subject: [PATCH 08/11] Decouple clear_after_fork from lru_cache and then compose. --- importlib_metadata/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 68f9b5f9..15ecb0b2 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -787,18 +787,17 @@ def find_distributions(self, context=Context()) -> Iterable[Distribution]: """ -def _clear_lru_cache_after_fork(func): - """Wrap ``func`` with ``functools.lru_cache`` and clear it after ``fork``. +def _clear_after_fork(cached): + """Ensure ``func`` clears cached state after ``fork`` when supported. - ``FastPath`` caches zip-backed ``pathlib.Path`` objects that keep a + ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a reference to the parent's open ``ZipFile`` handle. Re-using a cached instance in a forked child can therefore resurrect invalid file pointers and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520). - Registering ``cache_clear`` with ``os.register_at_fork`` ensures every - process gets a pristine cache and opens its own archive handles. + Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process + on its own cache. """ - cached = functools.lru_cache()(func) register = getattr(os, 'register_at_fork', None) if register is not None: register(after_in_child=cached.cache_clear) @@ -821,7 +820,8 @@ class FastPath: True """ - @_clear_lru_cache_after_fork + @_clear_after_fork + @functools.lru_cache() def __new__(cls, root): return super().__new__(cls) From a36bab926643dcd67513851d5bebc285ef9ac681 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 04:22:17 -0500 Subject: [PATCH 09/11] Avoid if block. --- importlib_metadata/__init__.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 15ecb0b2..22824be8 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -797,10 +797,9 @@ def _clear_after_fork(cached): Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process on its own cache. """ - - register = getattr(os, 'register_at_fork', None) - if register is not None: - register(after_in_child=cached.cache_clear) + getattr(os, 'register_at_fork', lambda **kw: None)( + after_in_child=cached.cache_clear, + ) return cached From 3c9510bf848fd4031e76028da0c9f60129047546 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 04:28:24 -0500 Subject: [PATCH 10/11] Prefer noop for degenerate behavior. --- importlib_metadata/__init__.py | 6 ++---- importlib_metadata/_functools.py | 9 +++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 22824be8..df9ff61a 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -35,7 +35,7 @@ NullFinder, install, ) -from ._functools import method_cache, pass_none +from ._functools import method_cache, noop, pass_none from ._itertools import always_iterable, bucket, unique_everseen from ._meta import PackageMetadata, SimplePath from ._typing import md_none @@ -797,9 +797,7 @@ def _clear_after_fork(cached): Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process on its own cache. """ - getattr(os, 'register_at_fork', lambda **kw: None)( - after_in_child=cached.cache_clear, - ) + getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear) return cached diff --git a/importlib_metadata/_functools.py b/importlib_metadata/_functools.py index 5dda6a21..8dcec720 100644 --- a/importlib_metadata/_functools.py +++ b/importlib_metadata/_functools.py @@ -102,3 +102,12 @@ def wrapper(param, *args, **kwargs): return func(param, *args, **kwargs) return wrapper + + +# From jaraco.functools 4.4 +def noop(*args, **kwargs): + """ + A no-operation function that does nothing. + + >>> noop(1, 2, three=3) + """ From f6eee5671a3e9e1cb56a6d3a6219145c19518713 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 21 Dec 2025 04:48:18 -0500 Subject: [PATCH 11/11] Rely on passthrough to designate a wrapper for its side effect. --- importlib_metadata/__init__.py | 6 +++--- importlib_metadata/_functools.py | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index df9ff61a..508b02e4 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -35,7 +35,7 @@ NullFinder, install, ) -from ._functools import method_cache, noop, pass_none +from ._functools import method_cache, noop, pass_none, passthrough from ._itertools import always_iterable, bucket, unique_everseen from ._meta import PackageMetadata, SimplePath from ._typing import md_none @@ -787,6 +787,7 @@ def find_distributions(self, context=Context()) -> Iterable[Distribution]: """ +@passthrough def _clear_after_fork(cached): """Ensure ``func`` clears cached state after ``fork`` when supported. @@ -798,7 +799,6 @@ def _clear_after_fork(cached): on its own cache. """ getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear) - return cached class FastPath: @@ -817,7 +817,7 @@ class FastPath: True """ - @_clear_after_fork + @_clear_after_fork # type: ignore[misc] @functools.lru_cache() def __new__(cls, root): return super().__new__(cls) diff --git a/importlib_metadata/_functools.py b/importlib_metadata/_functools.py index 8dcec720..b1fd04a8 100644 --- a/importlib_metadata/_functools.py +++ b/importlib_metadata/_functools.py @@ -1,5 +1,6 @@ import functools import types +from typing import Callable, TypeVar # from jaraco.functools 3.3 @@ -111,3 +112,24 @@ def noop(*args, **kwargs): >>> noop(1, 2, three=3) """ + + +_T = TypeVar('_T') + + +# From jaraco.functools 4.4 +def passthrough(func: Callable[..., object]) -> Callable[[_T], _T]: + """ + Wrap the function to always return the first parameter. + + >>> passthrough(print)('3') + 3 + '3' + """ + + @functools.wraps(func) + def wrapper(first: _T, *args, **kwargs) -> _T: + func(first, *args, **kwargs) + return first + + return wrapper # type: ignore[return-value]