Consider the following script:
from concurrent.futures import ThreadPoolExecutor
from pkgutil import walk_packages
import scipy
def worker():
for _ in walk_packages(scipy.__path__, scipy.__name__ + '.'):
pass
n_threads=10
tpe = ThreadPoolExecutor(max_workers=min((n_threads, 4)))
futures = [None]*n_threads
for i in range(n_threads):
futures[i] = tpe.submit(worker)
[f.result() for f in futures]
This is based on the scipy test scipy/_lib/tests/test_public_api.py::test_all_modules_are_expected running under pytest-run-parallel.
On both the free-threaded and GIL-enabled interpreter, this script eventually fails with the following error:
Traceback (most recent call last):
File "/Users/goldbaum/Documents/test/test.py", line 16, in <module>
[f.result() for f in futures]
~~~~~~~~^^
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/concurrent/futures/_base.py", line 456, in result
return self.__get_result()
~~~~~~~~~~~~~~~~~^^
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/concurrent/futures/_base.py", line 401, in __get_result
raise self._exception
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/concurrent/futures/thread.py", line 59, in run
result = self.fn(*self.args, **self.kwargs)
File "/Users/goldbaum/Documents/test/test.py", line 6, in worker
for _ in walk_packages(scipy.__path__, scipy.__name__ + '.'):
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/pkgutil.py", line 93, in walk_packages
yield from walk_packages(path, info.name+'.', onerror)
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/pkgutil.py", line 93, in walk_packages
yield from walk_packages(path, info.name+'.', onerror)
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/pkgutil.py", line 93, in walk_packages
yield from walk_packages(path, info.name+'.', onerror)
File "/Users/goldbaum/.pyenv/versions/3.13.4/lib/python3.13/pkgutil.py", line 88, in walk_packages
path = getattr(sys.modules[info.name], '__path__', None) or []
~~~~~~~~~~~^^^^^^^^^^^
KeyError: 'scipy._lib.array_api_compat.dask.array'
It runs successfully if I set n_threads=1 in the script.
I think this is happening because there's a race to call clone_module:
|
def clone_module(mod_name: str, globals_: dict[str, object]) -> list[str]: |
|
"""Import everything from module, updating globals(). |
|
Returns __all__. |
|
""" |
|
mod = importlib.import_module(mod_name) |
|
# Neither of these two methods is sufficient by itself, |
|
# depending on various idiosyncrasies of the libraries we're wrapping. |
|
objs = {} |
|
exec(f"from {mod.__name__} import *", objs) |
|
|
|
for n in dir(mod): |
|
if not n.startswith("_") and hasattr(mod, n): |
|
objs[n] = getattr(mod, n) |
|
|
|
globals_.update(objs) |
|
return list(objs) |
Consider the following script:
This is based on the scipy test
scipy/_lib/tests/test_public_api.py::test_all_modules_are_expectedrunning under pytest-run-parallel.On both the free-threaded and GIL-enabled interpreter, this script eventually fails with the following error:
It runs successfully if I set
n_threads=1in the script.I think this is happening because there's a race to call
clone_module:array-api-compat/array_api_compat/_internal.py
Lines 56 to 71 in 6c708d1