diff --git a/packages/bsb-core/README.md b/packages/bsb-core/README.md index 1f1725e7..7cb0e521 100644 --- a/packages/bsb-core/README.md +++ b/packages/bsb-core/README.md @@ -1,7 +1,6 @@ [![Build Status](https://github.com/dbbs-lab/bsb/actions/workflows/main.yml/badge.svg)](https://github.com/dbbs-lab/bsb/actions/workflows/main.yml) [![Documentation](https://readthedocs.org/projects/bsb-core/badge/?version=latest)](https://bsb-core.readthedocs.io/en/latest/?badge=latest) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) -[![codecov](https://codecov.io/gh/dbbs-lab/bsb-core/branch/main/graph/badge.svg)](https://codecov.io/gh/dbbs-lab/bsb-core) # bsb-core diff --git a/packages/bsb-core/bsb/config/_attrs.py b/packages/bsb-core/bsb/config/_attrs.py index 107b9c48..6d977ae3 100644 --- a/packages/bsb-core/bsb/config/_attrs.py +++ b/packages/bsb-core/bsb/config/_attrs.py @@ -473,23 +473,22 @@ def _boot_nodes(top_node, scaffold): boot(node, scaffold) booted.add(boot) # Boot node hook - try: - run_hook(node, "boot") - except Exception as e: - errr.wrap(BootError, e, prepend=f"Failed to boot {node}:") - # fixme: why is this here? Will deadlock in case of BootError on specific node only. - scaffold._comm.barrier() + with scaffold._comm.try_all(BootError("Boot failed on different rank.")): + try: + run_hook(node, "boot") + except Exception as e: + errr.wrap(BootError, e, prepend=f"Failed to boot {node}:") def _unset_nodes(top_node): for node in walk_nodes(top_node): + run_hook(node, "unboot") with contextlib.suppress(Exception): del node.scaffold node._config_parent = None node._config_key = None if hasattr(node, "_config_index"): node._config_index = None - run_hook(node, "unboot") class ConfigurationAttribute: diff --git a/packages/bsb-core/bsb/morphologies/selector.py b/packages/bsb-core/bsb/morphologies/selector.py index a00d4343..6c922593 100644 --- a/packages/bsb-core/bsb/morphologies/selector.py +++ b/packages/bsb-core/bsb/morphologies/selector.py @@ -1,5 +1,6 @@ import abc import concurrent +import contextlib import re import tempfile import typing @@ -12,7 +13,7 @@ from .. import config from ..config import types from ..config._attrs import cfglist -from ..exceptions import MissingMorphologyError, SelectorError +from ..exceptions import MissingMorphologyError, MorphologyRepositoryError, SelectorError from .parsers import parse_morphology_file if typing.TYPE_CHECKING: # pragma: nocover @@ -87,15 +88,19 @@ class NeuroMorphoSelector(NameSelector, classmap_entry="from_neuromorpho"): _files = "dableFiles/" def __boot__(self): - if self.scaffold.is_main_process(): - try: + with self.scaffold._comm.try_main(): + if self.scaffold.is_main_process(): morphos = self._scrape_nm(self.names) - except: - self.scaffold._comm.barrier() - raise - for name, morpho in morphos.items(): - self.scaffold.morphologies.save(name, morpho, overwrite=True) - self.scaffold._comm.barrier() + for name, morpho in morphos.items(): + self.scaffold.morphologies.save(name, morpho, overwrite=True) + + def __unboot__(self): + with self.scaffold._comm.try_main(): + if self.scaffold.is_main_process(): + for name in self.names: + with contextlib.suppress(MorphologyRepositoryError): + # remove morphology if it was saved in the scaffold. + self.scaffold.morphologies.remove(name) @classmethod def _swc_url(cls, archive, name): diff --git a/packages/bsb-core/bsb/services/mpi.py b/packages/bsb-core/bsb/services/mpi.py index 09c88940..22e758cd 100644 --- a/packages/bsb-core/bsb/services/mpi.py +++ b/packages/bsb-core/bsb/services/mpi.py @@ -1,3 +1,4 @@ +import contextlib import functools import os @@ -74,6 +75,56 @@ def Unlock(self, rank): return WindowMock() + @contextlib.contextmanager + def try_all(self, default_exception=None): + """ + Create a context manager that checks if any exception is raised by any processes + within the context, and make all other processes raise an exception in that case + + :param Exception default_exception: Exception instance to raise for all processes + that did not raise during the context. + :return: context manager + """ + exc_instance = None + default_exception = default_exception or RuntimeError( + "An error occurred on a different rank" + ) + try: + yield + except Exception as e: + exc_instance = e + + exceptions = self.allgather(exc_instance) + if any(exceptions): + raise ( + exceptions[self.get_rank()] + if exceptions[self.get_rank()] + else default_exception + ) + + @contextlib.contextmanager + def try_main(self): + """ + Create a context manager that checks if any exception is raised by the main + process within the context, and make all other processes raise this exception in + that case + Warning: All processes will still enter the context, but only main exception will + be raised. + + :return: context manager + """ + exc_instance = None + try: + # All processes have to enter the context + # contextlib will throw an error if one does not yield + yield + except Exception as e: + exc_instance = e + + exception = self.bcast(exc_instance) + if exception is not None: + raise exception + class MPIModule(MockModule): """ diff --git a/packages/bsb-core/tests/test_issues.py b/packages/bsb-core/tests/test_issues.py index 97830ccc..95f01a1c 100644 --- a/packages/bsb-core/tests/test_issues.py +++ b/packages/bsb-core/tests/test_issues.py @@ -2,13 +2,18 @@ import unittest from types import NoneType +from bsb_test import RandomStorageFixture, timeout + from bsb import ( + BootError, CellType, CfgReferenceError, Chunk, + Configuration, FixedPositions, PlacementIndications, Reference, + Scaffold, config, ) @@ -45,7 +50,7 @@ class Root430: extensions = config.dict(type=Extension, required=True) -class TestIssues(unittest.TestCase): +class TestIssues(RandomStorageFixture, unittest.TestCase, engine_name="hdf5"): def test_430(self): with self.assertRaises(CfgReferenceError, msg="Regression of issue #430"): _config = Root430( @@ -62,3 +67,15 @@ def test_802(self): Chunk((0, 0, 0), (100, 100, 100)), {CellType(spatial=dict(radius=1, count=1)): PlacementIndications()}, ) + + @timeout(3) + def test_211(self): + """ + Test if the absence of a file does not make the reconstruction + get stuck in parallel. + """ + cfg = Configuration.default( + files=dict(annotations={"file": "path/to/missing/file.nrrd", "type": "nrrd"}), + ) + with self.assertRaises(BootError): + Scaffold(cfg, self.storage) diff --git a/packages/bsb-core/tests/test_selectors.py b/packages/bsb-core/tests/test_selectors.py index b4612eb3..1d7c73a9 100644 --- a/packages/bsb-core/tests/test_selectors.py +++ b/packages/bsb-core/tests/test_selectors.py @@ -1,6 +1,6 @@ import unittest -from bsb_test import RandomStorageFixture, skip_parallel, skipIfOffline +from bsb_test import RandomStorageFixture, skipIfOffline from bsb import ( MPI, @@ -105,7 +105,6 @@ def test_nm_selector(self): m = s.morphologies.select(*ct.spatial.morphologies)[0] self.assertEqual(name, m.get_meta()["neuron_name"], "meta not stored") - @skip_parallel # https://github.com/dbbs-lab/bsb/issues/187 @skipIfOffline(scheme=NeuroMorphoScheme()) def test_nm_selector_wrong_name(self): ct = CellType( diff --git a/packages/bsb-core/tests/test_util.py b/packages/bsb-core/tests/test_util.py index fbf0ee7d..e4c538d1 100644 --- a/packages/bsb-core/tests/test_util.py +++ b/packages/bsb-core/tests/test_util.py @@ -5,7 +5,6 @@ FixedPosConfigFixture, NumpyTestCase, RandomStorageFixture, - skip_parallel, skipIfOffline, ) from scipy.spatial.transform import Rotation @@ -70,7 +69,6 @@ def test_rotation_matrix_from_vectors(self): class TestUriSchemes(RandomStorageFixture, unittest.TestCase, engine_name="fs"): - @skip_parallel # see https://github.com/dbbs-lab/bsb/issues/197 @skipIfOffline(scheme=NeuroMorphoScheme()) def test_nm_scheme(self): file = FileDependency(