From 3933155c51e3e7d91724be93a23d92d302b168d0 Mon Sep 17 00:00:00 2001 From: Enning Yang Date: Wed, 27 Aug 2025 12:15:08 -0400 Subject: [PATCH 1/6] Fix test logic bug in participant filtering test - Fix test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard - Test was creating modified config but not using it in generate_inputs call - This caused intermittent test failures due to non-deterministic behavior - Now uses the modified config consistently for reliable test execution --- PR_MESSAGE.md | 34 ++++++++++++++++++++++++++++++++++ tests/test_generate_inputs.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 PR_MESSAGE.md diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md new file mode 100644 index 00000000..bdf8a964 --- /dev/null +++ b/PR_MESSAGE.md @@ -0,0 +1,34 @@ +# Fix Test Logic Bug in Participant Filtering Test + +## Summary + +This PR fixes a bug in the test `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` where the test was creating a modified config but not using it in the actual test call, causing intermittent test failures. + +## Problem + +The test was experiencing flaky behavior - sometimes passing, sometimes failing. Investigation revealed that the test was: + +1. Creating a `config` from the dataset using `create_snakebids_config(rooted)` +2. Modifying this config by adding subject filters to each component +3. But then calling `generate_inputs()` with the original `create_snakebids_config(rooted)` instead of the modified `config` + +This meant the subject filters were never actually applied during testing, making the test's behavior non-deterministic and dependent on the randomly generated test data from Hypothesis. + +## Solution + +Fixed the test to use the modified `config` variable in the `generate_inputs()` call instead of recreating the config. This ensures that the subject filters are properly applied during the test, making the test behavior consistent and deterministic. + +## Changes + +- Modified `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` in `snakebids/tests/test_generate_inputs.py` +- Changed the `generate_inputs()` call to use the modified `config` instead of `create_snakebids_config(rooted)` + +## Testing + +- The previously flaky test now passes consistently +- All existing tests continue to pass +- Pre-commit hooks (ruff, pyright, codespell) all pass + +## Impact + +This fix ensures reliable test execution and eliminates a source of CI flakiness. The test now properly validates the intended behavior: that participant label filtering doesn't affect components when subject entities have explicit filters applied. diff --git a/tests/test_generate_inputs.py b/tests/test_generate_inputs.py index c72b2fef..529bb6a3 100644 --- a/tests/test_generate_inputs.py +++ b/tests/test_generate_inputs.py @@ -1836,7 +1836,7 @@ def test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard( comp["filters"]["subject"] = subject reindexed = generate_inputs( root, - create_snakebids_config(rooted), + config, **self.get_filter_params(mode, participant_filter), ) assert reindexed == rooted From cf7473ecf4ed46632d0ad434d84c6c734df48584 Mon Sep 17 00:00:00 2001 From: Enning Yang Date: Wed, 27 Aug 2025 12:16:48 -0400 Subject: [PATCH 2/6] rm tmp files --- PR_MESSAGE.md | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 PR_MESSAGE.md diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md deleted file mode 100644 index bdf8a964..00000000 --- a/PR_MESSAGE.md +++ /dev/null @@ -1,34 +0,0 @@ -# Fix Test Logic Bug in Participant Filtering Test - -## Summary - -This PR fixes a bug in the test `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` where the test was creating a modified config but not using it in the actual test call, causing intermittent test failures. - -## Problem - -The test was experiencing flaky behavior - sometimes passing, sometimes failing. Investigation revealed that the test was: - -1. Creating a `config` from the dataset using `create_snakebids_config(rooted)` -2. Modifying this config by adding subject filters to each component -3. But then calling `generate_inputs()` with the original `create_snakebids_config(rooted)` instead of the modified `config` - -This meant the subject filters were never actually applied during testing, making the test's behavior non-deterministic and dependent on the randomly generated test data from Hypothesis. - -## Solution - -Fixed the test to use the modified `config` variable in the `generate_inputs()` call instead of recreating the config. This ensures that the subject filters are properly applied during the test, making the test behavior consistent and deterministic. - -## Changes - -- Modified `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` in `snakebids/tests/test_generate_inputs.py` -- Changed the `generate_inputs()` call to use the modified `config` instead of `create_snakebids_config(rooted)` - -## Testing - -- The previously flaky test now passes consistently -- All existing tests continue to pass -- Pre-commit hooks (ruff, pyright, codespell) all pass - -## Impact - -This fix ensures reliable test execution and eliminates a source of CI flakiness. The test now properly validates the intended behavior: that participant label filtering doesn't affect components when subject entities have explicit filters applied. From 7c7ff571ffd02874fea444baac00da6c8c0aa83f Mon Sep 17 00:00:00 2001 From: Peter Van Dyken Date: Thu, 13 Nov 2025 18:07:56 -0500 Subject: [PATCH 3/6] Return empty list when PostFilter exclusions empty Previously returned a regex with a negative look-ahead on an empty string. This would match and exclude everything. --- src/snakebids/core/_querying.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/snakebids/core/_querying.py b/src/snakebids/core/_querying.py index e491edb4..32ab2988 100644 --- a/src/snakebids/core/_querying.py +++ b/src/snakebids/core/_querying.py @@ -63,13 +63,16 @@ def add_filter( if exclusions is not None: self.exclusions[key] = self._format_exclusions(exclusions) - def _format_exclusions(self, exclusions: Iterable[str] | str): + def _format_exclusions(self, exclusions: Iterable[str] | str) -> list[str]: # if multiple items to exclude, combine with with item1|item2|... + hit = None exclude_string = "|".join( - re.escape(label) for label in itx.always_iterable(exclusions) + (hit := re.escape(label)) for label in itx.always_iterable(exclusions) ) + if hit is None: + return [] # regex to exclude subjects - return [f"^((?!({exclude_string})$).*)$"] + return [f"(?!({exclude_string})$)"] @attrs.define(slots=False) From 4f292d7c8c058f4c2bd72c6c32e7d45d97fd84b3 Mon Sep 17 00:00:00 2001 From: Peter Van Dyken Date: Thu, 13 Nov 2025 18:10:23 -0500 Subject: [PATCH 4/6] Add unit test suite for _querying Previously tested only by integration type tests in test_generate_inputs. --- src/snakebids/core/_querying.py | 16 +- tests/test_querying.py | 510 ++++++++++++++++++++++++++++++++ 2 files changed, 523 insertions(+), 3 deletions(-) create mode 100644 tests/test_querying.py diff --git a/src/snakebids/core/_querying.py b/src/snakebids/core/_querying.py index 32ab2988..b91ae0b6 100644 --- a/src/snakebids/core/_querying.py +++ b/src/snakebids/core/_querying.py @@ -17,6 +17,8 @@ CompiledFilter: TypeAlias = "Mapping[str, Sequence[str | Query]]" +_VALID_FILTER_METHODS = frozenset(("get", "match", "search")) + class PostFilter: """Filters to apply after indexing, typically derived from the CLI. @@ -28,6 +30,13 @@ def __init__(self): self.inclusions: dict[str, Sequence[str] | str] = {} self.exclusions: dict[str, Sequence[str] | str] = {} + def __eq__(self, other: object): + if not isinstance(other, self.__class__): + return False + return ( + self.inclusions == other.inclusions and self.exclusions == other.exclusions + ) + def add_filter( self, key: str, @@ -97,7 +106,7 @@ class UnifiedFilter: @classmethod def from_filter_dict( cls, - filters: Mapping[str, str | bool | Sequence[str | bool]], + filters: FilterMap, postfilter: PostFilter | None = None, ) -> Self: """Patch together a UnifiedFilter based on a basic filter dict. @@ -249,6 +258,7 @@ def get_matching_files( raise PybidsError(msg) from err if search is not None: + # intersection preserving order return [p for p in get if p in search] return get @@ -341,11 +351,11 @@ def wrap(filt: str, method: str): filt = raw_filter filt_type = "get" else: - if filt_type not in {"match", "search", "get"}: + if filt_type not in _VALID_FILTER_METHODS: raise _InvalidKeyError(key, raw_filter) if TYPE_CHECKING: assert isinstance(raw_filter, dict) - filt = cast("str | bool | Sequence[str | bool]", raw_filter[filt_type]) + filt = raw_filter[filt_type] # pyright: ignore[reportTypedDictNotRequiredAccess] # these two must not be simultaneously true or false if with_regex == (filt_type == "get"): diff --git a/tests/test_querying.py b/tests/test_querying.py new file mode 100644 index 00000000..f571a9fb --- /dev/null +++ b/tests/test_querying.py @@ -0,0 +1,510 @@ +from __future__ import annotations + +import itertools as it +import re +import string +from collections.abc import Sequence +from typing import cast + +import more_itertools as itx +import pytest +from bids.layout import Query +from hypothesis import assume, given +from hypothesis import strategies as st + +from snakebids.core._querying import ( + _VALID_FILTER_METHODS, + PostFilter, + UnifiedFilter, + _compile_filters, + _InvalidKeyError, + _TooFewKeysError, + _TooManyKeysError, + get_matching_files, +) +from snakebids.exceptions import PybidsError +from snakebids.types import FilterMap, InputConfig +from tests import strategies as sb_st + + +def _filters(*, min_size: int = 0): + return st.dictionaries( + sb_st.bids_entity().map(str), sb_st.bids_value(), min_size=min_size + ) + + +_mixed_filters: st.SearchStrategy[dict[str, str | list[str] | dict[str, None]]] = ( + st.dictionaries( + sb_st.bids_entity().map(str), + ( + st.text() + | st.lists(st.text(), min_size=1) + | st.sampled_from(tuple(_VALID_FILTER_METHODS)).map(lambda m: {m: None}) + ), + max_size=6, + ) +) + + +class TestPostFilter: + def test_postfilter_initialized_empty(self): + pf = PostFilter() + assert pf.inclusions == {} + assert pf.exclusions == {} + + @given(key=st.text(), val=st.text() | st.iterables(st.text())) + def test_add_filter_turns_inclusions_into_list( + self, key: str, val: list[str] | str + ): + pf = PostFilter() + pf.add_filter(key, val, None) + assert isinstance(pf.inclusions[key], list) + + @given(key=st.text(), val=st.text() | st.lists(st.text())) + def test_add_filter_copies_inclusions_without_modification( + self, key: str, val: list[str] | str + ): + pf = PostFilter() + pf.add_filter(key, val, None) + if isinstance(val, str): + assert pf.inclusions[key] == [val] + else: + assert pf.inclusions[key] == list(val) + + @given(key=st.text(), vals=st.text() | st.lists(st.text(), min_size=1)) + def test_add_filter_turns_exclusions_into_regex(self, key: str, vals: list[str]): + pf = PostFilter() + pf.add_filter(key, None, vals) + # exclusions are stored as a list containing a regex string + assert key in pf.exclusions + excl = pf.exclusions[key] + assert isinstance(excl, list) + assert len(excl) == 1 + assert isinstance(excl[0], str) + + @given(key=st.text()) + def test_empty_list_of_exclusions_gives_empty_list(self, key: str): + pf = PostFilter() + pf.add_filter(key, None, iter([])) + assert key in pf.exclusions + excl = pf.exclusions[key] + assert isinstance(excl, list) + assert len(excl) == 0 + + @given( + key=st.text(), + exclusion=st.text() | st.lists(st.text(), min_size=1), + test=st.text(), + ) + def test_exclusion_regex_excludes_correct_values( + self, key: str, exclusion: str | list[str], test: str + ): + excl_list = list(itx.always_iterable(exclusion)) + assume(test not in excl_list) + + pf = PostFilter() + pf.add_filter(key, None, exclusion) + # normalize exclusion into a list for testing + pattern = pf.exclusions[key][0] + # should not match excluded values + for ex in excl_list: + assert re.match(pattern, ex) is None + # should match other values + assert re.match(pattern, "".join(excl_list) + "_") is not None + assert re.match(pattern, "_" + "".join(excl_list)) is not None + assert re.match(pattern, test) is not None + + @given(key=st.text()) + def test_add_filter_overwrites_duplicate_keys(self, key: str): + pf = PostFilter() + pf.add_filter(key, "a", None) + assert pf.inclusions[key] == ["a"] + pf.add_filter(key, "b", None) + assert pf.inclusions[key] == ["b"] + + def test_multiple_filters_in_postfilter(self): + pf = PostFilter() + pf.add_filter("subject", "a", None) + pf.add_filter("session", None, "s1") + assert "subject" in pf.inclusions + + assert "session" in pf.exclusions + + +class TestUnifiedFilter: + @pytest.mark.parametrize("attr", ["prefilters", "get"]) + @given(comp=sb_st.input_configs(wildcards=False, paths=False)) + def test_regex_search_removed_from_prefilters(self, comp: InputConfig, attr: str): + # ensure regex_search present in component filters + filters = dict(comp.get("filters", {}), regex_search=True) + comp["filters"] = filters + uf = UnifiedFilter(component=comp, postfilters=PostFilter()) + compiled = getattr(uf, attr) + + assert "regex_search" in filters + assert "regex_search" not in compiled + # other filters should be preserved (presence for .get, exact for .prefilters) + filters.pop("regex_search") + if attr == "prefilters": + assert compiled == filters + else: + assert set(compiled) == set(filters) + + @pytest.mark.parametrize("inclusion", [True, False]) + @given(filters=_filters()) + def test_postfilters_incorporated_into_unified_filter( + self, filters: dict[str, str], inclusion: bool + ): + pf = PostFilter() + for k, v in filters.items(): + if inclusion: + pf.add_filter(k, v, None) + else: + pf.add_filter(k, None, v) + dummy = "".join(filters.keys()) + "_" + uf = UnifiedFilter( + component={ + "filters": {dummy: ""}, + "wildcards": list(filters.keys()), + }, + postfilters=pf, + ) + if inclusion: + assert set(uf.get) == set(filters) | {dummy} + else: + assert set(uf.post_exclusions) == set(filters) + + def test_empty_postfilter_inclusion_turned_into_pybids_query(self): + # An empty inclusion postfilter should be turned into Query.ANY in uf.get + pf = PostFilter() + pf.add_filter("", [], None) + + # component should list the entity as a wildcard so postfilter applies + uf = UnifiedFilter( + component={"wildcards": [""]}, + postfilters=pf, + ) + + assert uf.get[""][0] is Query.ANY + + @pytest.mark.parametrize("inclusion", [True, False]) + @given(filters=_filters(min_size=2)) + def test_postfilters_skipped_when_not_wildcards( + self, filters: dict[str, str], inclusion: bool + ): + pf = PostFilter() + for k, v in filters.items(): + if inclusion: + pf.add_filter(k, v, None) + else: + pf.add_filter(k, None, v) + filters.popitem() + # All but withdrawn entity in wildcards + uf = UnifiedFilter(component={"wildcards": list(filters)}, postfilters=pf) + assert set(filters) == set(uf.get if inclusion else uf.post_exclusions) + + @pytest.mark.parametrize("inclusion", [True, False]) + @given(filters=_filters(min_size=2)) + def test_post_exclusions_skips_entities_that_are_filters( + self, filters: dict[str, str], inclusion: bool + ): + pf = PostFilter() + for k, v in filters.items(): + if inclusion: + pf.add_filter(k, v, None) + else: + pf.add_filter(k, None, v) + wildcards = list(filters) + entity, value = filters.popitem() + uf = UnifiedFilter( + component={"wildcards": wildcards, "filters": {entity: value * 2}}, + postfilters=pf, + ) + # since entity is present in prefilters, it should be skipped + if inclusion: + assert uf.get[entity] == [value * 2] + else: + assert entity not in uf.post_exclusions + + @pytest.mark.parametrize("attr", ["get", "search"]) + @given(filters=_mixed_filters) + def test_get_and_search_return_appropriate_methods( + self, + filters: dict[str, str | list[str] | dict[str, None]], + attr: str, + ): + # Build UnifiedFilter from the mixed filters and check that `get` only + # contains 'get' (including simple string/list forms) and `search` only + # contains 'match'/'search' filters. + uf = UnifiedFilter.from_filter_dict(filters) # type: ignore[arg-type] + compiled = getattr(uf, attr) + + for key, raw in filters.items(): + method = next(iter(raw.keys())) if isinstance(raw, dict) else "get" + + if attr == "get": + expected = method == "get" + else: + expected = method in ("match", "search") + + assert (key in compiled) == expected + + @pytest.mark.parametrize("empty_list", [True, False]) + @given( + filters=st.dictionaries( + sb_st.bids_entity().map(str), st.text() | st.lists(st.text(), min_size=1) + ) + ) + def test_has_empty_prefilter_detects_empty_list( + self, filters: dict[str, str | list[str]], empty_list: bool + ): + if empty_list: + filters["".join(filters)] = [] + uf = UnifiedFilter.from_filter_dict(filters) + assert uf.has_empty_prefilter == empty_list + + @pytest.mark.parametrize("empty_list", [True, False]) + @given( + filters=st.dictionaries( + sb_st.bids_entity().map(str), st.text() | st.lists(st.text(), min_size=1) + ) + ) + def test_has_empty_postfilter_detects_empty_list( + self, filters: dict[str, str | list[str]], empty_list: bool + ): + pf = PostFilter() + for k, v in filters.items(): + pf.add_filter(k, v, None) + dummy = "".join(filters) + if empty_list: + pf.add_filter(dummy, [], None) + uf = UnifiedFilter(component={"wildcards": [*filters, dummy]}, postfilters=pf) + assert uf.has_empty_postfilter == empty_list + + # without_bools behavior tested below with parameterized cases + @given( + value=st.booleans() + | st.lists(st.booleans(), min_size=1) + | st.tuples(st.booleans(), st.text()).map(list) + ) + def test_without_bools_raises_when_bools_present(self, value: bool): + """Accessing without_bools must raise when boolean filters are present.""" + uf = UnifiedFilter.from_filter_dict({"": value}) + with pytest.raises( + ValueError, + match=("Boolean filters in items with custom paths are not supported"), + ): + _ = uf.without_bools + + @given(value=st.text() | st.lists(st.text())) + def test_without_bools_returns_when_no_bools(self, value: str | list[str]): + """without_bools should return the mapping when no boolean filters exist.""" + uf = UnifiedFilter.from_filter_dict({"": value}) + result = uf.without_bools + assert "" in result + + +class TestCompileFilters: + @pytest.mark.parametrize( + ("method_a", "method_b"), + list(it.combinations(tuple(_VALID_FILTER_METHODS), 2)), + ) + def test_multiple_filtering_methods_raises_error( + self, method_a: str, method_b: str + ): + # dict with two different filtering methods should raise a _TooManyKeysError + filt = {method_a: "_", method_b: "_"} + with pytest.raises(_TooManyKeysError): + _compile_filters({"": filt}, with_regex=False) # type: ignore[arg-type] + + def test_dict_with_no_filtering_methods_raises_error(self): + # empty dict as a filter value should raise a _TooFewKeysError + with pytest.raises(_TooFewKeysError): + _compile_filters({"": {}}, with_regex=False) # type: ignore[arg-type] + + @given(method=st.text().filter(lambda t: t not in _VALID_FILTER_METHODS)) + def test_invalid_filtering_method_raises_error(self, method: str): + # unknown filtering key should raise a _InvalidKeyError + with pytest.raises(_InvalidKeyError): + _compile_filters({"": {method: ""}}, with_regex=False) # type: ignore[arg-type] + + @given(entity=sb_st.bids_entity().map(str), val=st.text()) + def test_string_filter_wrapped_with_list(self, entity: str, val: str): + # plain string filters should be wrapped into a single-element list + res = _compile_filters({entity: val}, with_regex=False) + assert isinstance(res, dict) + assert entity in res + assert isinstance(res[entity], list) + assert res[entity][0] == val + + @pytest.mark.parametrize( + ("kind", "with_regex"), + [ + ("simple", False), + ("list", False), + ("get", False), + ("match", True), + ("search", True), + ], + ) + @pytest.mark.parametrize( + ("val", "expected"), + [ + (True, Query.ANY), + (False, Query.NONE), + ], + ) + @given(entity=sb_st.bids_entity().map(str)) + def test_boolean_filters_converted_to_pybids_queries( + self, entity: str, kind: str, with_regex: bool, val: bool, expected: object + ): + """Boolean filters (True/False) should convert to Query.ANY / Query.NONE + + Test three shapes for each boolean: simple boolean, boolean in a list, and + boolean nested under each of the valid methods ('get','match','search'). + """ + if kind == "simple": + filt = {entity: val} + elif kind == "list": + filt = {entity: [val]} + else: + filt = {entity: {kind: val}} + res = _compile_filters(filt, with_regex=with_regex) # type: ignore[arg-type] + assert res[entity][0] is expected + + @given(txt=st.text()) + def test_match_filters_converted_to_valid_regex(self, txt: str): + # match should produce anchored regex that matches exactly + res = _compile_filters({"": {"match": re.escape(txt)}}, with_regex=True) + pattern = cast(str, res[""][0]) + assert re.match(pattern, txt) is not None + assert re.match(pattern, txt + "_") is None + + @pytest.mark.parametrize("method", ["match", "search"]) + @given(txt=st.text(alphabet=string.ascii_lowercase, min_size=1)) + def test_regex_filters_case_sensitive(self, method: str, txt: str): + res = _compile_filters({"": {method: txt}}, with_regex=True) # pyright: ignore[reportArgumentType] + pattern = cast(str, res[""][0]) + assert getattr(re, method)(pattern, txt.upper()) is None + + @given(txt=st.text()) + def test_search_filters_converted_to_valid_regex(self, txt: str): + # search should produce regex that matches when substring present + res = _compile_filters({"": {"search": re.escape(txt)}}, with_regex=True) + pattern = cast(str, res[""][0]) + assert re.search(pattern, "_" + txt + "_") is not None + + @given(txt=st.text(min_size=2).filter(lambda t: t != t[::-1])) + def test_search_filters_only_match_query(self, txt: str): + # search should produce regex that matches when substring present + res = _compile_filters({"": {"search": re.escape(txt)}}, with_regex=True) + pattern = cast(str, res[""][0]) + assert re.search(pattern, txt[::-1]) is None + + @given(txt=st.text()) + def test_get_filters_stay_as_str(self, txt: str): + # explicit get method should leave the string as-is + res = _compile_filters({"": {"get": txt}}, with_regex=False) + assert cast(str, res[""][0]) == txt + + @pytest.mark.parametrize( + ("method", "with_regex"), + [ + ("get", True), + ("match", False), + ("search", False), + (None, True), + ], + ) + @given(entity=sb_st.bids_entity().map(str)) + def test_filter_skipped_when_with_regex_not_matching( + self, entity: str, method: str | None, with_regex: bool + ): + filt = {entity: ""} if method is None else {entity: {method: ""}} + + res = _compile_filters(filt, with_regex=with_regex) # type: ignore[arg-type] + + assert entity not in res + + +class _FakeBIDSLayout: + def __init__(self, get: list[str] | None = None, search: list[str] | None = None): + self.get_called = False + self.search_called = False + self.get_return = get or [] + self.search_return = search or [] + + def get(self, regex_search: bool, **filters: Sequence[str]): + query = re.search( + filters.popitem()[1][0] if filters else "get", "searchgeterror" + ) + assert query + query = query[0] + if query == "error": + raise AttributeError + assert regex_search == (query == "search") + if query == "search": + self.search_called = True + return self.search_return + assert query == "get" + self.get_called = True + return self.get_return + + +class TestGetMatchingFiles: + def _create_unified_filter(self, *, get: bool, search: bool, error: bool): + """Create a small UnifiedFilter for tests. + + Arguments are keyword-only to make call sites explicit about intent. + """ + filter_dict: FilterMap = {} + if get: + filter_dict["get"] = "error" if error else "get" + if search: + filter_dict["search"] = {"search": "error" if error else "search"} + return UnifiedFilter.from_filter_dict(filter_dict) + + @pytest.mark.parametrize( + ("get", "search"), [(False, False), (True, True), (True, False), (False, True)] + ) + def test_get_and_search_called_correctly(self, get: bool, search: bool): + """Test appropriate call of BIDSLayout.get. + + When called with filters from UnifiedFilter.get, regex_search should be False. A + call with regex_search == False should be made every time. + + When called with filters from UnifiedFilter.search, regex_search should be True. + A call with regex_search == True should only be made when UnifiedFilter.search + returns a Filter. + """ + layout = _FakeBIDSLayout() + filters = self._create_unified_filter(get=get, search=search, error=False) + get_matching_files(layout, filters) # pyright: ignore[reportArgumentType] + assert layout.get_called + assert layout.search_called == search + + @pytest.mark.parametrize(("get", "search"), [(True, False), (False, True)]) + def test_get_and_search_catches_attribute_error(self, get: bool, search: bool): + layout = _FakeBIDSLayout() + filters = self._create_unified_filter(get=get, search=search, error=True) + with pytest.raises( + PybidsError, + match="Pybids has encountered a problem that Snakebids cannot handle. ", + ): + get_matching_files(layout, filters) # pyright: ignore[reportArgumentType] + + @given(get=st.sets(st.text()), search=st.sets(st.text())) + def test_returns_union_of_get_and_search(self, get: set[str], search: set[str]): + files = get_matching_files( + _FakeBIDSLayout(get, search), # pyright: ignore[reportArgumentType] + self._create_unified_filter(get=True, search=True, error=False), + ) + assert set(files) == get & search + + def test_preserves_order_of_files(self): + get = list(map(str, range(1000))) + search = list(map(str, range(1000)[::2])) + layout = _FakeBIDSLayout(get, search) + filters = self._create_unified_filter(get=True, search=True, error=False) + files = get_matching_files(layout, filters) # pyright: ignore[reportArgumentType] + assert files == search From 23d46765917b8cd8f2d235c8dfe5eeaadc81d6e2 Mon Sep 17 00:00:00 2001 From: Peter Van Dyken Date: Thu, 13 Nov 2025 18:11:43 -0500 Subject: [PATCH 5/6] Make participant-label tests more efficient Tests involve pybids indexing, and previously used hypothesis to generate examples. Remove hypothesis from all cases and parametrize with focused cases. Test logic between generate_inputs and get_matching_files with patches --- tests/test_generate_inputs.py | 255 ++++++++++++---------------------- 1 file changed, 85 insertions(+), 170 deletions(-) diff --git a/tests/test_generate_inputs.py b/tests/test_generate_inputs.py index 529bb6a3..2a5c577f 100644 --- a/tests/test_generate_inputs.py +++ b/tests/test_generate_inputs.py @@ -5,6 +5,7 @@ import functools as ft import itertools as it import keyword +import logging import os import re import shutil @@ -14,12 +15,12 @@ from collections import defaultdict from collections.abc import Iterable from pathlib import Path, PosixPath -from typing import Any, Literal, NamedTuple, TypedDict, TypeVar +from typing import Any, NamedTuple, TypeVar import attrs import more_itertools as itx import pytest -from bids import BIDSLayout +from bids.layout import BIDSLayout from hypothesis import HealthCheck, assume, example, given, settings from hypothesis import strategies as st from pytest_mock import MockerFixture @@ -40,7 +41,7 @@ from snakebids.exceptions import ConfigError, PybidsError, RunError from snakebids.paths._presets import bids from snakebids.snakemake_compat import expand as sb_expand -from snakebids.types import InputsConfig +from snakebids.types import InputConfig, InputsConfig from snakebids.utils.containers import MultiSelectDict from snakebids.utils.utils import DEPRECATION_FLAG, BidsEntity, BidsParseError from tests import strategies as sb_st @@ -1687,197 +1688,111 @@ def test_generate_inputs(dataset: BidsDataset, tmpdir: Path): assert reindexed.layout is not None -@st.composite -def dataset_with_subject(draw: st.DrawFn): - entities = draw(sb_st.bids_entity_lists(blacklist_entities=["subject"])) - entities += ["subject"] - return BidsDataset.from_iterable( - [ - draw( - sb_st.bids_components( - whitelist_entities=entities, - min_entities=len(entities), - max_entities=len(entities), - restrict_patterns=True, - unique=True, - ) - ) - ] - ) +class _FakeBIDSLayout: + def __init__(self, *args: Any, **kwargs: Any): + pass + def get(self, regex_search: bool, **kwargs: Any) -> list[str]: + return [] -class TestParticipantFiltering: - MODE = Literal["include", "exclude"] - - def get_filter_params(self, mode: MODE, filters: list[str] | str): - class FiltParams(TypedDict, total=False): - participant_label: list[str] | str - exclude_participant_label: list[str] | str + def __eq__(self, other: object): + return isinstance(other, self.__class__) - if mode == "include": - return FiltParams({"participant_label": filters}) - if mode == "exclude": - return FiltParams({"exclude_participant_label": filters}) - msg = f"Invalid mode specification: {mode}" - raise ValueError(msg) - @given( - data=st.data(), - dataset=dataset_with_subject(), - ) - @settings( - deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture] +class TestParticipantFiltering: + @pytest.mark.parametrize( + ("include", "exclude"), + [ + ("01", None), + (None, "01"), + ("01", "02"), + (["01", "02"], "02"), + (None, ["02", "03"]), + (["01", "02"], ["02", "03"]), + ], ) def test_exclude_and_participant_label_filter_correctly( - self, data: st.DataObject, dataset: BidsDataset, tmpdir: Path - ): - root = tempfile.mkdtemp(dir=tmpdir) - rooted = BidsDataset.from_iterable( - attrs.evolve(comp, path=os.path.join(root, comp.path)) - for comp in dataset.values() - ) - sampler = st.sampled_from(itx.first(rooted.values()).entities["subject"]) - excluded = data.draw(st.lists(sampler, unique=True) | sampler | st.none()) - included = data.draw(st.lists(sampler, unique=True) | sampler | st.none()) - reindexed = reindex_dataset( - root, rooted, exclude_participant_label=excluded, participant_label=included - ) - reindexed_subjects = set(itx.first(reindexed.values()).entities["subject"]) - expected_subjects = set(itx.first(rooted.values()).entities["subject"]) - if included is not None: - expected_subjects &= set(itx.always_iterable(included)) - if excluded is not None: - expected_subjects -= set(itx.always_iterable(excluded)) - - assert reindexed_subjects == expected_subjects - - @pytest.mark.parametrize("mode", ["include", "exclude"]) - @given( - dataset=sb_st.datasets_one_comp(blacklist_entities=["subject"], unique=True), - participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1), - ) - @settings( - deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture] - ) - def test_participant_label_doesnt_filter_comps_without_subject( self, - mode: MODE, - dataset: BidsDataset, - participant_filter: list[str] | str, + include: str | list[str] | None, + exclude: str | list[str] | None, tmpdir: Path, ): - root = tempfile.mkdtemp(dir=tmpdir) - rooted = BidsDataset.from_iterable( - attrs.evolve(comp, path=os.path.join(root, comp.path)) - for comp in dataset.values() - ) - reindexed = reindex_dataset( - root, rooted, **self.get_filter_params(mode, participant_filter) + zip_lists = { + "subject": ["01", "02", "03"], + "acq": ["x", "y", "z"], + } + component = BidsComponent( + name="0", path=str(tmpdir / get_bids_path(zip_lists)), zip_lists=zip_lists ) - assert reindexed == rooted + dataset = BidsDataset.from_iterable([component]) - @pytest.mark.parametrize("mode", ["include", "exclude"]) - @given( - dataset=dataset_with_subject(), - participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1), - ) - @settings( - deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture] - ) - def test_participant_label_doesnt_filter_comps_when_subject_has_filter( - self, - mode: MODE, - dataset: BidsDataset, - participant_filter: list[str] | str, - tmpdir: Path, - ): - root = tempfile.mkdtemp(dir=tmpdir) - rooted = BidsDataset.from_iterable( - attrs.evolve(comp, path=os.path.join(root, comp.path)) - for comp in dataset.values() - ) - create_dataset(Path("/"), rooted) - reindexed = generate_inputs( - root, - create_snakebids_config(rooted), - **self.get_filter_params(mode, participant_filter), + reindexed = reindex_dataset( + str(tmpdir), + dataset, + participant_label=include, + exclude_participant_label=exclude, ) - assert reindexed == rooted - - @pytest.mark.parametrize("mode", ["include", "exclude"]) - @given( - dataset=dataset_with_subject(), - participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1), - data=st.data(), - ) - @settings( - deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture] - ) - def test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard( + expected = set(component.entities["subject"]) + expected -= set(itx.always_iterable(exclude)) + if include is not None: + expected &= set(itx.always_iterable(include)) + assert set(itx.first(reindexed.values()).entities["subject"]) == expected + + @pytest.mark.parametrize("include", ["include", ["include"], None]) + @pytest.mark.parametrize("exclude", ["exclude", ["exclude"], None]) + @pytest.mark.parametrize("filters", ["filters", None]) + def test_participant_flags_and_filters_merged( self, - mode: MODE, - dataset: BidsDataset, - participant_filter: list[str] | str, - data: st.DataObject, - tmpdir: Path, + include: str | list[str] | None, + exclude: str | list[str] | None, + filters: str | None, + mocker: MockerFixture, + caplog: pytest.LogCaptureFixture, ): - root = tempfile.mkdtemp(dir=tmpdir) - rooted = BidsDataset.from_iterable( - attrs.evolve(comp, path=os.path.join(root, comp.path)) - for comp in dataset.values() - ) - subject = data.draw( - st.sampled_from(itx.first(rooted.values()).entities["subject"]) - ) - create_dataset(Path("/"), rooted) - config = create_snakebids_config(rooted) - for comp in config.values(): - comp["filters"] = dict(comp.get("filters", {})) - comp["filters"]["subject"] = subject - reindexed = generate_inputs( - root, - config, - **self.get_filter_params(mode, participant_filter), - ) - assert reindexed == rooted + mocker.patch.object(input_generation, "BIDSLayout", _FakeBIDSLayout) + patch = mocker.patch.object(input_generation, "get_matching_files") + component: InputConfig = {"filters": {"subject": filters}} if filters else {} + with caplog.at_level(logging.ERROR, "snakebids.core.input_generation"): + generate_inputs( + "", + {"": component}, + participant_label=include, + exclude_participant_label=exclude, + ) + pf = PostFilter() + pf.add_filter("subject", include, exclude) + uf = UnifiedFilter(component, pf) + patch.assert_called_once_with(_FakeBIDSLayout(), uf) - @given( - data=st.data(), - dataset=dataset_with_subject().filter( - lambda ds: set(itx.first(ds.values()).wildcards) != {"subject", "extension"} - ), - ) - @settings( - deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture] - ) def test_exclude_participant_does_not_make_all_other_filters_regex( - self, data: st.DataObject, dataset: BidsDataset, tmpdir: Path + self, tmpdir: Path ): - root = tempfile.mkdtemp(dir=tmpdir) - rooted = BidsDataset.from_iterable( - attrs.evolve(comp, path=os.path.join(root, comp.path)) - for comp in dataset.values() + # Construct a small deterministic BIDS dataset with a subject entity and + # at least one other entity so we can mutate that other entity's values. + # Single-component dataset with a valid BIDS-style path and entities. + mut_entity = "acq" + zip_lists = { + "subject": ["01", "02"], + mut_entity: ["x", "y"], + } + component = BidsComponent( + name="0", path=str(tmpdir / get_bids_path(zip_lists)), zip_lists=zip_lists ) + dataset = BidsDataset.from_iterable([component]) # Create an extra set of paths by modifying one of the existing components to - # put foo after a set of entity values. If that filter gets changed to a regex, - # all of the suffixed decoys will get picked up by pybids - ziplist = dict(itx.first(rooted.values()).zip_lists) - mut_entity = itx.first( - filter(lambda e: e not in {"subject", "extension"}, ziplist) - ) - ziplist[mut_entity] = ["foo" + v for v in ziplist[mut_entity]] - for path in sb_expand(itx.first(rooted.values()).path, zip, **ziplist): + # put 'foo' after a set of entity values. If that filter gets changed to a + # regex, all of the suffixed decoys will get picked up by pybids. + zip_lists[mut_entity] = ["foo" + v for v in zip_lists[mut_entity]] + for path in sb_expand(itx.first(dataset.values()).path, zip, **zip_lists): p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) p.touch() - sampler = st.sampled_from(itx.first(rooted.values()).entities["subject"]) - label = data.draw(st.lists(sampler, unique=True) | sampler) - reindexed = reindex_dataset(root, rooted, exclude_participant_label=label) - reindexed_subjects = set(itx.first(reindexed.values()).entities["subject"]) - original_subjects = set(itx.first(rooted.values()).entities["subject"]) - assert reindexed_subjects == original_subjects - set(itx.always_iterable(label)) + reindexed = reindex_dataset( + str(tmpdir), dataset, exclude_participant_label="01" + ) + assert itx.first(reindexed.values()) == component.filter(subject="02") # The content of the dataset is irrelevant to this test, so one example suffices From aa1d53c4271ec8856c2238cc34b4b2a107692f98 Mon Sep 17 00:00:00 2001 From: Peter Van Dyken Date: Thu, 13 Nov 2025 18:20:43 -0500 Subject: [PATCH 6/6] Update paths filter on tests action --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 26cea428..a443bcd8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,8 @@ on: - '*' - '!push-action/*' paths: - - snakebids/** + - src/** + - tests/** - scripts/** jobs: