From 3933155c51e3e7d91724be93a23d92d302b168d0 Mon Sep 17 00:00:00 2001
From: Enning Yang <enning2000@gmail.com>
Date: Wed, 27 Aug 2025 12:15:08 -0400
Subject: [PATCH 1/6] Fix test logic bug in participant filtering test

- Fix test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard
- Test was creating modified config but not using it in generate_inputs call
- This caused intermittent test failures due to non-deterministic behavior
- Now uses the modified config consistently for reliable test execution
---
 PR_MESSAGE.md                 | 34 ++++++++++++++++++++++++++++++++++
 tests/test_generate_inputs.py |  2 +-
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 PR_MESSAGE.md

diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md
new file mode 100644
index 00000000..bdf8a964
--- /dev/null
+++ b/PR_MESSAGE.md
@@ -0,0 +1,34 @@
+# Fix Test Logic Bug in Participant Filtering Test
+
+## Summary
+
+This PR fixes a bug in the test `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` where the test was creating a modified config but not using it in the actual test call, causing intermittent test failures.
+
+## Problem
+
+The test was experiencing flaky behavior - sometimes passing, sometimes failing. Investigation revealed that the test was:
+
+1. Creating a `config` from the dataset using `create_snakebids_config(rooted)`
+2. Modifying this config by adding subject filters to each component
+3. But then calling `generate_inputs()` with the original `create_snakebids_config(rooted)` instead of the modified `config`
+
+This meant the subject filters were never actually applied during testing, making the test's behavior non-deterministic and dependent on the randomly generated test data from Hypothesis.
+
+## Solution
+
+Fixed the test to use the modified `config` variable in the `generate_inputs()` call instead of recreating the config. This ensures that the subject filters are properly applied during the test, making the test behavior consistent and deterministic.
+
+## Changes
+
+- Modified `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` in `snakebids/tests/test_generate_inputs.py`
+- Changed the `generate_inputs()` call to use the modified `config` instead of `create_snakebids_config(rooted)`
+
+## Testing
+
+- The previously flaky test now passes consistently
+- All existing tests continue to pass
+- Pre-commit hooks (ruff, pyright, codespell) all pass
+
+## Impact
+
+This fix ensures reliable test execution and eliminates a source of CI flakiness. The test now properly validates the intended behavior: that participant label filtering doesn't affect components when subject entities have explicit filters applied.
diff --git a/tests/test_generate_inputs.py b/tests/test_generate_inputs.py
index c72b2fef..529bb6a3 100644
--- a/tests/test_generate_inputs.py
+++ b/tests/test_generate_inputs.py
@@ -1836,7 +1836,7 @@ def test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard(
             comp["filters"]["subject"] = subject
         reindexed = generate_inputs(
             root,
-            create_snakebids_config(rooted),
+            config,
             **self.get_filter_params(mode, participant_filter),
         )
         assert reindexed == rooted

From cf7473ecf4ed46632d0ad434d84c6c734df48584 Mon Sep 17 00:00:00 2001
From: Enning Yang <enning2000@gmail.com>
Date: Wed, 27 Aug 2025 12:16:48 -0400
Subject: [PATCH 2/6] rm tmp files

---
 PR_MESSAGE.md | 34 ----------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 PR_MESSAGE.md

diff --git a/PR_MESSAGE.md b/PR_MESSAGE.md
deleted file mode 100644
index bdf8a964..00000000
--- a/PR_MESSAGE.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Fix Test Logic Bug in Participant Filtering Test
-
-## Summary
-
-This PR fixes a bug in the test `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` where the test was creating a modified config but not using it in the actual test call, causing intermittent test failures.
-
-## Problem
-
-The test was experiencing flaky behavior - sometimes passing, sometimes failing. Investigation revealed that the test was:
-
-1. Creating a `config` from the dataset using `create_snakebids_config(rooted)`
-2. Modifying this config by adding subject filters to each component
-3. But then calling `generate_inputs()` with the original `create_snakebids_config(rooted)` instead of the modified `config`
-
-This meant the subject filters were never actually applied during testing, making the test's behavior non-deterministic and dependent on the randomly generated test data from Hypothesis.
-
-## Solution
-
-Fixed the test to use the modified `config` variable in the `generate_inputs()` call instead of recreating the config. This ensures that the subject filters are properly applied during the test, making the test behavior consistent and deterministic.
-
-## Changes
-
-- Modified `test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard` in `snakebids/tests/test_generate_inputs.py`
-- Changed the `generate_inputs()` call to use the modified `config` instead of `create_snakebids_config(rooted)`
-
-## Testing
-
-- The previously flaky test now passes consistently
-- All existing tests continue to pass
-- Pre-commit hooks (ruff, pyright, codespell) all pass
-
-## Impact
-
-This fix ensures reliable test execution and eliminates a source of CI flakiness. The test now properly validates the intended behavior: that participant label filtering doesn't affect components when subject entities have explicit filters applied.

From 7c7ff571ffd02874fea444baac00da6c8c0aa83f Mon Sep 17 00:00:00 2001
From: Peter Van Dyken <pvandyk2@uwo.ca>
Date: Thu, 13 Nov 2025 18:07:56 -0500
Subject: [PATCH 3/6] Return empty list when PostFilter exclusions empty

Previously returned a regex with a negative look-ahead on an empty
string. This would match and exclude everything.
---
 src/snakebids/core/_querying.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/snakebids/core/_querying.py b/src/snakebids/core/_querying.py
index e491edb4..32ab2988 100644
--- a/src/snakebids/core/_querying.py
+++ b/src/snakebids/core/_querying.py
@@ -63,13 +63,16 @@ def add_filter(
         if exclusions is not None:
             self.exclusions[key] = self._format_exclusions(exclusions)
 
-    def _format_exclusions(self, exclusions: Iterable[str] | str):
+    def _format_exclusions(self, exclusions: Iterable[str] | str) -> list[str]:
         # if multiple items to exclude, combine with with item1|item2|...
+        hit = None
         exclude_string = "|".join(
-            re.escape(label) for label in itx.always_iterable(exclusions)
+            (hit := re.escape(label)) for label in itx.always_iterable(exclusions)
         )
+        if hit is None:
+            return []
         # regex to exclude subjects
-        return [f"^((?!({exclude_string})$).*)$"]
+        return [f"(?!({exclude_string})$)"]
 
 
 @attrs.define(slots=False)

From 4f292d7c8c058f4c2bd72c6c32e7d45d97fd84b3 Mon Sep 17 00:00:00 2001
From: Peter Van Dyken <pvandyk2@uwo.ca>
Date: Thu, 13 Nov 2025 18:10:23 -0500
Subject: [PATCH 4/6] Add unit test suite for _querying

Previously tested only by integration type tests in
test_generate_inputs.
---
 src/snakebids/core/_querying.py |  16 +-
 tests/test_querying.py          | 510 ++++++++++++++++++++++++++++++++
 2 files changed, 523 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_querying.py

diff --git a/src/snakebids/core/_querying.py b/src/snakebids/core/_querying.py
index 32ab2988..b91ae0b6 100644
--- a/src/snakebids/core/_querying.py
+++ b/src/snakebids/core/_querying.py
@@ -17,6 +17,8 @@
 
 CompiledFilter: TypeAlias = "Mapping[str, Sequence[str | Query]]"
 
+_VALID_FILTER_METHODS = frozenset(("get", "match", "search"))
+
 
 class PostFilter:
     """Filters to apply after indexing, typically derived from the CLI.
@@ -28,6 +30,13 @@ def __init__(self):
         self.inclusions: dict[str, Sequence[str] | str] = {}
         self.exclusions: dict[str, Sequence[str] | str] = {}
 
+    def __eq__(self, other: object):
+        if not isinstance(other, self.__class__):
+            return False
+        return (
+            self.inclusions == other.inclusions and self.exclusions == other.exclusions
+        )
+
     def add_filter(
         self,
         key: str,
@@ -97,7 +106,7 @@ class UnifiedFilter:
     @classmethod
     def from_filter_dict(
         cls,
-        filters: Mapping[str, str | bool | Sequence[str | bool]],
+        filters: FilterMap,
         postfilter: PostFilter | None = None,
     ) -> Self:
         """Patch together a UnifiedFilter based on a basic filter dict.
@@ -249,6 +258,7 @@ def get_matching_files(
         raise PybidsError(msg) from err
 
     if search is not None:
+        # intersection preserving order
         return [p for p in get if p in search]
     return get
 
@@ -341,11 +351,11 @@ def wrap(filt: str, method: str):
             filt = raw_filter
             filt_type = "get"
         else:
-            if filt_type not in {"match", "search", "get"}:
+            if filt_type not in _VALID_FILTER_METHODS:
                 raise _InvalidKeyError(key, raw_filter)
             if TYPE_CHECKING:
                 assert isinstance(raw_filter, dict)
-            filt = cast("str | bool | Sequence[str | bool]", raw_filter[filt_type])
+            filt = raw_filter[filt_type]  # pyright: ignore[reportTypedDictNotRequiredAccess]
 
         # these two must not be simultaneously true or false
         if with_regex == (filt_type == "get"):
diff --git a/tests/test_querying.py b/tests/test_querying.py
new file mode 100644
index 00000000..f571a9fb
--- /dev/null
+++ b/tests/test_querying.py
@@ -0,0 +1,510 @@
+from __future__ import annotations
+
+import itertools as it
+import re
+import string
+from collections.abc import Sequence
+from typing import cast
+
+import more_itertools as itx
+import pytest
+from bids.layout import Query
+from hypothesis import assume, given
+from hypothesis import strategies as st
+
+from snakebids.core._querying import (
+    _VALID_FILTER_METHODS,
+    PostFilter,
+    UnifiedFilter,
+    _compile_filters,
+    _InvalidKeyError,
+    _TooFewKeysError,
+    _TooManyKeysError,
+    get_matching_files,
+)
+from snakebids.exceptions import PybidsError
+from snakebids.types import FilterMap, InputConfig
+from tests import strategies as sb_st
+
+
+def _filters(*, min_size: int = 0):
+    return st.dictionaries(
+        sb_st.bids_entity().map(str), sb_st.bids_value(), min_size=min_size
+    )
+
+
+_mixed_filters: st.SearchStrategy[dict[str, str | list[str] | dict[str, None]]] = (
+    st.dictionaries(
+        sb_st.bids_entity().map(str),
+        (
+            st.text()
+            | st.lists(st.text(), min_size=1)
+            | st.sampled_from(tuple(_VALID_FILTER_METHODS)).map(lambda m: {m: None})
+        ),
+        max_size=6,
+    )
+)
+
+
+class TestPostFilter:
+    def test_postfilter_initialized_empty(self):
+        pf = PostFilter()
+        assert pf.inclusions == {}
+        assert pf.exclusions == {}
+
+    @given(key=st.text(), val=st.text() | st.iterables(st.text()))
+    def test_add_filter_turns_inclusions_into_list(
+        self, key: str, val: list[str] | str
+    ):
+        pf = PostFilter()
+        pf.add_filter(key, val, None)
+        assert isinstance(pf.inclusions[key], list)
+
+    @given(key=st.text(), val=st.text() | st.lists(st.text()))
+    def test_add_filter_copies_inclusions_without_modification(
+        self, key: str, val: list[str] | str
+    ):
+        pf = PostFilter()
+        pf.add_filter(key, val, None)
+        if isinstance(val, str):
+            assert pf.inclusions[key] == [val]
+        else:
+            assert pf.inclusions[key] == list(val)
+
+    @given(key=st.text(), vals=st.text() | st.lists(st.text(), min_size=1))
+    def test_add_filter_turns_exclusions_into_regex(self, key: str, vals: list[str]):
+        pf = PostFilter()
+        pf.add_filter(key, None, vals)
+        # exclusions are stored as a list containing a regex string
+        assert key in pf.exclusions
+        excl = pf.exclusions[key]
+        assert isinstance(excl, list)
+        assert len(excl) == 1
+        assert isinstance(excl[0], str)
+
+    @given(key=st.text())
+    def test_empty_list_of_exclusions_gives_empty_list(self, key: str):
+        pf = PostFilter()
+        pf.add_filter(key, None, iter([]))
+        assert key in pf.exclusions
+        excl = pf.exclusions[key]
+        assert isinstance(excl, list)
+        assert len(excl) == 0
+
+    @given(
+        key=st.text(),
+        exclusion=st.text() | st.lists(st.text(), min_size=1),
+        test=st.text(),
+    )
+    def test_exclusion_regex_excludes_correct_values(
+        self, key: str, exclusion: str | list[str], test: str
+    ):
+        excl_list = list(itx.always_iterable(exclusion))
+        assume(test not in excl_list)
+
+        pf = PostFilter()
+        pf.add_filter(key, None, exclusion)
+        # normalize exclusion into a list for testing
+        pattern = pf.exclusions[key][0]
+        # should not match excluded values
+        for ex in excl_list:
+            assert re.match(pattern, ex) is None
+        # should match other values
+        assert re.match(pattern, "".join(excl_list) + "_") is not None
+        assert re.match(pattern, "_" + "".join(excl_list)) is not None
+        assert re.match(pattern, test) is not None
+
+    @given(key=st.text())
+    def test_add_filter_overwrites_duplicate_keys(self, key: str):
+        pf = PostFilter()
+        pf.add_filter(key, "a", None)
+        assert pf.inclusions[key] == ["a"]
+        pf.add_filter(key, "b", None)
+        assert pf.inclusions[key] == ["b"]
+
+    def test_multiple_filters_in_postfilter(self):
+        pf = PostFilter()
+        pf.add_filter("subject", "a", None)
+        pf.add_filter("session", None, "s1")
+        assert "subject" in pf.inclusions
+
+        assert "session" in pf.exclusions
+
+
+class TestUnifiedFilter:
+    @pytest.mark.parametrize("attr", ["prefilters", "get"])
+    @given(comp=sb_st.input_configs(wildcards=False, paths=False))
+    def test_regex_search_removed_from_prefilters(self, comp: InputConfig, attr: str):
+        # ensure regex_search present in component filters
+        filters = dict(comp.get("filters", {}), regex_search=True)
+        comp["filters"] = filters
+        uf = UnifiedFilter(component=comp, postfilters=PostFilter())
+        compiled = getattr(uf, attr)
+
+        assert "regex_search" in filters
+        assert "regex_search" not in compiled
+        # other filters should be preserved (presence for .get, exact for .prefilters)
+        filters.pop("regex_search")
+        if attr == "prefilters":
+            assert compiled == filters
+        else:
+            assert set(compiled) == set(filters)
+
+    @pytest.mark.parametrize("inclusion", [True, False])
+    @given(filters=_filters())
+    def test_postfilters_incorporated_into_unified_filter(
+        self, filters: dict[str, str], inclusion: bool
+    ):
+        pf = PostFilter()
+        for k, v in filters.items():
+            if inclusion:
+                pf.add_filter(k, v, None)
+            else:
+                pf.add_filter(k, None, v)
+        dummy = "".join(filters.keys()) + "_"
+        uf = UnifiedFilter(
+            component={
+                "filters": {dummy: ""},
+                "wildcards": list(filters.keys()),
+            },
+            postfilters=pf,
+        )
+        if inclusion:
+            assert set(uf.get) == set(filters) | {dummy}
+        else:
+            assert set(uf.post_exclusions) == set(filters)
+
+    def test_empty_postfilter_inclusion_turned_into_pybids_query(self):
+        # An empty inclusion postfilter should be turned into Query.ANY in uf.get
+        pf = PostFilter()
+        pf.add_filter("", [], None)
+
+        # component should list the entity as a wildcard so postfilter applies
+        uf = UnifiedFilter(
+            component={"wildcards": [""]},
+            postfilters=pf,
+        )
+
+        assert uf.get[""][0] is Query.ANY
+
+    @pytest.mark.parametrize("inclusion", [True, False])
+    @given(filters=_filters(min_size=2))
+    def test_postfilters_skipped_when_not_wildcards(
+        self, filters: dict[str, str], inclusion: bool
+    ):
+        pf = PostFilter()
+        for k, v in filters.items():
+            if inclusion:
+                pf.add_filter(k, v, None)
+            else:
+                pf.add_filter(k, None, v)
+        filters.popitem()
+        # All but withdrawn entity in wildcards
+        uf = UnifiedFilter(component={"wildcards": list(filters)}, postfilters=pf)
+        assert set(filters) == set(uf.get if inclusion else uf.post_exclusions)
+
+    @pytest.mark.parametrize("inclusion", [True, False])
+    @given(filters=_filters(min_size=2))
+    def test_post_exclusions_skips_entities_that_are_filters(
+        self, filters: dict[str, str], inclusion: bool
+    ):
+        pf = PostFilter()
+        for k, v in filters.items():
+            if inclusion:
+                pf.add_filter(k, v, None)
+            else:
+                pf.add_filter(k, None, v)
+        wildcards = list(filters)
+        entity, value = filters.popitem()
+        uf = UnifiedFilter(
+            component={"wildcards": wildcards, "filters": {entity: value * 2}},
+            postfilters=pf,
+        )
+        # since entity is present in prefilters, it should be skipped
+        if inclusion:
+            assert uf.get[entity] == [value * 2]
+        else:
+            assert entity not in uf.post_exclusions
+
+    @pytest.mark.parametrize("attr", ["get", "search"])
+    @given(filters=_mixed_filters)
+    def test_get_and_search_return_appropriate_methods(
+        self,
+        filters: dict[str, str | list[str] | dict[str, None]],
+        attr: str,
+    ):
+        # Build UnifiedFilter from the mixed filters and check that `get` only
+        # contains 'get' (including simple string/list forms) and `search` only
+        # contains 'match'/'search' filters.
+        uf = UnifiedFilter.from_filter_dict(filters)  # type: ignore[arg-type]
+        compiled = getattr(uf, attr)
+
+        for key, raw in filters.items():
+            method = next(iter(raw.keys())) if isinstance(raw, dict) else "get"
+
+            if attr == "get":
+                expected = method == "get"
+            else:
+                expected = method in ("match", "search")
+
+            assert (key in compiled) == expected
+
+    @pytest.mark.parametrize("empty_list", [True, False])
+    @given(
+        filters=st.dictionaries(
+            sb_st.bids_entity().map(str), st.text() | st.lists(st.text(), min_size=1)
+        )
+    )
+    def test_has_empty_prefilter_detects_empty_list(
+        self, filters: dict[str, str | list[str]], empty_list: bool
+    ):
+        if empty_list:
+            filters["".join(filters)] = []
+        uf = UnifiedFilter.from_filter_dict(filters)
+        assert uf.has_empty_prefilter == empty_list
+
+    @pytest.mark.parametrize("empty_list", [True, False])
+    @given(
+        filters=st.dictionaries(
+            sb_st.bids_entity().map(str), st.text() | st.lists(st.text(), min_size=1)
+        )
+    )
+    def test_has_empty_postfilter_detects_empty_list(
+        self, filters: dict[str, str | list[str]], empty_list: bool
+    ):
+        pf = PostFilter()
+        for k, v in filters.items():
+            pf.add_filter(k, v, None)
+        dummy = "".join(filters)
+        if empty_list:
+            pf.add_filter(dummy, [], None)
+        uf = UnifiedFilter(component={"wildcards": [*filters, dummy]}, postfilters=pf)
+        assert uf.has_empty_postfilter == empty_list
+
+    # without_bools behavior tested below with parameterized cases
+    @given(
+        value=st.booleans()
+        | st.lists(st.booleans(), min_size=1)
+        | st.tuples(st.booleans(), st.text()).map(list)
+    )
+    def test_without_bools_raises_when_bools_present(self, value: bool):
+        """Accessing without_bools must raise when boolean filters are present."""
+        uf = UnifiedFilter.from_filter_dict({"": value})
+        with pytest.raises(
+            ValueError,
+            match=("Boolean filters in items with custom paths are not supported"),
+        ):
+            _ = uf.without_bools
+
+    @given(value=st.text() | st.lists(st.text()))
+    def test_without_bools_returns_when_no_bools(self, value: str | list[str]):
+        """without_bools should return the mapping when no boolean filters exist."""
+        uf = UnifiedFilter.from_filter_dict({"": value})
+        result = uf.without_bools
+        assert "" in result
+
+
+class TestCompileFilters:
+    @pytest.mark.parametrize(
+        ("method_a", "method_b"),
+        list(it.combinations(tuple(_VALID_FILTER_METHODS), 2)),
+    )
+    def test_multiple_filtering_methods_raises_error(
+        self, method_a: str, method_b: str
+    ):
+        # dict with two different filtering methods should raise a _TooManyKeysError
+        filt = {method_a: "_", method_b: "_"}
+        with pytest.raises(_TooManyKeysError):
+            _compile_filters({"": filt}, with_regex=False)  # type: ignore[arg-type]
+
+    def test_dict_with_no_filtering_methods_raises_error(self):
+        # empty dict as a filter value should raise a _TooFewKeysError
+        with pytest.raises(_TooFewKeysError):
+            _compile_filters({"": {}}, with_regex=False)  # type: ignore[arg-type]
+
+    @given(method=st.text().filter(lambda t: t not in _VALID_FILTER_METHODS))
+    def test_invalid_filtering_method_raises_error(self, method: str):
+        # unknown filtering key should raise a _InvalidKeyError
+        with pytest.raises(_InvalidKeyError):
+            _compile_filters({"": {method: ""}}, with_regex=False)  # type: ignore[arg-type]
+
+    @given(entity=sb_st.bids_entity().map(str), val=st.text())
+    def test_string_filter_wrapped_with_list(self, entity: str, val: str):
+        # plain string filters should be wrapped into a single-element list
+        res = _compile_filters({entity: val}, with_regex=False)
+        assert isinstance(res, dict)
+        assert entity in res
+        assert isinstance(res[entity], list)
+        assert res[entity][0] == val
+
+    @pytest.mark.parametrize(
+        ("kind", "with_regex"),
+        [
+            ("simple", False),
+            ("list", False),
+            ("get", False),
+            ("match", True),
+            ("search", True),
+        ],
+    )
+    @pytest.mark.parametrize(
+        ("val", "expected"),
+        [
+            (True, Query.ANY),
+            (False, Query.NONE),
+        ],
+    )
+    @given(entity=sb_st.bids_entity().map(str))
+    def test_boolean_filters_converted_to_pybids_queries(
+        self, entity: str, kind: str, with_regex: bool, val: bool, expected: object
+    ):
+        """Boolean filters (True/False) should convert to Query.ANY / Query.NONE
+
+        Test three shapes for each boolean: simple boolean, boolean in a list, and
+        boolean nested under each of the valid methods ('get','match','search').
+        """
+        if kind == "simple":
+            filt = {entity: val}
+        elif kind == "list":
+            filt = {entity: [val]}
+        else:
+            filt = {entity: {kind: val}}
+        res = _compile_filters(filt, with_regex=with_regex)  # type: ignore[arg-type]
+        assert res[entity][0] is expected
+
+    @given(txt=st.text())
+    def test_match_filters_converted_to_valid_regex(self, txt: str):
+        # match should produce anchored regex that matches exactly
+        res = _compile_filters({"": {"match": re.escape(txt)}}, with_regex=True)
+        pattern = cast(str, res[""][0])
+        assert re.match(pattern, txt) is not None
+        assert re.match(pattern, txt + "_") is None
+
+    @pytest.mark.parametrize("method", ["match", "search"])
+    @given(txt=st.text(alphabet=string.ascii_lowercase, min_size=1))
+    def test_regex_filters_case_sensitive(self, method: str, txt: str):
+        res = _compile_filters({"": {method: txt}}, with_regex=True)  # pyright: ignore[reportArgumentType]
+        pattern = cast(str, res[""][0])
+        assert getattr(re, method)(pattern, txt.upper()) is None
+
+    @given(txt=st.text())
+    def test_search_filters_converted_to_valid_regex(self, txt: str):
+        # search should produce regex that matches when substring present
+        res = _compile_filters({"": {"search": re.escape(txt)}}, with_regex=True)
+        pattern = cast(str, res[""][0])
+        assert re.search(pattern, "_" + txt + "_") is not None
+
+    @given(txt=st.text(min_size=2).filter(lambda t: t != t[::-1]))
+    def test_search_filters_only_match_query(self, txt: str):
+        # search should produce regex that matches when substring present
+        res = _compile_filters({"": {"search": re.escape(txt)}}, with_regex=True)
+        pattern = cast(str, res[""][0])
+        assert re.search(pattern, txt[::-1]) is None
+
+    @given(txt=st.text())
+    def test_get_filters_stay_as_str(self, txt: str):
+        # explicit get method should leave the string as-is
+        res = _compile_filters({"": {"get": txt}}, with_regex=False)
+        assert cast(str, res[""][0]) == txt
+
+    @pytest.mark.parametrize(
+        ("method", "with_regex"),
+        [
+            ("get", True),
+            ("match", False),
+            ("search", False),
+            (None, True),
+        ],
+    )
+    @given(entity=sb_st.bids_entity().map(str))
+    def test_filter_skipped_when_with_regex_not_matching(
+        self, entity: str, method: str | None, with_regex: bool
+    ):
+        filt = {entity: ""} if method is None else {entity: {method: ""}}
+
+        res = _compile_filters(filt, with_regex=with_regex)  # type: ignore[arg-type]
+
+        assert entity not in res
+
+
+class _FakeBIDSLayout:
+    def __init__(self, get: list[str] | None = None, search: list[str] | None = None):
+        self.get_called = False
+        self.search_called = False
+        self.get_return = get or []
+        self.search_return = search or []
+
+    def get(self, regex_search: bool, **filters: Sequence[str]):
+        query = re.search(
+            filters.popitem()[1][0] if filters else "get", "searchgeterror"
+        )
+        assert query
+        query = query[0]
+        if query == "error":
+            raise AttributeError
+        assert regex_search == (query == "search")
+        if query == "search":
+            self.search_called = True
+            return self.search_return
+        assert query == "get"
+        self.get_called = True
+        return self.get_return
+
+
+class TestGetMatchingFiles:
+    def _create_unified_filter(self, *, get: bool, search: bool, error: bool):
+        """Create a small UnifiedFilter for tests.
+
+        Arguments are keyword-only to make call sites explicit about intent.
+        """
+        filter_dict: FilterMap = {}
+        if get:
+            filter_dict["get"] = "error" if error else "get"
+        if search:
+            filter_dict["search"] = {"search": "error" if error else "search"}
+        return UnifiedFilter.from_filter_dict(filter_dict)
+
+    @pytest.mark.parametrize(
+        ("get", "search"), [(False, False), (True, True), (True, False), (False, True)]
+    )
+    def test_get_and_search_called_correctly(self, get: bool, search: bool):
+        """Test appropriate call of BIDSLayout.get.
+
+        When called with filters from UnifiedFilter.get, regex_search should be False. A
+        call with regex_search == False should be made every time.
+
+        When called with filters from UnifiedFilter.search, regex_search should be True.
+        A call with regex_search == True should only be made when UnifiedFilter.search
+        returns a Filter.
+        """
+        layout = _FakeBIDSLayout()
+        filters = self._create_unified_filter(get=get, search=search, error=False)
+        get_matching_files(layout, filters)  # pyright: ignore[reportArgumentType]
+        assert layout.get_called
+        assert layout.search_called == search
+
+    @pytest.mark.parametrize(("get", "search"), [(True, False), (False, True)])
+    def test_get_and_search_catches_attribute_error(self, get: bool, search: bool):
+        layout = _FakeBIDSLayout()
+        filters = self._create_unified_filter(get=get, search=search, error=True)
+        with pytest.raises(
+            PybidsError,
+            match="Pybids has encountered a problem that Snakebids cannot handle. ",
+        ):
+            get_matching_files(layout, filters)  # pyright: ignore[reportArgumentType]
+
+    @given(get=st.sets(st.text()), search=st.sets(st.text()))
+    def test_returns_union_of_get_and_search(self, get: set[str], search: set[str]):
+        files = get_matching_files(
+            _FakeBIDSLayout(get, search),  # pyright: ignore[reportArgumentType]
+            self._create_unified_filter(get=True, search=True, error=False),
+        )
+        assert set(files) == get & search
+
+    def test_preserves_order_of_files(self):
+        get = list(map(str, range(1000)))
+        search = list(map(str, range(1000)[::2]))
+        layout = _FakeBIDSLayout(get, search)
+        filters = self._create_unified_filter(get=True, search=True, error=False)
+        files = get_matching_files(layout, filters)  # pyright: ignore[reportArgumentType]
+        assert files == search

From 23d46765917b8cd8f2d235c8dfe5eeaadc81d6e2 Mon Sep 17 00:00:00 2001
From: Peter Van Dyken <pvandyk2@uwo.ca>
Date: Thu, 13 Nov 2025 18:11:43 -0500
Subject: [PATCH 5/6] Make participant-label tests more efficient

Tests involve pybids indexing, and previously used hypothesis to
generate examples. Remove hypothesis from all cases and parametrize with
focused cases. Test logic between generate_inputs and get_matching_files
with patches
---
 tests/test_generate_inputs.py | 255 ++++++++++++----------------------
 1 file changed, 85 insertions(+), 170 deletions(-)

diff --git a/tests/test_generate_inputs.py b/tests/test_generate_inputs.py
index 529bb6a3..2a5c577f 100644
--- a/tests/test_generate_inputs.py
+++ b/tests/test_generate_inputs.py
@@ -5,6 +5,7 @@
 import functools as ft
 import itertools as it
 import keyword
+import logging
 import os
 import re
 import shutil
@@ -14,12 +15,12 @@
 from collections import defaultdict
 from collections.abc import Iterable
 from pathlib import Path, PosixPath
-from typing import Any, Literal, NamedTuple, TypedDict, TypeVar
+from typing import Any, NamedTuple, TypeVar
 
 import attrs
 import more_itertools as itx
 import pytest
-from bids import BIDSLayout
+from bids.layout import BIDSLayout
 from hypothesis import HealthCheck, assume, example, given, settings
 from hypothesis import strategies as st
 from pytest_mock import MockerFixture
@@ -40,7 +41,7 @@
 from snakebids.exceptions import ConfigError, PybidsError, RunError
 from snakebids.paths._presets import bids
 from snakebids.snakemake_compat import expand as sb_expand
-from snakebids.types import InputsConfig
+from snakebids.types import InputConfig, InputsConfig
 from snakebids.utils.containers import MultiSelectDict
 from snakebids.utils.utils import DEPRECATION_FLAG, BidsEntity, BidsParseError
 from tests import strategies as sb_st
@@ -1687,197 +1688,111 @@ def test_generate_inputs(dataset: BidsDataset, tmpdir: Path):
     assert reindexed.layout is not None
 
 
-@st.composite
-def dataset_with_subject(draw: st.DrawFn):
-    entities = draw(sb_st.bids_entity_lists(blacklist_entities=["subject"]))
-    entities += ["subject"]
-    return BidsDataset.from_iterable(
-        [
-            draw(
-                sb_st.bids_components(
-                    whitelist_entities=entities,
-                    min_entities=len(entities),
-                    max_entities=len(entities),
-                    restrict_patterns=True,
-                    unique=True,
-                )
-            )
-        ]
-    )
+class _FakeBIDSLayout:
+    def __init__(self, *args: Any, **kwargs: Any):
+        pass
 
+    def get(self, regex_search: bool, **kwargs: Any) -> list[str]:
+        return []
 
-class TestParticipantFiltering:
-    MODE = Literal["include", "exclude"]
-
-    def get_filter_params(self, mode: MODE, filters: list[str] | str):
-        class FiltParams(TypedDict, total=False):
-            participant_label: list[str] | str
-            exclude_participant_label: list[str] | str
+    def __eq__(self, other: object):
+        return isinstance(other, self.__class__)
 
-        if mode == "include":
-            return FiltParams({"participant_label": filters})
-        if mode == "exclude":
-            return FiltParams({"exclude_participant_label": filters})
-        msg = f"Invalid mode specification: {mode}"
-        raise ValueError(msg)
 
-    @given(
-        data=st.data(),
-        dataset=dataset_with_subject(),
-    )
-    @settings(
-        deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture]
+class TestParticipantFiltering:
+    @pytest.mark.parametrize(
+        ("include", "exclude"),
+        [
+            ("01", None),
+            (None, "01"),
+            ("01", "02"),
+            (["01", "02"], "02"),
+            (None, ["02", "03"]),
+            (["01", "02"], ["02", "03"]),
+        ],
     )
     def test_exclude_and_participant_label_filter_correctly(
-        self, data: st.DataObject, dataset: BidsDataset, tmpdir: Path
-    ):
-        root = tempfile.mkdtemp(dir=tmpdir)
-        rooted = BidsDataset.from_iterable(
-            attrs.evolve(comp, path=os.path.join(root, comp.path))
-            for comp in dataset.values()
-        )
-        sampler = st.sampled_from(itx.first(rooted.values()).entities["subject"])
-        excluded = data.draw(st.lists(sampler, unique=True) | sampler | st.none())
-        included = data.draw(st.lists(sampler, unique=True) | sampler | st.none())
-        reindexed = reindex_dataset(
-            root, rooted, exclude_participant_label=excluded, participant_label=included
-        )
-        reindexed_subjects = set(itx.first(reindexed.values()).entities["subject"])
-        expected_subjects = set(itx.first(rooted.values()).entities["subject"])
-        if included is not None:
-            expected_subjects &= set(itx.always_iterable(included))
-        if excluded is not None:
-            expected_subjects -= set(itx.always_iterable(excluded))
-
-        assert reindexed_subjects == expected_subjects
-
-    @pytest.mark.parametrize("mode", ["include", "exclude"])
-    @given(
-        dataset=sb_st.datasets_one_comp(blacklist_entities=["subject"], unique=True),
-        participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1),
-    )
-    @settings(
-        deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture]
-    )
-    def test_participant_label_doesnt_filter_comps_without_subject(
         self,
-        mode: MODE,
-        dataset: BidsDataset,
-        participant_filter: list[str] | str,
+        include: str | list[str] | None,
+        exclude: str | list[str] | None,
         tmpdir: Path,
     ):
-        root = tempfile.mkdtemp(dir=tmpdir)
-        rooted = BidsDataset.from_iterable(
-            attrs.evolve(comp, path=os.path.join(root, comp.path))
-            for comp in dataset.values()
-        )
-        reindexed = reindex_dataset(
-            root, rooted, **self.get_filter_params(mode, participant_filter)
+        zip_lists = {
+            "subject": ["01", "02", "03"],
+            "acq": ["x", "y", "z"],
+        }
+        component = BidsComponent(
+            name="0", path=str(tmpdir / get_bids_path(zip_lists)), zip_lists=zip_lists
         )
-        assert reindexed == rooted
+        dataset = BidsDataset.from_iterable([component])
 
-    @pytest.mark.parametrize("mode", ["include", "exclude"])
-    @given(
-        dataset=dataset_with_subject(),
-        participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1),
-    )
-    @settings(
-        deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture]
-    )
-    def test_participant_label_doesnt_filter_comps_when_subject_has_filter(
-        self,
-        mode: MODE,
-        dataset: BidsDataset,
-        participant_filter: list[str] | str,
-        tmpdir: Path,
-    ):
-        root = tempfile.mkdtemp(dir=tmpdir)
-        rooted = BidsDataset.from_iterable(
-            attrs.evolve(comp, path=os.path.join(root, comp.path))
-            for comp in dataset.values()
-        )
-        create_dataset(Path("/"), rooted)
-        reindexed = generate_inputs(
-            root,
-            create_snakebids_config(rooted),
-            **self.get_filter_params(mode, participant_filter),
+        reindexed = reindex_dataset(
+            str(tmpdir),
+            dataset,
+            participant_label=include,
+            exclude_participant_label=exclude,
         )
-        assert reindexed == rooted
-
-    @pytest.mark.parametrize("mode", ["include", "exclude"])
-    @given(
-        dataset=dataset_with_subject(),
-        participant_filter=st.lists(st.text(min_size=1)) | st.text(min_size=1),
-        data=st.data(),
-    )
-    @settings(
-        deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture]
-    )
-    def test_participant_label_doesnt_filter_comps_when_subject_has_filter_no_wcard(
+        expected = set(component.entities["subject"])
+        expected -= set(itx.always_iterable(exclude))
+        if include is not None:
+            expected &= set(itx.always_iterable(include))
+        assert set(itx.first(reindexed.values()).entities["subject"]) == expected
+
+    @pytest.mark.parametrize("include", ["include", ["include"], None])
+    @pytest.mark.parametrize("exclude", ["exclude", ["exclude"], None])
+    @pytest.mark.parametrize("filters", ["filters", None])
+    def test_participant_flags_and_filters_merged(
         self,
-        mode: MODE,
-        dataset: BidsDataset,
-        participant_filter: list[str] | str,
-        data: st.DataObject,
-        tmpdir: Path,
+        include: str | list[str] | None,
+        exclude: str | list[str] | None,
+        filters: str | None,
+        mocker: MockerFixture,
+        caplog: pytest.LogCaptureFixture,
     ):
-        root = tempfile.mkdtemp(dir=tmpdir)
-        rooted = BidsDataset.from_iterable(
-            attrs.evolve(comp, path=os.path.join(root, comp.path))
-            for comp in dataset.values()
-        )
-        subject = data.draw(
-            st.sampled_from(itx.first(rooted.values()).entities["subject"])
-        )
-        create_dataset(Path("/"), rooted)
-        config = create_snakebids_config(rooted)
-        for comp in config.values():
-            comp["filters"] = dict(comp.get("filters", {}))
-            comp["filters"]["subject"] = subject
-        reindexed = generate_inputs(
-            root,
-            config,
-            **self.get_filter_params(mode, participant_filter),
-        )
-        assert reindexed == rooted
+        mocker.patch.object(input_generation, "BIDSLayout", _FakeBIDSLayout)
+        patch = mocker.patch.object(input_generation, "get_matching_files")
+        component: InputConfig = {"filters": {"subject": filters}} if filters else {}
+        with caplog.at_level(logging.ERROR, "snakebids.core.input_generation"):
+            generate_inputs(
+                "",
+                {"": component},
+                participant_label=include,
+                exclude_participant_label=exclude,
+            )
+        pf = PostFilter()
+        pf.add_filter("subject", include, exclude)
+        uf = UnifiedFilter(component, pf)
+        patch.assert_called_once_with(_FakeBIDSLayout(), uf)
 
-    @given(
-        data=st.data(),
-        dataset=dataset_with_subject().filter(
-            lambda ds: set(itx.first(ds.values()).wildcards) != {"subject", "extension"}
-        ),
-    )
-    @settings(
-        deadline=None, suppress_health_check=[HealthCheck.function_scoped_fixture]
-    )
     def test_exclude_participant_does_not_make_all_other_filters_regex(
-        self, data: st.DataObject, dataset: BidsDataset, tmpdir: Path
+        self, tmpdir: Path
     ):
-        root = tempfile.mkdtemp(dir=tmpdir)
-        rooted = BidsDataset.from_iterable(
-            attrs.evolve(comp, path=os.path.join(root, comp.path))
-            for comp in dataset.values()
+        # Construct a small deterministic BIDS dataset with a subject entity and
+        # at least one other entity so we can mutate that other entity's values.
+        # Single-component dataset with a valid BIDS-style path and entities.
+        mut_entity = "acq"
+        zip_lists = {
+            "subject": ["01", "02"],
+            mut_entity: ["x", "y"],
+        }
+        component = BidsComponent(
+            name="0", path=str(tmpdir / get_bids_path(zip_lists)), zip_lists=zip_lists
         )
+        dataset = BidsDataset.from_iterable([component])
 
         # Create an extra set of paths by modifying one of the existing components to
-        # put foo after a set of entity values. If that filter gets changed to a regex,
-        # all of the suffixed decoys will get picked up by pybids
-        ziplist = dict(itx.first(rooted.values()).zip_lists)
-        mut_entity = itx.first(
-            filter(lambda e: e not in {"subject", "extension"}, ziplist)
-        )
-        ziplist[mut_entity] = ["foo" + v for v in ziplist[mut_entity]]
-        for path in sb_expand(itx.first(rooted.values()).path, zip, **ziplist):
+        # put 'foo' after a set of entity values. If that filter gets changed to a
+        # regex, all of the suffixed decoys will get picked up by pybids.
+        zip_lists[mut_entity] = ["foo" + v for v in zip_lists[mut_entity]]
+        for path in sb_expand(itx.first(dataset.values()).path, zip, **zip_lists):
             p = Path(path)
             p.parent.mkdir(parents=True, exist_ok=True)
             p.touch()
 
-        sampler = st.sampled_from(itx.first(rooted.values()).entities["subject"])
-        label = data.draw(st.lists(sampler, unique=True) | sampler)
-        reindexed = reindex_dataset(root, rooted, exclude_participant_label=label)
-        reindexed_subjects = set(itx.first(reindexed.values()).entities["subject"])
-        original_subjects = set(itx.first(rooted.values()).entities["subject"])
-        assert reindexed_subjects == original_subjects - set(itx.always_iterable(label))
+        reindexed = reindex_dataset(
+            str(tmpdir), dataset, exclude_participant_label="01"
+        )
+        assert itx.first(reindexed.values()) == component.filter(subject="02")
 
 
 # The content of the dataset is irrelevant to this test, so one example suffices

From aa1d53c4271ec8856c2238cc34b4b2a107692f98 Mon Sep 17 00:00:00 2001
From: Peter Van Dyken <pvandyk2@uwo.ca>
Date: Thu, 13 Nov 2025 18:20:43 -0500
Subject: [PATCH 6/6] Update paths filter on tests action

---
 .github/workflows/test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 26cea428..a443bcd8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -13,7 +13,8 @@ on:
       - '*'
       - '!push-action/*'
     paths:
-      - snakebids/**
+      - src/**
+      - tests/**
       - scripts/**
 
 jobs: