From 85495235bf56eab19be576e39cc7f5b6b96b7ca5 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 11:57:16 -0500 Subject: [PATCH 01/14] Improvements for dev testing - Use base Docker image directly instead of rebuilding for tests - Avoid double overlay/install - Avoid container rebuild for every change - Remove docker build step from test scripts and CI - Fix hardcoded paths to work from any checkout - CI e2e job calls e2e_in_docker.sh directly - script was broken and untested Co-Authored-By: Claude Opus 4.6 (1M context) --- .circleci/config.yml | 29 +++++++---------------------- Dockerfile_testing | 6 ------ docs/developer_how_to_test.rst | 21 +++++++++++++++++++-- tests/e2e_in_docker.sh | 19 +++++++------------ tests/pytest_in_docker.sh | 11 +++++------ 5 files changed, 38 insertions(+), 48 deletions(-) delete mode 100644 Dockerfile_testing diff --git a/.circleci/config.yml b/.circleci/config.yml index 77deed95..12b7d3d0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,18 +18,15 @@ jobs: name: pytest of BABS no_output_timeout: 1h command: | - docker build \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . - - # Make a directory that will hold the test artifacts mkdir -p ${HOME}/e2e-testing docker run -it \ - -v ${PWD}:/tests \ + -v ${PWD}:/babs \ -v ${HOME}/e2e-testing:/test-temp:rw \ + -w /babs \ -h slurmctl --cap-add sys_admin \ --privileged \ - pennlinc/slurm-docker-ci:unstable \ + pennlinc/slurm-docker-ci:0.14 \ + bash -c "pip install -e .[tests] && \ pytest -n 4 -sv \ --durations=0 \ --timeout=300 \ @@ -37,7 +34,7 @@ jobs: --cov-report term-missing \ --cov-report xml:/test-temp/coverage.xml \ --cov=babs \ - /babs + /babs/tests/" - store_test_results: path: /home/circleci/e2e-testing/junit.xml @@ -54,22 +51,10 @@ jobs: - checkout: path: /home/circleci/src/babs - run: - name: pytest of BABS + name: e2e SLURM tests no_output_timeout: 1h command: | - docker build \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . - - # Make a directory that will hold the test artifacts - mkdir -p ${HOME}/e2e-testing - docker run -it \ - -v ${PWD}:/tests \ - -v ${HOME}/e2e-testing:/test-temp:rw \ - -h slurmctl --cap-add sys_admin \ - --privileged \ - pennlinc/slurm-docker-ci:unstable \ - /tests/tests/e2e-slurm/container/walkthrough-tests.sh + E2E_DIR=${HOME}/e2e-testing bash tests/e2e_in_docker.sh - run: name: clean up test artifacts diff --git a/Dockerfile_testing b/Dockerfile_testing deleted file mode 100644 index 4b0cff97..00000000 --- a/Dockerfile_testing +++ /dev/null @@ -1,6 +0,0 @@ -FROM pennlinc/slurm-docker-ci:0.14 - -# install BABS in development mode for proper coverage tracking -COPY . /babs -WORKDIR /babs -RUN pip install -e .[tests] diff --git a/docs/developer_how_to_test.rst b/docs/developer_how_to_test.rst index a3292243..2cd0bf13 100644 --- a/docs/developer_how_to_test.rst +++ b/docs/developer_how_to_test.rst @@ -25,8 +25,25 @@ Manually run pytest The easiest way to run pytest is to run the ``tests/pytest_in_docker.sh`` script from the root directory of BABS. -This will build a docker container that has a running SLURM job scheduler system. -It will also install the local copy of BABS and run the pytests in the container. +This runs the tests inside a Docker container that has a running SLURM job scheduler system, +and installs the local copy of BABS at runtime via volume mount. + +.. code-block:: bash + + bash tests/pytest_in_docker.sh + +To run the end-to-end walkthrough tests (``babs init`` → ``submit`` → ``merge``): + +.. code-block:: bash + + bash tests/e2e_in_docker.sh + +By default, e2e test artifacts go to a temporary directory under ``/tmp``. +To specify a location (e.g., for inspection after the run): + +.. code-block:: bash + + E2E_DIR=/path/to/output bash tests/e2e_in_docker.sh ----------------------------- Automatic pytest via CircleCI diff --git a/tests/e2e_in_docker.sh b/tests/e2e_in_docker.sh index 9de3de14..a571a449 100755 --- a/tests/e2e_in_docker.sh +++ b/tests/e2e_in_docker.sh @@ -1,17 +1,12 @@ #!/bin/bash -mkdir -p "${HOME}"/projects/e2e-testing -docker build --platform linux/amd64 \ - -t pennlinc/slurm-docker-ci:unstable \ - -f Dockerfile_testing . +E2E_DIR="${E2E_DIR:-$(mktemp -d /tmp/babs-e2e-XXXXXX)}" +mkdir -p "${E2E_DIR}" +echo "E2E_DIR=${E2E_DIR}" docker run -it \ --platform linux/amd64 \ - -v "${HOME}"/projects/babs:/tests \ - -v "${HOME}"/projects/e2e-testing:/test-temp:rw \ + -v "$(pwd)":/tests \ + -v "${E2E_DIR}":/test-temp:rw \ -h slurmctl --cap-add sys_admin \ --privileged \ - pennlinc/slurm-docker-ci:unstable #\ - #/babs/tests/e2e-slurm/container/walkthrough-tests.sh - - - #pytest -svx --pdb \ - #/babs/tests \ No newline at end of file + pennlinc/slurm-docker-ci:0.14 \ + /tests/tests/e2e-slurm/container/walkthrough-tests.sh diff --git a/tests/pytest_in_docker.sh b/tests/pytest_in_docker.sh index c8646923..509c3252 100755 --- a/tests/pytest_in_docker.sh +++ b/tests/pytest_in_docker.sh @@ -1,15 +1,14 @@ #!/bin/bash -docker build --platform linux/amd64 -t pennlinc/slurm-docker-ci:unstable -f Dockerfile_testing . docker run -it \ --platform linux/amd64 \ -h slurmctl --cap-add sys_admin \ --privileged \ - -v "${HOME}"/projects/babs:/babs \ - pennlinc/slurm-docker-ci:unstable \ - pytest -svx \ + -v "$(pwd)":/babs \ + -w /babs \ + pennlinc/slurm-docker-ci:0.14 \ + bash -c "pip install -e .[tests] && pytest -svx \ --cov-report=term-missing \ --cov-report=xml \ --cov=babs \ --pdb \ - /babs/tests/ - \ No newline at end of file + /babs/tests/" From ef2b97ff14c9eb5691afaf4337a711384672b6ef Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 12:30:23 -0500 Subject: [PATCH 02/14] Write test artifacts to tmp_path instead of repo root Tests were writing generated shell scripts to the working directory, which left root-owned files in the checkout when run via Docker. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_generate_bidsapp_runscript.py | 8 ++++---- tests/test_generate_submit_script.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_generate_bidsapp_runscript.py b/tests/test_generate_bidsapp_runscript.py index 9c454475..19a34402 100644 --- a/tests/test_generate_bidsapp_runscript.py +++ b/tests/test_generate_bidsapp_runscript.py @@ -112,7 +112,7 @@ def test_get_input_unipping_cmds(): @pytest.mark.parametrize(('input_datasets', 'config_file', 'processing_level'), testing_pairs) -def test_generate_bidsapp_runscript(input_datasets, config_file, processing_level): +def test_generate_bidsapp_runscript(input_datasets, config_file, processing_level, tmp_path): """Test that the bidsapp runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / config_file container_name = config_file.split('_')[1] @@ -130,7 +130,7 @@ def test_generate_bidsapp_runscript(input_datasets, config_file, processing_leve templateflow_home='/path/to/templateflow_home', ) - out_fn = Path('.') / f'{config_path.name}_{processing_level}.sh' + out_fn = tmp_path / f'{config_path.name}_{processing_level}.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) @@ -164,7 +164,7 @@ def run_shellcheck(script_path): return False, str(e) -def test_generate_pipeline_runscript(): +def test_generate_pipeline_runscript(tmp_path): """Test that the pipeline runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / 'eg_nordic-fmriprep_pipeline.yaml' config = read_yaml(config_path) @@ -180,7 +180,7 @@ def test_generate_pipeline_runscript(): final_zip_foldernames=config.get('zip_foldernames', {}), ) - out_fn = Path('.') / f'{config_path.name}_pipeline.sh' + out_fn = tmp_path / f'{config_path.name}_pipeline.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) diff --git a/tests/test_generate_submit_script.py b/tests/test_generate_submit_script.py index 6d0868a3..62184ef0 100644 --- a/tests/test_generate_submit_script.py +++ b/tests/test_generate_submit_script.py @@ -91,7 +91,7 @@ @pytest.mark.parametrize(('input_datasets', 'config_file', 'processing_level'), testing_pairs) -def test_generate_submit_script(input_datasets, config_file, processing_level): +def test_generate_submit_script(input_datasets, config_file, processing_level, tmp_path): """Test that the bidsapp runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / config_file container_name = config_file.split('_')[1] @@ -107,7 +107,7 @@ def test_generate_submit_script(input_datasets, config_file, processing_level): zip_foldernames=config['zip_foldernames'], ) - out_fn = Path('.') / f'participant_job_{config_path.name}_{processing_level}.sh' + out_fn = tmp_path / f'participant_job_{config_path.name}_{processing_level}.sh' with open(out_fn, 'w') as f: f.write(script_content) passed, status = run_shellcheck(str(out_fn)) From c7a8ed54e2441694d84b6694714e9770c8f3c93e Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 12:33:42 -0500 Subject: [PATCH 03/14] Write coverage.xml to /tmp instead of repo root Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/pytest_in_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytest_in_docker.sh b/tests/pytest_in_docker.sh index 509c3252..4251d601 100755 --- a/tests/pytest_in_docker.sh +++ b/tests/pytest_in_docker.sh @@ -8,7 +8,7 @@ docker run -it \ pennlinc/slurm-docker-ci:0.14 \ bash -c "pip install -e .[tests] && pytest -svx \ --cov-report=term-missing \ - --cov-report=xml \ + --cov-report=xml:/tmp/coverage.xml \ --cov=babs \ --pdb \ /babs/tests/" From 54aff89bee720afe5762a1936f5fa5fc9b73acca Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 14:40:22 -0500 Subject: [PATCH 04/14] Discover container image path via containers-list and register at analysis level Instead of hardcoding the container path as containers/.datalad/environments/{name}/image, use datalad containers-list to discover it from the container dataset, then register it at the analysis level via containers-add. This makes babs work with any container dataset layout. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 46 +++++++++++++++++++++++++++++++++++++++------- babs/container.py | 7 +++---- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 878f91f5..5cba7472 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -223,10 +223,38 @@ def babs_bootstrap( ) # into `analysis/containers` folder - # Create initial container for sanity check - container = Container(container_ds, container_name, container_config) + # Discover container image path from the container dataset: + containers_path = op.join(self.analysis_path, 'containers') + result = dlapi.containers_list(dataset=containers_path, result_renderer='disabled') + container_info = [r for r in result + if r['action'] == 'containers' and r['name'] == container_name] + if not container_info: + available = [r['name'] for r in result if r['action'] == 'containers'] + raise ValueError( + f"Container '{container_name}' not found in container dataset. " + f"Available: {available}" + ) + image_path_in_ds = op.relpath(container_info[0]['path'], containers_path) + container_image_path = op.join('containers', image_path_in_ds) + + # Build call_fmt from user's singularity_args: + with open(container_config) as f: + user_config = yaml.safe_load(f) + singularity_args = user_config.get('singularity_args', []) + singularity_args_str = ' '.join(singularity_args) if singularity_args else '' + call_fmt = f'singularity run -B $PWD --pwd $PWD {singularity_args_str} {{img}} {{cmd}}' + + # Register container at analysis level so datalad containers-run works: + print(f'\nRegistering container at analysis level: {container_image_path}') + dlapi.containers_add( + dataset=self.analysis_path, + name=container_name, + image=container_image_path, + call_fmt=call_fmt, + ) - # sanity check of container ds: + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) container.sanity_check(self.analysis_path) # ============================================================== @@ -250,9 +278,11 @@ def babs_bootstrap( container = containers[0] else: self._bootstrap_single_app_scripts( - container_ds, container_name, container_config, system + container_ds, container_name, container_config, system, + container_image_path=container_image_path, ) - container = Container(container_ds, container_name, container_config) + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) # Copy in any other files needed: self._init_import_files(container.config.get('imported_files', [])) @@ -390,10 +420,12 @@ def babs_bootstrap( print('`babs init` was successful!') def _bootstrap_single_app_scripts( - self, container_ds, container_name, container_config, system + self, container_ds, container_name, container_config, system, + container_image_path=None, ): """Bootstrap scripts for single BIDS app configuration.""" - container = Container(container_ds, container_name, container_config) + container = Container(container_ds, container_name, container_config, + container_image_path=container_image_path) # Generate `_zip.sh`: ---------------------------------- # which is a bash script of singularity run + zip diff --git a/babs/container.py b/babs/container.py index f3296286..36cad45b 100644 --- a/babs/container.py +++ b/babs/container.py @@ -14,7 +14,8 @@ class Container: """This class is for the BIDS App Container""" - def __init__(self, container_ds, container_name, config_yaml_file): + def __init__(self, container_ds, container_name, config_yaml_file, + container_image_path=None): """ This is to initialize Container class. @@ -67,9 +68,7 @@ def __init__(self, container_ds, container_name, config_yaml_file): with open(self.config_yaml_file) as f: self.config = yaml.safe_load(f) - self.container_path_relToAnalysis = op.join( - 'containers', '.datalad', 'environments', self.container_name, 'image' - ) + self.container_path_relToAnalysis = container_image_path def sanity_check(self, analysis_path): """ From dd8b44b982c61522c318b994debc65be6d7e4a6d Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 15:44:24 -0500 Subject: [PATCH 05/14] Replace singularity run with datalad containers-run in job template The job template now uses three steps instead of one: 1. datalad containers-run for the BIDS app 2. datalad run for zipping outputs 3. git rm for raw output cleanup (datalad run --explicit doesn't track deletions) Container args parsed via existing bids_app_args_from_config and passed through generate_submit_script to the template. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/container.py | 25 +++++++++++- babs/generate_submit_script.py | 10 +++++ babs/templates/participant_job.sh.jinja2 | 49 +++++++++++++++--------- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/babs/container.py b/babs/container.py index 36cad45b..d5e56664 100644 --- a/babs/container.py +++ b/babs/container.py @@ -6,7 +6,10 @@ import yaml from jinja2 import Environment, PackageLoader, StrictUndefined -from babs.generate_bidsapp_runscript import generate_bidsapp_runscript +from babs.generate_bidsapp_runscript import ( + bids_app_args_from_config, + generate_bidsapp_runscript, +) from babs.generate_submit_script import generate_submit_script, generate_test_submit_script from babs.utils import app_output_settings_from_config @@ -164,16 +167,34 @@ def generate_bash_participant_job( Shown in the script error message when PROJECT_ROOT is unset. """ + input_datasets = input_ds.as_records() + _, bids_app_output_dir = app_output_settings_from_config(self.config) + + raw_bids_app_args = self.config.get('bids_app_args', None) + if raw_bids_app_args: + bids_app_args, subject_selection_flag, _, _, bids_app_input_dir = ( + bids_app_args_from_config(raw_bids_app_args, input_datasets) + ) + else: + bids_app_args = [] + subject_selection_flag = '--participant-label' + bids_app_input_dir = input_datasets[0]['unzipped_path_containing_subject_dirs'] + script_content = generate_submit_script( queue_system=system.type, cluster_resources_config=self.config['cluster_resources'], script_preamble=self.config['script_preamble'], job_scratch_directory=self.config['job_compute_space'], - input_datasets=input_ds.as_records(), + input_datasets=input_datasets, processing_level=processing_level, container_name=self.container_name, zip_foldernames=self.config['zip_foldernames'], project_root=project_root, + container_image_path=self.container_path_relToAnalysis, + bids_app_args=bids_app_args, + bids_app_input_dir=bids_app_input_dir, + bids_app_output_dir=bids_app_output_dir, + subject_selection_flag=subject_selection_flag, ) with open(bash_path, 'w') as f: diff --git a/babs/generate_submit_script.py b/babs/generate_submit_script.py index c586619d..0ed9fd8a 100644 --- a/babs/generate_submit_script.py +++ b/babs/generate_submit_script.py @@ -30,6 +30,11 @@ def generate_submit_script( container_images=None, datalad_run_message=None, project_root=None, + container_image_path=None, + bids_app_args=None, + bids_app_input_dir=None, + bids_app_output_dir=None, + subject_selection_flag=None, ): """ Generate a bash script that runs the BIDS App singularity image. @@ -122,6 +127,11 @@ def generate_submit_script( container_images=container_images, datalad_run_message=datalad_run_message, project_root=project_root, + container_image_path=container_image_path, + bids_app_args=bids_app_args or [], + bids_app_input_dir=bids_app_input_dir or '', + bids_app_output_dir=bids_app_output_dir or '', + subject_selection_flag=subject_selection_flag or '', ) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 9b4786b6..8a134d0a 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -107,9 +107,8 @@ fi {{ zip_locator_text }} # Link to shared container image so each job does not re-clone the same image. -# If shared path is not available (e.g. Slurm Docker workers), retrieve image in this clone. -CONTAINER_SHARED="${PROJECT_ROOT}/analysis/containers/.datalad/environments/{{ container_name }}/image" -CONTAINER_JOB="containers/.datalad/environments/{{ container_name }}/image" +CONTAINER_SHARED="${PROJECT_ROOT}/analysis/{{ container_image_path }}" +CONTAINER_JOB="{{ container_image_path }}" if [ ! -L "${CONTAINER_SHARED}" ]; then echo "ERROR: shared container image not found at ${CONTAINER_SHARED}" >&2 @@ -125,35 +124,49 @@ if [ ! -L "${CONTAINER_JOB}" ]; then exit 1 fi -# datalad run: -datalad run \ - -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \ +# Step 1: Run BIDS app via containers-run +datalad containers-run \ + -n {{ container_name }} \ + --explicit \ {% for input_dataset in input_datasets %} {% if not input_dataset['is_zipped'] %} - -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ - -i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \ + --input "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ + --input "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \ {% else %} - -i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ + --input "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ {% endif %} {% endfor %} -{% if container_images %} -{% for image_path in container_images %} - -i "{{ image_path }}" \ -{% endfor %} -{% else %} - -i "containers/.datalad/environments/{{container_name}}/image" \ -{% endif %} {% if datalad_expand_inputs %} --expand inputs \ {% endif %} + --output "{{ bids_app_output_dir }}" \ + -m "{{ container_name }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + -- \ + {{ bids_app_input_dir }} \ + {{ bids_app_output_dir }} \ + participant \ +{% for bids_app_arg in bids_app_args %} +{% if bids_app_arg %} + {{ bids_app_arg }} \ +{% endif %} +{% endfor %} + {{ subject_selection_flag }} "{% raw %}${subid}{% endraw %}" + +# Step 2: Zip outputs +datalad run \ --explicit \ {% if zip_foldernames is not none %} {% for key, value in zip_foldernames.items() %} -o "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}_{% raw %}${sesid}{% endraw %}{% endif %}_{{ key }}-{{ value }}.zip" \ {% endfor %} {% endif %} - -m "{{ (datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ - "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} {% raw %}${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}" + -m "Zip {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + -- \ + bash ./code/{{ container_name }}_zip.sh "{% raw %}${subid}{% endraw %}"{% if processing_level == 'session' %} "{% raw %}${sesid}{% endraw %}"{% endif %} + +# Step 3: Remove raw outputs (datalad run --explicit doesn't track deletions) +git rm -rf {{ bids_app_output_dir }} +git commit -m "Remove raw outputs for {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %} (zipped)" # Finish up: # push result file content to output RIA storage: From bc6ab49fcbad464638ae62323f0f60013ba627d9 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:04:16 -0500 Subject: [PATCH 06/14] Generate zip-only script instead of singularity+zip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zip script no longer runs the container or removes outputs — both are now handled by the job template (containers-run and git rm). Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 11 +++++------ babs/container.py | 23 +++++++++++++++++++++++ babs/templates/zip_outputs.sh.jinja2 | 9 +++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 babs/templates/zip_outputs.sh.jinja2 diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 5cba7472..9b116537 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -428,15 +428,14 @@ def _bootstrap_single_app_scripts( container_image_path=container_image_path) # Generate `_zip.sh`: ---------------------------------- - # which is a bash script of singularity run + zip - # in folder: `analysis/code` - print('\nGenerating a bash script for running container and zipping the outputs...') - print('This bash script will be named as `' + container_name + '_zip.sh`') + # Zip-only script (container execution is now handled by containers-run + # in participant_job.sh) + print('\nGenerating zip script: ' + container_name + '_zip.sh') bash_path = op.join(self.analysis_path, 'code', container_name + '_zip.sh') - container.generate_bash_run_bidsapp(bash_path, self.input_datasets, self.processing_level) + container.generate_bash_zip_outputs(bash_path, self.processing_level) self.datalad_save( path='code/' + container_name + '_zip.sh', - message='Generate script of running container', + message='Generate zip script', ) # make another folder within `code` for test jobs: diff --git a/babs/container.py b/babs/container.py index d5e56664..0c5d6b22 100644 --- a/babs/container.py +++ b/babs/container.py @@ -9,6 +9,7 @@ from babs.generate_bidsapp_runscript import ( bids_app_args_from_config, generate_bidsapp_runscript, + get_output_zipping_cmds, ) from babs.generate_submit_script import generate_submit_script, generate_test_submit_script from babs.utils import app_output_settings_from_config @@ -103,6 +104,28 @@ def sanity_check(self, analysis_path): + "'." ) + def generate_bash_zip_outputs(self, bash_path, processing_level): + """Generate a bash script that only zips BIDS App outputs.""" + dict_zip_foldernames, _ = app_output_settings_from_config(self.config) + cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level) + + env = Environment( + loader=PackageLoader('babs', 'templates'), + trim_blocks=True, + lstrip_blocks=True, + autoescape=False, + undefined=StrictUndefined, + ) + template = env.get_template('zip_outputs.sh.jinja2') + script_content = template.render( + processing_level=processing_level, + cmd_zip=cmd_zip, + ) + + with open(bash_path, 'w') as f: + f.write(script_content) + os.chmod(bash_path, 0o700) + def generate_bash_run_bidsapp(self, bash_path, input_ds, processing_level): """ This is to generate a bash script that runs the BIDS App singularity image. diff --git a/babs/templates/zip_outputs.sh.jinja2 b/babs/templates/zip_outputs.sh.jinja2 new file mode 100644 index 00000000..b6e59c29 --- /dev/null +++ b/babs/templates/zip_outputs.sh.jinja2 @@ -0,0 +1,9 @@ +#!/bin/bash +set -e -u -x + +subid="$1" +{% if processing_level == 'session' %} +sesid="$2" +{% endif %} + +{{ cmd_zip }} From e6afd81e7bcb1f1d69c3fa0c7f9136e9804dfbb8 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:36:14 -0500 Subject: [PATCH 07/14] Include .datalad in sparse-checkout for containers-run The job clone uses --no-checkout + sparse-checkout. Cone mode does not include dotfile directories by default, so .datalad/config (where containers-add registers the container) was missing. This caused "No known containers" errors at job runtime. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/templates/participant_job.sh.jinja2 | 1 + 1 file changed, 1 insertion(+) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 8a134d0a..052c7713 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -70,6 +70,7 @@ if ! git sparse-checkout init --cone; then fi git sparse-checkout set \ + .datalad \ code \ containers \ {% for input_dataset in input_datasets %} From 29d63f16fa9ce5a51e0bf51046e009c7d25f4ae3 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 16:55:16 -0500 Subject: [PATCH 08/14] Use git rm --sparse for output cleanup Output dirs created by containers-run are outside the sparse-checkout cone, so git rm refuses to touch them without --sparse. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/templates/participant_job.sh.jinja2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 052c7713..3ed23125 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -166,7 +166,7 @@ datalad run \ bash ./code/{{ container_name }}_zip.sh "{% raw %}${subid}{% endraw %}"{% if processing_level == 'session' %} "{% raw %}${sesid}{% endraw %}"{% endif %} # Step 3: Remove raw outputs (datalad run --explicit doesn't track deletions) -git rm -rf {{ bids_app_output_dir }} +git rm -rf --sparse {{ bids_app_output_dir }} git commit -m "Remove raw outputs for {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %} (zipped)" # Finish up: From 295d4c826f53dacda3a26f252ec41f6037798b33 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 17:34:50 -0500 Subject: [PATCH 09/14] Fix writable check for NFS/ACL filesystems os.access() is unreliable on NFS and ACL-based filesystems. Use tempfile.TemporaryFile() to actually test write access. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 9b116537..13f1ae8f 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -3,6 +3,7 @@ import os import os.path as op import subprocess +import tempfile from pathlib import Path import datalad.api as dlapi @@ -77,8 +78,11 @@ def babs_bootstrap( f"The parent folder '{parent_dir}' does not exist! `babs init` won't proceed." ) - # check if parent directory is writable: - if not os.access(parent_dir, os.W_OK): + # check if parent directory is writable (os.access unreliable on NFS/ACL): + try: + with tempfile.TemporaryFile(dir=parent_dir): + pass + except OSError: raise ValueError( f"The parent folder '{parent_dir}' is not writable! `babs init` won't proceed." ) From 0e50464e389c23bfeae7602e330ac33944a79bef Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 17:45:08 -0500 Subject: [PATCH 10/14] Fetch container image during init for job symlink access The PROJECT_ROOT symlink in job clones points to the analysis dataset's container image. Without fetching the actual content during init, it's just an annex pointer and singularity can't open it. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 13f1ae8f..57f2ace6 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -257,6 +257,14 @@ def babs_bootstrap( call_fmt=call_fmt, ) + # Fetch container image so the PROJECT_ROOT symlink at job time + # resolves to actual content, not a dangling annex pointer: + print('\nFetching container image...') + dlapi.get( + dataset=self.analysis_path, + path=op.join(self.analysis_path, container_image_path), + ) + container = Container(container_ds, container_name, container_config, container_image_path=container_image_path) container.sanity_check(self.analysis_path) From 5e34541628e12e05d259ce3c0fb558cdba2a4b2d Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 18:51:35 -0500 Subject: [PATCH 11/14] Add design doc for containers-run support Co-Authored-By: Claude Opus 4.6 (1M context) --- design/containers-run.md | 211 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 design/containers-run.md diff --git a/design/containers-run.md b/design/containers-run.md new file mode 100644 index 00000000..f3cdc545 --- /dev/null +++ b/design/containers-run.md @@ -0,0 +1,211 @@ +# Design: `datalad containers-run` support in BABS + +Issue: https://github.com/PennLINC/babs/issues/328 + +## Goal + +Use `datalad containers-run` instead of direct `singularity run` for BIDS app +execution. This provides: + +- **Dynamic container path discovery** — `datalad containers-list` reads the + path from the container dataset's `.datalad/config`, so BABS works with any + container dataset layout (not just the hardcoded + `containers/.datalad/environments/{name}/image`). +- **Provenance tracking** — datalad records exactly which container ran and how. +- **Compatibility with repronim/containers** — which uses + `images/bids/{name}.sif` instead of the default datalad-containers layout. + +## How it works + +### During `babs init` + +1. Clone container dataset as subdataset of analysis (unchanged) +2. `datalad containers-list` discovers the image path in the container dataset +3. `datalad containers-add` registers the container at the analysis level with: + - The discovered image path (relative to analysis) + - A `call_fmt` built from the user's `singularity_args` config +4. `datalad get` fetches the container image content so it's available for job + symlinks + +### During job execution (`participant_job.sh`) + +Three-step flow replaces the single `datalad run` that called a combined +singularity+zip script: + +1. **`datalad containers-run`** — runs the BIDS app. Container is accessed via + a PROJECT_ROOT symlink to the analysis dataset's fetched image. +2. **`datalad run`** — calls a zip-only script (no longer contains singularity + invocation or output removal). +3. **`git rm --sparse`** — removes raw outputs. Needed because `datalad run + --explicit` doesn't track file deletions. The `--sparse` flag is required + because outputs are outside the sparse-checkout cone. + +## Prior work + +An initial proof-of-concept was developed before upstream's sparse-checkout PR +(#337) landed: https://github.com/asmacdo/babs/pull/6 + +That POC was validated end-to-end on a SLURM cluster (Discovery) with both +handmade container datasets and repronim/containers. Key findings from that +work: + +- **Concurrent `datalad get` failures** — when multiple jobs run simultaneously, + they all try to fetch the same container image. Git-annex uses transfer locks, + causing all but one job to fail. This is a known git-annex bug: + https://git-annex.branchable.com/bugs/concurrent_get_from_separate_clones_fails/ + The POC solved this with ephemeral clones for the containers subdataset. + +- **Ephemeral clones work for containers but not inputs** — container images are + resolved on the host before singularity launches, so annex symlinks outside + `$PWD` are fine. Input data needs to be accessible inside the container with + `--containall -B $PWD`, so symlinks outside `$PWD` break. + +- **`datalad run --explicit` doesn't track deletions** — discovered and + documented as an upstream issue. The workaround (separate `git rm` step) is + carried forward into this implementation. + +The current implementation was recreated from scratch on top of the +sparse-checkout changes, using the POC as reference. The PROJECT_ROOT symlink +approach (from upstream) combined with `datalad get` of the image during init +avoids the concurrent get problem entirely — jobs never call `datalad get` for +the container image, they just follow the symlink to the pre-fetched content. +Ephemeral clones remain a potential future alternative. + +## Problems encountered + +### `git sparse-checkout` interaction + +The upstream sparse-checkout PR (#337) changed job clones to use +`--no-checkout` + `git sparse-checkout`. This caused two issues: + +- **`.datalad/config` not checked out** — cone mode doesn't include + dot-directories by default. Fixed by adding `.datalad` to the sparse-checkout + set. Without it, `containers-run` fails with "No known containers." + +- **`git rm` blocked by sparse-checkout** — outputs created by `containers-run` + are outside the cone, so `git rm` refuses to touch them. Fixed with + `git rm --sparse`. + +### Container image access in job clones + +Job clones don't have the container image content — they clone from input RIA +with `--no-checkout`. The existing PROJECT_ROOT symlink approach works: symlink +from the job clone's container path to the analysis dataset's copy. But this +requires the analysis dataset to have the actual content (not just an annex +pointer), so `datalad get` during init is necessary. + +### `--containall` requires `--pwd $PWD` and `-B $PWD` + +When users specify `--containall` in `singularity_args`, singularity disables +auto-binding of `$PWD` and sets the working directory to `$HOME` inside the +container. This breaks relative paths to inputs/outputs. The `call_fmt` built +during init always includes `-B $PWD --pwd $PWD` to handle this. + +### datalad `{placeholder}` conflicts with shell `${variables}` + +`datalad run` interprets `{name}` as a substitution placeholder, which +conflicts with shell variable expansion like `${subid}`. The POC initially +tried inlining zip commands in `datalad run` but this caused placeholder +conflicts. The solution is a separate zip script called by `datalad run`. + +In the jinja2 templates, shell variables use `{%raw%}/${subid}{%endraw%}` +blocks to prevent jinja2 from interpreting them, and datalad sees the literal +`${subid}` which it passes through to bash. + +### `datalad run --explicit` doesn't track deletions + +When outputs are deleted inside a `datalad run --explicit` command, datalad +doesn't record the deletion — only additions/modifications are tracked. This is +why raw output removal is a separate `git rm` step outside of datalad run. + +This was fixed upstream in https://github.com/datalad/datalad/pull/7823 but +we keep the `git rm` workaround to avoid requiring the latest datalad version. + +## Open questions + +### Pipeline support + +Pipeline currently uses a separate code path (`_bootstrap_pipeline_scripts`) +that hardcodes container paths and generates a combined script +(`pipeline_zip.sh`) with direct `singularity run` calls per step. + +Questions: +- Should pipeline be "a pipeline of 1"? i.e., unify single-app and pipeline + into one code path where single-app is just a pipeline with one step. +- If not unified, pipeline needs the same `containers-list` / `containers-add` + treatment per container step. +- Pipeline currently puts all singularity invocations in one script called by + one `datalad run`. With `containers-run`, each step would be a separate + `datalad containers-run` call — better provenance but different structure. + +### Reusing `call_fmt` / `cmdexec` from container datasets + +Currently we build `call_fmt` from the user's `singularity_args` config: +``` +singularity run -B $PWD --pwd $PWD {user_args} {img} {cmd} +``` + +Some container datasets (like repronim/containers) already define `cmdexec` in +their `.datalad/config`. Should we reuse that instead of building our own? + +Related: datalad-container's freeze script approach +(https://github.com/datalad/datalad-container/issues/287) could provide a +standardized way to handle this. + +### Container image fetch during init + +`datalad get` of the container image during `babs init` ensures the symlink +approach works at job time. But this makes init slow if the image isn't already +local (e.g., pulling a multi-GB .sif from a remote). + +Options: +- Accept the cost during init (current approach) +- Advise users to `datalad get` the image in their source container dataset + before running `babs init`, so the fetch is a fast local copy +- Add a separate `babs prepare` step for data orchestration that runs after + init but before submit + +### Container access mechanism + +Currently using PROJECT_ROOT symlink (inherited from upstream). This depends on: +- `PROJECT_ROOT` environment variable propagating through SLURM +- Shared filesystem visible from compute nodes + +An alternative is ephemeral clones (`datalad get -n --reckless ephemeral +containers`), which removes these dependencies but requires changing the clone +source from input RIA to the analysis dataset path. Deferred for now. + +### Backwards-incompatible: features from `bidsapp_run.sh.jinja2` not yet ported + +The old `bidsapp_run.sh.jinja2` template had several BABS-managed features that +are not yet in the `containers-run` path. These are **backwards-incompatible** +for users who depend on them and need to be restored or replaced. + +- **TemplateFlow bind mount** — BABS read `TEMPLATEFLOW_HOME` from the + environment and added `-B path:path_in_container` + `--env` to the singularity + command. Could be restored in BABS, or users could handle this via + `singularity_args` in their config. + +- **FreeSurfer license bind mount** — BABS intercepted `--fs-license-file` from + `bids_app_args` and converted it to a `-B` bind mount. Same options: restore + in BABS or let users specify via `singularity_args`. + +- **BIDS filter file** — dynamically generated per-job for session-level + processing, restricting BIDS layout indexing to the current session. This is + BIDS-app-specific (fmriprep gets `bold`, qsiprep gets `dwi`, etc.) and can't + be handled by user config alone. Could potentially be supported via a hook or + user-provided script mechanism (nothing like this exists in BABS today). + +- **`${PWD}/` prefix on paths** — the old script passed absolute paths + (`${PWD}/inputs/data/BIDS`). The `containers-run` path passes relative paths, + which should work because `call_fmt` includes `--pwd $PWD`. Needs verification + with BIDS apps that resolve paths differently. + +### `--explicit` flag + +We use `--explicit` on both `containers-run` and the zip `datalad run`. The +POC required `--explicit` because the old subject pruning (`rm -rf` of other +subjects' directories) dirtied the dataset, and datalad refuses to run on a +dirty dataset without `--explicit`. Upstream's sparse-checkout PR (#337) +eliminated subject pruning, so the dataset should stay clean. `--explicit` may +no longer be strictly necessary — kept for safety, could test removing it. From 802dae996c21e938c4b84c76b04f1def98592c72 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 16 Mar 2026 19:22:18 -0500 Subject: [PATCH 12/14] fixup: ruff --- babs/bootstrap.py | 42 +++++++++++++++++++++++++++++++----------- babs/container.py | 3 +-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 57f2ace6..5d37749e 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -230,13 +230,14 @@ def babs_bootstrap( # Discover container image path from the container dataset: containers_path = op.join(self.analysis_path, 'containers') result = dlapi.containers_list(dataset=containers_path, result_renderer='disabled') - container_info = [r for r in result - if r['action'] == 'containers' and r['name'] == container_name] + container_info = [ + r for r in result if r['action'] == 'containers' and r['name'] == container_name + ] if not container_info: available = [r['name'] for r in result if r['action'] == 'containers'] raise ValueError( f"Container '{container_name}' not found in container dataset. " - f"Available: {available}" + f'Available: {available}' ) image_path_in_ds = op.relpath(container_info[0]['path'], containers_path) container_image_path = op.join('containers', image_path_in_ds) @@ -265,8 +266,12 @@ def babs_bootstrap( path=op.join(self.analysis_path, container_image_path), ) - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) + container = Container( + container_ds, + container_name, + container_config, + container_image_path=container_image_path, + ) container.sanity_check(self.analysis_path) # ============================================================== @@ -290,11 +295,18 @@ def babs_bootstrap( container = containers[0] else: self._bootstrap_single_app_scripts( - container_ds, container_name, container_config, system, + container_ds, + container_name, + container_config, + system, + container_image_path=container_image_path, + ) + container = Container( + container_ds, + container_name, + container_config, container_image_path=container_image_path, ) - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) # Copy in any other files needed: self._init_import_files(container.config.get('imported_files', [])) @@ -432,12 +444,20 @@ def babs_bootstrap( print('`babs init` was successful!') def _bootstrap_single_app_scripts( - self, container_ds, container_name, container_config, system, + self, + container_ds, + container_name, + container_config, + system, container_image_path=None, ): """Bootstrap scripts for single BIDS app configuration.""" - container = Container(container_ds, container_name, container_config, - container_image_path=container_image_path) + container = Container( + container_ds, + container_name, + container_config, + container_image_path=container_image_path, + ) # Generate `_zip.sh`: ---------------------------------- # Zip-only script (container execution is now handled by containers-run diff --git a/babs/container.py b/babs/container.py index 0c5d6b22..cc12ac72 100644 --- a/babs/container.py +++ b/babs/container.py @@ -18,8 +18,7 @@ class Container: """This class is for the BIDS App Container""" - def __init__(self, container_ds, container_name, config_yaml_file, - container_image_path=None): + def __init__(self, container_ds, container_name, config_yaml_file, container_image_path=None): """ This is to initialize Container class. From 3d959e194dd3d64925ddef72403c81189b0405ac Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Sun, 22 Mar 2026 16:48:51 -0500 Subject: [PATCH 13/14] Remove container image fetch from init The datalad get during init made init slow and coupled data orchestration with project setup. Users should fetch the container image content in the analysis dataset before submitting jobs. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/bootstrap.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 5d37749e..b93b57e5 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -258,14 +258,6 @@ def babs_bootstrap( call_fmt=call_fmt, ) - # Fetch container image so the PROJECT_ROOT symlink at job time - # resolves to actual content, not a dangling annex pointer: - print('\nFetching container image...') - dlapi.get( - dataset=self.analysis_path, - path=op.join(self.analysis_path, container_image_path), - ) - container = Container( container_ds, container_name, From bfd2bc9c12f81f53a2ee9f2e309a282f22f2a624 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Tue, 24 Mar 2026 11:28:47 -0500 Subject: [PATCH 14/14] Unlock outputs before zipping to get real content After containers-run, datalad saves outputs as annex symlinks. The zip step was archiving the symlinks instead of file content, producing tiny zips with broken links when cloned elsewhere. Co-Authored-By: Claude Opus 4.6 (1M context) --- babs/templates/participant_job.sh.jinja2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 3ed23125..1000df04 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -153,6 +153,9 @@ datalad containers-run \ {% endfor %} {{ subject_selection_flag }} "{% raw %}${subid}{% endraw %}" +# Unlock outputs so zip gets real content, not annex symlinks +datalad unlock {{ bids_app_output_dir }} + # Step 2: Zip outputs datalad run \ --explicit \