From 54da6ee1540372ada43a41d0c2c06b398650ce2e Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Wed, 31 Mar 2021 22:31:47 -0400 Subject: [PATCH 01/30] add submit script --- scripts/cubic/bootstrap-c-pac.sh | 263 +++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 scripts/cubic/bootstrap-c-pac.sh diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh new file mode 100644 index 0000000..3cfe015 --- /dev/null +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -0,0 +1,263 @@ +## NOTE ## +# This workflow is derived from the Datalad Handbook + +## Ensure the environment is ready to bootstrap the analysis workspace +# Check that we have conda installed +conda activate +if [ $? -gt 0 ]; then + echo "Error initializing conda. Exiting" + exit $? +fi + +DATALAD_VERSION=$(datalad --version) + +if [ $? -gt 0 ]; then + echo "No datalad available in your conda environment." + echo "Try pip install datalad" + exit 1 +fi + +echo USING DATALAD VERSION ${DATALAD_VERSION} + +set -e -u + + +## Set up the directory that will contain the necessary directories +PROJECTROOT=${PWD}/fmriprep +if [[ -d ${PROJECTROOT} ]] +then + echo ${PROJECTROOT} already exists + #exit 1 +fi + +if [[ ! -w $(dirname ${PROJECTROOT}) ]] +then + echo Unable to write to ${PROJECTROOT}\'s parent. Change permissions and retry + #exit 1 +fi + + +## Check the BIDS input +BIDSINPUT=/cbica/projects/RBC/testing/way2/exemplars +if [[ -z ${BIDSINPUT} ]] +then + echo "Required argument is an identifier of the BIDS source" + # exit 1 +fi + +# Is it a directory on the filesystem? +BIDS_INPUT_METHOD=clone +if [[ -d "${BIDSINPUT}" ]] +then + # Check if it's datalad + set +e + BIDS_DATALAD_ID=$(datalad -f '{infos[dataset][id]}' wtf -S dataset -d ${BIDSINPUT}) + #set -e + [ "${BIDS_DATALAD_ID}" = 'N/A' ] && BIDS_INPUT_METHOD=copy +fi + + +## Start making things +mkdir -p ${PROJECTROOT} +cd ${PROJECTROOT} + +# Jobs are set up to not require a shared filesystem (except for the lockfile) +# ------------------------------------------------------------------------------ +# RIA-URL to a different RIA store from which the dataset will be cloned from. +# Both RIA stores will be created +input_store="ria+file://${PROJECTROOT}/input_ria" +output_store="ria+file://${PROJECTROOT}/output_ria" + +# Create a source dataset with all analysis components as an analysis access +# point. +datalad create -c yoda analysis +cd analysis + +# create dedicated input and output locations. Results will be pushed into the +# output sibling and the analysis will start with a clone from the input sibling. +datalad create-sibling-ria -s output "${output_store}" +pushremote=$(git remote get-url --push output) +datalad create-sibling-ria -s input --storage-sibling off "${input_store}" + +# register the input dataset +if [[ "${BIDS_INPUT_METHOD}" == "clone" ]] +then + echo "Cloning input dataset into analysis dataset" + datalad clone -d . ${BIDSINPUT} inputs/data + # amend the previous commit with a nicer commit message + git commit --amend -m 'Register input data dataset as a subdataset' +else + echo "Copying input data into " + mkdir inputs + datalad create -d . inputs/data + cp -rv ${BIDSINPUT}/* inputs/data + datalad save -r -m "added input data" +fi + +SUBJECTS=$(ls -d inputs/data/* | grep sub- | cut -d "/" -f 3 ) +if [ -z "${SUBJECTS}" ] +then + echo "No subjects found in input data" + exit 1 +fi + + +## Add the containers as a subdataset +datalad clone -d . ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_project/containers#~pennlinc-containers +# download the image so we don't ddos pmacs +datalad get pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image +.datalad/environments/fmriprep-20-2-1 + + +## the actual compute job specification +cat > code/participant_job.sh << "EOT" +#!/bin/bash +#$ -S /bin/bash +#$ -l h_vmem=25G +#$ -l s_vmem=23.5G +# Set up the correct conda environment +source ${CONDA_PREFIX}/bin/activate base +echo I\'m in $PWD using `which python` + +# fail whenever something is fishy, use -x to get verbose logfiles +set -e -u -x + +# Set up the remotes and get the subject id from the call +dssource="$1" +pushgitremote="$2" +subid="$3" + +# change into the cluster-assigned temp directory. Not done by default in SGE +cd ${CBICA_TMPDIR} + +# Used for the branch names and the temp dir +BRANCH="job-${JOB_ID}-${subid}" +mkdir ${BRANCH} +cd ${BRANCH} + +# get the analysis dataset, which includes the inputs as well +# importantly, we do not clone from the lcoation that we want to push the +# results to, in order to avoid too many jobs blocking access to +# the same location and creating a throughput bottleneck +datalad clone "${dssource}" ds + +# all following actions are performed in the context of the superdataset +cd ds + +# in order to avoid accumulation temporary git-annex availability information +# and to avoid a syncronization bottleneck by having to consolidate the +# git-annex branch across jobs, we will only push the main tracking branch +# back to the output store (plus the actual file content). Final availability +# information can be establish via an eventual `git-annex fsck -f joc-storage`. +# this remote is never fetched, it accumulates a larger number of branches +# and we want to avoid progressive slowdown. Instead we only ever push +# a unique branch per each job (subject AND process specific name) +git remote add outputstore "$pushgitremote" + +# all results of this job will be put into a dedicated branch +git checkout -b "${BRANCH}" + +# we pull down the input subject manually in order to discover relevant +# files. We do this outside the recorded call, because on a potential +# re-run we want to be able to do fine-grained recomputing of individual +# outputs. The recorded calls will have specific paths that will enable +# recomputation outside the scope of the original setup +datalad get -n "inputs/data/${subid}" + +# Reomve all subjects we're not working on +(cd inputs/data && rm -rf `find . -type d -name 'sub*' | grep -v $subid`) + +# ------------------------------------------------------------------------------ +# Do the run! + +datalad run \ + -m "copy ${subid}" \ + -i code/fmriprep_zip.sh \ + -i inputs/data/${subid} \ + -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image +.datalad/environments/fmriprep-20-2-1 \ + -o ${subid}_fmriprep-20-2-1.tar.gz \ + -o ${subid}_freesurfer-20-2-1.tar.gz \ + "./code/fmriprep_zip.sh ${subid}" + +# file content first -- does not need a lock, no interaction with Git +datalad push --to output-storage +# and the output branch +flock $DSLOCKFILE git push outputstore + +echo SUCCESS +# job handler should clean up workspace +EOT + +chmod +x code/participant_job.sh + +cat > code/fmriprep_zip.sh << "EOT" +#!/bin/bash +set -e -u -x + +subid="$1" +mkdir -p ${PWD}/.git/tmp/wdir +singularity run --cleanenv -B ${PWD} \ + pennlinc-containers.datalad/environments/fmriprep-20-2-1/image +.datalad/environments/fmriprep-20-2-1/image \ + inputs/data \ + prep \ + participant \ + -w ${PWD}/.git/wkdir \ + --n_cpus 1 \ + --skip-bids-validation \ + --participant-label "$subid" \ + --force-bbr \ + --cifti-output 91k -v -v + +tar cvfz -C prep ${subid}_fmriprep-20-2-1.tar.gz fmriprep +tar cvfz -C prep ${subid}_freesurfer-20-2-1.tar.gz freesurfer +rm -rf prep .git/tmp/wkdir + +EOT + +chmod +x code/fmriprep_zip.sh +cp ${FREESURFER_HOME}/license.txt code/license.txt + +datalad save -m "Participant compute job implementation" + +mkdir logs +echo logs >> .gitignore + +mkdir logs +echo logs >> .gitignore + + +################################################################################ +# SGE SETUP START - remove or adjust to your needs +################################################################################ + +echo .SGE_datalad_lock >> .gitignore +env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock" +eo_args="-e ${PWD}/logs -o ${PWD}/logs" +echo '#!/bin/bash' > code/qsub_calls.sh +dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" +pushgitremote=$(git remote get-url --push output) +for subject in sub-01 sub-02 sub-03 sub-04; do + echo "qsub -cwd ${env_flags} ${eo_args} \ + ${PWD}/code/participant_job \ + ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh +done +datalad save -m "SGE submission setup" code/ .gitignore + +################################################################################ +# SGE SETUP END +################################################################################ + +# cleanup - we have generated the job definitions, we do not need to keep a +# massive input dataset around. Having it around wastes resources and makes many +# git operations needlessly slow +datalad uninstall -r --nocheck inputs/data + +# make sure the fully configured output dataset is available from the designated +# store for initial cloning and pushing the results. +datalad push --to input +datalad push --to output + +# if we get here, we are happy +echo SUCCESS \ No newline at end of file From d5b68c883f21d7c11b1b5dc27ff7d3137c12199d Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 1 Apr 2021 14:33:32 -0400 Subject: [PATCH 02/30] working scripts --- scripts/cubic/bootstrap-c-pac.sh | 72 +++++++++++++++++--------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 3cfe015..612e38c 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -14,7 +14,7 @@ DATALAD_VERSION=$(datalad --version) if [ $? -gt 0 ]; then echo "No datalad available in your conda environment." echo "Try pip install datalad" - exit 1 + # exit 1 fi echo USING DATALAD VERSION ${DATALAD_VERSION} @@ -27,18 +27,18 @@ PROJECTROOT=${PWD}/fmriprep if [[ -d ${PROJECTROOT} ]] then echo ${PROJECTROOT} already exists - #exit 1 + # exit 1 fi if [[ ! -w $(dirname ${PROJECTROOT}) ]] then echo Unable to write to ${PROJECTROOT}\'s parent. Change permissions and retry - #exit 1 + # exit 1 fi ## Check the BIDS input -BIDSINPUT=/cbica/projects/RBC/testing/way2/exemplars +BIDSINPUT=/cbica/projects/RBC/testing/way2/exemplars_ds if [[ -z ${BIDSINPUT} ]] then echo "Required argument is an identifier of the BIDS source" @@ -50,9 +50,8 @@ BIDS_INPUT_METHOD=clone if [[ -d "${BIDSINPUT}" ]] then # Check if it's datalad - set +e - BIDS_DATALAD_ID=$(datalad -f '{infos[dataset][id]}' wtf -S dataset -d ${BIDSINPUT}) - #set -e + BIDS_DATALAD_ID=$(datalad -f '{infos[dataset][id]}' wtf -S \ + dataset -d ${BIDSINPUT} 2> /dev/null || true) [ "${BIDS_DATALAD_ID}" = 'N/A' ] && BIDS_INPUT_METHOD=copy fi @@ -87,27 +86,28 @@ then # amend the previous commit with a nicer commit message git commit --amend -m 'Register input data dataset as a subdataset' else - echo "Copying input data into " - mkdir inputs - datalad create -d . inputs/data - cp -rv ${BIDSINPUT}/* inputs/data + echo "WARNING: copying input data into repository" + mkdir -p inputs/data + cp -r ${BIDSINPUT}/* inputs/data datalad save -r -m "added input data" fi -SUBJECTS=$(ls -d inputs/data/* | grep sub- | cut -d "/" -f 3 ) +SUBJECTS=$(find inputs/data -type d -name 'sub-*' | cut -d '/' -f 3 ) if [ -z "${SUBJECTS}" ] then echo "No subjects found in input data" - exit 1 + # exit 1 fi ## Add the containers as a subdataset -datalad clone -d . ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_project/containers#~pennlinc-containers +cd ${PROJECTROOT} +datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers # download the image so we don't ddos pmacs -datalad get pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image -.datalad/environments/fmriprep-20-2-1 - +cd pennlinc-containers +datalad get . +cd ${PROJECTROOT}/analysis +datalad install -d . --source ${PROJECTROOT}/pennlinc-containers ## the actual compute job specification cat > code/participant_job.sh << "EOT" @@ -128,7 +128,8 @@ pushgitremote="$2" subid="$3" # change into the cluster-assigned temp directory. Not done by default in SGE -cd ${CBICA_TMPDIR} +# cd ${CBICA_TMPDIR} +cd /cbica/comp_space/RBC/way2 # Used for the branch names and the temp dir BRANCH="job-${JOB_ID}-${subid}" @@ -171,13 +172,13 @@ datalad get -n "inputs/data/${subid}" # Do the run! datalad run \ - -m "copy ${subid}" \ -i code/fmriprep_zip.sh \ -i inputs/data/${subid} \ - -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image -.datalad/environments/fmriprep-20-2-1 \ + -i inputs/data/*json \ + -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ -o ${subid}_fmriprep-20-2-1.tar.gz \ -o ${subid}_freesurfer-20-2-1.tar.gz \ + -m "fmriprep:20.2.1 ${subid}" \ "./code/fmriprep_zip.sh ${subid}" # file content first -- does not need a lock, no interaction with Git @@ -198,13 +199,14 @@ set -e -u -x subid="$1" mkdir -p ${PWD}/.git/tmp/wdir singularity run --cleanenv -B ${PWD} \ - pennlinc-containers.datalad/environments/fmriprep-20-2-1/image -.datalad/environments/fmriprep-20-2-1/image \ + pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ inputs/data \ prep \ participant \ -w ${PWD}/.git/wkdir \ --n_cpus 1 \ + --stop-on-first-crash \ + --fs-license-file code/license.txt \ --skip-bids-validation \ --participant-label "$subid" \ --force-bbr \ @@ -219,28 +221,28 @@ EOT chmod +x code/fmriprep_zip.sh cp ${FREESURFER_HOME}/license.txt code/license.txt -datalad save -m "Participant compute job implementation" - mkdir logs +echo .SGE_datalad_lock >> .gitignore echo logs >> .gitignore -mkdir logs -echo logs >> .gitignore +datalad save -m "Participant compute job implementation" + ################################################################################ # SGE SETUP START - remove or adjust to your needs ################################################################################ -echo .SGE_datalad_lock >> .gitignore + env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock" -eo_args="-e ${PWD}/logs -o ${PWD}/logs" + echo '#!/bin/bash' > code/qsub_calls.sh dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) -for subject in sub-01 sub-02 sub-03 sub-04; do - echo "qsub -cwd ${env_flags} ${eo_args} \ - ${PWD}/code/participant_job \ +eo_args="-e ${PWD}/logs -o ${PWD}/logs" +for subject in ${SUBJECTS}; do + echo "qsub -cwd ${env_flags} -N fp${subject} ${eo_args} \ + ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done datalad save -m "SGE submission setup" code/ .gitignore @@ -252,7 +254,11 @@ datalad save -m "SGE submission setup" code/ .gitignore # cleanup - we have generated the job definitions, we do not need to keep a # massive input dataset around. Having it around wastes resources and makes many # git operations needlessly slow -datalad uninstall -r --nocheck inputs/data +if [ "${BIDS_INPUT_METHOD}" = "clone" ] +then + datalad uninstall -r --nocheck inputs/data +fi + # make sure the fully configured output dataset is available from the designated # store for initial cloning and pushing the results. From 2b76e0767fc9af4dca213458cdbcad08e287c064 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 1 Apr 2021 23:09:57 -0400 Subject: [PATCH 03/30] use explicit for prep run --- scripts/cubic/bootstrap-c-pac.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 612e38c..9549779 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -176,6 +176,7 @@ datalad run \ -i inputs/data/${subid} \ -i inputs/data/*json \ -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ + --explicit \ -o ${subid}_fmriprep-20-2-1.tar.gz \ -o ${subid}_freesurfer-20-2-1.tar.gz \ -m "fmriprep:20.2.1 ${subid}" \ From 704232b9202ad8fab88747039b9f1d7a942f97d9 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Tue, 13 Apr 2021 15:24:48 -0400 Subject: [PATCH 04/30] Add merging script --- scripts/cubic/bootstrap-c-pac.sh | 60 ++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 9549779..cd345e8 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -177,8 +177,8 @@ datalad run \ -i inputs/data/*json \ -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ --explicit \ - -o ${subid}_fmriprep-20-2-1.tar.gz \ - -o ${subid}_freesurfer-20-2-1.tar.gz \ + -o ${subid}_fmriprep-20.2.1.zip \ + -o ${subid}_freesurfer-20.2.1.zip \ -m "fmriprep:20.2.1 ${subid}" \ "./code/fmriprep_zip.sh ${subid}" @@ -213,8 +213,9 @@ singularity run --cleanenv -B ${PWD} \ --force-bbr \ --cifti-output 91k -v -v -tar cvfz -C prep ${subid}_fmriprep-20-2-1.tar.gz fmriprep -tar cvfz -C prep ${subid}_freesurfer-20-2-1.tar.gz freesurfer +cd prep +7z a ../${subid}_fmriprep-20.2.1.zip fmriprep +7z a ../${subid}_freesurfer-20.2.1.zip freesurfer rm -rf prep .git/tmp/wkdir EOT @@ -228,11 +229,58 @@ echo logs >> .gitignore datalad save -m "Participant compute job implementation" - - ################################################################################ # SGE SETUP START - remove or adjust to your needs ################################################################################ +cat > code/merge_outputs.sh << "EOT" +#!/bin/bash +set -e -u -x +EOT + +echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \ + >> code/merge_outputs.sh + +cat >> code/merge_outputs.sh << "EOT" + +datalad clone ${outputsource} merge_ds +cd merge_ds +NBRANCHES=$(git branch -a | grep job- | sort | wc -l) +echo "Found $NBRANCHES branches to merge" + +gitref=$(git show-ref master | cut -d ' ' -f1 | head -n 1) + +# query all branches for the most recent commit and check if it is identical. +# Write all branch identifiers for jobs without outputs into a file. +for i in $(git branch -a | grep job- | sort); do [ x"$(git show-ref $i \ + | cut -d ' ' -f1)" = x"${gitref}" ] && \ + echo $i; done | tee code/noresults.txt | wc -l + + +for i in $(git branch -a | grep job- | sort); \ + do [ x"$(git show-ref $i \ + | cut -d ' ' -f1)" != x"${gitref}" ] && \ + echo $i; \ +done | tee code/has_results.txt + +mkdir -p code/merge_batches +num_branches=$(wc -l < code/has_results.txt) +CHUNKSIZE=5000 +num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) +[[ $num_chunks == 0 ]] && num_chunks=1 + +for chunknum in {1..$num_chunks} +do + startnum=$(expr $(expr ${chunknum} - 1) \* ${CHUNKSIZE} + 1) + endnum=$(expr ${chunknum} \* ${CHUNKSIZE}) + batch_file=code/merge_branches_$(printf %04d ${chunknum}).txt + [[ ${num_branches} -lt ${endnum} ]] && endnum=${num_branches} + branches=$(sed -n "${startnum},${endnum}p;$(expr ${endnum} + 1)q" code/has_results.txt) + echo ${branches} > ${batch_file} + git merge -m "fmriprep results batch ${chunknum}/${num_chunks}" $(cat ${batch_file}) + +done + +EOT env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock" From ac0b59330f49b3f0bd808839d8d4157f5769cb26 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 15 Apr 2021 11:43:39 -0400 Subject: [PATCH 05/30] add projectroot to merge script --- scripts/cubic/bootstrap-c-pac.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index cd345e8..27ec529 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -239,6 +239,7 @@ EOT echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \ >> code/merge_outputs.sh +echo "cd ${PROJECTROOT}" >> code/merge_outputs.sh cat >> code/merge_outputs.sh << "EOT" @@ -268,7 +269,7 @@ CHUNKSIZE=5000 num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) [[ $num_chunks == 0 ]] && num_chunks=1 -for chunknum in {1..$num_chunks} +for chunknum in $(seq 1 $num_chunks) do startnum=$(expr $(expr ${chunknum} - 1) \* ${CHUNKSIZE} + 1) endnum=$(expr ${chunknum} \* ${CHUNKSIZE}) @@ -280,6 +281,27 @@ do done +# Push the merge back +git push + +# Get the file availability info +git annex fsck --fast -f output-storage + +# This should not print anything +MISSING=$(git annex find --not --in output-storage) + +if [[ ! -z "$MISSING"]] +then + echo Unable to find data for $MISSING + exit 1 +fi + +# stop tracking this branch +git annex dead here + +datalad push --data nothing +echo SUCCESS + EOT From e01188d9b6310f0247af450e01211d6421c69a05 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 15 Apr 2021 12:00:57 -0400 Subject: [PATCH 06/30] use arg --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 27ec529..adfb443 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -38,7 +38,7 @@ fi ## Check the BIDS input -BIDSINPUT=/cbica/projects/RBC/testing/way2/exemplars_ds +BIDSINPUT=$1 if [[ -z ${BIDSINPUT} ]] then echo "Required argument is an identifier of the BIDS source" From 4946f5175e0a2193bfeab7b9462ccb7d48cdb440 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 15 Apr 2021 14:42:23 -0400 Subject: [PATCH 07/30] Don't check for conda, use tempdir --- scripts/cubic/bootstrap-c-pac.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index adfb443..0d6e382 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -3,11 +3,11 @@ ## Ensure the environment is ready to bootstrap the analysis workspace # Check that we have conda installed -conda activate -if [ $? -gt 0 ]; then - echo "Error initializing conda. Exiting" - exit $? -fi +#conda activate +#if [ $? -gt 0 ]; then +# echo "Error initializing conda. Exiting" +# exit $? +#fi DATALAD_VERSION=$(datalad --version) @@ -115,6 +115,7 @@ cat > code/participant_job.sh << "EOT" #$ -S /bin/bash #$ -l h_vmem=25G #$ -l s_vmem=23.5G +#$ -l tmpfree=200G # Set up the correct conda environment source ${CONDA_PREFIX}/bin/activate base echo I\'m in $PWD using `which python` @@ -128,8 +129,9 @@ pushgitremote="$2" subid="$3" # change into the cluster-assigned temp directory. Not done by default in SGE -# cd ${CBICA_TMPDIR} -cd /cbica/comp_space/RBC/way2 +cd ${CBICA_TMPDIR} +# OR Run it on a shared network drive +# cd /cbica/comp_space/$(basename $HOME) # Used for the branch names and the temp dir BRANCH="job-${JOB_ID}-${subid}" From b0a3cccf8a08f722ac12cd8367663a9961739aa1 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Fri, 16 Apr 2021 12:22:09 -0400 Subject: [PATCH 08/30] space --- scripts/cubic/bootstrap-c-pac.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 0d6e382..05b6444 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -269,7 +269,9 @@ mkdir -p code/merge_batches num_branches=$(wc -l < code/has_results.txt) CHUNKSIZE=5000 num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) -[[ $num_chunks == 0 ]] && num_chunks=1 +if [[ $num_chunks == 0 ]]; then + num_chunks=1 +fi for chunknum in $(seq 1 $num_chunks) do @@ -292,7 +294,7 @@ git annex fsck --fast -f output-storage # This should not print anything MISSING=$(git annex find --not --in output-storage) -if [[ ! -z "$MISSING"]] +if [[ ! -z "$MISSING" ]] then echo Unable to find data for $MISSING exit 1 From c3c092dddcc9511531b7630b4bd4dfa0d7230f33 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Wed, 21 Apr 2021 15:46:55 -0400 Subject: [PATCH 09/30] Write to 2mm mni by default --- scripts/cubic/bootstrap-c-pac.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 05b6444..b78e598 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -211,6 +211,7 @@ singularity run --cleanenv -B ${PWD} \ --stop-on-first-crash \ --fs-license-file code/license.txt \ --skip-bids-validation \ + --output-spaces --output-spaces MNI152NLin6Asym:res-2 \ --participant-label "$subid" \ --force-bbr \ --cifti-output 91k -v -v From 6efb6dd8d698cc900ce9bb1916eae8c1ca600d18 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Wed, 21 Apr 2021 18:06:44 -0400 Subject: [PATCH 10/30] remove double arg --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index b78e598..2ad8114 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -211,7 +211,7 @@ singularity run --cleanenv -B ${PWD} \ --stop-on-first-crash \ --fs-license-file code/license.txt \ --skip-bids-validation \ - --output-spaces --output-spaces MNI152NLin6Asym:res-2 \ + --output-spaces MNI152NLin6Asym:res-2 \ --participant-label "$subid" \ --force-bbr \ --cifti-output 91k -v -v From b3f9fc8bad82cbafa5c6d43a3675b65d191ee464 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 22 Apr 2021 16:25:53 -0400 Subject: [PATCH 11/30] run zip with bash --- scripts/cubic/bootstrap-c-pac.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 2ad8114..84be3fd 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -182,7 +182,7 @@ datalad run \ -o ${subid}_fmriprep-20.2.1.zip \ -o ${subid}_freesurfer-20.2.1.zip \ -m "fmriprep:20.2.1 ${subid}" \ - "./code/fmriprep_zip.sh ${subid}" + "bash ./code/fmriprep_zip.sh ${subid}" # file content first -- does not need a lock, no interaction with Git datalad push --to output-storage @@ -269,11 +269,12 @@ done | tee code/has_results.txt mkdir -p code/merge_batches num_branches=$(wc -l < code/has_results.txt) CHUNKSIZE=5000 +set +e num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) if [[ $num_chunks == 0 ]]; then num_chunks=1 fi - +set -e for chunknum in $(seq 1 $num_chunks) do startnum=$(expr $(expr ${chunknum} - 1) \* ${CHUNKSIZE} + 1) From 037047b490dd4eb199efa870e5f1c74289786ff5 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Thu, 29 Apr 2021 16:39:03 -0400 Subject: [PATCH 12/30] clear out remotes from the containers ds --- scripts/cubic/bootstrap-c-pac.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 84be3fd..1813048 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -106,6 +106,12 @@ datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers# # download the image so we don't ddos pmacs cd pennlinc-containers datalad get . +# get rid of the references to pmacs +set +e +datalad siblings remove -s pmacs-ria-storage +datalad siblings remove -s origin +set -e + cd ${PROJECTROOT}/analysis datalad install -d . --source ${PROJECTROOT}/pennlinc-containers From 175eea50b0de17d6211ac40e11a80c3b980aa1e9 Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Tue, 4 May 2021 16:52:00 -0400 Subject: [PATCH 13/30] add script for fmriprep audit --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 1813048..674a050 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -105,7 +105,7 @@ cd ${PROJECTROOT} datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers # download the image so we don't ddos pmacs cd pennlinc-containers -datalad get . +datalad get -r . # get rid of the references to pmacs set +e datalad siblings remove -s pmacs-ria-storage From 48627f3753048193623240ab05beaa483fa918ea Mon Sep 17 00:00:00 2001 From: Matt Cieslak Date: Tue, 8 Jun 2021 22:54:37 -0400 Subject: [PATCH 14/30] wget a merge script --- scripts/cubic/bootstrap-c-pac.sh | 83 +++++--------------------------- 1 file changed, 11 insertions(+), 72 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 674a050..4f4cd65 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -238,87 +238,22 @@ echo logs >> .gitignore datalad save -m "Participant compute job implementation" -################################################################################ -# SGE SETUP START - remove or adjust to your needs -################################################################################ +# Add a script for merging outputs +MERGE_POSTSCRIPT=https://raw.githubusercontent.com/PennLINC/TheWay/main/scripts/cubic/merge_outputs_postscript.sh cat > code/merge_outputs.sh << "EOT" #!/bin/bash set -e -u -x EOT - echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \ >> code/merge_outputs.sh echo "cd ${PROJECTROOT}" >> code/merge_outputs.sh - -cat >> code/merge_outputs.sh << "EOT" - -datalad clone ${outputsource} merge_ds -cd merge_ds -NBRANCHES=$(git branch -a | grep job- | sort | wc -l) -echo "Found $NBRANCHES branches to merge" - -gitref=$(git show-ref master | cut -d ' ' -f1 | head -n 1) - -# query all branches for the most recent commit and check if it is identical. -# Write all branch identifiers for jobs without outputs into a file. -for i in $(git branch -a | grep job- | sort); do [ x"$(git show-ref $i \ - | cut -d ' ' -f1)" = x"${gitref}" ] && \ - echo $i; done | tee code/noresults.txt | wc -l - - -for i in $(git branch -a | grep job- | sort); \ - do [ x"$(git show-ref $i \ - | cut -d ' ' -f1)" != x"${gitref}" ] && \ - echo $i; \ -done | tee code/has_results.txt - -mkdir -p code/merge_batches -num_branches=$(wc -l < code/has_results.txt) -CHUNKSIZE=5000 -set +e -num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) -if [[ $num_chunks == 0 ]]; then - num_chunks=1 -fi -set -e -for chunknum in $(seq 1 $num_chunks) -do - startnum=$(expr $(expr ${chunknum} - 1) \* ${CHUNKSIZE} + 1) - endnum=$(expr ${chunknum} \* ${CHUNKSIZE}) - batch_file=code/merge_branches_$(printf %04d ${chunknum}).txt - [[ ${num_branches} -lt ${endnum} ]] && endnum=${num_branches} - branches=$(sed -n "${startnum},${endnum}p;$(expr ${endnum} + 1)q" code/has_results.txt) - echo ${branches} > ${batch_file} - git merge -m "fmriprep results batch ${chunknum}/${num_chunks}" $(cat ${batch_file}) - -done - -# Push the merge back -git push - -# Get the file availability info -git annex fsck --fast -f output-storage - -# This should not print anything -MISSING=$(git annex find --not --in output-storage) - -if [[ ! -z "$MISSING" ]] -then - echo Unable to find data for $MISSING - exit 1 -fi - -# stop tracking this branch -git annex dead here - -datalad push --data nothing -echo SUCCESS - -EOT +wget -qO- ${MERGE_POSTSCRIPT} >> code/merge_outputs.sh +################################################################################ +# SGE SETUP START - remove or adjust to your needs +################################################################################ env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock" - echo '#!/bin/bash' > code/qsub_calls.sh dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) @@ -342,11 +277,15 @@ then datalad uninstall -r --nocheck inputs/data fi - # make sure the fully configured output dataset is available from the designated # store for initial cloning and pushing the results. datalad push --to input datalad push --to output +# Add an alias to the data in the RIA store +RIA_DIR=$(find $PROJECTROOT/output_ria/??? -maxdepth 1 -type d) +mkdir -p ${PROJECTROOT}/output_ria/alias +ln -s ${RIA_DIR} ${PROJECTROOT}/output_ria/alias/data + # if we get here, we are happy echo SUCCESS \ No newline at end of file From 8edb98cd19d9ade76ecf767223cf4b5a754ffde1 Mon Sep 17 00:00:00 2001 From: Maxwell Bertolero Date: Wed, 9 Jun 2021 13:49:09 -0400 Subject: [PATCH 15/30] new alias --- scripts/cubic/bootstrap-c-pac.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 4f4cd65..1c00bae 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -283,9 +283,9 @@ datalad push --to input datalad push --to output # Add an alias to the data in the RIA store -RIA_DIR=$(find $PROJECTROOT/output_ria/??? -maxdepth 1 -type d) +RIA_DIR=$(find $PROJECTROOT/output_ria/???/ -maxdepth 1 -type d | sort | tail -n 1) mkdir -p ${PROJECTROOT}/output_ria/alias ln -s ${RIA_DIR} ${PROJECTROOT}/output_ria/alias/data # if we get here, we are happy -echo SUCCESS \ No newline at end of file +echo SUCCESS From dbe801963bd68fc4229a7014eee565ee5946d714 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Fri, 13 Aug 2021 14:44:20 -0400 Subject: [PATCH 16/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 1c00bae..2e7ac83 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -99,10 +99,14 @@ then # exit 1 fi - -## Add the containers as a subdataset +set +u +CONTAINERDS=$2 +set -u +#if [[ ! -z "${CONTAINERDS}" ]]; then cd ${PROJECTROOT} -datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers +datalad clone ${CONTAINERDS} pennlinc-containers +## Add the containers as a subdataset +#datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers # download the image so we don't ddos pmacs cd pennlinc-containers datalad get -r . @@ -183,11 +187,11 @@ datalad run \ -i code/fmriprep_zip.sh \ -i inputs/data/${subid} \ -i inputs/data/*json \ - -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ + -i pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ --explicit \ - -o ${subid}_fmriprep-20.2.1.zip \ - -o ${subid}_freesurfer-20.2.1.zip \ - -m "fmriprep:20.2.1 ${subid}" \ + -o ${subid}_fmriprep-20.2.3.zip \ + -o ${subid}_freesurfer-20.2.3.zip \ + -m "fmriprep:20.2.3 ${subid}" \ "bash ./code/fmriprep_zip.sh ${subid}" # file content first -- does not need a lock, no interaction with Git @@ -208,7 +212,7 @@ set -e -u -x subid="$1" mkdir -p ${PWD}/.git/tmp/wdir singularity run --cleanenv -B ${PWD} \ - pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ + pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ inputs/data \ prep \ participant \ @@ -223,8 +227,8 @@ singularity run --cleanenv -B ${PWD} \ --cifti-output 91k -v -v cd prep -7z a ../${subid}_fmriprep-20.2.1.zip fmriprep -7z a ../${subid}_freesurfer-20.2.1.zip freesurfer +7z a ../${subid}_fmriprep-20.2.3.zip fmriprep +7z a ../${subid}_freesurfer-20.2.3.zip freesurfer rm -rf prep .git/tmp/wkdir EOT From 5ebab2912247f4e1cb13c7aef6b43009d0fc7412 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Fri, 13 Aug 2021 14:44:20 -0400 Subject: [PATCH 17/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 1c00bae..2e7ac83 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -99,10 +99,14 @@ then # exit 1 fi - -## Add the containers as a subdataset +set +u +CONTAINERDS=$2 +set -u +#if [[ ! -z "${CONTAINERDS}" ]]; then cd ${PROJECTROOT} -datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers +datalad clone ${CONTAINERDS} pennlinc-containers +## Add the containers as a subdataset +#datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers # download the image so we don't ddos pmacs cd pennlinc-containers datalad get -r . @@ -183,11 +187,11 @@ datalad run \ -i code/fmriprep_zip.sh \ -i inputs/data/${subid} \ -i inputs/data/*json \ - -i pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ + -i pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ --explicit \ - -o ${subid}_fmriprep-20.2.1.zip \ - -o ${subid}_freesurfer-20.2.1.zip \ - -m "fmriprep:20.2.1 ${subid}" \ + -o ${subid}_fmriprep-20.2.3.zip \ + -o ${subid}_freesurfer-20.2.3.zip \ + -m "fmriprep:20.2.3 ${subid}" \ "bash ./code/fmriprep_zip.sh ${subid}" # file content first -- does not need a lock, no interaction with Git @@ -208,7 +212,7 @@ set -e -u -x subid="$1" mkdir -p ${PWD}/.git/tmp/wdir singularity run --cleanenv -B ${PWD} \ - pennlinc-containers/.datalad/environments/fmriprep-20-2-1/image \ + pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ inputs/data \ prep \ participant \ @@ -223,8 +227,8 @@ singularity run --cleanenv -B ${PWD} \ --cifti-output 91k -v -v cd prep -7z a ../${subid}_fmriprep-20.2.1.zip fmriprep -7z a ../${subid}_freesurfer-20.2.1.zip freesurfer +7z a ../${subid}_fmriprep-20.2.3.zip fmriprep +7z a ../${subid}_freesurfer-20.2.3.zip freesurfer rm -rf prep .git/tmp/wkdir EOT From 140c375c7df01b5552db183a2882e5ea0936c655 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Thu, 19 Aug 2021 11:48:48 -0400 Subject: [PATCH 18/30] added garbage collection --- scripts/cubic/bootstrap-c-pac.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 2e7ac83..4ea98cd 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -199,6 +199,15 @@ datalad push --to output-storage # and the output branch flock $DSLOCKFILE git push outputstore +echo TMPDIR TO DELETE +echo ${BRANCH} + +datalad drop -r . --nocheck +datalad uninstall -r inputs/data +git annex dead here +cd ../.. +rm -rf $BRANCH + echo SUCCESS # job handler should clean up workspace EOT From cbe3b9e96c315bd3883d1f723c03a0fb6f248ace Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Mon, 13 Sep 2021 11:54:26 -0400 Subject: [PATCH 19/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 4ea98cd..2a15e76 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -202,8 +202,8 @@ flock $DSLOCKFILE git push outputstore echo TMPDIR TO DELETE echo ${BRANCH} +datalad uninstall -r --nocheck --if-dirty ignore inputs/data datalad drop -r . --nocheck -datalad uninstall -r inputs/data git annex dead here cd ../.. rm -rf $BRANCH From f49cec45771bf06fea8f1f94028b993e1fa21baf Mon Sep 17 00:00:00 2001 From: Mengjia Lyu <53891017+YumekaMengjiaLYU@users.noreply.github.com> Date: Mon, 20 Sep 2021 21:23:50 -0400 Subject: [PATCH 20/30] Update bootstrap-fmriprep.sh (#25) modified the -w directory in singularity run of fmriprep_zip.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 2a15e76..6058c49 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -225,7 +225,7 @@ singularity run --cleanenv -B ${PWD} \ inputs/data \ prep \ participant \ - -w ${PWD}/.git/wkdir \ + -w ${PWD}/.git/tmp/wkdir \ --n_cpus 1 \ --stop-on-first-crash \ --fs-license-file code/license.txt \ From 84a5f98b04aa8106552746b6a3791d604ba478ed Mon Sep 17 00:00:00 2001 From: Mengjia Lyu <53891017+YumekaMengjiaLYU@users.noreply.github.com> Date: Mon, 20 Sep 2021 21:24:23 -0400 Subject: [PATCH 21/30] Update bootstrap-qsirecon.sh (#30) fixed typo From 25e6dea2f2599b70745cb55b7f49a9daecbd8140 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Wed, 27 Oct 2021 14:59:46 -0400 Subject: [PATCH 22/30] added reservation flag to qsub_calls.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 6058c49..d8693f2 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -ry -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 919747aef2510ed02ce21b8846138d04c913f57b Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Wed, 27 Oct 2021 15:28:21 -0400 Subject: [PATCH 23/30] added -h_rt flag for 24h --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index d8693f2..cd47726 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -ry -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -ry -h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 537a722254f7b76cbb56bb83ca91505bb68a900b Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Wed, 27 Oct 2021 16:12:09 -0400 Subject: [PATCH 24/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index cd47726..27c5a2a 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -ry -h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -ry --l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 9b86fe6602c98cc566bfe0e69205029436b77453 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Wed, 27 Oct 2021 16:18:13 -0400 Subject: [PATCH 25/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 27c5a2a..add922f 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -ry --l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -r y --l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 9fd70f138f7e9eba724f678f784e332d02652def Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Wed, 27 Oct 2021 16:20:34 -0400 Subject: [PATCH 26/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index add922f..6161dd7 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -r y --l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -r y -l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 773e0c542f55bfc6ed9a4f48a4d787c510cbb2e7 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Thu, 28 Oct 2021 12:09:22 -0400 Subject: [PATCH 27/30] -r to -R --- scripts/cubic/bootstrap-c-pac.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index 6161dd7..a027225 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -272,7 +272,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -r y -l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -R y -l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 9f5f4a54348a21cec31b3b63098d4b6059535ca5 Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Mon, 1 Nov 2021 13:52:18 -0400 Subject: [PATCH 28/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index a027225..e42163f 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -124,8 +124,9 @@ cat > code/participant_job.sh << "EOT" #!/bin/bash #$ -S /bin/bash #$ -l h_vmem=25G -#$ -l s_vmem=23.5G #$ -l tmpfree=200G +#$ -R y +#$ -l h_rt=24:00:00 # Set up the correct conda environment source ${CONDA_PREFIX}/bin/activate base echo I\'m in $PWD using `which python` @@ -272,7 +273,7 @@ dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -R y -l h_rt=24:00:0 -cwd ${env_flags} -N fp${subject} ${eo_args} \ + echo "qsub -cwd ${env_flags} -N fp${subject} ${eo_args} \ ${PWD}/code/participant_job.sh \ ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh done From 9f31fd1cce2e48d141975bb1a02826deac63a2ec Mon Sep 17 00:00:00 2001 From: Sydney Covitz <70981267+scovitz@users.noreply.github.com> Date: Fri, 7 Jan 2022 17:17:52 -0500 Subject: [PATCH 29/30] Update bootstrap-fmriprep.sh --- scripts/cubic/bootstrap-c-pac.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index e42163f..f60cb14 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -111,10 +111,10 @@ datalad clone ${CONTAINERDS} pennlinc-containers cd pennlinc-containers datalad get -r . # get rid of the references to pmacs -set +e -datalad siblings remove -s pmacs-ria-storage -datalad siblings remove -s origin -set -e +#set +e +#datalad siblings remove -s pmacs-ria-storage +#datalad siblings remove -s origin +#set -e cd ${PROJECTROOT}/analysis datalad install -d . --source ${PROJECTROOT}/pennlinc-containers From b1583e9f6b41c062fdeeb3b39a37693bc0743b95 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 7 Jul 2022 10:23:50 -0400 Subject: [PATCH 30/30] :twisted_rightwards_arrows: Merge multises setup from fmriprep-multises bootstrap into c-pac bootstrap --- scripts/cubic/bootstrap-c-pac.sh | 269 +++++++++++++++++++++++-------- 1 file changed, 206 insertions(+), 63 deletions(-) diff --git a/scripts/cubic/bootstrap-c-pac.sh b/scripts/cubic/bootstrap-c-pac.sh index f60cb14..d18dcca 100644 --- a/scripts/cubic/bootstrap-c-pac.sh +++ b/scripts/cubic/bootstrap-c-pac.sh @@ -1,6 +1,12 @@ ## NOTE ## # This workflow is derived from the Datalad Handbook +# In addition to the positional arguments described in https://pennlinc.github.io/docs/TheWay/RunningDataLadPipelines/#preparing-the-analysis-dataset , +# this bootstrap script also takes a /full/path/to/callback.log i.e., +# `bash bootstrap-c-pac.sh /full/path/to/BIDS /full/path/to/cpac-container /full/path/to/callback.log` +# for optimizing memory (see https://fcp-indi.github.io/docs/nightly/user/tutorials/observed_usage for C-PAC optimization tutorial, and see +# sections marked "C-PAC-specific memory optimization" in this script for details). + ## Ensure the environment is ready to bootstrap the analysis workspace # Check that we have conda installed #conda activate @@ -23,7 +29,7 @@ set -e -u ## Set up the directory that will contain the necessary directories -PROJECTROOT=${PWD}/fmriprep +PROJECTROOT=${PWD}/c-pac-1.8.5 if [[ -d ${PROJECTROOT} ]] then echo ${PROJECTROOT} already exists @@ -37,6 +43,11 @@ then fi +# C-PAC-specific memory optimization +CALLBACK_LOG=$3 +# ---------------------------------- + + ## Check the BIDS input BIDSINPUT=$1 if [[ -z ${BIDSINPUT} ]] @@ -92,41 +103,55 @@ else datalad save -r -m "added input data" fi -SUBJECTS=$(find inputs/data -type d -name 'sub-*' | cut -d '/' -f 3 ) +SUBJECTS=$(find inputs/data -type d -name 'sub-*' | cut -d '/' -f 3 | sort) if [ -z "${SUBJECTS}" ] then echo "No subjects found in input data" # exit 1 fi -set +u -CONTAINERDS=$2 -set -u -#if [[ ! -z "${CONTAINERDS}" ]]; then -cd ${PROJECTROOT} -datalad clone ${CONTAINERDS} pennlinc-containers + ## Add the containers as a subdataset -#datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers -# download the image so we don't ddos pmacs -cd pennlinc-containers -datalad get -r . -# get rid of the references to pmacs -#set +e -#datalad siblings remove -s pmacs-ria-storage -#datalad siblings remove -s origin -#set -e +cd ${PROJECTROOT} + +# Clone the containers dataset. If specified on the command, use that path +CONTAINERDS=$2 +if [[ ! -z "${CONTAINERDS}" ]]; then + datalad clone ${CONTAINERDS} pennlinc-containers +else + echo "No containers dataset specified, attempting to clone from pmacs" + datalad clone \ + ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers \ + pennlinc-containers + cd pennlinc-containers + datalad get -r . + # get rid of the references to pmacs + set +e + datalad siblings remove -s pmacs-ria-storage + git annex dead pmacs-ria-storage + datalad siblings remove -s origin + git annex dead origin + set -e +fi cd ${PROJECTROOT}/analysis datalad install -d . --source ${PROJECTROOT}/pennlinc-containers + +# C-PAC-specific memory optimization --------- +if [[ ! -z "${CALLBACK_LOG}" ]]; then + ln $CALLBACK_LOG code/runtime_callback.log +fi +# -------------------------------------------- + + ## the actual compute job specification cat > code/participant_job.sh << "EOT" #!/bin/bash #$ -S /bin/bash -#$ -l h_vmem=25G +#$ -l h_vmem=32G +#$ -l s_vmem=32G #$ -l tmpfree=200G -#$ -R y -#$ -l h_rt=24:00:00 # Set up the correct conda environment source ${CONDA_PREFIX}/bin/activate base echo I\'m in $PWD using `which python` @@ -138,6 +163,7 @@ set -e -u -x dssource="$1" pushgitremote="$2" subid="$3" +sesid="$4" # change into the cluster-assigned temp directory. Not done by default in SGE cd ${CBICA_TMPDIR} @@ -145,7 +171,7 @@ cd ${CBICA_TMPDIR} # cd /cbica/comp_space/$(basename $HOME) # Used for the branch names and the temp dir -BRANCH="job-${JOB_ID}-${subid}" +BRANCH="job-${JOB_ID}-${subid}-${sesid}" mkdir ${BRANCH} cd ${BRANCH} @@ -184,22 +210,38 @@ datalad get -n "inputs/data/${subid}" # ------------------------------------------------------------------------------ # Do the run! -datalad run \ - -i code/fmriprep_zip.sh \ - -i inputs/data/${subid} \ - -i inputs/data/*json \ - -i pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ - --explicit \ - -o ${subid}_fmriprep-20.2.3.zip \ - -o ${subid}_freesurfer-20.2.3.zip \ - -m "fmriprep:20.2.3 ${subid}" \ - "bash ./code/fmriprep_zip.sh ${subid}" +# C-PAC-specific memory optimization -------------------------------- +if [[ -f code/runtime_callback.log ]] +then + datalad run \ + -i code/c-pac_zip.sh \ + -i code/runtime_callback.log \ + -i inputs/data/${subid}/${sesid} \ + -i inputs/data/*json \ + -i pennlinc-containers/.datalad/environments/cpac-1-8-5/image \ + --explicit \ + -o ${subid}_${sesid}_c-pac-1.8.5.zip \ + -m "C-PAC:1.8.5 ${subid} ${sesid}" \ + "bash ./code/c-pac_zip.sh ${subid} ${sesid}" +# ------------------------------------------------------------------- +else + datalad run \ + -i code/c-pac_zip.sh \ + -i inputs/data/${subid} \ + -i inputs/data/*json \ + -i pennlinc-containers/.datalad/environments/cpac-1-8-5/image \ + --explicit \ + -o ${subid}_${sesid}_c-pac-1.8.5.zip \ + -m "C-PAC:1.8.5 ${subid}" \ + "bash ./code/c-pac_zip.sh ${subid}" +fi # file content first -- does not need a lock, no interaction with Git datalad push --to output-storage # and the output branch flock $DSLOCKFILE git push outputstore +# remove tempdir echo TMPDIR TO DELETE echo ${BRANCH} @@ -215,36 +257,71 @@ EOT chmod +x code/participant_job.sh -cat > code/fmriprep_zip.sh << "EOT" +cat > code/c-pac_zip.sh << "EOT" #!/bin/bash set -e -u -x subid="$1" -mkdir -p ${PWD}/.git/tmp/wdir -singularity run --cleanenv -B ${PWD} \ - pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \ - inputs/data \ - prep \ - participant \ - -w ${PWD}/.git/tmp/wkdir \ - --n_cpus 1 \ - --stop-on-first-crash \ - --fs-license-file code/license.txt \ - --skip-bids-validation \ - --output-spaces MNI152NLin6Asym:res-2 \ - --participant-label "$subid" \ - --force-bbr \ - --cifti-output 91k -v -v - -cd prep -7z a ../${subid}_fmriprep-20.2.3.zip fmriprep -7z a ../${subid}_freesurfer-20.2.3.zip freesurfer -rm -rf prep .git/tmp/wkdir +sesid="$2" + +# Create a filter file that only allows this session +filterfile=${PWD}/${sesid}_filter.json +echo "{" > ${filterfile} +echo "'fmap': {'datatype': 'fmap'}," >> ${filterfile} +echo "'bold': {'datatype': 'func', 'session': '$sesid', 'suffix': 'bold'}," >> ${filterfile} +echo "'sbref': {'datatype': 'func', 'session': '$sesid', 'suffix': 'sbref'}," >> ${filterfile} +echo "'flair': {'datatype': 'anat', 'session': '$sesid', 'suffix': 'FLAIR'}," >> ${filterfile} +echo "'t2w': {'datatype': 'anat', 'session': '$sesid', 'suffix': 'T2w'}," >> ${filterfile} +echo "'t1w': {'datatype': 'anat', 'session': '$sesid', 'suffix': 'T1w'}," >> ${filterfile} +echo "'roi': {'datatype': 'anat', 'session': '$sesid', 'suffix': 'roi'}" >> ${filterfile} +echo "}" >> ${filterfile} + +# remove ses and get valid json +sed -i "s/'/\"/g" ${filterfile} +sed -i "s/ses-//g" ${filterfile} + +mkdir -p ${subid}_${sesid}_outputs +# C-PAC-specific memory optimization ----------------------------- +if [[ -f code/runtime_callback.log ]] +then + singularity run --cleanenv \ + -B ${PWD} \ + -B ${PWD}/${subid}_${sesid}_outputs:/outputs \ + pennlinc-containers/.datalad/environments/cpac-1-8-5/image \ + inputs/data \ + /outputs \ + participant \ + --preconfig rbc-options \ + --skip_bids_validator \ + --n_cpus 4 \ + --mem_gb 32 \ + --participant_label "$subid" \ + --runtime_usage=code/runtime_callback.log \ + --runtime_buffer=30 +# ---------------------------------------------------------------- +else + singularity run --cleanenv \ + -B ${PWD} \ + -B ${PWD}/${subid}_${sesid}_outputs:/outputs \ + pennlinc-containers/.datalad/environments/cpac-1-8-5/image \ + inputs/data \ + /outputs \ + participant \ + --preconfig rbc-options \ + --skip_bids_validator \ + --n_cpus 4 \ + --mem_gb 32 \ + --participant_label "$subid" +fi + +rm -rf ${subid}_${sesid}_outputs/working +7z a ${subid}_${sesid}_c-pac-1.8.5.zip ${subid}_${sesid}_outputs +rm -rf ${subid}_${sesid}_outputs +rm ${filterfile} EOT -chmod +x code/fmriprep_zip.sh -cp ${FREESURFER_HOME}/license.txt code/license.txt +chmod +x code/c-pac_zip.sh mkdir logs echo .SGE_datalad_lock >> .gitignore @@ -252,8 +329,9 @@ echo logs >> .gitignore datalad save -m "Participant compute job implementation" -# Add a script for merging outputs -MERGE_POSTSCRIPT=https://raw.githubusercontent.com/PennLINC/TheWay/main/scripts/cubic/merge_outputs_postscript.sh +################################################################################ +# SGE SETUP START - remove or adjust to your needs +################################################################################ cat > code/merge_outputs.sh << "EOT" #!/bin/bash set -e -u -x @@ -261,21 +339,86 @@ EOT echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \ >> code/merge_outputs.sh echo "cd ${PROJECTROOT}" >> code/merge_outputs.sh -wget -qO- ${MERGE_POSTSCRIPT} >> code/merge_outputs.sh + +cat >> code/merge_outputs.sh << "EOT" + +datalad clone ${outputsource} merge_ds +cd merge_ds +NBRANCHES=$(git branch -a | grep job- | sort | wc -l) +echo "Found $NBRANCHES branches to merge" + +gitref=$(git show-ref master | cut -d ' ' -f1 | head -n 1) + +# query all branches for the most recent commit and check if it is identical. +# Write all branch identifiers for jobs without outputs into a file. +for i in $(git branch -a | grep job- | sort); do [ x"$(git show-ref $i \ + | cut -d ' ' -f1)" = x"${gitref}" ] && \ + echo $i; done | tee code/noresults.txt | wc -l + + +for i in $(git branch -a | grep job- | sort); \ + do [ x"$(git show-ref $i \ + | cut -d ' ' -f1)" != x"${gitref}" ] && \ + echo $i; \ +done | tee code/has_results.txt + +mkdir -p code/merge_batches +num_branches=$(wc -l < code/has_results.txt) +CHUNKSIZE=5000 +set +e +num_chunks=$(expr ${num_branches} / ${CHUNKSIZE}) +if [[ $num_chunks == 0 ]]; then + num_chunks=1 +fi +set -e +for chunknum in $(seq 1 $num_chunks) +do + startnum=$(expr $(expr ${chunknum} - 1) \* ${CHUNKSIZE} + 1) + endnum=$(expr ${chunknum} \* ${CHUNKSIZE}) + batch_file=code/merge_branches_$(printf %04d ${chunknum}).txt + [[ ${num_branches} -lt ${endnum} ]] && endnum=${num_branches} + branches=$(sed -n "${startnum},${endnum}p;$(expr ${endnum} + 1)q" code/has_results.txt) + echo ${branches} > ${batch_file} + git merge -m "C-PAC results batch ${chunknum}/${num_chunks}" $(cat ${batch_file}) + +done + +# Push the merge back +git push + +# Get the file availability info +git annex fsck --fast -f output-storage + +# This should not print anything +MISSING=$(git annex find --not --in output-storage) + +if [[ ! -z "$MISSING" ]] +then + echo Unable to find data for $MISSING + exit 1 +fi + +# stop tracking this branch +git annex dead here + +datalad push --data nothing +echo SUCCESS + +EOT -################################################################################ -# SGE SETUP START - remove or adjust to your needs -################################################################################ env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock" echo '#!/bin/bash' > code/qsub_calls.sh dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" pushgitremote=$(git remote get-url --push output) eo_args="-e ${PWD}/logs -o ${PWD}/logs" for subject in ${SUBJECTS}; do - echo "qsub -cwd ${env_flags} -N fp${subject} ${eo_args} \ - ${PWD}/code/participant_job.sh \ - ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh + SESSIONS=$(ls inputs/data/$subject | grep ses- | cut -d '/' -f 1) + for session in ${SESSIONS}; do + echo "qsub -cwd ${env_flags} -N c-pac_${subject}_${session} ${eo_args} \ + ${PWD}/code/participant_job.sh \ + ${dssource} ${pushgitremote} ${subject} ${session}" >> code/qsub_calls.sh + done done datalad save -m "SGE submission setup" code/ .gitignore