diff --git a/hcpd_main.py b/hcpd_main.py
new file mode 100644
index 0000000..a0b75c5
--- /dev/null
+++ b/hcpd_main.py
@@ -0,0 +1,361 @@
+import os
+import glob
+import shutil
+import csv
+import subprocess
+
+import json
+import sys
+
+# Change the current working directory
+"""
+Rename the HCP-D data downloaded from NDA S3
+
+No datalad python api is used in this script
+
+"""
+
+
+def move_glob_files(glob_pattern,dest,cur_dir):
+    """Move files with a certain glob pattern to a new directory in bulk"""
+    glob_pattern = cur_dir + glob_pattern
+    dest = cur_dir + dest
+    for matches in glob.glob(glob_pattern):
+        try:
+            filename = os.path.basename(matches)
+            rename_dest = os.path.join(dest,filename)
+            os.rename(matches, rename_dest)
+#            dl.save(message=f'Move {filename} to {dest}')
+        # supress the File Exists errors, which cannot be avoided            
+        except OSError as e:
+            # Errno 17 is "File exists" and is inevitable during the re-organization
+            if e.errno != 17:
+                print("Error:", e)
+    #dl.save(message=f'Moving files to {dest}')
+def rename_list_of_files(pattern, replacement, cur_dir):
+    """Rename files with a certain glob pattern in bulk"""
+    file_list = os.listdir(cur_dir)
+    for ii in file_list:
+        newName = ii.replace(pattern,replacement)
+        if newName != ii:
+            source = os.path.join(cur_dir,ii) 
+            dest = os.path.join(cur_dir,newName)
+            os.rename(os.path.join(cur_dir,ii),
+                      os.path.join(cur_dir,newName))
+            #dl.save(dest) 
+ #           dl.save(message=f'Renaming {ii} to {newName}')
+def remove_glob_files(glob_pattern,cur_dir):
+    """Delete files with a glob pattern in bulk"""
+    glob_list = cur_dir + glob_pattern
+    fileList = glob.glob(cur_dir+glob_pattern)
+    for filePath in fileList:
+        try:
+            shutil.rmtree(os.path.join(cur_dir,filePath))
+        except:
+            print("Error while deleting file : ", filePath)
+  #  dl.save(message=f'Deleting files with pattern {glob_pattern}')
+def main():
+    """Entry point """
+    # make inputs/data the current working directory
+    subid = sys.argv[1]
+    fileList = glob.glob(str(subid))
+
+    for ii in fileList:
+        newName = ii.replace('HCD','sub-').replace('_V1_MR','')
+        if newName != ii:
+            os.rename(ii,newName)
+    subjects = glob.glob("./sub*")
+    # list of new directories to be created
+    dir_names = ['ses-V1/anat','ses-V1/func','ses-V1/dwi','ses-V1/fmap','ses-V1/S1','ses-V1/S2','ses-V1/S3','ses-V1/S4','ses-V1/S5','ses-V1/S6','ses-V1/S7']
+    for sub in subjects:
+        os.chdir(sub)
+        cur_dir = os.getcwd()
+        try:
+            os.rename('unprocessed','ses-V1')
+        except:
+            #print("Error while trying to rename directory unprocessed to ses-V1; unprocessed does not exist")
+            pass
+        
+        for folder in dir_names:
+            try:
+                if not os.path.isdir(os.path.join(cur_dir,folder)):
+                    os.mkdir(os.path.join(cur_dir,folder))
+        #            dl.save(os.path.join(cur_dir,folder), message=f"creating new folder {folder}")
+            except:
+                print("Error while create directory: ", folder)
+        
+        move_glob_files('/ses-V1/Diffusion/*/*SpinEchoFieldMap*','/ses-V1/Diffusion/',cur_dir )
+        move_glob_files('/ses-V1/T2w_SPC_vNav/*/*SpinEchoFieldMap*','/ses-V1/T2w_SPC_vNav/',cur_dir )
+        move_glob_files('/ses-V1/T1w_MPR_vNav_4e_e1e2_mean/*/*SpinEchoFieldMap*','/ses-V1/T1w_MPR_vNav_4e_e1e2_mean/',cur_dir)
+
+        os.chdir('ses-V1/')
+
+        rest_files = glob.glob('*/*REST*')
+
+        # bidify the resting state files
+        for rest_file in rest_files:
+            newName = (rest_file
+                       .replace("_AP.nii.gz","_dir-AP_bold.nii.gz")
+                       .replace("_AP.json","_dir-AP_bold.json")
+                       .replace("_PA.nii.gz", "_dir-PA_bold.nii.gz")
+                       .replace("_PA.json", "_dir-PA_bold.json")
+                       .replace("AP_SBRef.nii.gz", "dir-AP_sbref.nii.gz")
+                       .replace("AP_SBRef.json", "dir-AP_sbref.json")
+                       .replace("PA_SBRef.nii.gz", "dir-PA_sbref.nii.gz")
+                       .replace("PA_SBRef.json", "dir-PA_sbref.json")
+                       .replace("_rfMRI_", "_task-rest_")
+                       .replace("rest_REST", "rest_acq-REST"))
+            os.rename(rest_file,newName)
+        
+        # rename sbref nii.gz files by the number of run of the same task      
+        counter_sbref_nii=1
+        cur_dir = os.getcwd()
+        for sbref_file in glob.glob('*/*REST*_sbref.nii.gz'):
+            if 'run' in sbref_file:
+                continue
+            
+            newName = sbref_file.replace("_sbref",f"_run-0{counter_sbref_nii}_sbref")
+            if newName != sbref_file:
+                source = os.path.join(cur_dir,sbref_file)
+                dest = os.path.join(cur_dir,newName)
+                os.rename(source,dest)
+
+            
+            counter_sbref_nii += 1
+            
+        # rename sbref json files by the number of run of the same task 
+        counter_sbref_json=1
+        for sbref_json in glob.glob('*/*REST*_sbref.json' ):
+            if 'run' in sbref_json:
+                continue
+            newName = sbref_json.replace("_sbref",f"_run-0{counter_sbref_json}_sbref")
+            if newName != sbref_json:
+                source = os.path.join(cur_dir,sbref_json)
+                dest = os.path.join(cur_dir,newName)
+                os.rename(source,dest)
+            counter_sbref_json += 1
+            
+        # rename bold nii.gz files by the number of run of the same task 
+        counter_bold_nii=1
+        for bold_file in glob.glob('*/*REST*_bold.nii.gz' ):
+            if 'run' in bold_file:
+                continue
+            newName = bold_file.replace("_bold",f"_run-0{counter_bold_nii}_bold")
+            if newName != bold_file:
+                source = os.path.join(cur_dir,bold_file)
+                dest = os.path.join(cur_dir,newName)
+                os.rename(source,dest)
+            counter_bold_nii += 1
+            
+        # rename bold json files by the number of run of the same task 
+        counter_bold_json=1
+        for bold_json in glob.glob('*/*REST*_bold.json' ):
+            if 'run' in bold_json:
+                continue
+            newName = bold_json.replace("_bold",f"_run-0{counter_bold_json}_bold")
+            if newName != bold_json:
+                source = os.path.join(cur_dir,bold_json)
+                dest = os.path.join(cur_dir, newName)
+                os.rename(source,dest)
+            counter_bold_json += 1
+            
+        directories_in_curdir = list(filter(os.path.isdir, os.listdir(os.curdir)))
+        # loop all subject folders
+        for sub_dir in directories_in_curdir:
+            os.chdir(sub_dir) #into each existing folder
+            
+            cur_dir = os.getcwd()
+
+            source_list =["HCD",
+                          "V1_MR",
+                          "_SpinEchoFieldMap1_AP",
+                          "_SpinEchoFieldMap1_PA",
+                          "_SpinEchoFieldMap2_AP",
+                          "_SpinEchoFieldMap2_PA",
+                          "_SpinEchoFieldMap3_AP",
+                          "_SpinEchoFieldMap3_PA",
+                          "_SpinEchoFieldMap4_AP",
+                          "_SpinEchoFieldMap4_PA",
+                          "_SpinEchoFieldMap5_AP",
+                          "_SpinEchoFieldMap5_PA",
+                          "_SpinEchoFieldMap6_AP",
+                          "_SpinEchoFieldMap6_PA",
+                          "_SpinEchoFieldMap7_AP",
+                          "_SpinEchoFieldMap7_PA",
+                          "tfMRI_CARIT_AP_SBRef",
+                          "tfMRI_CARIT_AP",
+                          "tfMRI_CARIT_PA_SBRef",
+                          "tfMRI_CARIT_PA",
+                          "tfMRI_EMOTION_AP_SBRef",
+                          "tfMRI_EMOTION_AP",
+                          "tfMRI_EMOTION_PA_SBRef",
+                          "tfMRI_EMOTION_PA",
+                          "tfMRI_GUESSING_AP_SBRef",
+                          "tfMRI_GUESSING_AP",
+                          "tfMRI_GUESSING_PA_SBRef",
+                          "tfMRI_GUESSING_PA",
+                          "dMRI_dir98_AP_SBRef",
+                          "dMRI_dir98_AP",
+                          "dMRI_dir98_PA_SBRef",
+                          "dMRI_dir98_PA",
+                          "dMRI_dir99_AP_SBRef",
+                          "dMRI_dir99_AP",
+                          "dMRI_dir99_PA_SBRef",
+                          "dMRI_dir99_PA",
+                          "T1w_MPR_vNav_4e_e1e2_mean",
+                          "T2w_SPC_vNav"]
+            dest_list = ["sub-",
+                         "ses-V1",
+                         "_dir-AP_run-01_epi",
+                         "_dir-PA_run-01_epi",
+                         "_dir-AP_run-02_epi",
+                         "_dir-PA_run-02_epi",
+                         "_dir-AP_run-03_epi",
+                         "_dir-PA_run-03_epi",
+                         "_dir-AP_run-04_epi",
+                         "_dir-PA_run-04_epi",
+                         "_dir-AP_run-05_epi",
+                         "_dir-PA_run-05_epi",
+                         "_dir-AP_run-06_epi",
+                         "_dir-PA_run-06_epi",
+                         "_dir-AP_run-07_epi",
+                         "_dir-PA_run-07_epi",
+                         "task-carit_dir-AP_run-01_sbref",
+                         "task-carit_dir-AP_run-01_bold",
+                         "task-carit_dir-PA_run-02_sbref",
+                         "task-carit_dir-PA_run-02_bold",
+                         "task-emotion_dir-AP_run-01_sbref",
+                         "task-emotion_dir-AP_run-01_bold",
+                         "task-emotion_dir-PA_run-02_sbref",
+                         "task-emotion_dir-PA_run-02_bold",
+                         "task-guessing_dir-AP_run-01_sbref",
+                         "task-guessing_dir-AP_run-01_bold",
+                         "task-guessing_dir-PA_run-02_sbref",
+                         "task-guessing_dir-PA_run-02_bold",
+                         "acq-dir98_dir-AP_run-01_sbref",
+                         "acq-dir98_dir-AP_run-01_dwi",
+                         "acq-dir98_dir-PA_run-02_sbref",
+                         "acq-dir98_dir-PA_run-02_dwi",
+                         "acq-dir99_dir-AP_run-03_sbref",
+                         "acq-dir99_dir-AP_run-03_dwi",
+                         "acq-dir99_dir-PA_run-04_sbref",
+                         "acq-dir99_dir-PA_run-04_dwi",
+                         "T1w",
+                         "T2w"]
+            for index, item in enumerate(source_list):
+                rename_list_of_files(source_list[index], dest_list[index], cur_dir)
+            if glob.glob('*run-01_epi*'):
+                move_glob_files('/*','/../S1',cur_dir )
+            if glob.glob('*run-02_epi*'):
+                move_glob_files('/*','/../S2',cur_dir)
+            if glob.glob('*run-03_epi*'): 
+                move_glob_files('/*','/../S3',cur_dir)
+            if glob.glob('*run-04_epi*'): 
+                move_glob_files('/*','/../S4',cur_dir)
+            if glob.glob('*run-05_epi*'): 
+                move_glob_files('/*','/../S5',cur_dir)
+            if glob.glob('*run-06_epi*'): 
+                move_glob_files('/*','/../S6',cur_dir)
+            if glob.glob('*run-07_epi*'): 
+                move_glob_files('/*','/../S7',cur_dir)
+            os.chdir('..') #out of ses-V1
+        
+        # add IntendedFor fields for EPI fieldmap jsons
+        fmap_poss = glob.glob('*/*epi.json')
+        folders = glob.glob('*/')
+        cur_dir = os.getcwd()
+        folders = [ os.path.join(cur_dir, ls) for ls in folders] # using list comprehension
+        fmap_poss = [ os.path.join(cur_dir, ls) for ls in fmap_poss]
+        for b in range (0, len(fmap_poss)):
+            intended_for = list()
+            for m in range (0, len(folders)):
+                folders[m] = os.path.join(cur_dir, folders[m])
+                os.chdir(folders[m])
+                folder_contents = glob.glob('*')
+                intended_subset = list(set(glob.glob('*.nii.gz')) - set(glob.glob('*epi.nii.gz')))
+                basename = os.path.basename(fmap_poss[b])
+                if basename in folder_contents:
+                    intended_for.append(intended_subset)
+                else:
+                    pass
+                os.chdir('..')
+        
+           
+            with open(fmap_poss[b]) as json_file:
+                data = json.load(json_file)
+
+            [new_list] = intended_for
+            if intended_for == list():
+                data['IntendedFor'] = str()
+            else:
+                data['IntendedFor'] = new_list
+            with open(fmap_poss[b], 'w') as json_file:
+                json.dump(data, json_file)
+        
+        os.chdir('..')
+        cur_dir = os.getcwd()
+        move_glob_files('/ses-V1/*/*epi*','/ses-V1/fmap',cur_dir )
+        move_glob_files('/ses-V1/*/*task-*','/ses-V1/func',cur_dir)
+        move_glob_files('/ses-V1/*/*acq-dir*','/ses-V1/dwi',cur_dir)
+        move_glob_files('/ses-V1/*/*T1w*','/ses-V1/anat',cur_dir)
+        move_glob_files('/ses-V1/*/*T2w*','/ses-V1/anat',cur_dir)
+        #remove excess files / folders
+        remove_glob_files('/ses-V1/*fMRI*/',cur_dir)
+        remove_glob_files('/ses-V1/*vNav*/',cur_dir)
+        remove_glob_files('/ses-V1/*PCAS*/',cur_dir)
+        if os.path.isdir('./ses-V1/Diffusion'):
+            shutil.rmtree(os.path.join(cur_dir, 'ses-V1/Diffusion'))
+        if os.path.isdir('ses-V1/S1'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S1'))
+        if os.path.isdir('ses-V1/S2'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S2'))
+        if os.path.isdir('./ses-V1/S3'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S3'))
+        if os.path.isdir('./ses-V1/S4'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S4'))
+        if os.path.isdir('./ses-V1/S5'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S5'))
+        if os.path.isdir('./ses-V1/S6'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S6'))
+        if os.path.isdir('./ses-V1/S7'):
+            shutil.rmtree(os.path.join(cur_dir,'ses-V1/S7'))
+        os.chdir('..')
+
+
+        
+    # create participants.tsv file
+    subjects = glob.glob('sub-*')
+
+    
+    # create problem_fmapjsons.txt
+    fmap_json = glob.glob('sub*/*/fmap/*epi.json')
+    t1w = '_T1w'
+    t2w = '_T2w'
+    dir99 = 'dir99'
+    dir98 = 'dir98'
+    
+    # add ses-V1/dwi or ses-V1/anat or ses-V1/func to the IntendedFor field
+    for i in range (0, len(fmap_json)):
+        with open(fmap_json[i]) as json_file:
+            data = json.load(json_file)
+        if 'IntendedFor' in data:
+            for j in range (0, len(data['IntendedFor'])):
+                if dir99 in data['IntendedFor'][j]:
+                    data['IntendedFor'][j] = 'ses-V1/dwi/' + str(data['IntendedFor'][j])
+                elif dir98 in data['IntendedFor'][j]:
+                    data['IntendedFor'][j] = 'ses-V1/dwi/' + str(data['IntendedFor'][j])
+                elif t1w in data['IntendedFor'][j]:
+                    data['IntendedFor'][j] = 'ses-V1/anat/' + str(data['IntendedFor'][j])
+                elif t2w in data['IntendedFor'][j]:
+                    data['IntendedFor'][j] = 'ses-V1/anat/' + str(data['IntendedFor'][j])
+                else:
+                    data['IntendedFor'][j] = 'ses-V1/func/' + str(data['IntendedFor'][j])
+
+
+       
+        with open(fmap_json[i], 'w') as json_file:
+            json.dump(data,json_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/cubic/bootstrap-mriqc-unzip-full-outputs.sh b/scripts/cubic/bootstrap-mriqc-unzip-full-outputs.sh
new file mode 100644
index 0000000..579a056
--- /dev/null
+++ b/scripts/cubic/bootstrap-mriqc-unzip-full-outputs.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+PROJECTROOT=/cbica/projects/RBC/production/PNC/mriqc
+cd ${PROJECTROOT}
+RIA=${PROJECTROOT}/output_ria
+datalad create -c yoda -D "extract mriqc results" unzipped-results-duplicate
+cd unzipped-results-duplicate
+datalad clone -d . --reckless ephemeral "ria+file://${RIA}#~data" inputs/data
+datalad clone -d . ../pennlinc-containers
+
+## the actual compute job specification
+cat > code/get_files.sh << "EOT"
+#!/bin/bash
+set -e -u -x
+
+ZIP_FILE=$1
+
+# Create a mriqc/ directory
+#unzip -j $ZIP_FILE "mriqc/sub-4238772277/ses-PNC1/anat/sub-4238772277_ses-PNC1_acq-refaced_T1w.json" -d "mriqc_results" 
+
+
+#subid=$(basename "${ZIP_FILE%.*}")
+#subid=${subid%_*}
+#echo $subid
+# Create a mriqc/ directory
+unzip -o $ZIP_FILE -x 'mriqc/*.html' 
+
+
+EOT
+
+cat > code/mriqc-group.sh << "EOT"
+#!/bin/bash
+set -e -u -x
+datalad get ${PWD}/pennlinc-containers/.datalad/environments/mriqc-0-16-1/image
+# create group reports for the anatomical T1w data
+singularity exec --cleanenv -B ${PWD} \
+    pennlinc-containers/.datalad/environments/mriqc-0-16-1/image \
+    python code/group_results.py
+
+EOT
+
+cat > code/group_results.py << "EOT"
+
+from pathlib import Path
+from mriqc.reports import group_html
+from mriqc.utils.bids import DEFAULT_TYPES
+from mriqc.utils.misc import generate_tsv
+
+if __name__ == '__main__':
+    output_dir = Path(".") / "mriqc"
+    # Generate reports
+    mod_group_reports = []
+    for mod in DEFAULT_TYPES:
+        dataframe, out_tsv = generate_tsv(output_dir, mod)
+        # If there are no iqm.json files, nothing to do.
+        if dataframe is None:
+            continue
+
+        print(f"Generated summary TSV table for the {mod} data ({out_tsv})")
+
+        # out_pred = generate_pred(derivatives_dir, settings['output_dir'], mod)
+        # if out_pred is not None:
+        #     log.info('Predicted QA CSV table for the %s data generated (%s)',
+        #                    mod, out_pred)
+
+        out_html = output_dir / f"group_{mod}.html"
+        group_html(
+            out_tsv,
+            mod,
+            csv_failed=output_dir / f"group_variant-failed_{mod}.csv",
+            out_file=out_html,
+        )
+
+        print(f"Group-{mod} report generated ({out_html})")
+        mod_group_reports.append(mod)
+
+    if not mod_group_reports:
+        raise Exception("No data found. No group level reports were generated.")
+
+    print("Group level finished successfully.")
+EOT
+
+datalad save -m "Add data extraction code" code
+
+zip_files=$(find inputs/data/ -name '*.zip')
+for input_zip in ${zip_files}
+do
+    subid=$(basename "${input_zip%.*}")
+    subid=${subid%_*}
+    outdir=.
+
+    datalad run \
+        -i pennlinc-containers/.datalad/environments/fmriprep-20-2-3/image \
+        -i ${input_zip} \
+        -o ${outdir}/${subid} \
+        --explicit \
+        "bash code/get_files.sh ${input_zip}"
+done
+
+# CRITICAL: Don't uninstall the inputs - it will delete your data
+rm -rf inputs
diff --git a/scripts/cubic/bootstrap-mriqc.sh b/scripts/cubic/bootstrap-mriqc.sh
new file mode 100644
index 0000000..ab450a9
--- /dev/null
+++ b/scripts/cubic/bootstrap-mriqc.sh
@@ -0,0 +1,282 @@
+## NOTE ##
+# This workflow is derived from the Datalad Handbook
+
+## Ensure the environment is ready to bootstrap the analysis workspace
+# Check that we have conda installed
+#conda activate
+#if [ $? -gt 0 ]; then
+#    echo "Error initializing conda. Exiting"
+#    exit $?
+#fi
+
+DATALAD_VERSION=$(datalad --version)
+
+if [ $? -gt 0 ]; then
+    echo "No datalad available in your conda environment."
+    echo "Try pip install datalad"
+    # exit 1
+fi
+
+echo USING DATALAD VERSION ${DATALAD_VERSION}
+
+set -e -u
+
+
+## Set up the directory that will contain the necessary directories
+PROJECTROOT=${PWD}/mriqc
+if [[ -d ${PROJECTROOT} ]]
+then
+    echo ${PROJECTROOT} already exists
+    # exit 1
+fi
+
+if [[ ! -w $(dirname ${PROJECTROOT}) ]]
+then
+    echo Unable to write to ${PROJECTROOT}\'s parent. Change permissions and retry
+    # exit 1
+fi
+
+
+## Check the BIDS input
+BIDSINPUT=$1
+if [[ -z ${BIDSINPUT} ]]
+then
+    echo "Required argument is an identifier of the BIDS source"
+    # exit 1
+fi
+
+# Is it a directory on the filesystem?
+BIDS_INPUT_METHOD=clone
+if [[ -d "${BIDSINPUT}" ]]
+then
+    # Check if it's datalad
+    BIDS_DATALAD_ID=$(datalad -f '{infos[dataset][id]}' wtf -S \
+                      dataset -d ${BIDSINPUT} 2> /dev/null || true)
+    [ "${BIDS_DATALAD_ID}" = 'N/A' ] && BIDS_INPUT_METHOD=copy
+fi
+
+
+## Start making things
+mkdir -p ${PROJECTROOT}
+cd ${PROJECTROOT}
+
+# Jobs are set up to not require a shared filesystem (except for the lockfile)
+# ------------------------------------------------------------------------------
+# RIA-URL to a different RIA store from which the dataset will be cloned from.
+# Both RIA stores will be created
+input_store="ria+file://${PROJECTROOT}/input_ria"
+output_store="ria+file://${PROJECTROOT}/output_ria"
+
+# Create a source dataset with all analysis components as an analysis access
+# point.
+datalad create -c yoda analysis
+cd analysis
+
+# create dedicated input and output locations. Results will be pushed into the
+# output sibling and the analysis will start with a clone from the input sibling.
+datalad create-sibling-ria -s output "${output_store}"
+pushremote=$(git remote get-url --push output)
+datalad create-sibling-ria -s input --storage-sibling off "${input_store}"
+
+# register the input dataset
+if [[ "${BIDS_INPUT_METHOD}" == "clone" ]]
+then
+    echo "Cloning input dataset into analysis dataset"
+    datalad clone -d . ${BIDSINPUT} inputs/data
+    # amend the previous commit with a nicer commit message
+    git commit --amend -m 'Register input data dataset as a subdataset'
+else
+    echo "WARNING: copying input data into repository"
+    mkdir -p inputs/data
+    cp -r ${BIDSINPUT}/* inputs/data
+    datalad save -r -m "added input data"
+fi
+
+SUBJECTS=$(find inputs/data -type d -name 'sub-*' | cut -d '/' -f 3 )
+if [ -z "${SUBJECTS}" ]
+then
+    echo "No subjects found in input data"
+    # exit 1
+fi
+
+set +u
+CONTAINERDS=$2
+set -u
+#if [[ ! -z "${CONTAINERDS}" ]]; then
+cd ${PROJECTROOT}
+datalad clone ${CONTAINERDS} pennlinc-containers
+## Add the containers as a subdataset
+#datalad clone ria+ssh://sciget.pmacs.upenn.edu:/project/bbl_projects/containers#~pennlinc-containers pennlinc-containers
+# download the image so we don't ddos pmacs
+cd pennlinc-containers
+datalad get -r .
+# get rid of the references to pmacs
+set +e
+datalad siblings remove -s pmacs-ria-storage
+datalad siblings remove -s origin
+set -e
+
+cd ${PROJECTROOT}/analysis
+datalad install -d . --source ${PROJECTROOT}/pennlinc-containers
+
+## the actual compute job specification
+cat > code/participant_job.sh << "EOT"
+#!/bin/bash
+#$ -S /bin/bash
+#$ -l h_vmem=15G
+#$ -l tmpfree=100G
+# Set up the correct conda environment
+source ${CONDA_PREFIX}/bin/activate base
+echo I\'m in $PWD using `which python`
+# fail whenever something is fishy, use -x to get verbose logfiles
+set -e -u -x
+# Set up the remotes and get the subject id from the call
+dssource="$1"
+pushgitremote="$2"
+subid="$3"
+# change into the cluster-assigned temp directory. Not done by default in SGE
+cd ${CBICA_TMPDIR}
+# OR Run it on a shared network drive
+# cd /cbica/comp_space/$(basename $HOME)
+# Used for the branch names and the temp dir
+BRANCH="job-${JOB_ID}-${subid}"
+mkdir ${BRANCH}
+cd ${BRANCH}
+# get the analysis dataset, which includes the inputs as well
+# importantly, we do not clone from the lcoation that we want to push the
+# results to, in order to avoid too many jobs blocking access to
+# the same location and creating a throughput bottleneck
+datalad clone "${dssource}" ds
+# all following actions are performed in the context of the superdataset
+cd ds
+# in order to avoid accumulation temporary git-annex availability information
+# and to avoid a syncronization bottleneck by having to consolidate the
+# git-annex branch across jobs, we will only push the main tracking branch
+# back to the output store (plus the actual file content). Final availability
+# information can be establish via an eventual `git-annex fsck -f joc-storage`.
+# this remote is never fetched, it accumulates a larger number of branches
+# and we want to avoid progressive slowdown. Instead we only ever push
+# a unique branch per each job (subject AND process specific name)
+git remote add outputstore "$pushgitremote"
+# all results of this job will be put into a dedicated branch
+git checkout -b "${BRANCH}"
+# we pull down the input subject manually in order to discover relevant
+# files. We do this outside the recorded call, because on a potential
+# re-run we want to be able to do fine-grained recomputing of individual
+# outputs. The recorded calls will have specific paths that will enable
+# recomputation outside the scope of the original setup
+datalad get -n "inputs/data/${subid}"
+# Reomve all subjects we're not working on
+(cd inputs/data && rm -rf `find . -type d -name 'sub*' | grep -v $subid`)
+# ------------------------------------------------------------------------------
+# Do the run!
+datalad run \
+    -i code/mriqc_zip.sh \
+    -i inputs/data/${subid} \
+    -i inputs/data/*json \
+    -i pennlinc-containers/.datalad/environments/mriqc-0-16-1/image \
+    --explicit \
+    -o ${subid}_mriqc-0.16.1.zip \
+    -m "mriqc:0.16.1 ${subid}" \
+    "bash ./code/mriqc_zip.sh ${subid}"
+# file content first -- does not need a lock, no interaction with Git
+datalad push --to output-storage
+# and the output branch
+flock $DSLOCKFILE git push outputstore
+echo TMPDIR TO DELETE
+echo ${BRANCH}
+datalad uninstall --nocheck --if-dirty ignore -r inputs/data
+datalad drop -r . --nocheck
+git annex dead here
+cd ../..
+rm -rf $BRANCH
+echo SUCCESS
+# job handler should clean up workspace
+EOT
+
+chmod +x code/participant_job.sh
+
+cat > code/mriqc_zip.sh << "EOT"
+#!/bin/bash
+set -e -u -x
+subid="$1"
+mkdir -p ${PWD}/.git/tmp/wkdir
+singularity run --cleanenv -B ${PWD} \
+    pennlinc-containers/.datalad/environments/mriqc-0-16-1/image \
+    inputs/data \
+    qc/mriqc \
+    participant \
+    -w ${PWD}/.git/tmp/wkdir \
+    --n_cpus $NSLOTS \
+    --ants-nthreads 2 \
+    --float32 \
+    -m T1w \
+    --participant-label "$subid" \
+    --verbose-reports --no-sub -v -v
+
+cd qc
+7z a ../${subid}_mriqc-0.16.1.zip mriqc
+
+rm -rf qc .git/tmp/wkdir
+EOT
+
+chmod +x code/mriqc_zip.sh
+#cp ${FREESURFER_HOME}/license.txt code/license.txt
+
+mkdir logs
+echo .SGE_datalad_lock >> .gitignore
+echo logs >> .gitignore
+
+datalad save -m "Participant compute job implementation"
+
+# Add a script for merging outputs
+MERGE_POSTSCRIPT=https://raw.githubusercontent.com/PennLINC/TheWay/main/scripts/cubic/merge_outputs_postscript.sh
+cat > code/merge_outputs.sh << "EOT"
+#!/bin/bash
+set -e -u -x
+EOT
+echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \
+    >> code/merge_outputs.sh
+echo "cd ${PROJECTROOT}" >> code/merge_outputs.sh
+wget -qO- ${MERGE_POSTSCRIPT} >> code/merge_outputs.sh
+
+
+################################################################################
+# SGE SETUP START - remove or adjust to your needs
+################################################################################
+env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock"
+echo '#!/bin/bash' > code/qsub_calls.sh
+dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)"
+pushgitremote=$(git remote get-url --push output)
+eo_args="-e ${PWD}/logs -o ${PWD}/logs"
+for subject in ${SUBJECTS}; do
+  echo "qsub -cwd ${env_flags} -N fp${subject} ${eo_args} \
+  ${PWD}/code/participant_job.sh \
+  ${dssource} ${pushgitremote} ${subject} " >> code/qsub_calls.sh
+done
+datalad save -m "SGE submission setup" code/ .gitignore
+
+################################################################################
+# SGE SETUP END
+################################################################################
+
+# cleanup - we have generated the job definitions, we do not need to keep a
+# massive input dataset around. Having it around wastes resources and makes many
+# git operations needlessly slow
+if [ "${BIDS_INPUT_METHOD}" = "clone" ]
+then
+    datalad uninstall -r --nocheck inputs/data
+fi
+
+# make sure the fully configured output dataset is available from the designated
+# store for initial cloning and pushing the results.
+datalad push --to input
+datalad push --to output
+
+# Add an alias to the data in the RIA store
+RIA_DIR=$(find $PROJECTROOT/output_ria/???/ -maxdepth 1 -type d | sort | tail -n 1)
+mkdir -p ${PROJECTROOT}/output_ria/alias
+ln -s ${RIA_DIR} ${PROJECTROOT}/output_ria/alias/data
+
+# if we get here, we are happy
+echo SUCCESS
diff --git a/scripts/cubic/bootstrap_hcpd_bids.sh b/scripts/cubic/bootstrap_hcpd_bids.sh
new file mode 100644
index 0000000..1905e84
--- /dev/null
+++ b/scripts/cubic/bootstrap_hcpd_bids.sh
@@ -0,0 +1,262 @@
+
+
+
+
+
+
+
+
+
+
+
+DATALAD_VERSION=$(datalad --version)
+
+if [ $? -gt 0 ]; then
+    echo "No datalad available in your conda environment."
+    echo "Try pip install datalad"
+    # exit 1
+fi
+
+echo USING DATALAD VERSION ${DATALAD_VERSION}
+
+set -e -u
+
+
+## Set up the directory that will contain the necessary directories
+PROJECTROOT=${PWD}/bootstrap_hcpd_bids
+if [[ -d ${PROJECTROOT} ]]
+then
+    echo ${PROJECTROOT} already exists
+    # exit 1
+fi
+
+if [[ ! -w $(dirname ${PROJECTROOT}) ]]
+then
+    echo Unable to write to ${PROJECTROOT}\'s parent. Change permissions and retry
+    # exit 1
+fi
+
+SUBJECTIDCSV=$1
+HCPDCSV=$2
+if [[ -z ${SUBJECTIDCSV} ]]
+then
+    echo "Required argument is an identifier of the HCPD csv source"
+    # exit 1
+fi
+
+## Start making things
+mkdir -p ${PROJECTROOT}
+cd ${PROJECTROOT}
+
+# Jobs are set up to not require a shared filesystem (except for the lockfile)
+# ------------------------------------------------------------------------------
+# RIA-URL to a different RIA store from which the dataset will be cloned from.
+# Both RIA stores will be created
+input_store="ria+file://${PROJECTROOT}/input_ria"
+output_store="ria+file://${PROJECTROOT}/output_ria"
+
+# Create a source dataset with all analysis components as an analysis access
+# point.
+datalad create -c yoda analysis
+cd analysis
+
+# create dedicated input and output locations. Results will be pushed into the
+# output sibling and the analysis will start with a clone from the input sibling.
+datalad create-sibling-ria -s output "${output_store}"
+pushremote=$(git remote get-url --push output)
+datalad create-sibling-ria -s input --storage-sibling off "${input_store}"
+
+# register the input dataset
+
+SUBJECTS=$(cut -d, -f1 ${SUBJECTIDCSV})
+
+
+
+git annex initremote datalad type=external externaltype=datalad encryption=none
+
+
+cat > code/participant_csv.py << "EOT"
+
+
+#!/usr/bin/env python
+"""
+USAGE:
+
+python participant_csv.py subid
+
+Run this inside of participant_job.sh
+
+Creates csv for one single participant 
+
+
+"""
+import pandas as pd
+import sys
+
+hcpdcsv = sys.argv[2]
+df = pd.read_csv(hcpdcsv)
+
+# the HCD*
+prefix = sys.argv[1]
+
+df2=df[df.filename.str.startswith(prefix)]
+
+df3 = df2.drop_duplicates(subset ="filename", keep = 'first', ignore_index=True)
+
+df3.to_csv(f"{prefix}.csv", index=False)
+
+
+EOT
+
+
+
+chmod +x code/participant_csv.py
+
+datalad save -m "Participant csv implementation"
+
+
+cat > code/participant_job.sh << "EOT"
+#!/bin/bash
+#$ -S /bin/bash
+#$ -l h_vmem=25G
+#$ -l tmpfree=200G
+#$ -R y 
+#$ -l h_rt=24:00:00
+# Set up the correct conda environment
+source ${CONDA_PREFIX}/bin/activate base
+echo I\'m in $PWD using `which python`
+
+# fail whenever something is fishy, use -x to get verbose logfiles
+set -e -u -x
+
+dssource="$1"
+pushgitremote="$2"
+subid="$3"
+hcpdcsv="$4"
+
+rename_subid="sub-${subid:3:7}"
+
+cd ${CBICA_TMPDIR}
+BRANCH="job-${JOB_ID}-${subid}"
+mkdir ${BRANCH}
+cd ${BRANCH}
+datalad clone "${dssource}" ds
+cd ds
+
+python code/participant_csv.py ${subid} ${hcpdcsv}
+
+
+
+SUBJECTCSV="${subid}.csv"
+
+
+git annex enableremote datalad type=external externaltype=datalad encryption=none
+datalad addurls -d . ${SUBJECTCSV} '{associated_file}' '{filename}'
+rm ${SUBJECTCSV}
+
+git remote add outputstore "$pushgitremote"
+
+git checkout -b "${BRANCH}"
+
+
+# ------------------------------------------------------------------------------
+# Do the run!
+# CREATE THE CSV FOR ONE SINGLE SUBJECT
+
+# CLONE
+datalad run \
+    -i ${subid}_V1_MR \
+    --explicit \
+    -o ${subid}_V1_MR \
+    -o ${rename_subid} \
+    -m "rename for ${subid}" \
+    "python /cbica/projects/RBC/mengjia_space/hcpd_main.py ${subid}_V1_MR"
+
+datalad save -m "Records the deletion of raw non-BIDS directories"
+
+
+# file content first -- does not need a lock, no interaction with Git
+datalad push --to output-storage ${rename_subid}
+# and the output branch
+flock $DSLOCKFILE git push outputstore
+
+echo TMPDIR TO DELETE
+echo ${BRANCH}
+
+
+datalad drop -r . --nocheck
+git annex dead here
+cd ../..
+
+chmod +w -R $BRANCH
+rm -rf $BRANCH 
+
+echo SUCCESS
+
+EOT
+
+chmod +x code/participant_job.sh
+
+
+
+mkdir logs
+echo .SGE_datalad_lock >> .gitignore
+echo logs >> .gitignore
+
+datalad save -m "Participant compute job implementation"
+
+# Add a script for merging outputs
+MERGE_POSTSCRIPT=https://raw.githubusercontent.com/PennLINC/TheWay/main/scripts/cubic/merge_outputs_postscript.sh
+cat > code/merge_outputs.sh << "EOT"
+#!/bin/bash
+set -e -u -x
+EOT
+echo "outputsource=${output_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)" \
+    >> code/merge_outputs.sh
+echo "cd ${PROJECTROOT}" >> code/merge_outputs.sh
+wget -qO- ${MERGE_POSTSCRIPT} >> code/merge_outputs.sh
+
+
+################################################################################
+# SGE SETUP START - remove or adjust to your needs
+################################################################################
+env_flags="-v DSLOCKFILE=${PWD}/.SGE_datalad_lock"
+echo '#!/bin/bash' > code/qsub_calls.sh
+dssource="${input_store}#$(datalad -f '{infos[dataset][id]}' wtf -S dataset)"
+#dssource="~/mengjia_space/hcpd_single_subject.csv"
+pushgitremote=$(git remote get-url --push output)
+eo_args="-e ${PWD}/logs -o ${PWD}/logs"
+ 
+for subject in ${SUBJECTS}; do
+  echo "qsub -cwd ${env_flags} -N ${subject} ${eo_args} \
+  ${PWD}/code/participant_job.sh \
+  ${dssource} ${pushgitremote} ${subject} ${HCPDCSV}" >> code/qsub_calls.sh
+done
+datalad save -m "SGE submission setup" code/ .gitignore
+
+################################################################################
+# SGE SETUP END
+################################################################################
+
+# cleanup - we have generated the job definitions, we do not need to keep a
+# massive input dataset around. Having it around wastes resources and makes many
+# git operations needlessly slow
+#if [ "${BIDS_INPUT_METHOD}" = "clone" ]
+#then
+ #   datalad uninstall -r --nocheck inputs/data
+#fi
+
+# make sure the fully configured output dataset is available from the designated
+# store for initial cloning and pushing the results.
+datalad push --to input
+datalad push --to output
+
+# Add an alias to the data in the RIA store
+RIA_DIR=$(find $PROJECTROOT/output_ria/???/ -maxdepth 1 -type d | sort | tail -n 1)
+mkdir -p ${PROJECTROOT}/output_ria/alias
+ln -s ${RIA_DIR} ${PROJECTROOT}/output_ria/alias/data
+
+# if we get here, we are happy
+echo SUCCESS
+
+