diff --git a/scripts/new_patient_pipeline/new_patient_pipeline.py b/scripts/new_patient_pipeline/new_patient_pipeline.py index b0d9e01..e57c094 100644 --- a/scripts/new_patient_pipeline/new_patient_pipeline.py +++ b/scripts/new_patient_pipeline/new_patient_pipeline.py @@ -118,7 +118,8 @@ def flush(self): extract_features_hdf5(list_ids=args.list_ids, sub_id=args.id, data_dir=HIPPUNFOLD_SUBJECTS_PATH, - output_dir=BASE_PATH) + output_dir=BASE_PATH, + demographic_file=args.demographic_file) #--------------------------------------------------------------------------------- ### PREPROCESSING ### @@ -129,6 +130,7 @@ def flush(self): list_ids=args.list_ids, sub_id=args.id, harmonisation_only = args.harmo_only, + demographic_file_path = args.demographic_file, verbose = args.debug_mode, ) diff --git a/scripts/new_patient_pipeline/run_pipeline_preprocessing.py b/scripts/new_patient_pipeline/run_pipeline_preprocessing.py index 9f56f54..224fd0e 100755 --- a/scripts/new_patient_pipeline/run_pipeline_preprocessing.py +++ b/scripts/new_patient_pipeline/run_pipeline_preprocessing.py @@ -54,7 +54,7 @@ def check_demographic_file(demographic_file, subject_ids): -def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PATH, compute_harmonisation=False, harmonisation_only=False ): +def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PATH, compute_harmonisation=False, harmonisation_only=False, demographic_file_path=None): # initialise surface_features and smoothing kernel surface_features = { @@ -169,7 +169,7 @@ def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PA if len(np.unique(subject_ids))<20: print(get_m(f'We recommend to use at least 20 subjects for an acurate harmonisation of the data. Here you are using only {len(np.unique(subject_ids))}', None, 'WARNING')) - demographic_file = os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE) + demographic_file = demographic_file_path if demographic_file_path is not None else os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE) check_demographic_file(demographic_file, subject_ids) ### COMBAT DISTRIBUTED DATA ### @@ -241,7 +241,7 @@ def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PA print(time.asctime(time.localtime(time.time()))) -def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_dir=BASE_PATH, harmonisation_only=False, verbose=False): +def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_dir=BASE_PATH, harmonisation_only=False, demographic_file_path=None, verbose=False): harmo_code = str(harmo_code) subject_ids=None if list_ids != None: @@ -269,7 +269,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di print(get_m(f'Compute combat parameters for {harmo_code} with subjects {subject_ids}', None, 'INFO')) compute_harmonisation = True #check that demographic file exist and is adequate - demographic_file = os.path.join(DATA_PATH, DEMOGRAPHIC_FEATURES_FILE) + demographic_file = demographic_file_path if demographic_file_path is not None else os.path.join(DATA_PATH, DEMOGRAPHIC_FEATURES_FILE) if os.path.isfile(demographic_file): print(get_m(f'Use demographic file {demographic_file}', None, 'INFO')) demographic_file = check_demographic_file(demographic_file, subject_ids) @@ -286,6 +286,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di harmo_code=harmo_code, output_dir=output_dir, compute_harmonisation = compute_harmonisation, + demographic_file_path=demographic_file_path, harmonisation_only=harmonisation_only) if __name__ == '__main__': @@ -308,6 +309,12 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di help="harmonisation code", required=False, ) + parser.add_argument('-demos', '--demographic_file', + type=str, + help='provide the demographic files for the harmonisation', + required=False, + default=None, + ) parser.add_argument('--harmo_only', action="store_true", help='only compute the harmonisation combat parameters, no further process', @@ -330,6 +337,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di list_ids=args.list_ids, sub_id=args.id, harmonisation_only = args.harmo_only, + demographic_file_path = args.demographic_file, verbose = args.debug_mode, ) diff --git a/scripts/preprocess/extract_features_hdf5.py b/scripts/preprocess/extract_features_hdf5.py index 358e7ad..46e6b80 100755 --- a/scripts/preprocess/extract_features_hdf5.py +++ b/scripts/preprocess/extract_features_hdf5.py @@ -18,6 +18,8 @@ def get_group_site(fs_id, csv_path): """ Read demographic features from csv file and extract harmo code and group """ + if csv_path is None: + csv_path = os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE) features_name=["Harmo code", "Group"] df = pd.read_csv(csv_path, header=0, encoding="latin") # get index column @@ -164,7 +166,7 @@ def convert_bids_id(bids_id=None): bids_id = 'sub-'+bids_id return bids_id -def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir=None): +def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir=None, demographic_file=None): subject_id=None subject_ids=None if list_ids != None: @@ -218,7 +220,7 @@ def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir= f_name='.'+feature_name, feature = overlay, base_path= output_dir, - demographic_file= DEMOGRAPHIC_FEATURES_FILE, + demographic_file=demographic_file, hdf5_file_root='{}_{}_featurematrix.hdf5', label = label) else: