MELDProject · ckronlage · Dec 16, 2025 · Dec 18, 2025
diff --git a/scripts/new_patient_pipeline/new_patient_pipeline.py b/scripts/new_patient_pipeline/new_patient_pipeline.py
@@ -118,7 +118,8 @@ def flush(self):
     extract_features_hdf5(list_ids=args.list_ids, 
                             sub_id=args.id, 
                             data_dir=HIPPUNFOLD_SUBJECTS_PATH, 
-                            output_dir=BASE_PATH)
+                            output_dir=BASE_PATH,
+                            demographic_file=args.demographic_file)
 
     #---------------------------------------------------------------------------------
     ### PREPROCESSING ###
@@ -129,6 +130,7 @@ def flush(self):
                     list_ids=args.list_ids,
                     sub_id=args.id,
                     harmonisation_only = args.harmo_only,
+                    demographic_file_path = args.demographic_file,
                     verbose = args.debug_mode,
                     )
 

diff --git a/scripts/new_patient_pipeline/run_pipeline_preprocessing.py b/scripts/new_patient_pipeline/run_pipeline_preprocessing.py
@@ -54,7 +54,7 @@ def check_demographic_file(demographic_file, subject_ids):
 
 
 
-def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PATH, compute_harmonisation=False, harmonisation_only=False ):
+def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PATH, compute_harmonisation=False, harmonisation_only=False, demographic_file_path=None):
 
     # initialise surface_features and smoothing kernel
     surface_features = {
@@ -169,7 +169,7 @@ def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PA
         if len(np.unique(subject_ids))<20:
             print(get_m(f'We recommend to use at least 20 subjects for an acurate harmonisation of the data. Here you are using only {len(np.unique(subject_ids))}', None, 'WARNING'))
 
-        demographic_file = os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE)
+        demographic_file = demographic_file_path if demographic_file_path is not None else os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE)
         check_demographic_file(demographic_file, subject_ids)
 
         ### COMBAT DISTRIBUTED DATA ###
@@ -241,7 +241,7 @@ def run_data_processing_new_subjects(subject_ids, harmo_code, output_dir=BASE_PA
 
     print(time.asctime(time.localtime(time.time())))
 
-def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_dir=BASE_PATH,  harmonisation_only=False, verbose=False):
+def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_dir=BASE_PATH,  harmonisation_only=False, demographic_file_path=None, verbose=False):
     harmo_code = str(harmo_code)
     subject_ids=None
     if list_ids != None:
@@ -269,7 +269,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di
             print(get_m(f'Compute combat parameters for {harmo_code} with subjects {subject_ids}', None, 'INFO'))
             compute_harmonisation = True
             #check that demographic file exist and is adequate
-            demographic_file = os.path.join(DATA_PATH, DEMOGRAPHIC_FEATURES_FILE) 
+            demographic_file = demographic_file_path if demographic_file_path is not None else os.path.join(DATA_PATH, DEMOGRAPHIC_FEATURES_FILE)
             if os.path.isfile(demographic_file):
                 print(get_m(f'Use demographic file {demographic_file}', None, 'INFO'))
                 demographic_file = check_demographic_file(demographic_file, subject_ids) 
@@ -286,6 +286,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di
                                      harmo_code=harmo_code, 
                                      output_dir=output_dir, 
                                      compute_harmonisation = compute_harmonisation,
+                                     demographic_file_path=demographic_file_path,
                                      harmonisation_only=harmonisation_only)
 
 if __name__ == '__main__':
@@ -308,6 +309,12 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di
                         help="harmonisation code",
                         required=False,
                         )
+    parser.add_argument('-demos', '--demographic_file', 
+                        type=str, 
+                        help='provide the demographic files for the harmonisation',
+                        required=False,
+                        default=None,
+                        )
     parser.add_argument('--harmo_only', 
                         action="store_true", 
                         help='only compute the harmonisation combat parameters, no further process',
@@ -330,6 +337,7 @@ def run_pipeline_preprocessing(harmo_code, list_ids=None, sub_id=None, output_di
                     list_ids=args.list_ids,
                     sub_id=args.id,
                     harmonisation_only = args.harmo_only,
+                    demographic_file_path = args.demographic_file,
                     verbose = args.debug_mode,
                     )
 

diff --git a/scripts/preprocess/extract_features_hdf5.py b/scripts/preprocess/extract_features_hdf5.py
@@ -18,6 +18,8 @@ def get_group_site(fs_id, csv_path):
         """
         Read demographic features from csv file and extract harmo code and group 
         """
+        if csv_path is None:
+            csv_path = os.path.join(BASE_PATH, DEMOGRAPHIC_FEATURES_FILE)
         features_name=["Harmo code", "Group"]
         df = pd.read_csv(csv_path, header=0, encoding="latin")
         # get index column
@@ -164,7 +166,7 @@ def convert_bids_id(bids_id=None):
             bids_id = 'sub-'+bids_id
         return bids_id
 
-def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir=None):
+def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir=None, demographic_file=None):
     subject_id=None
     subject_ids=None
     if list_ids != None:
@@ -218,7 +220,7 @@ def extract_features_hdf5(list_ids=None, sub_id=None, data_dir=None, output_dir=
                                  f_name='.'+feature_name, 
                                  feature = overlay, 
                                  base_path= output_dir, 
-                                 demographic_file= DEMOGRAPHIC_FEATURES_FILE,
+                                 demographic_file=demographic_file,
                                  hdf5_file_root='{}_{}_featurematrix.hdf5', 
                                 label = label)
                 else: