From 3043335b5ba98073bba7603f29b285e5d3159085 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 11 Jun 2025 10:25:43 -0400 Subject: [PATCH 1/2] fix bids compliance --- freesurfer_post/interfaces/tabular.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/freesurfer_post/interfaces/tabular.py b/freesurfer_post/interfaces/tabular.py index aa5a393..55953d9 100644 --- a/freesurfer_post/interfaces/tabular.py +++ b/freesurfer_post/interfaces/tabular.py @@ -138,8 +138,18 @@ def sanity_check_columns(reference_column, redundant_column, atol=0): output_dir.mkdir(parents=True, exist_ok=True) output_prefix = f'{subject_id}_{session_id}' if session_id else subject_id cleaned_atlas_name = atlas.replace('.', '').replace('_order', '').replace('_', '') + # Convert column names to snake case + out_df.columns = [col.lower().replace('-', '_').replace('.', '_') for col in out_df.columns] + # Rename subject_id to participant_id + out_df = out_df.rename(columns={'subject_id': 'participant_id'}) + # Reorder columns to have participant_id first + cols = out_df.columns.tolist() + cols.remove('participant_id') + out_df = out_df[['participant_id'] + cols] + # Replace missing values with "n/a" + out_df = out_df.fillna('n/a') out_df.to_csv( - output_dir / f'{output_prefix}_atlas-{cleaned_atlas_name}_surfacestats.tsv', + output_dir / f'{output_prefix}_seg-{cleaned_atlas_name}_surfacestats.tsv', sep='\t', index=False, ) @@ -359,11 +369,21 @@ def _run_interface(self, runtime): out_tsv = output_dir / f'{output_prefix}_brainmeasures.tsv' out_json = output_dir / f'{output_prefix}_brainmeasures.json' - metadata = {key: value['meta'] for key, value in fs_audit.items()} + metadata = {key: {'Description': value['meta']} for key, value in fs_audit.items()} with out_json.open('w') as jsonf: json.dump(metadata, jsonf, indent=2) real_data = {key: value['value'] for key, value in fs_audit.items()} data_df = pd.DataFrame([real_data]) + # Convert column names to snake case + data_df.columns = [col.lower().replace('-', '_').replace('.', '_') for col in data_df.columns] + # Rename subject_id to participant_id + data_df = data_df.rename(columns={'subject_id': 'participant_id'}) + # Reorder columns to have participant_id first + cols = data_df.columns.tolist() + cols.remove('participant_id') + data_df = data_df[['participant_id'] + cols] + # Replace missing values with "n/a" + data_df = data_df.fillna('n/a') data_df.to_csv(out_tsv, sep='\t', index=False) return runtime From 0ab2ddfcd27fd90c4a9a3e5cddd6dc30c607c0a4 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 11 Jun 2025 12:39:25 -0400 Subject: [PATCH 2/2] continue --- freesurfer_post/interfaces/tabular.py | 44 +++++++++++++++++++++++---- freesurfer_post/utils.py | 2 +- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/freesurfer_post/interfaces/tabular.py b/freesurfer_post/interfaces/tabular.py index 55953d9..a3e279c 100644 --- a/freesurfer_post/interfaces/tabular.py +++ b/freesurfer_post/interfaces/tabular.py @@ -138,6 +138,7 @@ def sanity_check_columns(reference_column, redundant_column, atol=0): output_dir.mkdir(parents=True, exist_ok=True) output_prefix = f'{subject_id}_{session_id}' if session_id else subject_id cleaned_atlas_name = atlas.replace('.', '').replace('_order', '').replace('_', '') + # Convert column names to snake case out_df.columns = [col.lower().replace('-', '_').replace('.', '_') for col in out_df.columns] # Rename subject_id to participant_id @@ -148,6 +149,27 @@ def sanity_check_columns(reference_column, redundant_column, atol=0): out_df = out_df[['participant_id'] + cols] # Replace missing values with "n/a" out_df = out_df.fillna('n/a') + + # Create metadata with the same column names + metadata = {} + for col in out_df.columns: + if col == 'participant_id': + metadata[col] = {'Description': 'BIDS participant ID'} + elif col == 'session_id': + metadata[col] = {'Description': 'BIDS session ID'} + elif col == 'hemisphere': + metadata[col] = {'Description': 'Brain hemisphere (lh or rh)'} + elif col == 'atlas': + metadata[col] = {'Description': 'Atlas used for parcellation'} + else: + metadata[col] = {'Description': f'Surface statistic: {col}'} + + # Save metadata + out_json = output_dir / f'{output_prefix}_seg-{cleaned_atlas_name}_surfacestats.json' + with out_json.open('w') as jsonf: + json.dump(metadata, jsonf, indent=2) + + # Save data out_df.to_csv( output_dir / f'{output_prefix}_seg-{cleaned_atlas_name}_surfacestats.tsv', sep='\t', @@ -369,12 +391,9 @@ def _run_interface(self, runtime): out_tsv = output_dir / f'{output_prefix}_brainmeasures.tsv' out_json = output_dir / f'{output_prefix}_brainmeasures.json' - metadata = {key: {'Description': value['meta']} for key, value in fs_audit.items()} - with out_json.open('w') as jsonf: - json.dump(metadata, jsonf, indent=2) - - real_data = {key: value['value'] for key, value in fs_audit.items()} - data_df = pd.DataFrame([real_data]) + # Extract just the values from the audit data + data_value = {key: value['value'] for key, value in fs_audit.items()} + data_df = pd.DataFrame([data_value]) # Convert column names to snake case data_df.columns = [col.lower().replace('-', '_').replace('.', '_') for col in data_df.columns] # Rename subject_id to participant_id @@ -385,5 +404,18 @@ def _run_interface(self, runtime): data_df = data_df[['participant_id'] + cols] # Replace missing values with "n/a" data_df = data_df.fillna('n/a') + + # Create metadata with the same column names as the TSV + metadata = {} + for key, value in fs_audit.items(): + # Convert the key to match TSV column name + new_key = key.lower().replace('-', '_').replace('.', '_') + if key == 'subject_id': + new_key = 'participant_id' + metadata[new_key] = {'Description': value['meta']} + + with out_json.open('w') as jsonf: + json.dump(metadata, jsonf, indent=2) + data_df.to_csv(out_tsv, sep='\t', index=False) return runtime diff --git a/freesurfer_post/utils.py b/freesurfer_post/utils.py index 2e53a64..a81f736 100644 --- a/freesurfer_post/utils.py +++ b/freesurfer_post/utils.py @@ -35,7 +35,7 @@ def find_freesurfer_dir(subjects_dir: str | Path, subject_id: str, session_id: s if warn_no_session: warnings.warn( f'{subjects_dir}/{subject_id}_{session_id} not found in {subjects_dir}' - f'using {subjects_dir}/{subject_id} instead', + f' using {subjects_dir}/{subject_id} instead', stacklevel=2, ) return subjects_dir / subject_id