@@ -33,19 +33,19 @@ def _dynamic_import_chebi_cls(
3333
3434 def migrate (self ):
3535 os .makedirs (self ._chebi_cls .base_dir , exist_ok = True )
36- print ("Migration started.................. " )
36+ print ("Migration started....." )
3737 self ._migrate_old_raw_data ()
3838
3939 # Either we can combine `.pt` split files to form `data.pt` file
4040 # self._migrate_old_processed_data()
4141 # OR
4242 # we can transform `data.pkl` to `data.pt` file (this seems efficient along with less code)
4343 self ._chebi_cls .setup_processed ()
44- print ("Migration completed.................. " )
44+ print ("Migration completed....." )
4545
4646 def _migrate_old_raw_data (self ):
4747 print ("-" * 50 )
48- print ("Migrating old raw Data..................... " )
48+ print ("Migrating old raw Data...." )
4949
5050 self ._copy_file (self ._old_raw_dir , self ._chebi_cls .raw_dir , "chebi.obo" )
5151 self ._copy_file (
@@ -66,16 +66,17 @@ def _migrate_old_raw_data(self):
6666 self ._old_raw_dir , old_splits_file_names
6767 )
6868
69- data_df .to_pickle (data_file_path )
69+ # data_df.to_pickle(data_file_path)
70+ self ._chebi_cls .save_processed (data_df , "data.pkl" )
7071 print (f"File { data_file_path } saved to new data-folder structure" )
7172
7273 split_file = os .path .join (self ._chebi_cls .processed_dir_main , "splits.csv" )
73- split_ass_df .to_csv (split_file )
74+ split_ass_df .to_csv (split_file ) # overwrites the files with same name
7475 print (f"File { split_file } saved to new data-folder structure" )
7576
7677 def _migrate_old_processed_data (self ):
7778 print ("-" * 50 )
78- print ("Migrating old processed data..................... " )
79+ print ("Migrating old processed data....." )
7980
8081 data_file_path = os .path .join (self ._chebi_cls .processed_dir , "data.pt" )
8182 if os .path .isfile (data_file_path ):
@@ -120,12 +121,11 @@ def _combine_pkl_splits(
120121 print ("Combining `.pkl` splits..." )
121122 for split , file_name in old_splits_file_names .items ():
122123 file_path = os .path .join (old_dir , file_name )
123- file_df = pd .DataFrame (self ._chebi_cls ._load_data_from_file (path = file_path ))
124- file_df ["split" ] = split # Assign the split label to the DataFrame
124+ file_df = pd .read_pickle (file_path )
125125 df_list .append (file_df )
126126
127127 # Create split assignment for the current DataFrame
128- split_assignment = pd .DataFrame ({"id" : file_df ["ident " ], "split" : split })
128+ split_assignment = pd .DataFrame ({"id" : file_df ["id " ], "split" : split })
129129 split_assignment_list .append (split_assignment )
130130
131131 # Concatenate all dataframes and split assignments
0 commit comments