Skip to content

Commit 9c25543

Browse files
committed
migration - raw data error fix + id col error
1 parent ae61d10 commit 9c25543

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

chebai/preprocessing/migration/chebi_data_migration.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,19 @@ def _dynamic_import_chebi_cls(
3333

3434
def migrate(self):
3535
os.makedirs(self._chebi_cls.base_dir, exist_ok=True)
36-
print("Migration started..................")
36+
print("Migration started.....")
3737
self._migrate_old_raw_data()
3838

3939
# Either we can combine `.pt` split files to form `data.pt` file
4040
# self._migrate_old_processed_data()
4141
# OR
4242
# we can transform `data.pkl` to `data.pt` file (this seems efficient along with less code)
4343
self._chebi_cls.setup_processed()
44-
print("Migration completed..................")
44+
print("Migration completed.....")
4545

4646
def _migrate_old_raw_data(self):
4747
print("-" * 50)
48-
print("Migrating old raw Data.....................")
48+
print("Migrating old raw Data....")
4949

5050
self._copy_file(self._old_raw_dir, self._chebi_cls.raw_dir, "chebi.obo")
5151
self._copy_file(
@@ -66,16 +66,17 @@ def _migrate_old_raw_data(self):
6666
self._old_raw_dir, old_splits_file_names
6767
)
6868

69-
data_df.to_pickle(data_file_path)
69+
# data_df.to_pickle(data_file_path)
70+
self._chebi_cls.save_processed(data_df, "data.pkl")
7071
print(f"File {data_file_path} saved to new data-folder structure")
7172

7273
split_file = os.path.join(self._chebi_cls.processed_dir_main, "splits.csv")
73-
split_ass_df.to_csv(split_file)
74+
split_ass_df.to_csv(split_file) # overwrites the files with same name
7475
print(f"File {split_file} saved to new data-folder structure")
7576

7677
def _migrate_old_processed_data(self):
7778
print("-" * 50)
78-
print("Migrating old processed data.....................")
79+
print("Migrating old processed data.....")
7980

8081
data_file_path = os.path.join(self._chebi_cls.processed_dir, "data.pt")
8182
if os.path.isfile(data_file_path):
@@ -120,12 +121,11 @@ def _combine_pkl_splits(
120121
print("Combining `.pkl` splits...")
121122
for split, file_name in old_splits_file_names.items():
122123
file_path = os.path.join(old_dir, file_name)
123-
file_df = pd.DataFrame(self._chebi_cls._load_data_from_file(path=file_path))
124-
file_df["split"] = split # Assign the split label to the DataFrame
124+
file_df = pd.read_pickle(file_path)
125125
df_list.append(file_df)
126126

127127
# Create split assignment for the current DataFrame
128-
split_assignment = pd.DataFrame({"id": file_df["ident"], "split": split})
128+
split_assignment = pd.DataFrame({"id": file_df["id"], "split": split})
129129
split_assignment_list.append(split_assignment)
130130

131131
# Concatenate all dataframes and split assignments

0 commit comments

Comments
 (0)