Skip to content

Commit d8e2efb

Browse files
committed
Revert "new data reader for protein pretraining data"
This reverts commit 2c446dc.
1 parent ad4fc95 commit d8e2efb

File tree

2 files changed

+2
-19
lines changed

2 files changed

+2
-19
lines changed

chebai/preprocessing/datasets/protein_pretraining.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
EXPERIMENTAL_EVIDENCE_CODES,
1818
GOUniProtOver250,
1919
)
20-
from chebai.preprocessing.reader import ProteinPretrainReader
20+
from chebai.preprocessing.reader import ProteinDataReader
2121

2222

2323
class _ProteinPretrainingData(_DynamicDataset, ABC):
@@ -248,7 +248,7 @@ class SwissProteinPretrain(_ProteinPretrainingData):
248248
READER (Type): The data reader class used to load and process protein pretraining data.
249249
"""
250250

251-
READER = ProteinPretrainReader
251+
READER = ProteinDataReader
252252

253253
@property
254254
def _name(self) -> str:

chebai/preprocessing/reader.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -469,20 +469,3 @@ def on_finish(self) -> None:
469469
print(f"Saving {len(self.cache)} tokens to {self.token_path}...")
470470
print(f"First 10 tokens: {self.cache[:10]}")
471471
pk.writelines([f"{c}\n" for c in self.cache])
472-
473-
474-
class ProteinPretrainReader(ProteinDataReader):
475-
def _read_components(self, row: Dict[str, Any]) -> Dict[str, Any]:
476-
"""Read and return components from the row."""
477-
return dict(
478-
features=self._get_raw_data(row),
479-
ident=self._get_raw_id(row),
480-
)
481-
482-
def to_data(self, row: Dict[str, Any]) -> Dict[str, Any]:
483-
"""Convert raw row data to processed data."""
484-
d = self._read_components(row)
485-
return dict(
486-
features=self._read_data(d["features"]),
487-
ident=self._read_id(d["ident"]),
488-
)

0 commit comments

Comments
 (0)