diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
deleted file mode 100644
index b04fb15..0000000
--- a/.github/workflows/black.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: Lint
-
-on: [push, pull_request]
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: psf/black@stable
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..bb9154f
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,26 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'  # or any version your project uses
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black==25.1.0 ruff==0.12.2
+
+      - name: Run Black
+        run: black --check .
+
+      - name: Run Ruff (no formatting)
+        run: ruff check . --no-fix
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0ad2115..d01839c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/.gitignore b/.gitignore
index 05cdfb7..587b4f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,3 +170,4 @@ electra_pretrained.ckpt
 .jupyter
 .virtual_documents
 .isort.cfg
+.vscode
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 108b91d..cbb7284 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: "24.2.0"
+    rev: "25.1.0"
     hooks:
     -   id: black
     -   id: black-jupyter # for formatting jupyter-notebook
@@ -23,3 +23,9 @@ repos:
     -   id: check-yaml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
+
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.2
+    hooks:
+    -   id: ruff
+        args: [--fix]
diff --git a/README.md b/README.md
index f0033da..cd8069a 100644
--- a/README.md
+++ b/README.md
@@ -7,19 +7,22 @@
 ## 🔧 Installation
 
 
-To install, follow these steps:
+To install this repository, download [`python-chebai`](https://github.com/ChEB-AI/python-chebai) and this repository, then run
 
-1. Clone the repository:
 ```
-git clone https://github.com/ChEB-AI/python-chebai-proteins.git
+cd python-chebai
+pip install .
+
+cd python-chebai-proteins
+pip install .
 ```
 
-2. Install the package:
+_Note for developers_: If you want to install the package in editable mode, use the following command instead:
 
+```bash
+pip install -e .
 ```
-cd python-chebai
-pip install .
-```
+
 
 ## 🗂 Recommended Folder Structure
 
@@ -43,39 +46,6 @@ This setup enables shared access to data and model configurations.
 
 ## 🚀 Training & Pretraining Guide
 
-### ⚠️ Important Setup Instructions
-
-Before running any training scripts, ensure the environment is correctly configured:
-
-* Either:
-
-  * Install the `python-chebai` repository as a package using:
-
-    ```bash
-    pip install .
-    ```
-* **OR**
-
-  * Manually set the `PYTHONPATH` environment variable if working across multiple directories (`python-chebai` and `python-chebai-proteins`):
-
-    * If your current working directory is `python-chebai-proteins`, set:
-
-      ```bash
-      export PYTHONPATH=path/to/python-chebai
-      ```
-      or vice versa.
-      
-    * If you're working within both repositories simultaneously or facing module not found errors,  we **recommend configuring both directories**:
-
-      ```bash
-      # Linux/macOS
-      export PYTHONPATH=path/to/python-chebai:path/to/python-chebai-proteins
-
-      # Windows (use semicolon instead of colon)
-      set PYTHONPATH=path\to\python-chebai;path\to\python-chebai-proteins
-      ```
-
-> 🔎 See the [PYTHONPATH Explained](#-pythonpath-explained) section below for more details.
 
 
 ### 📊 SCOPE hierarchy prediction
@@ -86,61 +56,3 @@ python -m chebai fit --trainer=../configs/training/default_trainer.yml --trainer
 ```
 
 Same command can be used for **DeepGO** just by changing the config path for data.
-
-
-
-
-
-
-
-## 🧭 PYTHONPATH Explained
-
-### What is `PYTHONPATH`?
-
-`PYTHONPATH` is an environment variable that tells Python where to search for modules that aren't installed via `pip` or not in your current working directory.
-
-### Why You Need It
-
-If your config refers to a custom module like:
-
-```yaml
-class_path: chebai_proteins.preprocessing.datasets.scope.scope.SCOPe50
-```
-
-...and you're running the code from `python-chebai`, Python won't know where to find `chebai_proteins` (from another repo like `python-chebai-proteins/`) unless you add it to `PYTHONPATH`.
-
-
-### How Python Finds Modules
-
-Python looks for imports in this order:
-
-1. Current directory
-2. Standard library
-3. Paths in `PYTHONPATH`
-4. Installed packages (`site-packages`)
-
-You can inspect the full search paths:
-
-```bash
-python -c "import sys; print(sys.path)"
-```
-
-
-
-### ✅ Setting `PYTHONPATH`
-
-#### 🐧 Linux / macOS
-
-```bash
-export PYTHONPATH=/path/to/python-chebai-graph
-echo $PYTHONPATH
-```
-
-#### 🪟 Windows CMD
-
-```cmd
-set PYTHONPATH=C:\path\to\python-chebai-graph
-echo %PYTHONPATH%
-```
-
-> 💡 Note: This is temporary for your terminal session. To make it permanent, add it to your system environment variables.
diff --git a/chebai_proteins/loss/bce_logits_loss.py b/chebai_proteins/loss/bce_logits_loss.py
new file mode 100644
index 0000000..dd629a4
--- /dev/null
+++ b/chebai_proteins/loss/bce_logits_loss.py
@@ -0,0 +1,7 @@
+import torch
+
+
+class WrappedBCEWithLogitsLoss(torch.nn.BCEWithLogitsLoss):
+    def forward(self, input, target, **kwargs):
+        # As the custom passed kwargs are not used in BCEWithLogitsLoss, we can ignore them
+        return super().forward(input, target)
diff --git a/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py b/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py
index dbdf93e..e9a7ac0 100644
--- a/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py
+++ b/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py
@@ -102,31 +102,43 @@ class _GOUniProtDataExtractor(_DynamicDataset, ABC):
 
     # Gene Ontology (GO) has three major branches, one for biological processes (BP), molecular functions (MF) and
     # cellular components (CC). The value "all" will take data related to all three branches into account.
+    # TODO: should we be really allowing all branches for single dataset?
     _ALL_GO_BRANCHES: str = "all"
     _GO_BRANCH_NAMESPACE: Dict[str, str] = {
-        "BP": "biological_process",
-        "MF": "molecular_function",
-        "CC": "cellular_component",
+        "BP": "biological_process",  # Huge branch, with 20,000+ GO terms
+        "MF": "molecular_function",  # smaller branch, with 6000+ GO terms
+        "CC": "cellular_component",  # smallest branch, with 2,000+ GO terms
     }
 
-    def __init__(self, **kwargs):
-        self.go_branch: str = self._get_go_branch(**kwargs)
+    READER = None
+
+    def __init__(
+        self,
+        go_branch: str,
+        max_sequence_len: int = 1002,
+        use_esm2_embeddings: bool = False,
+        **kwargs,
+    ):
+        if bool(use_esm2_embeddings):
+            self.READER = dr.ESM2EmbeddingReader
 
-        self.max_sequence_length: int = int(kwargs.get("max_sequence_length", 1002))
+        self.go_branch: str = self._get_go_branch(go_branch)
+
+        self.max_sequence_length: int = int(max_sequence_len)
         assert (
             self.max_sequence_length >= 1
         ), "Max sequence length should be greater than or equal to 1."
 
         super(_GOUniProtDataExtractor, self).__init__(**kwargs)
 
-        if self.reader.n_gram is not None:
+        if hasattr(self.reader, "n_gram") and self.reader.n_gram is not None:
             assert self.max_sequence_length >= self.reader.n_gram, (
                 f"max_sequence_length ({self.max_sequence_length}) must be greater than "
                 f"or equal to n_gram ({self.reader.n_gram})."
             )
 
     @classmethod
-    def _get_go_branch(cls, **kwargs) -> str:
+    def _get_go_branch(cls, go_branch_value: str, **kwargs) -> str:
         """
         Retrieves the Gene Ontology (GO) branch based on provided keyword arguments.
         This method checks if a valid GO branch value is provided in the keyword arguments.
@@ -141,7 +153,6 @@ def _get_go_branch(cls, **kwargs) -> str:
             ValueError: If the provided 'go_branch' value is not in the allowed list of values.
         """
 
-        go_branch_value = kwargs.get("go_branch", cls._ALL_GO_BRANCHES)
         allowed_values = list(cls._GO_BRANCH_NAMESPACE.keys()) + [cls._ALL_GO_BRANCHES]
         if go_branch_value not in allowed_values:
             raise ValueError(
@@ -181,7 +192,7 @@ def _download_gene_ontology_data(self) -> str:
 
         if not os.path.isfile(go_path):
             print("Missing Gene Ontology raw data")
-            print(f"Downloading Gene Ontology data....")
+            print("Downloading Gene Ontology data....")
             r = requests.get(self._GO_DATA_URL, allow_redirects=True)
             r.raise_for_status()  # Check if the request was successful
             open(go_path, "wb").write(r.content)
@@ -207,7 +218,7 @@ def _download_swiss_uni_prot_data(self) -> Optional[str]:
         os.makedirs(os.path.dirname(uni_prot_file_path), exist_ok=True)
 
         if not os.path.isfile(uni_prot_file_path):
-            print(f"Downloading Swiss UniProt data....")
+            print("Downloading Swiss UniProt data....")
 
             # Create a temporary file
             with NamedTemporaryFile(delete=False) as tf:
@@ -223,7 +234,7 @@ def _download_swiss_uni_prot_data(self) -> Optional[str]:
 
             # Unpack the gzipped file
             try:
-                print(f"Unzipping the file....")
+                print("Unzipping the file....")
                 with gzip.open(temp_filename, "rb") as f_in:
                     output_file_path = uni_prot_file_path
                     with open(output_file_path, "wb") as f_out:
@@ -375,7 +386,7 @@ def _graph_to_raw_dataset(self, g: nx.DiGraph) -> pd.DataFrame:
         Returns:
             pd.DataFrame: The raw dataset created from the graph.
         """
-        print(f"Processing graph")
+        print("Processing graph")
 
         data_df = self._get_swiss_to_go_mapping()
         # add ancestors to go ids
@@ -457,6 +468,14 @@ def _get_swiss_to_go_mapping(self) -> pd.DataFrame:
 
             if not record.sequence or len(record.sequence) > self.max_sequence_length:
                 # Consider protein with only sequence representation and seq. length not greater than max seq. length
+
+                # DeepGO1 paper ignores proteins with sequence length greater than 1002: https://github.com/bio-ontology-research-group/deepgo/blob/master/aaindex.py#L9-L14
+                # But DeepGO2 paper truncates the sequence to 1000: https://github.com/bio-ontology-research-group/deepgo2/blob/main/deepgo/aminoacids.py#L26-L33
+                # Latest Discussion: https://github.com/ChEB-AI/python-chebai/issues/36#issuecomment-2385693976
+                # So, we ignore proteins with sequence length greater than max_sequence_length
+                # The rationale is that with only a partial representation of the protein sequence, the model may not learn effectively.
+                # Also, proteins longer than 1002 are only 3.32% of the total proteins in Swiss-Prot dataset.
+                # https://github.com/ChEB-AI/python-chebai/issues/36#issuecomment-2431460448
                 continue
 
             if any(aa in AMBIGUOUS_AMINO_ACIDS for aa in record.sequence):
@@ -559,8 +578,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_go_data = pd.DataFrame(data_go)
@@ -586,7 +605,7 @@ def base_dir(self) -> str:
         Returns:
             str: The path to the base directory, which is "data/GO_UniProt".
         """
-        return os.path.join("data", f"GO_UniProt")
+        return os.path.join("data", "GO_UniProt")
 
     @property
     def raw_file_names_dict(self) -> dict:
diff --git a/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py b/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py
index 8c39d86..d1f615a 100644
--- a/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py
+++ b/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py
@@ -38,7 +38,7 @@ def __init__(self, **kwargs):
         Args:
             **kwargs: Additional arguments for the superclass initialization.
         """
-        self._go_uniprot_extractor = GOUniProtOver250()
+        self._go_uniprot_extractor = GOUniProtOver250(go_branch="all")
         assert self._go_uniprot_extractor.go_branch == GOUniProtOver250._ALL_GO_BRANCHES
 
         self.max_sequence_length: int = int(kwargs.get("max_sequence_length", 1002))
@@ -143,7 +143,6 @@ def _parse_protein_data_for_pretraining(self) -> pd.DataFrame:
             has_valid_associated_go_label = False
             for cross_ref in record.cross_references:
                 if cross_ref[0] == self._go_uniprot_extractor._GO_DATA_INIT:
-
                     if len(cross_ref) <= 3:
                         # No evidence code
                         continue
@@ -223,8 +222,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_go_data = pd.DataFrame(data_go)
diff --git a/chebai_proteins/preprocessing/datasets/scope/scope.py b/chebai_proteins/preprocessing/datasets/scope/scope.py
index bf3540e..842ce92 100644
--- a/chebai_proteins/preprocessing/datasets/scope/scope.py
+++ b/chebai_proteins/preprocessing/datasets/scope/scope.py
@@ -67,17 +67,22 @@ class _SCOPeDataExtractor(_DynamicDataset, ABC):
         "sp": "species",
         "px": "domain",
     }
+    READER = None
 
     def __init__(
         self,
         scope_version: str,
         scope_version_train: Optional[str] = None,
-        max_sequence_len: int = 1000,
+        max_sequence_len: int = 1002,
+        use_esm2_embeddings: bool = False,
         **kwargs,
     ):
+        if bool(use_esm2_embeddings):
+            self.READER = ESM2EmbeddingReader
+
         self.scope_version: str = scope_version
         self.scope_version_train: str = scope_version_train
-        self.max_sequence_len: int = max_sequence_len
+        self.max_sequence_len: int = int(max_sequence_len)
 
         super(_SCOPeDataExtractor, self).__init__(**kwargs)
 
@@ -130,7 +135,7 @@ def _download_pdb_sequence_data(self) -> None:
         os.makedirs(os.path.dirname(pdb_seq_file_path), exist_ok=True)
 
         if not os.path.isfile(pdb_seq_file_path):
-            print(f"Missing PDB raw data, Downloading PDB sequence data....")
+            print("Missing PDB raw data, Downloading PDB sequence data....")
 
             # Create a temporary file
             with NamedTemporaryFile(delete=False) as tf:
@@ -146,7 +151,7 @@ def _download_pdb_sequence_data(self) -> None:
 
             # Unpack the gzipped file
             try:
-                print(f"Unzipping the file....")
+                print("Unzipping the file....")
                 with gzip.open(temp_filename, "rb") as f_in:
                     output_file_path = pdb_seq_file_path
                     with open(output_file_path, "wb") as f_out:
@@ -224,7 +229,6 @@ def add_sequence_nodes_edges(chain_sequence, px_sun_id):
         # Step 1: Build the graph structure and store node attributes
         for row in df_scope.itertuples(index=False):
             if row.level == "px":
-
                 pdb_id, chain_id = row.sid[1:5], row.sid[5]
 
                 if pdb_id not in pdb_id_set or chain_id == "_":
@@ -422,7 +426,7 @@ def _graph_to_raw_dataset(self, graph: nx.DiGraph) -> pd.DataFrame:
         Raises:
             RuntimeError: If no sunids are selected.
         """
-        print(f"Process graph")
+        print("Process graph")
 
         selected_sun_ids_per_lvl = self.select_classes(graph)
 
@@ -546,7 +550,6 @@ def _parse_pdb_sequence_file(self) -> pd.DataFrame:
         for record in SeqIO.parse(
             os.path.join(self.scope_root_dir, self.raw_file_names_dict["PDB"]), "fasta"
         ):
-
             if not record.seq or len(record.seq) > self.max_sequence_len:
                 continue
 
@@ -665,8 +668,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_scope_version = pd.DataFrame(data_scope_version)
@@ -934,7 +937,6 @@ class SCOPeOver2000(_SCOPeOverX):
 
 
 class SCOPeOver50(_SCOPeOverX):
-
     THRESHOLD = 50
 
 
@@ -951,10 +953,6 @@ class SCOPeOverPartial2000(_SCOPeOverXPartial):
     THRESHOLD: int = 2000
 
 
-class SCOPeOver50ESM(SCOPeOver50):
-    READER = ESM2EmbeddingReader
-
-
 if __name__ == "__main__":
     scope = SCOPeOver50(scope_version="2.08")
 
diff --git a/chebai_proteins/preprocessing/reader.py b/chebai_proteins/preprocessing/reader.py
index 21bdaea..640b9c5 100644
--- a/chebai_proteins/preprocessing/reader.py
+++ b/chebai_proteins/preprocessing/reader.py
@@ -181,7 +181,7 @@ def __init__(
         self.truncation_length = truncation_length
         self.toks_per_batch = toks_per_batch
         self.return_contacts = return_contacts
-        self.repr_layer = repr_layer
+        self.repr_layer = int(repr_layer)
 
         self._model: Optional[ESM2] = None
         self._alphabet: Optional[Alphabet] = None
@@ -355,6 +355,7 @@ def _alphabet_tokens_to_esm_embedding(self, tokens: torch.Tensor) -> torch.Tenso
 
         References:
             https://github.com/bio-ontology-research-group/deepgo2/blob/main/deepgo/extract_esm.py#L82-L107
+            https://github.com/facebookresearch/esm?tab=readme-ov-file#usage-
 
         Returns:
             torch.Tensor: Protein embedding from the specified representation layer.
@@ -393,3 +394,16 @@ def on_finish(self) -> None:
             None
         """
         pass
+
+
+if __name__ == "__main__":
+    reader = ProteinDataReader()
+    sample_sequence = "MKTFFVAGVILLLLPLVSSQCVNLTTRTQSRGDPTQKARPEPT"
+    token_indices = reader._read_data(sample_sequence)
+    print(f"Token indices for the sequence: {token_indices}")
+
+    esm_reader = ESM2EmbeddingReader(
+        model_name="esm2_t6_8M_UR50D", repr_layer="6", device=torch.device("cpu")
+    )
+    embeddings = esm_reader._read_data(sample_sequence)
+    print(f"ESM2 embeddings shape: {len(embeddings)}")
diff --git a/configs/loss/BCELoss.yml b/configs/loss/BCELoss.yml
deleted file mode 100644
index 6ee636d..0000000
--- a/configs/loss/BCELoss.yml
+++ /dev/null
@@ -1 +0,0 @@
-class_path: torch.nn.BCELoss
diff --git a/configs/loss/BCEWithLogitsLoss.yml b/configs/loss/BCEWithLogitsLoss.yml
new file mode 100644
index 0000000..606cbcb
--- /dev/null
+++ b/configs/loss/BCEWithLogitsLoss.yml
@@ -0,0 +1 @@
+class_path: chebai_proteins.loss.bce_logits_loss.WrappedBCEWithLogitsLoss
diff --git a/configs/model/electra.yml b/configs/model/electra.yml
new file mode 100644
index 0000000..da1dbf4
--- /dev/null
+++ b/configs/model/electra.yml
@@ -0,0 +1,14 @@
+class_path: chebai.models.Electra
+init_args:
+  optimizer_kwargs:
+    lr: 1e-3
+  config:
+    vocab_size: 31 # 21 unique + embedding offset (10)
+    # For classification:[Maximum sequence length (1002) (padding will be also upto 1002)] + 1 for CLS token
+    # For pretraining: [Maximum sequence length (1002) (padding will be also upto 1002)] + 10 embedding offset (includes all special tokens)
+    # Hence, use max of (classification, pretraining): max_position_embeddings = 1002 + 10  = 1012
+    max_position_embeddings: 1012
+    num_attention_heads: 8
+    num_hidden_layers: 6
+    type_vocab_size: 1
+    hidden_size: 256
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8075481
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,33 @@
+[build-system]
+requires = ["setuptools >= 77.0.3", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "chebai-proteins"
+version = "0.0.2"
+description = "Repository for protein prediction and classification, built on top of the python-chebai codebase"
+authors = []
+readme = "README.md"
+license = { text = "AGPL-3.0" }
+requires-python = ">=3.10, <3.13"
+dependencies = [
+    "chebai @ git+https://github.com/ChEB-AI/python-chebai.git",
+    "biopython",
+    "fair-esm",
+]
+
+[project.optional-dependencies]
+dev = ["black", "isort", "pre-commit"]
+plot = ["matplotlib", "seaborn"]
+wandb = ["wandb"]
+
+[tool.setuptools]
+include-package-data = true
+license-files = ["LICEN[CS]E*"]
+
+[tool.setuptools.packages.find]
+where = ["."]
+exclude = ["tests*"]
+
+[tool.setuptools.package-data]
+"*" = ["**/*.txt", "**/*.json"]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 27284a0..0000000
--- a/setup.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from setuptools import find_packages, setup
-
-packages = find_packages()
-print(packages)
-setup(
-    name="chebai-proteins",
-    version="0.0.2.dev0",
-    packages=packages,
-    package_data={"": ["**/*.txt", "**/*.json"]},
-    include_package_data=True,
-    url="",
-    license="",
-    author="MGlauer",
-    author_email="martin.glauer@ovgu.de",
-    description="",
-    zip_safe=False,
-    python_requires=">=3.9, <3.13",
-    install_requires=[
-        "chebai @ git+https://github.com/ChEB-AI/python-chebai.git",
-        "biopython",
-        "fair-esm",
-    ],
-    extras_require={"dev": ["black", "isort", "pre-commit"]},
-)
diff --git a/tests/unit/dataset_classes/testGOUniProDataExtractor.py b/tests/unit/dataset_classes/testGOUniProDataExtractor.py
index 8cee8f8..d23f8e7 100644
--- a/tests/unit/dataset_classes/testGOUniProDataExtractor.py
+++ b/tests/unit/dataset_classes/testGOUniProDataExtractor.py
@@ -37,7 +37,7 @@ def setUpClass(
 
         _GOUniProtDataExtractor.READER = ProteinDataReader
 
-        cls.extractor = _GOUniProtDataExtractor()
+        cls.extractor = _GOUniProtDataExtractor(go_branch="all")
 
     def test_term_callback(self) -> None:
         """
diff --git a/tests/unit/dataset_classes/testGoUniProtOverX.py b/tests/unit/dataset_classes/testGoUniProtOverX.py
index ccd2d66..2ec70da 100644
--- a/tests/unit/dataset_classes/testGoUniProtOverX.py
+++ b/tests/unit/dataset_classes/testGoUniProtOverX.py
@@ -17,7 +17,7 @@ def setUpClass(cls, mock_makedirs) -> None:
         """
         Set up the class for tests by initializing the extractor, graph, and input DataFrame.
         """
-        cls.extractor = _GOUniProtOverX()
+        cls.extractor = _GOUniProtOverX(go_branch="all")
         cls.test_graph: nx.DiGraph = GOUniProtMockData.get_transitively_closed_graph()
         cls.input_df: pd.DataFrame = GOUniProtMockData.get_data_in_dataframe().iloc[
             :, :4
diff --git a/tutorials/data_exploration_scope.ipynb b/tutorials/data_exploration_scope.ipynb
index c7d17b6..a083ad5 100644
--- a/tutorials/data_exploration_scope.ipynb
+++ b/tutorials/data_exploration_scope.ipynb
@@ -1049,13 +1049,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "6dc3fd6c-7cf6-47ef-812f-54319a0cdeb9",
    "metadata": {},
    "outputs": [],
    "source": [
     "# You can specify a literal path for the `splits_file_path`, or if another `scope_class` instance is already defined,\n",
     "# you can use its existing `splits_file_path` attribute for consistency.\n",
+    "from chebai_proteins.preprocessing.datasets.scope.scope import SCOPeOver2000\n",
+    "\n",
     "scope_class_with_splits = SCOPeOver2000(\n",
     "    scope_version=\"2.08\",\n",
     "    # splits_file_path=\"data/chebi_v231/ChEBI50/processed/splits.csv\",  # Literal path option\n",