From 0ec6642bfe4f273dc78874de048d2151e5b22f9e Mon Sep 17 00:00:00 2001
From: zshang <makisekurisu@sjtu.edu.cn>
Date: Fri, 6 Mar 2026 14:59:17 +0800
Subject: [PATCH 1/3] Fixes bug in PR #123, and update docstrings

---
 optimhc/rescore/mokapot.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/optimhc/rescore/mokapot.py b/optimhc/rescore/mokapot.py
index 53cfc06..2909f97 100644
--- a/optimhc/rescore/mokapot.py
+++ b/optimhc/rescore/mokapot.py
@@ -30,11 +30,13 @@ def rescore(
     psms : PsmContainer
         A PsmContainer object containing PSM data.
     model : object, optional
-        A trained model for rescoring PSMs.
+        An untrained mokapot-compatible model (e.g. PercolatorModel, XGBoostPercolatorModel).
+        mokapot.brew trains it internally across cross-validation folds. If None, mokapot
+        uses its default PercolatorModel.
     rescoring_features : List[str], optional
         A list of feature names to use for rescoring.
     test_fdr : float, optional
-        The FDR threshold for testing the model. Default is 0.01.
+        The FDR threshold used to evaluate and report results after training. Default is 0.01.
     **kwargs : dict
         Additional keyword arguments for mokapot.brew.
 
@@ -47,19 +49,18 @@ def rescore(
           (i.e. PSMs, peptides) when assessed using the learned score. If a list, they will be
           in the same order as provided in the psms parameter.
         - list of Model objects:
-          The learned Model objects, one for each fold.
+          The trained Model objects, one for each cross-validation fold.
 
     Notes
     -----
     This function:
-    1. Converts the PsmContainer to a mokapot dataset
-    2. Runs mokapot.brew with the specified parameters
-    3. Returns the results and models
+    1. Converts the PsmContainer to a mokapot LinearPsmDataset
+    2. Passes the dataset and untrained model to mokapot.brew, which trains across folds
+    3. Returns the per-fold confidence results and trained models
     """
     psms = convert_to_mokapot_dataset(psms, rescoring_features=rescoring_features)
     logger.info("Rescoring PSMs with mokapot.")
-    model_arg = [model] if model is not None else None
-    results, models = mokapot.brew(psms, model=model_arg, test_fdr=test_fdr, **kwargs)
+    results, models = mokapot.brew(psms, model=model, test_fdr=test_fdr, **kwargs)
     return results, models
 
 

From 50a8aca2d5ff8015cbbda23a64c43454337a4031 Mon Sep 17 00:00:00 2001
From: zshang <makisekurisu@sjtu.edu.cn>
Date: Sat, 7 Mar 2026 15:57:02 +0800
Subject: [PATCH 2/3] fix minor bugs

---
 optimhc/cli.py                             |  7 +------
 optimhc/core/config.py                     |  5 +++--
 optimhc/core/pipeline.py                   | 12 ++++--------
 optimhc/feature_generator/PWM.py           |  4 ++--
 optimhc/feature_generator/mhcflurry.py     | 10 +---------
 optimhc/utils.py                           |  5 +----
 optimhc/visualization/plot_features.py     |  2 +-
 optimhc/visualization/save_or_show_plot.py |  1 +
 8 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/optimhc/cli.py b/optimhc/cli.py
index f39222f..2f7c589 100644
--- a/optimhc/cli.py
+++ b/optimhc/cli.py
@@ -26,11 +26,6 @@ def cli():
     pass
 
 
-def parse_cli_config(**kwargs):
-    # Remove None values and build a config dict
-    return {k: v for k, v in kwargs.items() if v is not None and v != ()}
-
-
 @cli.command()
 @click.option(
     "--config",
@@ -129,7 +124,7 @@ def pipeline(
     if visualization is not None:
         pipeline_config["visualization"] = visualization
     if numprocesses:
-        pipeline_config["numProcess"] = numprocesses
+        pipeline_config["numProcesses"] = numprocesses
     if allele:
         pipeline_config["allele"] = list(allele)
     if loglevel:
diff --git a/optimhc/core/config.py b/optimhc/core/config.py
index e3dfa4b..a88fabf 100644
--- a/optimhc/core/config.py
+++ b/optimhc/core/config.py
@@ -16,7 +16,7 @@
     "saveModels": True,
     "toFlashLFQ": True,
     "allele": [],
-    "numProcess": 4,
+    "numProcesses": 4,
     "removePreNxtAA": False,
     "showProgress": True,
     "logLevel": "INFO",
@@ -213,7 +213,8 @@ def validate(self):
         input_files = self._config["inputFile"]
         if not isinstance(input_files, (list, tuple)):
             logger.debug(f"inputFile is not a list or tuple: {input_files}. Converting to list.")
-            self._config["inputFile"] = list(input_files)
+            self._config["inputFile"] = [input_files]
+            input_files = self._config["inputFile"]
         if not input_files:
             logger.error("inputFile list cannot be empty")
             raise ValueError("inputFile list cannot be empty")
diff --git a/optimhc/core/pipeline.py b/optimhc/core/pipeline.py
index 6574ec4..32b5653 100644
--- a/optimhc/core/pipeline.py
+++ b/optimhc/core/pipeline.py
@@ -281,6 +281,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir):
         bool
             True if experiment succeeded, False otherwise.
         """
+        results = None
+        models = None
         try:
             os.makedirs(exp_dir, exist_ok=True)
 
@@ -333,12 +335,8 @@ def _run_single_experiment(self, psms, exp_config, exp_name, exp_dir):
             return False
 
         finally:
-            # Explicit resource release to free up memory after each experiment
-            try:
-                del results
-                del models
-            except Exception:
-                pass
+            del results
+            del models
             gc.collect()
 
     def run(self):
@@ -382,8 +380,6 @@ def run_experiments(self):
 
         psms = self.read_input()
         psms = self._generate_features(psms)
-
-        # Save the generated pin file for reference
         pin_path = os.path.join(self.output_dir, f"optimhc.{self.experiment}.pin")
         psms.write_pin(pin_path)
         fig_summary_dir = os.path.join(self.output_dir, "figures")
diff --git a/optimhc/feature_generator/PWM.py b/optimhc/feature_generator/PWM.py
index bf6793c..84b541d 100644
--- a/optimhc/feature_generator/PWM.py
+++ b/optimhc/feature_generator/PWM.py
@@ -284,7 +284,7 @@ def _default_allele_pwm_files(self) -> Dict[str, Dict[int, str]]:
         logger.debug(f"Default PWM file paths set for alleles: {self.alleles}")
         return pwm_files
 
-    def _most_conserved_postions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]:
+    def _most_conserved_positions(self, pwm: pd.DataFrame, n: int = 2) -> List[int]:
         """
         Find the n most conserved positions in the PWM.
 
@@ -635,7 +635,7 @@ def generate_features(self) -> pd.DataFrame:
                     min_mer = min(self.pwms[allele].keys())
                     max_mer = max(self.pwms[allele].keys())
                     for mer_len in range(min_mer, max_mer + 1):
-                        anchor_dict[mer_len] = self._most_conserved_postions(
+                        anchor_dict[mer_len] = self._most_conserved_positions(
                             self.pwms[allele][mer_len], self.anchors
                         )
                     logger.info(f"Most conserved positions for allele {allele}: {anchor_dict}")
diff --git a/optimhc/feature_generator/mhcflurry.py b/optimhc/feature_generator/mhcflurry.py
index d5fbe07..1cc3b30 100644
--- a/optimhc/feature_generator/mhcflurry.py
+++ b/optimhc/feature_generator/mhcflurry.py
@@ -297,15 +297,7 @@ def generate_features(self) -> pd.DataFrame:
         ]
         if features_df.isna().sum().sum() > 0:
             logger.warning("NaN values found in the generated features.")
-        return features_df[
-            [
-                "Peptide",
-                "mhcflurry_affinity",
-                "mhcflurry_processing_score",
-                "mhcflurry_presentation_score",
-                "mhcflurry_presentation_percentile",
-            ]
-        ]
+        return features_df
 
     def get_best_allele(self) -> pd.DataFrame:
         """
diff --git a/optimhc/utils.py b/optimhc/utils.py
index 5ee1be1..3a4e51f 100644
--- a/optimhc/utils.py
+++ b/optimhc/utils.py
@@ -1,5 +1,6 @@
 # utils.py
 
+import re
 from logging import getLogger
 from pathlib import Path
 from typing import List
@@ -80,8 +81,6 @@ def strip_flanking_and_charge(peptide: str) -> str:
     This function removes any amino acids before the first '.' and after the last '.'
     in the peptide sequence.
     """
-    import re
-
     peptide = re.sub(r"^[^.]*\.|\.[^.]*$", "", peptide)
 
     # Some PIN may have charge state at the end of the peptide, e.g., R.RRVEHHDHAVVSGR4.L
@@ -120,8 +119,6 @@ def remove_modifications(peptide: str, keep_modification=None) -> str:
     If keep_modification is provided, only those specific modifications will be
     preserved in the output sequence.
     """
-    import re
-
     if keep_modification is None:
         return re.sub(r"\[.*?\]", "", peptide)
     else:
diff --git a/optimhc/visualization/plot_features.py b/optimhc/visualization/plot_features.py
index 6a61412..bddb375 100644
--- a/optimhc/visualization/plot_features.py
+++ b/optimhc/visualization/plot_features.py
@@ -120,7 +120,7 @@ def plot_feature_importance(
     source_colors = dict(zip(rescoring_features.keys(), colors))
 
     for source, features in rescoring_features.items():
-        color = source_colors[source]  # 修改：使用预分配的颜色
+        color = source_colors[source]
         indices = [
             i for i, name in enumerate(sum(rescoring_features.values(), [])) if name in features
         ]
diff --git a/optimhc/visualization/save_or_show_plot.py b/optimhc/visualization/save_or_show_plot.py
index 4264177..26d89db 100644
--- a/optimhc/visualization/save_or_show_plot.py
+++ b/optimhc/visualization/save_or_show_plot.py
@@ -12,3 +12,4 @@ def save_or_show_plot(save_path, logger, tight_layout=True):
         logger.info(f"Plot saved to {save_path}")
     else:
         plt.show()
+    plt.close("all")

From 7f83279ad05a2803600ce409d27b5d38777b0c61 Mon Sep 17 00:00:00 2001
From: zshang <makisekurisu@sjtu.edu.cn>
Date: Sat, 7 Mar 2026 15:58:08 +0800
Subject: [PATCH 3/3] Fix typo in test configuration: changed 'numProcess' to
 'numProcesses'

---
 tests/test_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_config.py b/tests/test_config.py
index 3ca8f50..54be89d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -130,6 +130,6 @@ def test_default_config_values(self):
         assert DEFAULT_CONFIG["inputType"] == "pepxml"
         assert DEFAULT_CONFIG["outputDir"] == "./results"
         assert DEFAULT_CONFIG["decoyPrefix"] == "DECOY_"
-        assert DEFAULT_CONFIG["numProcess"] == 4
+        assert DEFAULT_CONFIG["numProcesses"] == 4
         assert DEFAULT_CONFIG["rescore"]["testFDR"] == 0.01
         assert DEFAULT_CONFIG["rescore"]["model"] == "Percolator"