5h4ng · 5h4ng · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/README.md b/README.md
@@ -71,9 +71,10 @@ Rescore parameters control how the rescoring step is executed and include:
 
 | Parameter | Type    | Example      | Description                                                                                                                                                                                 |
 | --------- | ------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `testFDR` | Float   | `0.01`       | The false-discovery rate threshold at which to evaluate the learned models.                                                                                                                 |
-| `model`   | String  | `Percolator` | Model to use for rescoring (valid options include `Percolator`, `XGBoost`, or `RandomForest`).                                                                                              |
-| `numJobs` | Integer | `4`          | The number of parallel jobs to run. This value is passed to Scikit-learn's n_jobs parameter to control parallelism for model training or scoring. Set to -1 to use all available CPU cores. |
+| `testFDR`  | Float   | `0.01`       | The false-discovery rate threshold at which to evaluate the learned models and report final results.                                                                                         |
+| `trainFDR` | Float   | `0.01`       | The FDR threshold used during model training to select positive PSMs in each iteration. Increase this value (e.g. `0.05`) if training fails with "No PSMs found below the eval_fdr" on challenging datasets. |
+| `model`    | String  | `Percolator` | Model to use for rescoring (valid options include `Percolator`, `XGBoost`, or `RandomForest`).                                                                                              |
+| `numJobs`  | Integer | `4`          | The number of parallel jobs to run. This value is passed to Scikit-learn's n_jobs parameter to control parallelism for model training or scoring. Set to -1 to use all available CPU cores. |
 
 #### Example YAML Configuration
 

diff --git a/optimhc/core/config.py b/optimhc/core/config.py
@@ -20,7 +20,7 @@
     "removePreNxtAA": False,
     "showProgress": True,
     "logLevel": "INFO",
-    "rescore": {"testFDR": 0.01, "model": "Percolator", "numJobs": 1},
+    "rescore": {"testFDR": 0.01, "trainFDR": 0.01, "model": "Percolator", "numJobs": 1},
 }
 
 

diff --git a/optimhc/core/pipeline.py b/optimhc/core/pipeline.py
@@ -73,6 +73,7 @@ def __init__(self, config):
         self.save_models = self.config.get("saveModels", True)
         self.to_flashlfq = self.config.get("toFlashLFQ", True)
         self.test_fdr = self.config.get("rescore", {}).get("testFDR", 0.01)
+        self.train_fdr = self.config.get("rescore", {}).get("trainFDR", 0.01)
         self.model_type = self.config.get("rescore", {}).get("model", "Percolator")
         self.n_jobs = self.config.get("rescore", {}).get("numJobs", 1)
 
@@ -143,7 +144,6 @@ def rescore(self, psms, model_type=None, n_jobs=None, test_fdr=None, rescoring_f
             Number of parallel jobs.
         test_fdr : float, optional
             FDR threshold.
-        rescoring_features : list, optional
             List of features to use for rescoring.
 
         Returns
@@ -161,14 +161,15 @@ def rescore(self, psms, model_type=None, n_jobs=None, test_fdr=None, rescoring_f
         model_type = model_type if model_type is not None else self.model_type
         n_jobs = n_jobs if n_jobs is not None else self.n_jobs
 
+        train_fdr = getattr(self, "train_fdr", 0.01)
         if model_type == "XGBoost":
-            model = XGBoostPercolatorModel(n_jobs=n_jobs)
+            model = XGBoostPercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs)
         elif model_type == "RandomForest":
-            model = RandomForestPercolatorModel(n_jobs=n_jobs)
+            model = RandomForestPercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs)
         elif model_type == "Percolator":
-            model = PercolatorModel(n_jobs=n_jobs)
+            model = PercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs)
         else:
-            model = PercolatorModel(n_jobs=n_jobs)
+            model = PercolatorModel(train_fdr=train_fdr, n_jobs=n_jobs)
 
         kwargs = {}
         if rescoring_features is not None:

diff --git a/optimhc/rescore/mokapot.py b/optimhc/rescore/mokapot.py
@@ -58,7 +58,8 @@ def rescore(
     """
     psms = convert_to_mokapot_dataset(psms, rescoring_features=rescoring_features)
     logger.info("Rescoring PSMs with mokapot.")
-    results, models = mokapot.brew(psms, model=model, test_fdr=test_fdr, **kwargs)
+    model_arg = [model] if model is not None else None
+    results, models = mokapot.brew(psms, model=model_arg, test_fdr=test_fdr, **kwargs)
     return results, models